From bfe136aefa59ee0b677c325ac56eec19eadf39a7 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Tue, 28 Apr 2026 19:57:01 +0800 Subject: [PATCH 01/43] update the logic of tool parser. Signed-off-by: Yuchuan finish the support for vllm with xgr built-in stag. Signed-off-by: Yuchuan refactor. Signed-off-by: Yuchuan fix. Signed-off-by: Yuchuan fix the detection for the thinking mode. Signed-off-by: Yuchuan add test. Signed-off-by: Yuchuan refactor the structure. Signed-off-by: Yuchuan rename the symbols. Signed-off-by: Yuchuan add the support for more models. Signed-off-by: Yuchuan --- .../test_qwen3coder_tool_parser.py | 54 +++++++++++++++++++ vllm/tool_parsers/abstract_tool_parser.py | 38 +++++++++++-- vllm/tool_parsers/deepseekv32_tool_parser.py | 18 +++++++ vllm/tool_parsers/kimi_k2_tool_parser.py | 20 ++++++- vllm/tool_parsers/openai_tool_parser.py | 18 +++++++ vllm/tool_parsers/qwen3coder_tool_parser.py | 20 +++++++ 6 files changed, 162 insertions(+), 6 deletions(-) diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index c62e95830243..9678c88e75a0 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -6,6 +6,7 @@ import pytest from openai.types.responses.function_tool import FunctionTool +from xgrammar import StructuralTag from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, @@ -1146,3 +1147,56 @@ def test_no_double_serialization_string_args(qwen3_tool_parser): args = json.loads(raw_arguments) assert args["message"] == "hello world" assert '\\"hello world\\"' not in raw_arguments + + +def test_support_builtin_structural_tag(qwen3_tool_parser: Qwen3CoderToolParser): + assert qwen3_tool_parser.support_structural_tag() is True + + +def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( + qwen3_tool_parser: Qwen3CoderToolParser, + sample_tools: list[ChatCompletionToolsParam], +) -> None: + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="auto", + ) + tag = qwen3_tool_parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) + + +@pytest.mark.parametrize("include_reasoning", [True, False]) +def test_adjust_request_auto_structural_tag_is_json_string( + qwen3_tool_parser: Qwen3CoderToolParser, + sample_tools: list[ChatCompletionToolsParam], + include_reasoning: bool, +) -> None: + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="auto", + include_reasoning=include_reasoning, + ) + out = qwen3_tool_parser.adjust_request(req) + assert out.structured_outputs is not None + assert out.structured_outputs.structural_tag is not None + assert isinstance(out.structured_outputs.structural_tag, str) + loaded = json.loads(out.structured_outputs.structural_tag) + assert isinstance(loaded, dict) + + +def test_adjust_request_required_uses_json_schema_not_structural_tag( + qwen3_tool_parser: Qwen3CoderToolParser, + sample_tools: list[ChatCompletionToolsParam], +) -> None: + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="required", + ) + out = qwen3_tool_parser.adjust_request(req) + assert out.structured_outputs.structural_tag is None diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index 75181d8dfac6..5738d02dcf31 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -5,6 +5,9 @@ import os from collections.abc import Callable, Sequence from functools import cached_property +import json + +from xgrammar import StructuralTag from openai.types.responses import ( ResponseFormatTextJSONSchemaConfig, @@ -83,17 +86,19 @@ def vocab(self) -> dict[str, int]: return self.model_tokenizer.get_vocab() def adjust_request( - self, request: ChatCompletionRequest | ResponsesRequest + self, + request: ChatCompletionRequest | ResponsesRequest, ) -> ChatCompletionRequest | ResponsesRequest: - """ - Static method that used to adjust the request parameters. - """ + + # If there are no tools, return the request as is. if not request.tools: return request + + # Step 1: set structured output params when tool constraints are derived + # from the tool schema. json_schema_from_tool = get_json_schema_from_tools( tool_choice=request.tool_choice, tools=request.tools ) - # Set structured output params for tool calling if json_schema_from_tool is not None: if isinstance(request, ChatCompletionRequest): # tool_choice: "Forced Function" or "required" will override @@ -118,9 +123,32 @@ def adjust_request( strict=True, ) ) + + return request + # Only ChatCompletionRequest is supported for Step 2. + if not isinstance(request, ChatCompletionRequest): + return request + + + # Step 2: apply xgrammar's built-in tool calling support. + if self.support_structural_tag() and request.tool_choice == "auto": + structure_tag = self.get_structural_tag(request) + request.structured_outputs = StructuredOutputsParams( + structural_tag=json.dumps(structure_tag.model_dump()), + ) return request + def get_structural_tag( + self, request: ChatCompletionRequest + ) -> StructuralTag: + raise NotImplementedError( + "ToolParser.get_xgrammar_builtin_structural_tag is not implemented" + ) + + def support_structural_tag(self) -> bool: + return False + def extract_tool_calls( self, model_output: str, request: ChatCompletionRequest ) -> ExtractedToolCallInformation: diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py index b8623592365c..c3efc8ef0139 100644 --- a/vllm/tool_parsers/deepseekv32_tool_parser.py +++ b/vllm/tool_parsers/deepseekv32_tool_parser.py @@ -28,6 +28,8 @@ ) from vllm.tool_parsers.utils import partial_tag_overlap +from xgrammar import StructuralTag, get_builtin_structural_tag + logger = init_logger(__name__) @@ -319,3 +321,19 @@ def extract_tool_calls_streaming( return DeltaMessage(content="") return None + + def support_structural_tag(self) -> bool: + return True + + def get_structural_tag( + self, request: ChatCompletionRequest + ) -> StructuralTag: + # Config for xgrammar's built-in structural tagging. + dict_tools = [tool.model_dump() for tool in request.tools] + thinking_mode = request.include_reasoning + return get_builtin_structural_tag( + model="deepseek_v3_2", + reasoning=True, + tools=dict_tools, + force_empty_reasoning=not thinking_mode, + ) diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py index 7ddd8fa7a80d..02325eda8641 100644 --- a/vllm/tool_parsers/kimi_k2_tool_parser.py +++ b/vllm/tool_parsers/kimi_k2_tool_parser.py @@ -25,6 +25,8 @@ ) from vllm.tool_parsers.utils import partial_tag_overlap +from xgrammar import StructuralTag, get_builtin_structural_tag + logger = init_logger(__name__) @@ -273,4 +275,20 @@ def extract_tool_calls_streaming( except Exception: logger.exception("Error trying to handle streaming tool call.") - return None + return None # do not stream a delta. skip this token ID. + + def support_structural_tag(self) -> bool: + return True + + def get_structural_tag( + self, request: ChatCompletionRequest + ) -> StructuralTag: + # Config for xgrammar's built-in structural tagging. + dict_tools = [tool.model_dump() for tool in request.tools] + thinking_mode = request.include_reasoning + return get_builtin_structural_tag( + model="kimi", + reasoning=True, + tools=dict_tools, + force_empty_reasoning=not thinking_mode, + ) diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py index ee6dd70718b3..01e036385ab2 100644 --- a/vllm/tool_parsers/openai_tool_parser.py +++ b/vllm/tool_parsers/openai_tool_parser.py @@ -20,6 +20,8 @@ ToolParser, ) +from xgrammar import StructuralTag, get_builtin_structural_tag + if TYPE_CHECKING: from vllm.tokenizers import TokenizerLike else: @@ -112,3 +114,19 @@ def extract_tool_calls_streaming( raise NotImplementedError( "Not being used, manual parsing in serving_chat.py" # noqa: E501 ) + + def support_structural_tag(self) -> bool: + return True + + def get_structural_tag( + self, request: ChatCompletionRequest + ) -> StructuralTag: + # Config for xgrammar's built-in structural tagging. + dict_tools = [tool.model_dump() for tool in request.tools] + thinking_mode = request.include_reasoning + return get_builtin_structural_tag( + model="harmony", + reasoning=True, + tools=dict_tools, + force_empty_reasoning=not thinking_mode, + ) diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 7b089ceffbc0..394bdc458d4e 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -11,6 +11,7 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ) + from vllm.entrypoints.openai.engine.protocol import ( DeltaFunctionCall, DeltaMessage, @@ -27,6 +28,8 @@ ) from vllm.tool_parsers.utils import find_tool_properties +from xgrammar import StructuralTag, get_builtin_structural_tag + logger = init_logger(__name__) @@ -681,3 +684,20 @@ def extract_tool_calls_streaming( return result return None + + + def support_structural_tag(self) -> bool: + return True + + def get_structural_tag( + self, request: ChatCompletionRequest + ) -> StructuralTag: + # Config for xgrammar's built-in structural tagging. + dict_tools = [tool.model_dump() for tool in request.tools] + thinking_mode = request.include_reasoning + return get_builtin_structural_tag( + model="qwen_coder", + reasoning=True, + tools=dict_tools, + force_empty_reasoning=not thinking_mode, + ) From 76899e1d20ae23dc5785a542ebd5b4a662e61047 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Sun, 5 Apr 2026 11:18:40 +0800 Subject: [PATCH 02/43] finish the test. Signed-off-by: Yuchuan --- .../test_deepseekv32_tool_parser.py | 96 +++++++++++++++++++ .../tool_parsers/test_kimi_k2_tool_parser.py | 95 ++++++++++++++++++ tests/tool_parsers/test_openai_tool_parser.py | 95 ++++++++++++++++++ 3 files changed, 286 insertions(+) diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py index 6145253d9f90..f2f7e82064c9 100644 --- a/tests/tool_parsers/test_deepseekv32_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py @@ -10,6 +10,7 @@ from unittest.mock import MagicMock import pytest +from xgrammar import StructuralTag from tests.tool_parsers.utils import run_tool_extraction_streaming from vllm.entrypoints.openai.chat_completion.protocol import ( @@ -17,6 +18,10 @@ FunctionDefinition, ) from vllm.tokenizers import get_tokenizer +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionRequest, + ChatCompletionToolsParam, +) from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser # --------------------------------------------------------------------------- @@ -48,6 +53,43 @@ def make_request(tools=None) -> MagicMock: return req +@pytest.fixture +def sample_tools() -> list[ChatCompletionToolsParam]: + return [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "get_current_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "city": {"type": "string", "description": "The city name"}, + "state": {"type": "string", "description": "The state code"}, + "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]}, + }, + "required": ["city", "state"], + }, + }, + ), + ChatCompletionToolsParam( + type="function", + function={ + "name": "calculate_area", + "description": "Calculate area of a shape", + "parameters": { + "type": "object", + "properties": { + "shape": {"type": "string"}, + "dimensions": {"type": "object"}, + "precision": {"type": "integer"}, + }, + }, + }, + ), + ] + + # Shorthand for the DSML tokens used throughout FC_START = "<|DSML|function_calls>" FC_END = "" @@ -797,3 +839,57 @@ def test_convert_param_value_checked_helper(parser): assert parser._convert_param_value("null", "integer") is None assert parser._convert_param_value("null", "boolean") is None assert parser._convert_param_value("null", "object") is None + + +def test_support_builtin_structural_tag(): + assert make_parser().support_structural_tag() is True + + +def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( + sample_tools: list[ChatCompletionToolsParam], +) -> None: + parser = make_parser() + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="auto", + ) + tag = parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) + + +@pytest.mark.parametrize("include_reasoning", [True, False]) +def test_adjust_request_auto_structural_tag_is_json_string( + sample_tools: list[ChatCompletionToolsParam], + include_reasoning: bool, +) -> None: + parser = make_parser() + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="auto", + include_reasoning=include_reasoning, + ) + out = parser.adjust_request(req) + assert out.structured_outputs is not None + assert out.structured_outputs.structural_tag is not None + assert isinstance(out.structured_outputs.structural_tag, str) + loaded = json.loads(out.structured_outputs.structural_tag) + assert isinstance(loaded, dict) + + +def test_adjust_request_required_uses_json_schema_not_structural_tag( + sample_tools: list[ChatCompletionToolsParam], +) -> None: + parser = make_parser() + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="required", + ) + out = parser.adjust_request(req) + assert out.structured_outputs is not None + assert out.structured_outputs.structural_tag is None diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py index b56032b91c17..5125dd7d6431 100644 --- a/tests/tool_parsers/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -6,11 +6,16 @@ from unittest.mock import MagicMock import pytest +from xgrammar import StructuralTag from tests.tool_parsers.utils import ( run_tool_extraction, run_tool_extraction_streaming, ) +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionRequest, + ChatCompletionToolsParam, +) from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ) @@ -20,6 +25,43 @@ MODEL = "moonshotai/Kimi-K2-Instruct" +@pytest.fixture +def sample_tools() -> list[ChatCompletionToolsParam]: + return [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "get_current_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "city": {"type": "string", "description": "The city name"}, + "state": {"type": "string", "description": "The state code"}, + "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]}, + }, + "required": ["city", "state"], + }, + }, + ), + ChatCompletionToolsParam( + type="function", + function={ + "name": "calculate_area", + "description": "Calculate area of a shape", + "parameters": { + "type": "object", + "properties": { + "shape": {"type": "string"}, + "dimensions": {"type": "object"}, + "precision": {"type": "integer"}, + }, + }, + }, + ), + ] + + @pytest.fixture(scope="module") def kimi_k2_tokenizer(): return get_tokenizer(tokenizer_name=MODEL, trust_remote_code=True) @@ -580,3 +622,56 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer): assert len(rec.tool_calls) == 1 assert rec.tool_calls[0].function.name == "get_weather" assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"} + + +def test_support_builtin_structural_tag(kimi_k2_tool_parser: KimiK2ToolParser): + assert kimi_k2_tool_parser.support_structural_tag() is True + + +def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( + kimi_k2_tool_parser: KimiK2ToolParser, + sample_tools: list[ChatCompletionToolsParam], +) -> None: + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="auto", + ) + tag = kimi_k2_tool_parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) + + +@pytest.mark.parametrize("include_reasoning", [True, False]) +def test_adjust_request_auto_structural_tag_is_json_string( + kimi_k2_tool_parser: KimiK2ToolParser, + sample_tools: list[ChatCompletionToolsParam], + include_reasoning: bool, +) -> None: + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="auto", + include_reasoning=include_reasoning, + ) + out = kimi_k2_tool_parser.adjust_request(req) + assert out.structured_outputs is not None + assert out.structured_outputs.structural_tag is not None + assert isinstance(out.structured_outputs.structural_tag, str) + loaded = json.loads(out.structured_outputs.structural_tag) + assert isinstance(loaded, dict) + + +def test_adjust_request_required_uses_json_schema_not_structural_tag( + kimi_k2_tool_parser: KimiK2ToolParser, + sample_tools: list[ChatCompletionToolsParam], +) -> None: + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="required", + ) + out = kimi_k2_tool_parser.adjust_request(req) + assert out.structured_outputs.structural_tag is None diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py index e9e39ef4c029..904f4dbcae49 100644 --- a/tests/tool_parsers/test_openai_tool_parser.py +++ b/tests/tool_parsers/test_openai_tool_parser.py @@ -13,7 +13,12 @@ SystemContent, load_harmony_encoding, ) +from xgrammar import StructuralTag +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionRequest, + ChatCompletionToolsParam, +) from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall from vllm.tokenizers import get_tokenizer from vllm.tool_parsers.openai_tool_parser import OpenAIToolParser @@ -37,6 +42,43 @@ def harmony_encoding(): return load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) +@pytest.fixture +def sample_tools() -> list[ChatCompletionToolsParam]: + return [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "get_current_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "city": {"type": "string", "description": "The city name"}, + "state": {"type": "string", "description": "The state code"}, + "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]}, + }, + "required": ["city", "state"], + }, + }, + ), + ChatCompletionToolsParam( + type="function", + function={ + "name": "calculate_area", + "description": "Calculate area of a shape", + "parameters": { + "type": "object", + "properties": { + "shape": {"type": "string"}, + "dimensions": {"type": "object"}, + "precision": {"type": "integer"}, + }, + }, + }, + ), + ] + + def assert_tool_calls( actual_tool_calls: list[ToolCall], expected_tool_calls: list[ToolCall], @@ -261,3 +303,56 @@ def test_extract_tool_calls_with_content( ] assert_tool_calls(extracted_info.tool_calls, expected_tool_calls) assert extracted_info.content == final_content + + +def test_support_builtin_structural_tag(openai_tool_parser: OpenAIToolParser): + assert openai_tool_parser.support_structural_tag() is True + + +def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( + openai_tool_parser: OpenAIToolParser, + sample_tools: list[ChatCompletionToolsParam], +) -> None: + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="auto", + ) + tag = openai_tool_parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) + + +@pytest.mark.parametrize("include_reasoning", [True, False]) +def test_adjust_request_auto_structural_tag_is_json_string( + openai_tool_parser: OpenAIToolParser, + sample_tools: list[ChatCompletionToolsParam], + include_reasoning: bool, +) -> None: + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="auto", + include_reasoning=include_reasoning, + ) + out = openai_tool_parser.adjust_request(req) + assert out.structured_outputs is not None + assert out.structured_outputs.structural_tag is not None + assert isinstance(out.structured_outputs.structural_tag, str) + loaded = json.loads(out.structured_outputs.structural_tag) + assert isinstance(loaded, dict) + + +def test_adjust_request_required_uses_json_schema_not_structural_tag( + openai_tool_parser: OpenAIToolParser, + sample_tools: list[ChatCompletionToolsParam], +) -> None: + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="required", + ) + out = openai_tool_parser.adjust_request(req) + assert out.structured_outputs.structural_tag is None From 5a984a124e4c6a86a92c6554a1c565d346b4616d Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Tue, 28 Apr 2026 20:12:39 +0800 Subject: [PATCH 03/43] update the qwen_coder. Signed-off-by: Yuchuan --- vllm/tool_parsers/abstract_tool_parser.py | 43 +++++++++++++++++--- vllm/tool_parsers/deepseekv32_tool_parser.py | 16 +------- vllm/tool_parsers/kimi_k2_tool_parser.py | 18 ++------ vllm/tool_parsers/openai_tool_parser.py | 18 ++------ vllm/tool_parsers/qwen3coder_tool_parser.py | 16 +------- 5 files changed, 48 insertions(+), 63 deletions(-) diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index 5738d02dcf31..2d52bdca1546 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -7,7 +7,7 @@ from functools import cached_property import json -from xgrammar import StructuralTag +from xgrammar import StructuralTag, get_model_structural_tag from openai.types.responses import ( ResponseFormatTextJSONSchemaConfig, @@ -132,19 +132,52 @@ def adjust_request( # Step 2: apply xgrammar's built-in tool calling support. - if self.support_structural_tag() and request.tool_choice == "auto": + # XGrammar will support tool_choice="none" in the future. Currently, we only support tool_choice="auto" and tool_choice="required". + need_tool_calling = request.tool_choice == "auto" or request.tool_choice == "required" + if self.support_structural_tag() and need_tool_calling: structure_tag = self.get_structural_tag(request) request.structured_outputs = StructuredOutputsParams( structural_tag=json.dumps(structure_tag.model_dump()), ) return request + + def get_model_structural_tag_id(self) -> str: + """ + Return the model ID for the builtin structural tag. + """ + raise NotImplementedError() + + def empty_thinking_as_non_thinking(self) -> bool: + """ + It decides how to handle non-thinking mode. If True, non-thinking mode will force the + LLM output an empty thinking. If False, thinking tags like or are not + allowed and will not be output by the LLM. + """ + return True def get_structural_tag( self, request: ChatCompletionRequest ) -> StructuralTag: - raise NotImplementedError( - "ToolParser.get_xgrammar_builtin_structural_tag is not implemented" - ) + + model_id = self.get_model_structural_tag_id() + thinking_mode = request.include_reasoning + + if thinking_mode: + return get_model_structural_tag( + model=model_id, + tools=request.tools, + tool_choice=request.tool_choice, + reasoning=True, + force_empty_reasoning=False, + ) + else: + return get_model_structural_tag( + model=model_id, + tools=request.tools, + tool_choice=request.tool_choice, + reasoning=not self.empty_thinking_as_non_thinking(), + force_empty_reasoning=self.empty_thinking_as_non_thinking(), + ) def support_structural_tag(self) -> bool: return False diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py index c3efc8ef0139..63e3cd59ab51 100644 --- a/vllm/tool_parsers/deepseekv32_tool_parser.py +++ b/vllm/tool_parsers/deepseekv32_tool_parser.py @@ -28,8 +28,6 @@ ) from vllm.tool_parsers.utils import partial_tag_overlap -from xgrammar import StructuralTag, get_builtin_structural_tag - logger = init_logger(__name__) @@ -325,15 +323,5 @@ def extract_tool_calls_streaming( def support_structural_tag(self) -> bool: return True - def get_structural_tag( - self, request: ChatCompletionRequest - ) -> StructuralTag: - # Config for xgrammar's built-in structural tagging. - dict_tools = [tool.model_dump() for tool in request.tools] - thinking_mode = request.include_reasoning - return get_builtin_structural_tag( - model="deepseek_v3_2", - reasoning=True, - tools=dict_tools, - force_empty_reasoning=not thinking_mode, - ) + def get_model_structural_tag_id(self) -> str: + return "deepseek_v3_2" \ No newline at end of file diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py index 02325eda8641..319f2c19f4b6 100644 --- a/vllm/tool_parsers/kimi_k2_tool_parser.py +++ b/vllm/tool_parsers/kimi_k2_tool_parser.py @@ -25,8 +25,6 @@ ) from vllm.tool_parsers.utils import partial_tag_overlap -from xgrammar import StructuralTag, get_builtin_structural_tag - logger = init_logger(__name__) @@ -279,16 +277,6 @@ def extract_tool_calls_streaming( def support_structural_tag(self) -> bool: return True - - def get_structural_tag( - self, request: ChatCompletionRequest - ) -> StructuralTag: - # Config for xgrammar's built-in structural tagging. - dict_tools = [tool.model_dump() for tool in request.tools] - thinking_mode = request.include_reasoning - return get_builtin_structural_tag( - model="kimi", - reasoning=True, - tools=dict_tools, - force_empty_reasoning=not thinking_mode, - ) + + def get_model_structural_tag_id(self) -> str: + return "kimi" diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py index 01e036385ab2..dc906a762a77 100644 --- a/vllm/tool_parsers/openai_tool_parser.py +++ b/vllm/tool_parsers/openai_tool_parser.py @@ -20,8 +20,6 @@ ToolParser, ) -from xgrammar import StructuralTag, get_builtin_structural_tag - if TYPE_CHECKING: from vllm.tokenizers import TokenizerLike else: @@ -117,16 +115,6 @@ def extract_tool_calls_streaming( def support_structural_tag(self) -> bool: return True - - def get_structural_tag( - self, request: ChatCompletionRequest - ) -> StructuralTag: - # Config for xgrammar's built-in structural tagging. - dict_tools = [tool.model_dump() for tool in request.tools] - thinking_mode = request.include_reasoning - return get_builtin_structural_tag( - model="harmony", - reasoning=True, - tools=dict_tools, - force_empty_reasoning=not thinking_mode, - ) + + def get_model_structural_tag_id(self) -> str: + return "harmony" diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 394bdc458d4e..ed24752bfd37 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -28,8 +28,6 @@ ) from vllm.tool_parsers.utils import find_tool_properties -from xgrammar import StructuralTag, get_builtin_structural_tag - logger = init_logger(__name__) @@ -689,15 +687,5 @@ def extract_tool_calls_streaming( def support_structural_tag(self) -> bool: return True - def get_structural_tag( - self, request: ChatCompletionRequest - ) -> StructuralTag: - # Config for xgrammar's built-in structural tagging. - dict_tools = [tool.model_dump() for tool in request.tools] - thinking_mode = request.include_reasoning - return get_builtin_structural_tag( - model="qwen_coder", - reasoning=True, - tools=dict_tools, - force_empty_reasoning=not thinking_mode, - ) + def get_model_structural_tag_id(self) -> str: + return "qwen_coder" From 95e64e7891dafe716af38b33b87e507de7fb43fc Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 11:28:55 +0800 Subject: [PATCH 04/43] update the logic of get stag. Signed-off-by: Yuchuan --- vllm/tool_parsers/abstract_tool_parser.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index 2d52bdca1546..e895f69a50c2 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -18,6 +18,7 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, + ChatCompletionNamedToolChoiceParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaMessage, @@ -133,7 +134,7 @@ def adjust_request( # Step 2: apply xgrammar's built-in tool calling support. # XGrammar will support tool_choice="none" in the future. Currently, we only support tool_choice="auto" and tool_choice="required". - need_tool_calling = request.tool_choice == "auto" or request.tool_choice == "required" + need_tool_calling = request.tool_choice == "auto" or request.tool_choice == "required" or isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) if self.support_structural_tag() and need_tool_calling: structure_tag = self.get_structural_tag(request) request.structured_outputs = StructuredOutputsParams( @@ -161,20 +162,31 @@ def get_structural_tag( model_id = self.get_model_structural_tag_id() thinking_mode = request.include_reasoning + tool_choice_type = ( + "forced" if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) else request.tool_choice + ) + tool_dicts = [] + + if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): + for tool in request.tools: + if tool.function.name == request.tool_choice.function.name: + tool_dicts.append(tool.model_dump()) + else: + tool_dicts = [tool.model_dump() for tool in request.tools] if thinking_mode: return get_model_structural_tag( model=model_id, - tools=request.tools, - tool_choice=request.tool_choice, + tools=tool_dicts, + tool_choice=tool_choice_type, reasoning=True, force_empty_reasoning=False, ) else: return get_model_structural_tag( model=model_id, - tools=request.tools, - tool_choice=request.tool_choice, + tools=tool_dicts, + tool_choice=tool_choice_type, reasoning=not self.empty_thinking_as_non_thinking(), force_empty_reasoning=self.empty_thinking_as_non_thinking(), ) From 7a3bbd0e18db453c45caf5cda22d895b4d2ff595 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 11:32:17 +0800 Subject: [PATCH 05/43] update the test. Signed-off-by: Yuchuan --- .../test_deepseekv32_tool_parser.py | 22 +++++++++++++++++ .../tool_parsers/test_kimi_k2_tool_parser.py | 24 ++++++++++++++++++- tests/tool_parsers/test_openai_tool_parser.py | 23 +++++++++++++++++- .../test_qwen3coder_tool_parser.py | 21 ++++++++++++++++ 4 files changed, 88 insertions(+), 2 deletions(-) diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py index f2f7e82064c9..80bcb9cf6b63 100644 --- a/tests/tool_parsers/test_deepseekv32_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py @@ -21,6 +21,8 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, + ChatCompletionNamedToolChoiceParam, + ChatCompletionNamedFunction, ) from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser @@ -857,6 +859,26 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( ) tag = parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) + + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="required", + ) + tag = parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) + + if sample_tools: + tool = sample_tools[0] + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), + ) + tag = parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) @pytest.mark.parametrize("include_reasoning", [True, False]) diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py index 5125dd7d6431..e94ab8299f01 100644 --- a/tests/tool_parsers/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -15,6 +15,8 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, + ChatCompletionNamedToolChoiceParam, + ChatCompletionNamedFunction, ) from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, @@ -640,7 +642,27 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( ) tag = kimi_k2_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) - + + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="required", + ) + tag = kimi_k2_tool_parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) + + if sample_tools: + + tool = sample_tools[0] + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), + ) + tag = kimi_k2_tool_parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) @pytest.mark.parametrize("include_reasoning", [True, False]) def test_adjust_request_auto_structural_tag_is_json_string( diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py index 904f4dbcae49..56ec4fafa0e0 100644 --- a/tests/tool_parsers/test_openai_tool_parser.py +++ b/tests/tool_parsers/test_openai_tool_parser.py @@ -18,6 +18,8 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, + ChatCompletionNamedToolChoiceParam, + ChatCompletionNamedFunction, ) from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall from vllm.tokenizers import get_tokenizer @@ -321,7 +323,26 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( ) tag = openai_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) - + + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="required", + ) + tag = openai_tool_parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) + + if sample_tools: + tool = sample_tools[0] + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), + ) + tag = openai_tool_parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) @pytest.mark.parametrize("include_reasoning", [True, False]) def test_adjust_request_auto_structural_tag_is_json_string( diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index 9678c88e75a0..a77fa657140f 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -11,6 +11,8 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, + ChatCompletionNamedToolChoiceParam, + ChatCompletionNamedFunction, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaMessage, @@ -1166,6 +1168,25 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( tag = qwen3_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="required", + ) + tag = qwen3_tool_parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) + + if sample_tools: + tool = sample_tools[0] + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), + ) + tag = qwen3_tool_parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) @pytest.mark.parametrize("include_reasoning", [True, False]) def test_adjust_request_auto_structural_tag_is_json_string( From 3f5e0f3767c29abff9586969c2a053f54a4d3296 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 14:05:52 +0800 Subject: [PATCH 06/43] fix the tool_choice type. Signed-off-by: Yuchuan --- vllm/tool_parsers/abstract_tool_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index e895f69a50c2..f671a866bd0a 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -163,7 +163,7 @@ def get_structural_tag( model_id = self.get_model_structural_tag_id() thinking_mode = request.include_reasoning tool_choice_type = ( - "forced" if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) else request.tool_choice + request.tool_choice.model_dump() if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) else request.tool_choice ) tool_dicts = [] From db9ccc6b272357764a9452435ce40689c85da1aa Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 14:18:43 +0800 Subject: [PATCH 07/43] fix the test. Signed-off-by: Yuchuan --- tests/tool_parsers/test_deepseekv32_tool_parser.py | 9 ++++++--- tests/tool_parsers/test_kimi_k2_tool_parser.py | 9 ++++++--- tests/tool_parsers/test_openai_tool_parser.py | 9 ++++++--- tests/tool_parsers/test_qwen3coder_tool_parser.py | 9 ++++++--- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py index 80bcb9cf6b63..a55d245054db 100644 --- a/tests/tool_parsers/test_deepseekv32_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py @@ -21,8 +21,6 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, - ChatCompletionNamedToolChoiceParam, - ChatCompletionNamedFunction, ) from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser @@ -875,7 +873,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( messages=[], model="m", tools=sample_tools, - tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), + tool_choice={ + "type": "function", + "function": { + "name": tool.function.name, + }, + }, ) tag = parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py index e94ab8299f01..fa1def007216 100644 --- a/tests/tool_parsers/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -15,8 +15,6 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, - ChatCompletionNamedToolChoiceParam, - ChatCompletionNamedFunction, ) from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, @@ -659,7 +657,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( messages=[], model="m", tools=sample_tools, - tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), + tool_choice={ + "type": "function", + "function": { + "name": tool.function.name, + }, + }, ) tag = kimi_k2_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py index 56ec4fafa0e0..babdafdf54e9 100644 --- a/tests/tool_parsers/test_openai_tool_parser.py +++ b/tests/tool_parsers/test_openai_tool_parser.py @@ -18,8 +18,6 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, - ChatCompletionNamedToolChoiceParam, - ChatCompletionNamedFunction, ) from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall from vllm.tokenizers import get_tokenizer @@ -339,7 +337,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( messages=[], model="m", tools=sample_tools, - tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), + tool_choice={ + "type": "function", + "function": { + "name": tool.function.name, + }, + }, ) tag = openai_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index a77fa657140f..21063348063d 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -11,8 +11,6 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, - ChatCompletionNamedToolChoiceParam, - ChatCompletionNamedFunction, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaMessage, @@ -1183,7 +1181,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( messages=[], model="m", tools=sample_tools, - tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), + tool_choice={ + "type": "function", + "function": { + "name": tool.function.name, + }, + }, ) tag = qwen3_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) From 8b246f9ac75a7be4b154d811416771cd8a981e61 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 14:20:42 +0800 Subject: [PATCH 08/43] Revert "fix the test." This reverts commit db9ccc6b272357764a9452435ce40689c85da1aa. --- tests/tool_parsers/test_deepseekv32_tool_parser.py | 9 +++------ tests/tool_parsers/test_kimi_k2_tool_parser.py | 9 +++------ tests/tool_parsers/test_openai_tool_parser.py | 9 +++------ tests/tool_parsers/test_qwen3coder_tool_parser.py | 9 +++------ 4 files changed, 12 insertions(+), 24 deletions(-) diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py index a55d245054db..80bcb9cf6b63 100644 --- a/tests/tool_parsers/test_deepseekv32_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py @@ -21,6 +21,8 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, + ChatCompletionNamedToolChoiceParam, + ChatCompletionNamedFunction, ) from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser @@ -873,12 +875,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( messages=[], model="m", tools=sample_tools, - tool_choice={ - "type": "function", - "function": { - "name": tool.function.name, - }, - }, + tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), ) tag = parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py index fa1def007216..e94ab8299f01 100644 --- a/tests/tool_parsers/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -15,6 +15,8 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, + ChatCompletionNamedToolChoiceParam, + ChatCompletionNamedFunction, ) from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, @@ -657,12 +659,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( messages=[], model="m", tools=sample_tools, - tool_choice={ - "type": "function", - "function": { - "name": tool.function.name, - }, - }, + tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), ) tag = kimi_k2_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py index babdafdf54e9..56ec4fafa0e0 100644 --- a/tests/tool_parsers/test_openai_tool_parser.py +++ b/tests/tool_parsers/test_openai_tool_parser.py @@ -18,6 +18,8 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, + ChatCompletionNamedToolChoiceParam, + ChatCompletionNamedFunction, ) from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall from vllm.tokenizers import get_tokenizer @@ -337,12 +339,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( messages=[], model="m", tools=sample_tools, - tool_choice={ - "type": "function", - "function": { - "name": tool.function.name, - }, - }, + tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), ) tag = openai_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index 21063348063d..a77fa657140f 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -11,6 +11,8 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, + ChatCompletionNamedToolChoiceParam, + ChatCompletionNamedFunction, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaMessage, @@ -1181,12 +1183,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( messages=[], model="m", tools=sample_tools, - tool_choice={ - "type": "function", - "function": { - "name": tool.function.name, - }, - }, + tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), ) tag = qwen3_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) From a5a5277c593e78dbdc51a5c37146bc1c561a1212 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 14:23:21 +0800 Subject: [PATCH 09/43] fix the validation. Signed-off-by: Yuchuan --- vllm/entrypoints/openai/chat_completion/protocol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/chat_completion/protocol.py b/vllm/entrypoints/openai/chat_completion/protocol.py index 01d2df88d69b..3976ee24287d 100644 --- a/vllm/entrypoints/openai/chat_completion/protocol.py +++ b/vllm/entrypoints/openai/chat_completion/protocol.py @@ -715,7 +715,7 @@ def check_tool_usage(cls, data): # OR that it's set to "auto" or "required" if data["tool_choice"] not in ["auto", "required"] and not isinstance( data["tool_choice"], dict - ): + ) and not isinstance(data["tool_choice"], ChatCompletionNamedToolChoiceParam): raise ValueError( f"Invalid value for `tool_choice`: {data['tool_choice']}! " 'Only named tools, "none", "auto" or "required" ' From 2de7bbdc250d1353162208900ed9a3d9acd9eb22 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 14:48:33 +0800 Subject: [PATCH 10/43] fix the test. Signed-off-by: Yuchuan --- .../tool_parsers/test_kimi_k2_tool_parser.py | 3 ++ .../test_qwen3coder_tool_parser.py | 37 +++++++++++++++---- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py index e94ab8299f01..a23d38aa0e5b 100644 --- a/tests/tool_parsers/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -507,6 +507,7 @@ def test_sets_skip_special_tokens_false(self, parser): request = MagicMock(spec=ChatCompletionRequest) request.tools = [{"type": "function", "function": {"name": "test"}}] request.tool_choice = "auto" + request.include_reasoning = True request.skip_special_tokens = True result = parser.adjust_request(request) @@ -516,6 +517,7 @@ def test_no_change_when_tool_choice_none(self, parser): request = MagicMock(spec=ChatCompletionRequest) request.tools = [{"type": "function", "function": {"name": "test"}}] request.tool_choice = "none" + request.include_reasoning = True request.skip_special_tokens = True result = parser.adjust_request(request) @@ -525,6 +527,7 @@ def test_no_change_when_no_tools(self, parser): request = MagicMock(spec=ChatCompletionRequest) request.tools = None request.tool_choice = "auto" + request.include_reasoning = False request.skip_special_tokens = True result = parser.adjust_request(request) diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index a77fa657140f..0c1b5061ccbb 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -111,6 +111,26 @@ def sample_tools(request): ] +def _as_chat_completion_tools( + tools: list[ChatCompletionToolsParam | FunctionTool], +) -> list[ChatCompletionToolsParam]: + normalized: list[ChatCompletionToolsParam] = [] + for tool in tools: + if isinstance(tool, ChatCompletionToolsParam): + normalized.append(tool) + else: + normalized.append( + ChatCompletionToolsParam( + type="function", + function={ + "name": tool.name, + "description": tool.description, + "parameters": tool.parameters, + }, + )) + return normalized + + def assert_tool_calls( actual_tool_calls: list[ToolCall], expected_tool_calls: list[ToolCall] ): @@ -1159,10 +1179,11 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( qwen3_tool_parser: Qwen3CoderToolParser, sample_tools: list[ChatCompletionToolsParam], ) -> None: + request_tools = _as_chat_completion_tools(sample_tools) req = ChatCompletionRequest( messages=[], model="m", - tools=sample_tools, + tools=request_tools, tool_choice="auto", ) tag = qwen3_tool_parser.get_structural_tag(req) @@ -1171,18 +1192,18 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( req = ChatCompletionRequest( messages=[], model="m", - tools=sample_tools, + tools=request_tools, tool_choice="required", ) tag = qwen3_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) - if sample_tools: - tool = sample_tools[0] + if request_tools: + tool = request_tools[0] req = ChatCompletionRequest( messages=[], model="m", - tools=sample_tools, + tools=request_tools, tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), ) tag = qwen3_tool_parser.get_structural_tag(req) @@ -1194,10 +1215,11 @@ def test_adjust_request_auto_structural_tag_is_json_string( sample_tools: list[ChatCompletionToolsParam], include_reasoning: bool, ) -> None: + request_tools = _as_chat_completion_tools(sample_tools) req = ChatCompletionRequest( messages=[], model="m", - tools=sample_tools, + tools=request_tools, tool_choice="auto", include_reasoning=include_reasoning, ) @@ -1213,10 +1235,11 @@ def test_adjust_request_required_uses_json_schema_not_structural_tag( qwen3_tool_parser: Qwen3CoderToolParser, sample_tools: list[ChatCompletionToolsParam], ) -> None: + request_tools = _as_chat_completion_tools(sample_tools) req = ChatCompletionRequest( messages=[], model="m", - tools=sample_tools, + tools=request_tools, tool_choice="required", ) out = qwen3_tool_parser.adjust_request(req) From e70a7205c6ace6125415bff258ddcc305bb87a05 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 14:51:10 +0800 Subject: [PATCH 11/43] update the version of xgr. Signed-off-by: Yuchuan --- requirements/common.txt | 2 +- requirements/test/rocm.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/common.txt b/requirements/common.txt index 5d4519204ee9..acd7fcb80012 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -24,7 +24,7 @@ outlines_core == 0.2.14 # required for outlines backend disk cache diskcache == 5.6.3 lark == 1.2.2 -xgrammar >= 0.1.32, < 1.0.0; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le" +xgrammar >= 0.1.34, < 1.0.0; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le" typing_extensions >= 4.10 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 partial-json-parser # used for parsing partial JSON outputs diff --git a/requirements/test/rocm.txt b/requirements/test/rocm.txt index ca33e2d09aa0..f87ea438bc28 100644 --- a/requirements/test/rocm.txt +++ b/requirements/test/rocm.txt @@ -1597,7 +1597,7 @@ wrapt==2.1.2 # via smart-open xarray==2026.2.0 # via rioxarray -xgrammar==0.1.33 +xgrammar==0.1.34 # via # -c requirements/common.txt # -r requirements/test/../common.txt From 7dfbd4d4c9ba85905e6066941f2fe4c36cc8833f Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 14:54:35 +0800 Subject: [PATCH 12/43] fix the tool type. Signed-off-by: Yuchuan --- vllm/tool_parsers/abstract_tool_parser.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index f671a866bd0a..02ba453bc169 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -159,21 +159,32 @@ def empty_thinking_as_non_thinking(self) -> bool: def get_structural_tag( self, request: ChatCompletionRequest ) -> StructuralTag: - + + def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: + if isinstance(tool, dict): + return tool + if hasattr(tool, "model_dump"): + return tool.model_dump() + if hasattr(tool, "dict"): + return tool.dict() + raise TypeError(f"Unsupported tool type: {type(tool)}") + model_id = self.get_model_structural_tag_id() thinking_mode = request.include_reasoning tool_choice_type = ( request.tool_choice.model_dump() if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) else request.tool_choice ) tool_dicts = [] - + if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): for tool in request.tools: - if tool.function.name == request.tool_choice.function.name: - tool_dicts.append(tool.model_dump()) + tool_dict = _tool_to_dict(tool) + tool_name = tool_dict.get("function", {}).get("name") + if tool_name == request.tool_choice.function.name: + tool_dicts.append(tool_dict) else: - tool_dicts = [tool.model_dump() for tool in request.tools] - + tool_dicts = [_tool_to_dict(tool) for tool in request.tools] + if thinking_mode: return get_model_structural_tag( model=model_id, From f06ccda912a0873518551722fcc2c94a91743344 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 14:58:44 +0800 Subject: [PATCH 13/43] fix the test. Signed-off-by: Yuchuan --- .../tool_parsers/test_kimi_k2_tool_parser.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py index a23d38aa0e5b..f14b1be0993c 100644 --- a/tests/tool_parsers/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -22,7 +22,7 @@ ChatCompletionRequest, ) from vllm.tokenizers import get_tokenizer -from vllm.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser +from vllm.tool_parsers.parser import KimiK2ToolParser MODEL = "moonshotai/Kimi-K2-Instruct" @@ -629,12 +629,12 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer): assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"} -def test_support_builtin_structural_tag(kimi_k2_tool_parser: KimiK2ToolParser): - assert kimi_k2_tool_parser.support_structural_tag() is True +def test_support_builtin_structural_tag(parser: KimiK2ToolParser): + assert parser.support_structural_tag() is True def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( - kimi_k2_tool_parser: KimiK2ToolParser, + parser: KimiK2ToolParser, sample_tools: list[ChatCompletionToolsParam], ) -> None: req = ChatCompletionRequest( @@ -643,7 +643,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( tools=sample_tools, tool_choice="auto", ) - tag = kimi_k2_tool_parser.get_structural_tag(req) + tag = parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) req = ChatCompletionRequest( @@ -652,7 +652,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( tools=sample_tools, tool_choice="required", ) - tag = kimi_k2_tool_parser.get_structural_tag(req) + tag = parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) if sample_tools: @@ -664,12 +664,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( tools=sample_tools, tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), ) - tag = kimi_k2_tool_parser.get_structural_tag(req) + tag = parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) @pytest.mark.parametrize("include_reasoning", [True, False]) def test_adjust_request_auto_structural_tag_is_json_string( - kimi_k2_tool_parser: KimiK2ToolParser, + parser: KimiK2ToolParser, sample_tools: list[ChatCompletionToolsParam], include_reasoning: bool, ) -> None: @@ -680,7 +680,7 @@ def test_adjust_request_auto_structural_tag_is_json_string( tool_choice="auto", include_reasoning=include_reasoning, ) - out = kimi_k2_tool_parser.adjust_request(req) + out = parser.adjust_request(req) assert out.structured_outputs is not None assert out.structured_outputs.structural_tag is not None assert isinstance(out.structured_outputs.structural_tag, str) @@ -689,7 +689,7 @@ def test_adjust_request_auto_structural_tag_is_json_string( def test_adjust_request_required_uses_json_schema_not_structural_tag( - kimi_k2_tool_parser: KimiK2ToolParser, + parser: KimiK2ToolParser, sample_tools: list[ChatCompletionToolsParam], ) -> None: req = ChatCompletionRequest( @@ -698,5 +698,5 @@ def test_adjust_request_required_uses_json_schema_not_structural_tag( tools=sample_tools, tool_choice="required", ) - out = kimi_k2_tool_parser.adjust_request(req) + out = parser.adjust_request(req) assert out.structured_outputs.structural_tag is None From f7c8c9173dd58450a2c040bfabf560cafe8c2535 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 15:01:37 +0800 Subject: [PATCH 14/43] fix the import. Signed-off-by: Yuchuan --- .../tool_parsers/test_kimi_k2_tool_parser.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py index f14b1be0993c..19fa4fd680bf 100644 --- a/tests/tool_parsers/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -22,7 +22,7 @@ ChatCompletionRequest, ) from vllm.tokenizers import get_tokenizer -from vllm.tool_parsers.parser import KimiK2ToolParser +from vllm.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser MODEL = "moonshotai/Kimi-K2-Instruct" @@ -629,12 +629,12 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer): assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"} -def test_support_builtin_structural_tag(parser: KimiK2ToolParser): - assert parser.support_structural_tag() is True +def test_support_builtin_structural_tag(kimi_k2_tool_parser: KimiK2ToolParser): + assert kimi_k2_tool_parser.support_structural_tag() is True def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( - parser: KimiK2ToolParser, + kimi_k2_tool_parser: KimiK2ToolParser, sample_tools: list[ChatCompletionToolsParam], ) -> None: req = ChatCompletionRequest( @@ -643,7 +643,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( tools=sample_tools, tool_choice="auto", ) - tag = parser.get_structural_tag(req) + tag = kimi_k2_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) req = ChatCompletionRequest( @@ -652,7 +652,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( tools=sample_tools, tool_choice="required", ) - tag = parser.get_structural_tag(req) + tag = kimi_k2_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) if sample_tools: @@ -664,12 +664,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( tools=sample_tools, tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), ) - tag = parser.get_structural_tag(req) + tag = kimi_k2_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) @pytest.mark.parametrize("include_reasoning", [True, False]) def test_adjust_request_auto_structural_tag_is_json_string( - parser: KimiK2ToolParser, + kimi_k2_tool_parser: KimiK2ToolParser, sample_tools: list[ChatCompletionToolsParam], include_reasoning: bool, ) -> None: @@ -680,7 +680,7 @@ def test_adjust_request_auto_structural_tag_is_json_string( tool_choice="auto", include_reasoning=include_reasoning, ) - out = parser.adjust_request(req) + out = kimi_k2_tool_parser.adjust_request(req) assert out.structured_outputs is not None assert out.structured_outputs.structural_tag is not None assert isinstance(out.structured_outputs.structural_tag, str) @@ -689,7 +689,7 @@ def test_adjust_request_auto_structural_tag_is_json_string( def test_adjust_request_required_uses_json_schema_not_structural_tag( - parser: KimiK2ToolParser, + kimi_k2_tool_parser: KimiK2ToolParser, sample_tools: list[ChatCompletionToolsParam], ) -> None: req = ChatCompletionRequest( From 5fbb503c56e32c9696c1faab074ead91f385d7fc Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 15:02:39 +0800 Subject: [PATCH 15/43] fix the import. Signed-off-by: Yuchuan --- tests/tool_parsers/test_kimi_k2_tool_parser.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py index 19fa4fd680bf..ff7fc5b0c8c9 100644 --- a/tests/tool_parsers/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -629,12 +629,12 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer): assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"} -def test_support_builtin_structural_tag(kimi_k2_tool_parser: KimiK2ToolParser): - assert kimi_k2_tool_parser.support_structural_tag() is True +def test_support_builtin_structural_tag(parser: KimiK2ToolParser): + assert parser.support_structural_tag() is True def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( - kimi_k2_tool_parser: KimiK2ToolParser, + parser: KimiK2ToolParser, sample_tools: list[ChatCompletionToolsParam], ) -> None: req = ChatCompletionRequest( @@ -643,7 +643,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( tools=sample_tools, tool_choice="auto", ) - tag = kimi_k2_tool_parser.get_structural_tag(req) + tag = parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) req = ChatCompletionRequest( @@ -652,7 +652,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( tools=sample_tools, tool_choice="required", ) - tag = kimi_k2_tool_parser.get_structural_tag(req) + tag = parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) if sample_tools: @@ -664,12 +664,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( tools=sample_tools, tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), ) - tag = kimi_k2_tool_parser.get_structural_tag(req) + tag = parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) @pytest.mark.parametrize("include_reasoning", [True, False]) def test_adjust_request_auto_structural_tag_is_json_string( - kimi_k2_tool_parser: KimiK2ToolParser, + parser: KimiK2ToolParser, sample_tools: list[ChatCompletionToolsParam], include_reasoning: bool, ) -> None: @@ -680,7 +680,7 @@ def test_adjust_request_auto_structural_tag_is_json_string( tool_choice="auto", include_reasoning=include_reasoning, ) - out = kimi_k2_tool_parser.adjust_request(req) + out = parser.adjust_request(req) assert out.structured_outputs is not None assert out.structured_outputs.structural_tag is not None assert isinstance(out.structured_outputs.structural_tag, str) @@ -689,7 +689,7 @@ def test_adjust_request_auto_structural_tag_is_json_string( def test_adjust_request_required_uses_json_schema_not_structural_tag( - kimi_k2_tool_parser: KimiK2ToolParser, + parser: KimiK2ToolParser, sample_tools: list[ChatCompletionToolsParam], ) -> None: req = ChatCompletionRequest( From 8a09479c5068c5d3c02f32f36faebfd1258750d2 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 16:55:09 +0800 Subject: [PATCH 16/43] update the api. Signed-off-by: Yuchuan --- vllm/tool_parsers/abstract_tool_parser.py | 59 +------------------- vllm/tool_parsers/deepseekv32_tool_parser.py | 33 ++++++++++- vllm/tool_parsers/deepseekv4_tool_parser.py | 36 +++++++++++- vllm/tool_parsers/kimi_k2_tool_parser.py | 35 +++++++++++- vllm/tool_parsers/openai_tool_parser.py | 36 +++++++++++- vllm/tool_parsers/qwen3coder_tool_parser.py | 33 ++++++++++- 6 files changed, 165 insertions(+), 67 deletions(-) diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index 02ba453bc169..b2c88d1449bb 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -7,7 +7,7 @@ from functools import cached_property import json -from xgrammar import StructuralTag, get_model_structural_tag +from xgrammar import StructuralTag from openai.types.responses import ( ResponseFormatTextJSONSchemaConfig, @@ -142,65 +142,12 @@ def adjust_request( ) return request - def get_model_structural_tag_id(self) -> str: - """ - Return the model ID for the builtin structural tag. - """ - raise NotImplementedError() - - def empty_thinking_as_non_thinking(self) -> bool: - """ - It decides how to handle non-thinking mode. If True, non-thinking mode will force the - LLM output an empty thinking. If False, thinking tags like or are not - allowed and will not be output by the LLM. - """ - return True - def get_structural_tag( self, request: ChatCompletionRequest ) -> StructuralTag: - - def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: - if isinstance(tool, dict): - return tool - if hasattr(tool, "model_dump"): - return tool.model_dump() - if hasattr(tool, "dict"): - return tool.dict() - raise TypeError(f"Unsupported tool type: {type(tool)}") - - model_id = self.get_model_structural_tag_id() - thinking_mode = request.include_reasoning - tool_choice_type = ( - request.tool_choice.model_dump() if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) else request.tool_choice + raise NotImplementedError( + "ToolParser.get_structural_tag has not been implemented!" ) - tool_dicts = [] - - if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): - for tool in request.tools: - tool_dict = _tool_to_dict(tool) - tool_name = tool_dict.get("function", {}).get("name") - if tool_name == request.tool_choice.function.name: - tool_dicts.append(tool_dict) - else: - tool_dicts = [_tool_to_dict(tool) for tool in request.tools] - - if thinking_mode: - return get_model_structural_tag( - model=model_id, - tools=tool_dicts, - tool_choice=tool_choice_type, - reasoning=True, - force_empty_reasoning=False, - ) - else: - return get_model_structural_tag( - model=model_id, - tools=tool_dicts, - tool_choice=tool_choice_type, - reasoning=not self.empty_thinking_as_non_thinking(), - force_empty_reasoning=self.empty_thinking_as_non_thinking(), - ) def support_structural_tag(self) -> bool: return False diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py index 63e3cd59ab51..783ec9aa928f 100644 --- a/vllm/tool_parsers/deepseekv32_tool_parser.py +++ b/vllm/tool_parsers/deepseekv32_tool_parser.py @@ -7,9 +7,12 @@ from typing import Any import regex as re +from xgrammar import StructuralTag, get_model_structural_tag from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionNamedToolChoiceParam, ChatCompletionRequest, + ChatCompletionToolsParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaFunctionCall, @@ -323,5 +326,31 @@ def extract_tool_calls_streaming( def support_structural_tag(self) -> bool: return True - def get_model_structural_tag_id(self) -> str: - return "deepseek_v3_2" \ No newline at end of file + def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag: + def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: + if isinstance(tool, dict): + return tool + if hasattr(tool, "model_dump"): + return tool.model_dump() + if hasattr(tool, "dict"): + return tool.dict() + raise TypeError(f"Unsupported tool type: {type(tool)}") + + if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): + converted_tool_choice = request.tool_choice.model_dump() + converted_tools = [] + for tool in request.tools: + tool_dict = _tool_to_dict(tool) + tool_name = tool_dict.get("function", {}).get("name") + if tool_name == request.tool_choice.function.name: + converted_tools.append(tool_dict) + else: + converted_tool_choice = request.tool_choice + converted_tools = [_tool_to_dict(tool) for tool in request.tools] + + return get_model_structural_tag( + model="deepseek_v3_2", + tools=converted_tools, + tool_choice=converted_tool_choice, + reasoning=request.include_reasoning, + ) diff --git a/vllm/tool_parsers/deepseekv4_tool_parser.py b/vllm/tool_parsers/deepseekv4_tool_parser.py index 45a9c1302578..0c9401e54601 100644 --- a/vllm/tool_parsers/deepseekv4_tool_parser.py +++ b/vllm/tool_parsers/deepseekv4_tool_parser.py @@ -2,7 +2,12 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser - +from xgrammar import StructuralTag, get_model_structural_tag +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionNamedToolChoiceParam, + ChatCompletionRequest, + ChatCompletionToolsParam, +) class DeepSeekV4ToolParser(DeepSeekV32ToolParser): """ @@ -14,3 +19,32 @@ class DeepSeekV4ToolParser(DeepSeekV32ToolParser): tool_call_start_token: str = "<|DSML|tool_calls>" tool_call_end_token: str = "" + + def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag: + def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: + if isinstance(tool, dict): + return tool + if hasattr(tool, "model_dump"): + return tool.model_dump() + if hasattr(tool, "dict"): + return tool.dict() + raise TypeError(f"Unsupported tool type: {type(tool)}") + + if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): + converted_tool_choice = request.tool_choice.model_dump() + converted_tools = [] + for tool in request.tools: + tool_dict = _tool_to_dict(tool) + tool_name = tool_dict.get("function", {}).get("name") + if tool_name == request.tool_choice.function.name: + converted_tools.append(tool_dict) + else: + converted_tool_choice = request.tool_choice + converted_tools = [_tool_to_dict(tool) for tool in request.tools] + + return get_model_structural_tag( + model="deepseek_v4", + tools=converted_tools, + tool_choice=converted_tool_choice, + reasoning=request.include_reasoning, + ) diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py index 319f2c19f4b6..35df1df2ad49 100644 --- a/vllm/tool_parsers/kimi_k2_tool_parser.py +++ b/vllm/tool_parsers/kimi_k2_tool_parser.py @@ -4,9 +4,12 @@ from collections.abc import Sequence import regex as re +from xgrammar import StructuralTag, get_model_structural_tag from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionNamedToolChoiceParam, ChatCompletionRequest, + ChatCompletionToolsParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaFunctionCall, @@ -277,6 +280,32 @@ def extract_tool_calls_streaming( def support_structural_tag(self) -> bool: return True - - def get_model_structural_tag_id(self) -> str: - return "kimi" + + def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag: + def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: + if isinstance(tool, dict): + return tool + if hasattr(tool, "model_dump"): + return tool.model_dump() + if hasattr(tool, "dict"): + return tool.dict() + raise TypeError(f"Unsupported tool type: {type(tool)}") + + if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): + converted_tool_choice = request.tool_choice.model_dump() + converted_tools = [] + for tool in request.tools: + tool_dict = _tool_to_dict(tool) + tool_name = tool_dict.get("function", {}).get("name") + if tool_name == request.tool_choice.function.name: + converted_tools.append(tool_dict) + else: + converted_tool_choice = request.tool_choice + converted_tools = [_tool_to_dict(tool) for tool in request.tools] + + return get_model_structural_tag( + model="kimi", + tools=converted_tools, + tool_choice=converted_tool_choice, + reasoning=request.include_reasoning, + ) diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py index dc906a762a77..152fdf2ce677 100644 --- a/vllm/tool_parsers/openai_tool_parser.py +++ b/vllm/tool_parsers/openai_tool_parser.py @@ -4,8 +4,12 @@ from collections.abc import Sequence from typing import TYPE_CHECKING +from xgrammar import StructuralTag, get_model_structural_tag + from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionNamedToolChoiceParam, ChatCompletionRequest, + ChatCompletionToolsParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaMessage, @@ -115,6 +119,32 @@ def extract_tool_calls_streaming( def support_structural_tag(self) -> bool: return True - - def get_model_structural_tag_id(self) -> str: - return "harmony" + + def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag: + def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: + if isinstance(tool, dict): + return tool + if hasattr(tool, "model_dump"): + return tool.model_dump() + if hasattr(tool, "dict"): + return tool.dict() + raise TypeError(f"Unsupported tool type: {type(tool)}") + + if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): + converted_tool_choice = request.tool_choice.model_dump() + converted_tools = [] + for tool in request.tools: + tool_dict = _tool_to_dict(tool) + tool_name = tool_dict.get("function", {}).get("name") + if tool_name == request.tool_choice.function.name: + converted_tools.append(tool_dict) + else: + converted_tool_choice = request.tool_choice + converted_tools = [_tool_to_dict(tool) for tool in request.tools] + + return get_model_structural_tag( + model="harmony", + tools=converted_tools, + tool_choice=converted_tool_choice, + reasoning=request.include_reasoning, + ) diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index ed24752bfd37..ce2b4a3069e9 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -7,9 +7,12 @@ from typing import Any import regex as re +from xgrammar import StructuralTag, get_model_structural_tag from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionNamedToolChoiceParam, ChatCompletionRequest, + ChatCompletionToolsParam, ) from vllm.entrypoints.openai.engine.protocol import ( @@ -687,5 +690,31 @@ def extract_tool_calls_streaming( def support_structural_tag(self) -> bool: return True - def get_model_structural_tag_id(self) -> str: - return "qwen_coder" + def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag: + def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: + if isinstance(tool, dict): + return tool + if hasattr(tool, "model_dump"): + return tool.model_dump() + if hasattr(tool, "dict"): + return tool.dict() + raise TypeError(f"Unsupported tool type: {type(tool)}") + + if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): + converted_tool_choice = request.tool_choice.model_dump() + converted_tools = [] + for tool in request.tools: + tool_dict = _tool_to_dict(tool) + tool_name = tool_dict.get("function", {}).get("name") + if tool_name == request.tool_choice.function.name: + converted_tools.append(tool_dict) + else: + converted_tool_choice = request.tool_choice + converted_tools = [_tool_to_dict(tool) for tool in request.tools] + + return get_model_structural_tag( + model="qwen_coder", + tools=converted_tools, + tool_choice=converted_tool_choice, + reasoning=request.include_reasoning, + ) From 098b80c9e9e415e73cd0ee9f139308f2ac205c71 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 17:02:21 +0800 Subject: [PATCH 17/43] add v4 tests. Signed-off-by: Yuchuan --- .../test_deepseekv4_tool_parser.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py index 631d0fb97b33..b369c364fead 100644 --- a/tests/tool_parsers/test_deepseekv4_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py @@ -8,6 +8,13 @@ from vllm.tool_parsers import ToolParserManager from vllm.tool_parsers.deepseekv4_tool_parser import DeepSeekV4ToolParser +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionNamedToolChoiceParam, + ChatCompletionRequest, + ChatCompletionToolsParam, + ChatCompletionNamedFunction, +) +from xgrammar import StructuralTag MOCK_TOKENIZER = MagicMock() MOCK_TOKENIZER.get_vocab.return_value = {} @@ -121,3 +128,40 @@ def test_streaming_extracts_complete_invokes(): ] assert names == ["search"] assert json.loads(reconstruct_args(deltas)) == {"query": "deepseek v4"} + +def test_support_builtin_structural_tag(): + assert make_parser().support_structural_tag() is True + + +def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( + sample_tools: list[ChatCompletionToolsParam], +) -> None: + parser = make_parser() + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="auto", + ) + tag = parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) + + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice="required", + ) + tag = parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) + + if sample_tools: + tool = sample_tools[0] + req = ChatCompletionRequest( + messages=[], + model="m", + tools=sample_tools, + tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), + ) + tag = parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) \ No newline at end of file From 15c99cb111e626d885d7f54a0b57dc9ff10168db Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 17:13:41 +0800 Subject: [PATCH 18/43] update. Signed-off-by: Yuchuan --- .../test_deepseekv32_tool_parser.py | 4 --- .../test_deepseekv4_tool_parser.py | 4 --- .../tool_parsers/test_kimi_k2_tool_parser.py | 4 --- tests/tool_parsers/test_openai_tool_parser.py | 4 --- .../test_qwen3coder_tool_parser.py | 4 --- vllm/tool_parsers/abstract_tool_parser.py | 25 ++++++++++--------- vllm/tool_parsers/deepseekv32_tool_parser.py | 7 +++--- vllm/tool_parsers/deepseekv4_tool_parser.py | 4 ++- vllm/tool_parsers/kimi_k2_tool_parser.py | 7 +++--- vllm/tool_parsers/openai_tool_parser.py | 7 +++--- vllm/tool_parsers/qwen3coder_tool_parser.py | 7 +++--- 11 files changed, 28 insertions(+), 49 deletions(-) diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py index 80bcb9cf6b63..c5ed3ae5a3fd 100644 --- a/tests/tool_parsers/test_deepseekv32_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py @@ -843,10 +843,6 @@ def test_convert_param_value_checked_helper(parser): assert parser._convert_param_value("null", "object") is None -def test_support_builtin_structural_tag(): - assert make_parser().support_structural_tag() is True - - def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( sample_tools: list[ChatCompletionToolsParam], ) -> None: diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py index b369c364fead..6849254a6672 100644 --- a/tests/tool_parsers/test_deepseekv4_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py @@ -129,10 +129,6 @@ def test_streaming_extracts_complete_invokes(): assert names == ["search"] assert json.loads(reconstruct_args(deltas)) == {"query": "deepseek v4"} -def test_support_builtin_structural_tag(): - assert make_parser().support_structural_tag() is True - - def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( sample_tools: list[ChatCompletionToolsParam], ) -> None: diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py index ff7fc5b0c8c9..20794df89ee4 100644 --- a/tests/tool_parsers/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -629,10 +629,6 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer): assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"} -def test_support_builtin_structural_tag(parser: KimiK2ToolParser): - assert parser.support_structural_tag() is True - - def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( parser: KimiK2ToolParser, sample_tools: list[ChatCompletionToolsParam], diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py index 56ec4fafa0e0..2fc24df11b1a 100644 --- a/tests/tool_parsers/test_openai_tool_parser.py +++ b/tests/tool_parsers/test_openai_tool_parser.py @@ -307,10 +307,6 @@ def test_extract_tool_calls_with_content( assert extracted_info.content == final_content -def test_support_builtin_structural_tag(openai_tool_parser: OpenAIToolParser): - assert openai_tool_parser.support_structural_tag() is True - - def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( openai_tool_parser: OpenAIToolParser, sample_tools: list[ChatCompletionToolsParam], diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index 0c1b5061ccbb..e31c30992c72 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -1171,10 +1171,6 @@ def test_no_double_serialization_string_args(qwen3_tool_parser): assert '\\"hello world\\"' not in raw_arguments -def test_support_builtin_structural_tag(qwen3_tool_parser: Qwen3CoderToolParser): - assert qwen3_tool_parser.support_structural_tag() is True - - def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( qwen3_tool_parser: Qwen3CoderToolParser, sample_tools: list[ChatCompletionToolsParam], diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index b2c88d1449bb..0988f52c4569 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -134,23 +134,24 @@ def adjust_request( # Step 2: apply xgrammar's built-in tool calling support. # XGrammar will support tool_choice="none" in the future. Currently, we only support tool_choice="auto" and tool_choice="required". - need_tool_calling = request.tool_choice == "auto" or request.tool_choice == "required" or isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) - if self.support_structural_tag() and need_tool_calling: + need_tool_calling = ( + request.tool_choice == "auto" + or request.tool_choice == "required" + or isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) + ) + if need_tool_calling: structure_tag = self.get_structural_tag(request) - request.structured_outputs = StructuredOutputsParams( - structural_tag=json.dumps(structure_tag.model_dump()), - ) + if structure_tag is not None: + request.structured_outputs = StructuredOutputsParams( + structural_tag=json.dumps(structure_tag.model_dump()), + ) + return request def get_structural_tag( self, request: ChatCompletionRequest - ) -> StructuralTag: - raise NotImplementedError( - "ToolParser.get_structural_tag has not been implemented!" - ) - - def support_structural_tag(self) -> bool: - return False + ) -> StructuralTag | None: + return None def extract_tool_calls( self, model_output: str, request: ChatCompletionRequest diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py index 783ec9aa928f..7a3e8635abc8 100644 --- a/vllm/tool_parsers/deepseekv32_tool_parser.py +++ b/vllm/tool_parsers/deepseekv32_tool_parser.py @@ -323,10 +323,9 @@ def extract_tool_calls_streaming( return None - def support_structural_tag(self) -> bool: - return True - - def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag: + def get_structural_tag( + self, request: ChatCompletionRequest + ) -> StructuralTag | None: def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: if isinstance(tool, dict): return tool diff --git a/vllm/tool_parsers/deepseekv4_tool_parser.py b/vllm/tool_parsers/deepseekv4_tool_parser.py index 0c9401e54601..2e6927f759fc 100644 --- a/vllm/tool_parsers/deepseekv4_tool_parser.py +++ b/vllm/tool_parsers/deepseekv4_tool_parser.py @@ -20,7 +20,9 @@ class DeepSeekV4ToolParser(DeepSeekV32ToolParser): tool_call_start_token: str = "<|DSML|tool_calls>" tool_call_end_token: str = "" - def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag: + def get_structural_tag( + self, request: ChatCompletionRequest + ) -> StructuralTag | None: def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: if isinstance(tool, dict): return tool diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py index 35df1df2ad49..b580c371a980 100644 --- a/vllm/tool_parsers/kimi_k2_tool_parser.py +++ b/vllm/tool_parsers/kimi_k2_tool_parser.py @@ -278,10 +278,9 @@ def extract_tool_calls_streaming( logger.exception("Error trying to handle streaming tool call.") return None # do not stream a delta. skip this token ID. - def support_structural_tag(self) -> bool: - return True - - def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag: + def get_structural_tag( + self, request: ChatCompletionRequest + ) -> StructuralTag | None: def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: if isinstance(tool, dict): return tool diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py index 152fdf2ce677..d308c038ae3b 100644 --- a/vllm/tool_parsers/openai_tool_parser.py +++ b/vllm/tool_parsers/openai_tool_parser.py @@ -117,10 +117,9 @@ def extract_tool_calls_streaming( "Not being used, manual parsing in serving_chat.py" # noqa: E501 ) - def support_structural_tag(self) -> bool: - return True - - def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag: + def get_structural_tag( + self, request: ChatCompletionRequest + ) -> StructuralTag | None: def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: if isinstance(tool, dict): return tool diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index ce2b4a3069e9..4a42aa87c57b 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -687,10 +687,9 @@ def extract_tool_calls_streaming( return None - def support_structural_tag(self) -> bool: - return True - - def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag: + def get_structural_tag( + self, request: ChatCompletionRequest + ) -> StructuralTag | None: def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: if isinstance(tool, dict): return tool From 93fc4b43716016e05430605664ce4ea3cc54688c Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 17:27:36 +0800 Subject: [PATCH 19/43] update hte priority. Signed-off-by: Yuchuan --- .../tool_parsers/test_kimi_k2_tool_parser.py | 5 ++- tests/tool_parsers/test_openai_tool_parser.py | 5 ++- .../test_qwen3coder_tool_parser.py | 5 ++- vllm/tool_parsers/abstract_tool_parser.py | 42 +++++++++---------- 4 files changed, 30 insertions(+), 27 deletions(-) diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py index 20794df89ee4..d9509b25a946 100644 --- a/tests/tool_parsers/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -684,7 +684,7 @@ def test_adjust_request_auto_structural_tag_is_json_string( assert isinstance(loaded, dict) -def test_adjust_request_required_uses_json_schema_not_structural_tag( +def test_adjust_request_required_prefers_structural_tag( parser: KimiK2ToolParser, sample_tools: list[ChatCompletionToolsParam], ) -> None: @@ -695,4 +695,5 @@ def test_adjust_request_required_uses_json_schema_not_structural_tag( tool_choice="required", ) out = parser.adjust_request(req) - assert out.structured_outputs.structural_tag is None + assert out.structured_outputs is not None + assert out.structured_outputs.structural_tag is not None diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py index 2fc24df11b1a..d4536d1c5e3d 100644 --- a/tests/tool_parsers/test_openai_tool_parser.py +++ b/tests/tool_parsers/test_openai_tool_parser.py @@ -361,7 +361,7 @@ def test_adjust_request_auto_structural_tag_is_json_string( assert isinstance(loaded, dict) -def test_adjust_request_required_uses_json_schema_not_structural_tag( +def test_adjust_request_required_prefers_structural_tag( openai_tool_parser: OpenAIToolParser, sample_tools: list[ChatCompletionToolsParam], ) -> None: @@ -372,4 +372,5 @@ def test_adjust_request_required_uses_json_schema_not_structural_tag( tool_choice="required", ) out = openai_tool_parser.adjust_request(req) - assert out.structured_outputs.structural_tag is None + assert out.structured_outputs is not None + assert out.structured_outputs.structural_tag is not None diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index e31c30992c72..aad7c2e4b512 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -1227,7 +1227,7 @@ def test_adjust_request_auto_structural_tag_is_json_string( assert isinstance(loaded, dict) -def test_adjust_request_required_uses_json_schema_not_structural_tag( +def test_adjust_request_required_prefers_structural_tag( qwen3_tool_parser: Qwen3CoderToolParser, sample_tools: list[ChatCompletionToolsParam], ) -> None: @@ -1239,4 +1239,5 @@ def test_adjust_request_required_uses_json_schema_not_structural_tag( tool_choice="required", ) out = qwen3_tool_parser.adjust_request(req) - assert out.structured_outputs.structural_tag is None + assert out.structured_outputs is not None + assert out.structured_outputs.structural_tag is not None diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index 0988f52c4569..e7351199b511 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -95,8 +95,27 @@ def adjust_request( if not request.tools: return request - # Step 1: set structured output params when tool constraints are derived - # from the tool schema. + # Step 1 (highest priority for ChatCompletionRequest): apply + # xgrammar's built-in structural tag support. + if isinstance(request, ChatCompletionRequest): + # XGrammar will support tool_choice="none" in the future. + # Currently, we only support tool_choice="auto" and + # tool_choice="required". + need_tool_calling = ( + request.tool_choice == "auto" + or request.tool_choice == "required" + or isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) + ) + if need_tool_calling: + structure_tag = self.get_structural_tag(request) + if structure_tag is not None: + request.structured_outputs = StructuredOutputsParams( + structural_tag=json.dumps(structure_tag.model_dump()), + ) + return request + + # Step 2: set structured output params when tool constraints are + # derived from the tool schema. json_schema_from_tool = get_json_schema_from_tools( tool_choice=request.tool_choice, tools=request.tools ) @@ -124,28 +143,9 @@ def adjust_request( strict=True, ) ) - - return request - # Only ChatCompletionRequest is supported for Step 2. - if not isinstance(request, ChatCompletionRequest): return request - - # Step 2: apply xgrammar's built-in tool calling support. - # XGrammar will support tool_choice="none" in the future. Currently, we only support tool_choice="auto" and tool_choice="required". - need_tool_calling = ( - request.tool_choice == "auto" - or request.tool_choice == "required" - or isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) - ) - if need_tool_calling: - structure_tag = self.get_structural_tag(request) - if structure_tag is not None: - request.structured_outputs = StructuredOutputsParams( - structural_tag=json.dumps(structure_tag.model_dump()), - ) - return request def get_structural_tag( From cbc745ebaefe709337407c9af792b6000eaabd55 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 18:01:03 +0800 Subject: [PATCH 20/43] fix the test. Signed-off-by: Yuchuan --- .../test_deepseekv4_tool_parser.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py index 6849254a6672..2074cb66d073 100644 --- a/tests/tool_parsers/test_deepseekv4_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py @@ -26,6 +26,43 @@ PARAM_START = '<|DSML|parameter name="' PARAM_END = "" +@pytest.fixture +def sample_tools() -> list[ChatCompletionToolsParam]: + return [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "get_current_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "city": {"type": "string", "description": "The city name"}, + "state": {"type": "string", "description": "The state code"}, + "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]}, + }, + "required": ["city", "state"], + }, + }, + ), + ChatCompletionToolsParam( + type="function", + function={ + "name": "calculate_area", + "description": "Calculate area of a shape", + "parameters": { + "type": "object", + "properties": { + "shape": {"type": "string"}, + "dimensions": {"type": "object"}, + "precision": {"type": "integer"}, + }, + }, + }, + ), + ] + + def make_parser(tools=None) -> DeepSeekV4ToolParser: return DeepSeekV4ToolParser(MOCK_TOKENIZER, tools=tools) From 894871f7d02397bf0e7e9a8ad1bfcd2c546b321a Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 18:01:48 +0800 Subject: [PATCH 21/43] fix the import. Signed-off-by: Yuchuan --- tests/tool_parsers/test_deepseekv4_tool_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py index 2074cb66d073..b3e737ddc6a1 100644 --- a/tests/tool_parsers/test_deepseekv4_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py @@ -5,6 +5,7 @@ import json from unittest.mock import MagicMock +import pytest from vllm.tool_parsers import ToolParserManager from vllm.tool_parsers.deepseekv4_tool_parser import DeepSeekV4ToolParser From b3bf271ff3dfc51d33ae9418d2aaa11d1748b16c Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Wed, 29 Apr 2026 23:00:28 +0800 Subject: [PATCH 22/43] update the version of xgr. Signed-off-by: Yuchuan --- requirements/common.txt | 2 +- requirements/test/rocm.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/common.txt b/requirements/common.txt index acd7fcb80012..652738eebe74 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -24,7 +24,7 @@ outlines_core == 0.2.14 # required for outlines backend disk cache diskcache == 5.6.3 lark == 1.2.2 -xgrammar >= 0.1.34, < 1.0.0; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le" +xgrammar >= 0.2.0, < 1.0.0; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le" typing_extensions >= 4.10 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 partial-json-parser # used for parsing partial JSON outputs diff --git a/requirements/test/rocm.txt b/requirements/test/rocm.txt index f87ea438bc28..8ba78ee128b5 100644 --- a/requirements/test/rocm.txt +++ b/requirements/test/rocm.txt @@ -1597,7 +1597,7 @@ wrapt==2.1.2 # via smart-open xarray==2026.2.0 # via rioxarray -xgrammar==0.1.34 +xgrammar==0.2.0 # via # -c requirements/common.txt # -r requirements/test/../common.txt From 1ecff43eba8525502f49e710c16370b7da484fa5 Mon Sep 17 00:00:00 2001 From: mgoin Date: Fri, 1 May 2026 15:41:40 +0000 Subject: [PATCH 23/43] Lint Signed-off-by: mgoin --- vllm/entrypoints/openai/chat_completion/protocol.py | 10 +++++++--- vllm/tool_parsers/qwen3coder_tool_parser.py | 7 ++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/vllm/entrypoints/openai/chat_completion/protocol.py b/vllm/entrypoints/openai/chat_completion/protocol.py index 03a473aa4cf6..140a2fe566da 100644 --- a/vllm/entrypoints/openai/chat_completion/protocol.py +++ b/vllm/entrypoints/openai/chat_completion/protocol.py @@ -739,9 +739,13 @@ def check_tool_usage(cls, data): # make sure that tool choice is either a named tool # OR that it's set to "auto" or "required" - if data["tool_choice"] not in ["auto", "required"] and not isinstance( - data["tool_choice"], dict - ) and not isinstance(data["tool_choice"], ChatCompletionNamedToolChoiceParam): + if ( + data["tool_choice"] not in ["auto", "required"] + and not isinstance(data["tool_choice"], dict) + and not isinstance( + data["tool_choice"], ChatCompletionNamedToolChoiceParam + ) + ): raise ValueError( f"Invalid value for `tool_choice`: {data['tool_choice']}! " 'Only named tools, "none", "auto" or "required" ' diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 4a42aa87c57b..729aa123bfea 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -14,7 +14,6 @@ ChatCompletionRequest, ChatCompletionToolsParam, ) - from vllm.entrypoints.openai.engine.protocol import ( DeltaFunctionCall, DeltaMessage, @@ -685,8 +684,7 @@ def extract_tool_calls_streaming( return result return None - - + def get_structural_tag( self, request: ChatCompletionRequest ) -> StructuralTag | None: @@ -699,6 +697,9 @@ def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: return tool.dict() raise TypeError(f"Unsupported tool type: {type(tool)}") + if not request.tools: + return None + if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): converted_tool_choice = request.tool_choice.model_dump() converted_tools = [] From 6ca893e2391982f52f1d1642bd8f7bed7a6427d1 Mon Sep 17 00:00:00 2001 From: Ubospica Date: Sun, 3 May 2026 05:12:52 -0400 Subject: [PATCH 24/43] Move structural tag builders into vLLM --- .../test_deepseekv32_tool_parser.py | 63 +--- .../test_deepseekv4_tool_parser.py | 2 +- .../tool_parsers/test_kimi_k2_tool_parser.py | 71 ---- tests/tool_parsers/test_openai_tool_parser.py | 70 ---- .../test_qwen3coder_tool_parser.py | 4 +- vllm/tool_parsers/abstract_tool_parser.py | 5 +- vllm/tool_parsers/deepseekv32_tool_parser.py | 33 -- vllm/tool_parsers/deepseekv4_tool_parser.py | 35 +- vllm/tool_parsers/kimi_k2_tool_parser.py | 33 -- vllm/tool_parsers/openai_tool_parser.py | 34 -- vllm/tool_parsers/qwen3coder_tool_parser.py | 35 +- vllm/tool_parsers/structural_tag_registry.py | 327 ++++++++++++++++++ 12 files changed, 344 insertions(+), 368 deletions(-) create mode 100644 vllm/tool_parsers/structural_tag_registry.py diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py index a620cf5bbec6..f82d1d739a07 100644 --- a/tests/tool_parsers/test_deepseekv32_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py @@ -10,7 +10,6 @@ from unittest.mock import MagicMock import pytest -from xgrammar import StructuralTag from tests.tool_parsers.utils import run_tool_extraction_streaming from vllm.entrypoints.openai.chat_completion.protocol import ( @@ -18,12 +17,7 @@ FunctionDefinition, ) from vllm.tokenizers import get_tokenizer -from vllm.entrypoints.openai.chat_completion.protocol import ( - ChatCompletionRequest, - ChatCompletionToolsParam, - ChatCompletionNamedToolChoiceParam, - ChatCompletionNamedFunction, -) +from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser # --------------------------------------------------------------------------- @@ -867,61 +861,6 @@ def test_convert_param_value_checked_helper(parser): assert parser._convert_param_value("null", "object") is None -def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( - sample_tools: list[ChatCompletionToolsParam], -) -> None: - parser = make_parser() - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice="auto", - ) - tag = parser.get_structural_tag(req) - assert isinstance(tag, StructuralTag) - - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice="required", - ) - tag = parser.get_structural_tag(req) - assert isinstance(tag, StructuralTag) - - if sample_tools: - tool = sample_tools[0] - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), - ) - tag = parser.get_structural_tag(req) - assert isinstance(tag, StructuralTag) - - -@pytest.mark.parametrize("include_reasoning", [True, False]) -def test_adjust_request_auto_structural_tag_is_json_string( - sample_tools: list[ChatCompletionToolsParam], - include_reasoning: bool, -) -> None: - parser = make_parser() - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice="auto", - include_reasoning=include_reasoning, - ) - out = parser.adjust_request(req) - assert out.structured_outputs is not None - assert out.structured_outputs.structural_tag is not None - assert isinstance(out.structured_outputs.structural_tag, str) - loaded = json.loads(out.structured_outputs.structural_tag) - assert isinstance(loaded, dict) - - def test_adjust_request_required_uses_json_schema_not_structural_tag( sample_tools: list[ChatCompletionToolsParam], ) -> None: diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py index b3e737ddc6a1..095ed7eb17a2 100644 --- a/tests/tool_parsers/test_deepseekv4_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py @@ -167,7 +167,7 @@ def test_streaming_extracts_complete_invokes(): assert names == ["search"] assert json.loads(reconstruct_args(deltas)) == {"query": "deepseek v4"} -def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( +def test_get_vllm_registry_structural_tag_returns_structural_tag( sample_tools: list[ChatCompletionToolsParam], ) -> None: parser = make_parser() diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py index d9509b25a946..5552a977d47d 100644 --- a/tests/tool_parsers/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -6,7 +6,6 @@ from unittest.mock import MagicMock import pytest -from xgrammar import StructuralTag from tests.tool_parsers.utils import ( run_tool_extraction, @@ -15,8 +14,6 @@ from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, - ChatCompletionNamedToolChoiceParam, - ChatCompletionNamedFunction, ) from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, @@ -629,71 +626,3 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer): assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"} -def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( - parser: KimiK2ToolParser, - sample_tools: list[ChatCompletionToolsParam], -) -> None: - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice="auto", - ) - tag = parser.get_structural_tag(req) - assert isinstance(tag, StructuralTag) - - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice="required", - ) - tag = parser.get_structural_tag(req) - assert isinstance(tag, StructuralTag) - - if sample_tools: - - tool = sample_tools[0] - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), - ) - tag = parser.get_structural_tag(req) - assert isinstance(tag, StructuralTag) - -@pytest.mark.parametrize("include_reasoning", [True, False]) -def test_adjust_request_auto_structural_tag_is_json_string( - parser: KimiK2ToolParser, - sample_tools: list[ChatCompletionToolsParam], - include_reasoning: bool, -) -> None: - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice="auto", - include_reasoning=include_reasoning, - ) - out = parser.adjust_request(req) - assert out.structured_outputs is not None - assert out.structured_outputs.structural_tag is not None - assert isinstance(out.structured_outputs.structural_tag, str) - loaded = json.loads(out.structured_outputs.structural_tag) - assert isinstance(loaded, dict) - - -def test_adjust_request_required_prefers_structural_tag( - parser: KimiK2ToolParser, - sample_tools: list[ChatCompletionToolsParam], -) -> None: - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice="required", - ) - out = parser.adjust_request(req) - assert out.structured_outputs is not None - assert out.structured_outputs.structural_tag is not None diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py index d4536d1c5e3d..b5a365b495ca 100644 --- a/tests/tool_parsers/test_openai_tool_parser.py +++ b/tests/tool_parsers/test_openai_tool_parser.py @@ -13,13 +13,10 @@ SystemContent, load_harmony_encoding, ) -from xgrammar import StructuralTag from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, - ChatCompletionNamedToolChoiceParam, - ChatCompletionNamedFunction, ) from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall from vllm.tokenizers import get_tokenizer @@ -307,70 +304,3 @@ def test_extract_tool_calls_with_content( assert extracted_info.content == final_content -def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( - openai_tool_parser: OpenAIToolParser, - sample_tools: list[ChatCompletionToolsParam], -) -> None: - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice="auto", - ) - tag = openai_tool_parser.get_structural_tag(req) - assert isinstance(tag, StructuralTag) - - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice="required", - ) - tag = openai_tool_parser.get_structural_tag(req) - assert isinstance(tag, StructuralTag) - - if sample_tools: - tool = sample_tools[0] - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), - ) - tag = openai_tool_parser.get_structural_tag(req) - assert isinstance(tag, StructuralTag) - -@pytest.mark.parametrize("include_reasoning", [True, False]) -def test_adjust_request_auto_structural_tag_is_json_string( - openai_tool_parser: OpenAIToolParser, - sample_tools: list[ChatCompletionToolsParam], - include_reasoning: bool, -) -> None: - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice="auto", - include_reasoning=include_reasoning, - ) - out = openai_tool_parser.adjust_request(req) - assert out.structured_outputs is not None - assert out.structured_outputs.structural_tag is not None - assert isinstance(out.structured_outputs.structural_tag, str) - loaded = json.loads(out.structured_outputs.structural_tag) - assert isinstance(loaded, dict) - - -def test_adjust_request_required_prefers_structural_tag( - openai_tool_parser: OpenAIToolParser, - sample_tools: list[ChatCompletionToolsParam], -) -> None: - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice="required", - ) - out = openai_tool_parser.adjust_request(req) - assert out.structured_outputs is not None - assert out.structured_outputs.structural_tag is not None diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index aad7c2e4b512..6e6842c40785 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -1171,7 +1171,7 @@ def test_no_double_serialization_string_args(qwen3_tool_parser): assert '\\"hello world\\"' not in raw_arguments -def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( +def test_get_vllm_registry_structural_tag_returns_structural_tag( qwen3_tool_parser: Qwen3CoderToolParser, sample_tools: list[ChatCompletionToolsParam], ) -> None: @@ -1206,7 +1206,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag( assert isinstance(tag, StructuralTag) @pytest.mark.parametrize("include_reasoning", [True, False]) -def test_adjust_request_auto_structural_tag_is_json_string( +def test_adjust_request_auto_uses_vllm_registry_structural_tag( qwen3_tool_parser: Qwen3CoderToolParser, sample_tools: list[ChatCompletionToolsParam], include_reasoning: bool, diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index e7351199b511..6eb15a444fc0 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -96,11 +96,8 @@ def adjust_request( return request # Step 1 (highest priority for ChatCompletionRequest): apply - # xgrammar's built-in structural tag support. + # vLLM-owned structural tag support for model-specific tool formats. if isinstance(request, ChatCompletionRequest): - # XGrammar will support tool_choice="none" in the future. - # Currently, we only support tool_choice="auto" and - # tool_choice="required". need_tool_calling = ( request.tool_choice == "auto" or request.tool_choice == "required" diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py index e7cc21c8533f..87a1f88cd67a 100644 --- a/vllm/tool_parsers/deepseekv32_tool_parser.py +++ b/vllm/tool_parsers/deepseekv32_tool_parser.py @@ -7,12 +7,9 @@ from typing import Any import regex as re -from xgrammar import StructuralTag, get_model_structural_tag from vllm.entrypoints.openai.chat_completion.protocol import ( - ChatCompletionNamedToolChoiceParam, ChatCompletionRequest, - ChatCompletionToolsParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaFunctionCall, @@ -324,33 +321,3 @@ def extract_tool_calls_streaming( return None - def get_structural_tag( - self, request: ChatCompletionRequest - ) -> StructuralTag | None: - def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: - if isinstance(tool, dict): - return tool - if hasattr(tool, "model_dump"): - return tool.model_dump() - if hasattr(tool, "dict"): - return tool.dict() - raise TypeError(f"Unsupported tool type: {type(tool)}") - - if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): - converted_tool_choice = request.tool_choice.model_dump() - converted_tools = [] - for tool in request.tools: - tool_dict = _tool_to_dict(tool) - tool_name = tool_dict.get("function", {}).get("name") - if tool_name == request.tool_choice.function.name: - converted_tools.append(tool_dict) - else: - converted_tool_choice = request.tool_choice - converted_tools = [_tool_to_dict(tool) for tool in request.tools] - - return get_model_structural_tag( - model="deepseek_v3_2", - tools=converted_tools, - tool_choice=converted_tool_choice, - reasoning=request.include_reasoning, - ) diff --git a/vllm/tool_parsers/deepseekv4_tool_parser.py b/vllm/tool_parsers/deepseekv4_tool_parser.py index 2e6927f759fc..12791fc7b2ea 100644 --- a/vllm/tool_parsers/deepseekv4_tool_parser.py +++ b/vllm/tool_parsers/deepseekv4_tool_parser.py @@ -1,13 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser -from xgrammar import StructuralTag, get_model_structural_tag +from xgrammar import StructuralTag + from vllm.entrypoints.openai.chat_completion.protocol import ( - ChatCompletionNamedToolChoiceParam, ChatCompletionRequest, - ChatCompletionToolsParam, ) +from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser +from vllm.tool_parsers.structural_tag_registry import get_model_structural_tag class DeepSeekV4ToolParser(DeepSeekV32ToolParser): """ @@ -23,30 +23,9 @@ class DeepSeekV4ToolParser(DeepSeekV32ToolParser): def get_structural_tag( self, request: ChatCompletionRequest ) -> StructuralTag | None: - def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: - if isinstance(tool, dict): - return tool - if hasattr(tool, "model_dump"): - return tool.model_dump() - if hasattr(tool, "dict"): - return tool.dict() - raise TypeError(f"Unsupported tool type: {type(tool)}") - - if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): - converted_tool_choice = request.tool_choice.model_dump() - converted_tools = [] - for tool in request.tools: - tool_dict = _tool_to_dict(tool) - tool_name = tool_dict.get("function", {}).get("name") - if tool_name == request.tool_choice.function.name: - converted_tools.append(tool_dict) - else: - converted_tool_choice = request.tool_choice - converted_tools = [_tool_to_dict(tool) for tool in request.tools] - return get_model_structural_tag( model="deepseek_v4", - tools=converted_tools, - tool_choice=converted_tool_choice, + tools=request.tools, + tool_choice=request.tool_choice, reasoning=request.include_reasoning, - ) + ) diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py index b580c371a980..31f29900c2b3 100644 --- a/vllm/tool_parsers/kimi_k2_tool_parser.py +++ b/vllm/tool_parsers/kimi_k2_tool_parser.py @@ -4,12 +4,9 @@ from collections.abc import Sequence import regex as re -from xgrammar import StructuralTag, get_model_structural_tag from vllm.entrypoints.openai.chat_completion.protocol import ( - ChatCompletionNamedToolChoiceParam, ChatCompletionRequest, - ChatCompletionToolsParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaFunctionCall, @@ -278,33 +275,3 @@ def extract_tool_calls_streaming( logger.exception("Error trying to handle streaming tool call.") return None # do not stream a delta. skip this token ID. - def get_structural_tag( - self, request: ChatCompletionRequest - ) -> StructuralTag | None: - def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: - if isinstance(tool, dict): - return tool - if hasattr(tool, "model_dump"): - return tool.model_dump() - if hasattr(tool, "dict"): - return tool.dict() - raise TypeError(f"Unsupported tool type: {type(tool)}") - - if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): - converted_tool_choice = request.tool_choice.model_dump() - converted_tools = [] - for tool in request.tools: - tool_dict = _tool_to_dict(tool) - tool_name = tool_dict.get("function", {}).get("name") - if tool_name == request.tool_choice.function.name: - converted_tools.append(tool_dict) - else: - converted_tool_choice = request.tool_choice - converted_tools = [_tool_to_dict(tool) for tool in request.tools] - - return get_model_structural_tag( - model="kimi", - tools=converted_tools, - tool_choice=converted_tool_choice, - reasoning=request.include_reasoning, - ) diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py index d308c038ae3b..57bfa9915e86 100644 --- a/vllm/tool_parsers/openai_tool_parser.py +++ b/vllm/tool_parsers/openai_tool_parser.py @@ -4,12 +4,8 @@ from collections.abc import Sequence from typing import TYPE_CHECKING -from xgrammar import StructuralTag, get_model_structural_tag - from vllm.entrypoints.openai.chat_completion.protocol import ( - ChatCompletionNamedToolChoiceParam, ChatCompletionRequest, - ChatCompletionToolsParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaMessage, @@ -117,33 +113,3 @@ def extract_tool_calls_streaming( "Not being used, manual parsing in serving_chat.py" # noqa: E501 ) - def get_structural_tag( - self, request: ChatCompletionRequest - ) -> StructuralTag | None: - def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: - if isinstance(tool, dict): - return tool - if hasattr(tool, "model_dump"): - return tool.model_dump() - if hasattr(tool, "dict"): - return tool.dict() - raise TypeError(f"Unsupported tool type: {type(tool)}") - - if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): - converted_tool_choice = request.tool_choice.model_dump() - converted_tools = [] - for tool in request.tools: - tool_dict = _tool_to_dict(tool) - tool_name = tool_dict.get("function", {}).get("name") - if tool_name == request.tool_choice.function.name: - converted_tools.append(tool_dict) - else: - converted_tool_choice = request.tool_choice - converted_tools = [_tool_to_dict(tool) for tool in request.tools] - - return get_model_structural_tag( - model="harmony", - tools=converted_tools, - tool_choice=converted_tool_choice, - reasoning=request.include_reasoning, - ) diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 729aa123bfea..b55c2b1274f9 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -7,12 +7,10 @@ from typing import Any import regex as re -from xgrammar import StructuralTag, get_model_structural_tag +from xgrammar import StructuralTag from vllm.entrypoints.openai.chat_completion.protocol import ( - ChatCompletionNamedToolChoiceParam, ChatCompletionRequest, - ChatCompletionToolsParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaFunctionCall, @@ -28,6 +26,7 @@ Tool, ToolParser, ) +from vllm.tool_parsers.structural_tag_registry import get_model_structural_tag from vllm.tool_parsers.utils import find_tool_properties logger = init_logger(__name__) @@ -688,33 +687,9 @@ def extract_tool_calls_streaming( def get_structural_tag( self, request: ChatCompletionRequest ) -> StructuralTag | None: - def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict: - if isinstance(tool, dict): - return tool - if hasattr(tool, "model_dump"): - return tool.model_dump() - if hasattr(tool, "dict"): - return tool.dict() - raise TypeError(f"Unsupported tool type: {type(tool)}") - - if not request.tools: - return None - - if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): - converted_tool_choice = request.tool_choice.model_dump() - converted_tools = [] - for tool in request.tools: - tool_dict = _tool_to_dict(tool) - tool_name = tool_dict.get("function", {}).get("name") - if tool_name == request.tool_choice.function.name: - converted_tools.append(tool_dict) - else: - converted_tool_choice = request.tool_choice - converted_tools = [_tool_to_dict(tool) for tool in request.tools] - return get_model_structural_tag( - model="qwen_coder", - tools=converted_tools, - tool_choice=converted_tool_choice, + model="qwen_3_6", + tools=request.tools, + tool_choice=request.tool_choice, reasoning=request.include_reasoning, ) diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py new file mode 100644 index 000000000000..3c7e9b7fc4eb --- /dev/null +++ b/vllm/tool_parsers/structural_tag_registry.py @@ -0,0 +1,327 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from collections.abc import Callable +from typing import Any, Literal + +from xgrammar import StructuralTag +from xgrammar.structural_tag import ( + AnyTextFormat, + ConstStringFormat, + JSONSchemaFormat, + QwenXMLParameterFormat, + SequenceFormat, + TagFormat, + TagsWithSeparatorFormat, + TriggeredTagsFormat, +) + +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionNamedToolChoiceParam, + ChatCompletionToolsParam, +) + +SimplifiedToolChoice = Literal["auto", "required", "forced"] +ToolChoice = ( + Literal["none", "auto", "required"] | ChatCompletionNamedToolChoiceParam | None +) +StructuralTagBuilder = Callable[ + [list[ChatCompletionToolsParam], SimplifiedToolChoice, bool], + StructuralTag, +] + +_structural_tag_registry: dict[str, StructuralTagBuilder] = {} + + +def register_model_structural_tag(name: str): + """Register a vLLM-owned model-specific structural tag builder.""" + + def decorator(func: StructuralTagBuilder) -> StructuralTagBuilder: + _structural_tag_registry[name] = func + return func + + return decorator + + +def get_model_structural_tag( + model: str, + tools: list[ChatCompletionToolsParam] | None, + tool_choice: ToolChoice, + reasoning: bool, +) -> StructuralTag | None: + """Build a structural tag from vLLM-owned model-specific builders.""" + + builder = _structural_tag_registry.get(model) + if builder is None: + supported = list(_structural_tag_registry.keys()) + raise ValueError(f"Unknown format type: {model}, supported types: {supported}") + + normalized_tools, simplified_tool_choice = _normalize_tool_choice( + tools=tools, + tool_choice=tool_choice, + ) + if not normalized_tools: + return None + + return builder(normalized_tools, simplified_tool_choice, reasoning) + + +def _normalize_tool_choice( + tools: list[ChatCompletionToolsParam] | None, + tool_choice: ToolChoice, +) -> tuple[list[ChatCompletionToolsParam], SimplifiedToolChoice]: + """Normalize vLLM ChatCompletion tool_choice for structural tag builders.""" + + if not tools: + return [], "auto" + + if tool_choice is None or tool_choice == "none": + return [], "auto" + + if tool_choice == "auto": + return tools, "auto" + + if tool_choice == "required": + return tools, "required" + + if isinstance(tool_choice, ChatCompletionNamedToolChoiceParam): + tool_name = tool_choice.function.name + filtered_tools = [ + tool for tool in tools if tool.function.name == tool_name + ] + if not filtered_tools: + raise ValueError( + f"The tool with name '{tool_name}' is not found in the tools list." + ) + return filtered_tools, "forced" + + raise ValueError(f"Unsupported tool_choice for structural tag: {tool_choice}") + + +def _get_function_parameters(function: Any) -> dict[str, Any] | bool: + """Return the JSON schema used for constrained tool arguments.""" + + if getattr(function, "strict", None) is False: + return True + if function.parameters is None: + return True + return function.parameters + + +def _build_deepseek_dsml_structural_tag( + tools: list[ChatCompletionToolsParam], + tool_choice: SimplifiedToolChoice, + reasoning: bool, + function_calls_begin: str, + function_calls_end: str, + function_calls_trigger: str, +) -> StructuralTag: + invoke_begin_prefix = '<|DSML|invoke name="' + invoke_begin_suffix = '">\n' + invoke_end = "\n" + tool_calls_prefix = "\n\n" + think_tag_end = "" + think_exclude_tokens = ["", ""] + xml_style = "deepseek_xml" + + if tool_choice == "auto": + tags = [] + for tool in tools: + function = tool.function + parameters = _get_function_parameters(function) + tags.append( + TagFormat( + begin=invoke_begin_prefix + function.name + invoke_begin_suffix, + content=JSONSchemaFormat( + json_schema=parameters, + style=xml_style, + ), + end=invoke_end, + ) + ) + + if tags: + function_calling_tags = TagsWithSeparatorFormat( + tags=tags, + separator="\n", + at_least_one=True, + ) + suffix_tag = TriggeredTagsFormat( + triggers=[function_calls_trigger], + tags=[ + TagFormat( + begin=function_calls_begin, + content=function_calling_tags, + end=function_calls_end, + ) + ], + excludes=think_exclude_tokens, + ) + else: + suffix_tag = AnyTextFormat(excludes=think_exclude_tokens) + + elif tool_choice == "forced": + if not tools: + raise ValueError("Forced tool choice must resolve to exactly one tool.") + function = tools[0].function + suffix_tag = SequenceFormat( + elements=[ + ConstStringFormat(value=tool_calls_prefix + function_calls_begin), + TagFormat( + begin=invoke_begin_prefix + function.name + invoke_begin_suffix, + content=JSONSchemaFormat( + json_schema=_get_function_parameters(function), + style=xml_style, + ), + end=invoke_end, + ), + ConstStringFormat(value=function_calls_end), + ] + ) + + elif tool_choice == "required": + tags = [] + for tool in tools: + function = tool.function + parameters = _get_function_parameters(function) + tags.append( + TagFormat( + begin=invoke_begin_prefix + function.name + invoke_begin_suffix, + content=JSONSchemaFormat( + json_schema=parameters, + style=xml_style, + ), + end=invoke_end, + ) + ) + assert len(tags) > 0 + suffix_tag = SequenceFormat( + elements=[ + ConstStringFormat(value=tool_calls_prefix + function_calls_begin), + TagsWithSeparatorFormat( + tags=tags, + separator="\n", + at_least_one=True, + ), + ConstStringFormat(value=function_calls_end), + ] + ) + + if not reasoning: + return StructuralTag(format=suffix_tag) + + prefix_tag = TagFormat(begin="", content=AnyTextFormat(), end=think_tag_end) + return StructuralTag(format=SequenceFormat(elements=[prefix_tag, suffix_tag])) + + +@register_model_structural_tag("deepseek_v4") +def get_deepseek_v4_structural_tag( + tools: list[ChatCompletionToolsParam], + tool_choice: SimplifiedToolChoice, + reasoning: bool, +) -> StructuralTag: + """Build DeepSeek V4 structural tags.""" + + return _build_deepseek_dsml_structural_tag( + tools=tools, + tool_choice=tool_choice, + reasoning=reasoning, + function_calls_begin="<|DSML|tool_calls>\n", + function_calls_end="", + function_calls_trigger="<|DSML|tool_calls>", + ) + + +def _build_qwen_xml_structural_tag( + tools: list[ChatCompletionToolsParam], + tool_choice: SimplifiedToolChoice, + reasoning: bool, + include_reasoning_prefix: bool, +) -> StructuralTag: + tool_call_begin_prefix = "\n", ""] + + if tool_choice == "auto": + tags = [] + for tool in tools: + function = tool.function + parameters = _get_function_parameters(function) + tags.append( + TagFormat( + begin=f"{tool_call_begin_prefix}{function.name}{tool_call_begin_suffix}", + content=QwenXMLParameterFormat(json_schema=parameters), + end=tool_call_end, + ) + ) + + if tags: + suffix_tag = TriggeredTagsFormat( + triggers=[tool_call_trigger], + tags=tags, + excludes=think_exclude_tokens, + ) + else: + suffix_tag = AnyTextFormat(excludes=think_exclude_tokens) + + elif tool_choice == "forced": + if not tools: + raise ValueError("Forced tool choice must resolve to exactly one tool.") + function = tools[0].function + suffix_tag = TagFormat( + begin=f"{tool_call_begin_prefix}{function.name}{tool_call_begin_suffix}", + content=QwenXMLParameterFormat( + json_schema=_get_function_parameters(function) + ), + end=tool_call_end, + ) + + elif tool_choice == "required": + tags = [] + for tool in tools: + function = tool.function + parameters = _get_function_parameters(function) + tags.append( + TagFormat( + begin=f"{tool_call_begin_prefix}{function.name}{tool_call_begin_suffix}", + content=QwenXMLParameterFormat(json_schema=parameters), + end=tool_call_end, + ) + ) + assert len(tags) > 0 + suffix_tag = TagsWithSeparatorFormat( + tags=tags, + separator="", + at_least_one=True, + ) + + if not include_reasoning_prefix or not reasoning: + return StructuralTag(format=suffix_tag) + + prefix_tag = SequenceFormat( + elements=[ + TagFormat(begin="", content=AnyTextFormat(), end=think_tag_end), + ConstStringFormat(value=think_suffix), + ] + ) + return StructuralTag(format=SequenceFormat(elements=[prefix_tag, suffix_tag])) + + +@register_model_structural_tag("qwen_3_6") +def get_qwen_3_6_structural_tag( + tools: list[ChatCompletionToolsParam], + tool_choice: SimplifiedToolChoice, + reasoning: bool, +) -> StructuralTag: + """Build Qwen3.6 structural tags.""" + + return _build_qwen_xml_structural_tag( + tools=tools, + tool_choice=tool_choice, + reasoning=reasoning, + include_reasoning_prefix=True, + ) From 9ae54785ef4157c619f47da59c2918f0ad10ed87 Mon Sep 17 00:00:00 2001 From: Ubospica Date: Sun, 3 May 2026 05:16:06 -0400 Subject: [PATCH 25/43] Drop non-target structural tag changes --- .../test_deepseekv32_tool_parser.py | 53 ------------------- .../tool_parsers/test_kimi_k2_tool_parser.py | 46 ---------------- tests/tool_parsers/test_openai_tool_parser.py | 43 --------------- vllm/tool_parsers/deepseekv32_tool_parser.py | 1 - vllm/tool_parsers/kimi_k2_tool_parser.py | 3 +- vllm/tool_parsers/openai_tool_parser.py | 1 - 6 files changed, 1 insertion(+), 146 deletions(-) diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py index f82d1d739a07..c547795e7bf2 100644 --- a/tests/tool_parsers/test_deepseekv32_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py @@ -17,7 +17,6 @@ FunctionDefinition, ) from vllm.tokenizers import get_tokenizer -from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser # --------------------------------------------------------------------------- @@ -49,43 +48,6 @@ def make_request(tools=None) -> MagicMock: return req -@pytest.fixture -def sample_tools() -> list[ChatCompletionToolsParam]: - return [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "get_current_weather", - "description": "Get the current weather", - "parameters": { - "type": "object", - "properties": { - "city": {"type": "string", "description": "The city name"}, - "state": {"type": "string", "description": "The state code"}, - "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]}, - }, - "required": ["city", "state"], - }, - }, - ), - ChatCompletionToolsParam( - type="function", - function={ - "name": "calculate_area", - "description": "Calculate area of a shape", - "parameters": { - "type": "object", - "properties": { - "shape": {"type": "string"}, - "dimensions": {"type": "object"}, - "precision": {"type": "integer"}, - }, - }, - }, - ), - ] - - # Shorthand for the DSML tokens used throughout FC_START = "<|DSML|function_calls>" FC_END = "" @@ -859,18 +821,3 @@ def test_convert_param_value_checked_helper(parser): assert parser._convert_param_value("null", "integer") is None assert parser._convert_param_value("null", "boolean") is None assert parser._convert_param_value("null", "object") is None - - -def test_adjust_request_required_uses_json_schema_not_structural_tag( - sample_tools: list[ChatCompletionToolsParam], -) -> None: - parser = make_parser() - req = ChatCompletionRequest( - messages=[], - model="m", - tools=sample_tools, - tool_choice="required", - ) - out = parser.adjust_request(req) - assert out.structured_outputs is not None - assert out.structured_outputs.structural_tag is None diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py index 5552a977d47d..b56032b91c17 100644 --- a/tests/tool_parsers/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -11,10 +11,6 @@ run_tool_extraction, run_tool_extraction_streaming, ) -from vllm.entrypoints.openai.chat_completion.protocol import ( - ChatCompletionRequest, - ChatCompletionToolsParam, -) from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ) @@ -24,43 +20,6 @@ MODEL = "moonshotai/Kimi-K2-Instruct" -@pytest.fixture -def sample_tools() -> list[ChatCompletionToolsParam]: - return [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "get_current_weather", - "description": "Get the current weather", - "parameters": { - "type": "object", - "properties": { - "city": {"type": "string", "description": "The city name"}, - "state": {"type": "string", "description": "The state code"}, - "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]}, - }, - "required": ["city", "state"], - }, - }, - ), - ChatCompletionToolsParam( - type="function", - function={ - "name": "calculate_area", - "description": "Calculate area of a shape", - "parameters": { - "type": "object", - "properties": { - "shape": {"type": "string"}, - "dimensions": {"type": "object"}, - "precision": {"type": "integer"}, - }, - }, - }, - ), - ] - - @pytest.fixture(scope="module") def kimi_k2_tokenizer(): return get_tokenizer(tokenizer_name=MODEL, trust_remote_code=True) @@ -504,7 +463,6 @@ def test_sets_skip_special_tokens_false(self, parser): request = MagicMock(spec=ChatCompletionRequest) request.tools = [{"type": "function", "function": {"name": "test"}}] request.tool_choice = "auto" - request.include_reasoning = True request.skip_special_tokens = True result = parser.adjust_request(request) @@ -514,7 +472,6 @@ def test_no_change_when_tool_choice_none(self, parser): request = MagicMock(spec=ChatCompletionRequest) request.tools = [{"type": "function", "function": {"name": "test"}}] request.tool_choice = "none" - request.include_reasoning = True request.skip_special_tokens = True result = parser.adjust_request(request) @@ -524,7 +481,6 @@ def test_no_change_when_no_tools(self, parser): request = MagicMock(spec=ChatCompletionRequest) request.tools = None request.tool_choice = "auto" - request.include_reasoning = False request.skip_special_tokens = True result = parser.adjust_request(request) @@ -624,5 +580,3 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer): assert len(rec.tool_calls) == 1 assert rec.tool_calls[0].function.name == "get_weather" assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"} - - diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py index b5a365b495ca..e9e39ef4c029 100644 --- a/tests/tool_parsers/test_openai_tool_parser.py +++ b/tests/tool_parsers/test_openai_tool_parser.py @@ -14,10 +14,6 @@ load_harmony_encoding, ) -from vllm.entrypoints.openai.chat_completion.protocol import ( - ChatCompletionRequest, - ChatCompletionToolsParam, -) from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall from vllm.tokenizers import get_tokenizer from vllm.tool_parsers.openai_tool_parser import OpenAIToolParser @@ -41,43 +37,6 @@ def harmony_encoding(): return load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) -@pytest.fixture -def sample_tools() -> list[ChatCompletionToolsParam]: - return [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "get_current_weather", - "description": "Get the current weather", - "parameters": { - "type": "object", - "properties": { - "city": {"type": "string", "description": "The city name"}, - "state": {"type": "string", "description": "The state code"}, - "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]}, - }, - "required": ["city", "state"], - }, - }, - ), - ChatCompletionToolsParam( - type="function", - function={ - "name": "calculate_area", - "description": "Calculate area of a shape", - "parameters": { - "type": "object", - "properties": { - "shape": {"type": "string"}, - "dimensions": {"type": "object"}, - "precision": {"type": "integer"}, - }, - }, - }, - ), - ] - - def assert_tool_calls( actual_tool_calls: list[ToolCall], expected_tool_calls: list[ToolCall], @@ -302,5 +261,3 @@ def test_extract_tool_calls_with_content( ] assert_tool_calls(extracted_info.tool_calls, expected_tool_calls) assert extracted_info.content == final_content - - diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py index 87a1f88cd67a..02182e22935a 100644 --- a/vllm/tool_parsers/deepseekv32_tool_parser.py +++ b/vllm/tool_parsers/deepseekv32_tool_parser.py @@ -320,4 +320,3 @@ def extract_tool_calls_streaming( return DeltaMessage(content="") return None - diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py index 31f29900c2b3..7ddd8fa7a80d 100644 --- a/vllm/tool_parsers/kimi_k2_tool_parser.py +++ b/vllm/tool_parsers/kimi_k2_tool_parser.py @@ -273,5 +273,4 @@ def extract_tool_calls_streaming( except Exception: logger.exception("Error trying to handle streaming tool call.") - return None # do not stream a delta. skip this token ID. - + return None diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py index 57bfa9915e86..ee6dd70718b3 100644 --- a/vllm/tool_parsers/openai_tool_parser.py +++ b/vllm/tool_parsers/openai_tool_parser.py @@ -112,4 +112,3 @@ def extract_tool_calls_streaming( raise NotImplementedError( "Not being used, manual parsing in serving_chat.py" # noqa: E501 ) - From d962b8084e1c5554c7d0438ee82a51a90fca8f03 Mon Sep 17 00:00:00 2001 From: Ubospica Date: Sun, 3 May 2026 05:17:58 -0400 Subject: [PATCH 26/43] Centralize structural tag xgrammar imports --- vllm/tool_parsers/abstract_tool_parser.py | 4 +--- vllm/tool_parsers/deepseekv4_tool_parser.py | 4 +--- vllm/tool_parsers/qwen3coder_tool_parser.py | 3 +-- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index 6eb15a444fc0..81bf0bb0d2f3 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -7,8 +7,6 @@ from functools import cached_property import json -from xgrammar import StructuralTag - from openai.types.responses import ( ResponseFormatTextJSONSchemaConfig, ResponseTextConfig, @@ -147,7 +145,7 @@ def adjust_request( def get_structural_tag( self, request: ChatCompletionRequest - ) -> StructuralTag | None: + ): return None def extract_tool_calls( diff --git a/vllm/tool_parsers/deepseekv4_tool_parser.py b/vllm/tool_parsers/deepseekv4_tool_parser.py index 12791fc7b2ea..69b35bab526e 100644 --- a/vllm/tool_parsers/deepseekv4_tool_parser.py +++ b/vllm/tool_parsers/deepseekv4_tool_parser.py @@ -1,8 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from xgrammar import StructuralTag - from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ) @@ -22,7 +20,7 @@ class DeepSeekV4ToolParser(DeepSeekV32ToolParser): def get_structural_tag( self, request: ChatCompletionRequest - ) -> StructuralTag | None: + ): return get_model_structural_tag( model="deepseek_v4", tools=request.tools, diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index b55c2b1274f9..f548fdb07e0e 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -7,7 +7,6 @@ from typing import Any import regex as re -from xgrammar import StructuralTag from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, @@ -686,7 +685,7 @@ def extract_tool_calls_streaming( def get_structural_tag( self, request: ChatCompletionRequest - ) -> StructuralTag | None: + ): return get_model_structural_tag( model="qwen_3_6", tools=request.tools, From 7d908320ee04d3429f3615b8c5e9df2f426df8b6 Mon Sep 17 00:00:00 2001 From: Ubospica Date: Sun, 3 May 2026 05:36:19 -0400 Subject: [PATCH 27/43] Rename Qwen structural tag key --- vllm/tool_parsers/qwen3coder_tool_parser.py | 2 +- vllm/tool_parsers/structural_tag_registry.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index f548fdb07e0e..a896606cf049 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -687,7 +687,7 @@ def get_structural_tag( self, request: ChatCompletionRequest ): return get_model_structural_tag( - model="qwen_3_6", + model="qwen_3_5", tools=request.tools, tool_choice=request.tool_choice, reasoning=request.include_reasoning, diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py index 3c7e9b7fc4eb..ad5da6e9cf3f 100644 --- a/vllm/tool_parsers/structural_tag_registry.py +++ b/vllm/tool_parsers/structural_tag_registry.py @@ -86,9 +86,7 @@ def _normalize_tool_choice( if isinstance(tool_choice, ChatCompletionNamedToolChoiceParam): tool_name = tool_choice.function.name - filtered_tools = [ - tool for tool in tools if tool.function.name == tool_name - ] + filtered_tools = [tool for tool in tools if tool.function.name == tool_name] if not filtered_tools: raise ValueError( f"The tool with name '{tool_name}' is not found in the tools list." @@ -311,13 +309,17 @@ def _build_qwen_xml_structural_tag( return StructuralTag(format=SequenceFormat(elements=[prefix_tag, suffix_tag])) -@register_model_structural_tag("qwen_3_6") -def get_qwen_3_6_structural_tag( +@register_model_structural_tag("qwen_3_5") +def get_qwen_3_5_structural_tag( tools: list[ChatCompletionToolsParam], tool_choice: SimplifiedToolChoice, reasoning: bool, ) -> StructuralTag: - """Build Qwen3.6 structural tags.""" + """Build Qwen XML structural tags. + + This format is used for Qwen3-Coder/Qwen3.5/Qwen3.6 and is compatible with + Qwen variants that use the same XML tool-call format. + """ return _build_qwen_xml_structural_tag( tools=tools, From 760e5af5ec4c006f57a5dbf5e0f1ef29c4c36fd2 Mon Sep 17 00:00:00 2001 From: Ubospica Date: Sun, 3 May 2026 05:40:48 -0400 Subject: [PATCH 28/43] Inline structural tag builders --- vllm/tool_parsers/structural_tag_registry.py | 61 ++++++-------------- 1 file changed, 18 insertions(+), 43 deletions(-) diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py index ad5da6e9cf3f..108546f0c3d1 100644 --- a/vllm/tool_parsers/structural_tag_registry.py +++ b/vllm/tool_parsers/structural_tag_registry.py @@ -1,6 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +# Model-specific structural tag builders adapted from XGrammar's +# builtin structural tag implementations: +# https://github.com/mlc-ai/xgrammar/blob/main/python/xgrammar/builtin_structural_tag.py + from collections.abc import Callable from typing import Any, Literal @@ -106,18 +110,21 @@ def _get_function_parameters(function: Any) -> dict[str, Any] | bool: return function.parameters -def _build_deepseek_dsml_structural_tag( +@register_model_structural_tag("deepseek_v4") +def get_deepseek_v4_structural_tag( tools: list[ChatCompletionToolsParam], tool_choice: SimplifiedToolChoice, reasoning: bool, - function_calls_begin: str, - function_calls_end: str, - function_calls_trigger: str, ) -> StructuralTag: + """Build DeepSeek V4 structural tags.""" + invoke_begin_prefix = '<|DSML|invoke name="' invoke_begin_suffix = '">\n' invoke_end = "\n" tool_calls_prefix = "\n\n" + function_calls_begin = "<|DSML|tool_calls>\n" + function_calls_end = "" + function_calls_trigger = "<|DSML|tool_calls>" think_tag_end = "" think_exclude_tokens = ["", ""] xml_style = "deepseek_xml" @@ -212,30 +219,18 @@ def _build_deepseek_dsml_structural_tag( return StructuralTag(format=SequenceFormat(elements=[prefix_tag, suffix_tag])) -@register_model_structural_tag("deepseek_v4") -def get_deepseek_v4_structural_tag( +@register_model_structural_tag("qwen_3_5") +def get_qwen_3_5_structural_tag( tools: list[ChatCompletionToolsParam], tool_choice: SimplifiedToolChoice, reasoning: bool, ) -> StructuralTag: - """Build DeepSeek V4 structural tags.""" - - return _build_deepseek_dsml_structural_tag( - tools=tools, - tool_choice=tool_choice, - reasoning=reasoning, - function_calls_begin="<|DSML|tool_calls>\n", - function_calls_end="", - function_calls_trigger="<|DSML|tool_calls>", - ) + """Build Qwen XML structural tags. + This format is used for Qwen3-Coder/Qwen3.5/Qwen3.6 and is compatible with + Qwen variants that use the same XML tool-call format. + """ -def _build_qwen_xml_structural_tag( - tools: list[ChatCompletionToolsParam], - tool_choice: SimplifiedToolChoice, - reasoning: bool, - include_reasoning_prefix: bool, -) -> StructuralTag: tool_call_begin_prefix = "\n StructuralTag: - """Build Qwen XML structural tags. - - This format is used for Qwen3-Coder/Qwen3.5/Qwen3.6 and is compatible with - Qwen variants that use the same XML tool-call format. - """ - - return _build_qwen_xml_structural_tag( - tools=tools, - tool_choice=tool_choice, - reasoning=reasoning, - include_reasoning_prefix=True, - ) From 4bd7d7217b6fd0789380d3e8595a93eb2b899c25 Mon Sep 17 00:00:00 2001 From: Ubospica Date: Sun, 3 May 2026 06:38:14 -0400 Subject: [PATCH 29/43] Stop Qwen 3.5 structural tag after first tool call Pass `stop_after_first=True` to the `TriggeredTagsFormat` used in the `auto` branch of `get_qwen_3_5_structural_tag` so the constrained generation closes the tool-call section once a single call is emitted, matching the parser's expectation of one tool call per response. Also add a short comment in `ToolParser.adjust_request` clarifying the purpose of the JSON-schema-from-tools branch. Signed-off-by: Ubospica --- vllm/tool_parsers/abstract_tool_parser.py | 1 + vllm/tool_parsers/structural_tag_registry.py | 1 + 2 files changed, 2 insertions(+) diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index 81bf0bb0d2f3..310752d696aa 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -114,6 +114,7 @@ def adjust_request( json_schema_from_tool = get_json_schema_from_tools( tool_choice=request.tool_choice, tools=request.tools ) + # Set structured output params for tool calling if json_schema_from_tool is not None: if isinstance(request, ChatCompletionRequest): # tool_choice: "Forced Function" or "required" will override diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py index 108546f0c3d1..7fae6cf59e96 100644 --- a/vllm/tool_parsers/structural_tag_registry.py +++ b/vllm/tool_parsers/structural_tag_registry.py @@ -257,6 +257,7 @@ def get_qwen_3_5_structural_tag( triggers=[tool_call_trigger], tags=tags, excludes=think_exclude_tokens, + stop_after_first=True, ) else: suffix_tag = AnyTextFormat(excludes=think_exclude_tokens) From 1e94b999e139dfcfddb6ef03dcb3dba61a9eea51 Mon Sep 17 00:00:00 2001 From: Ubospica Date: Sun, 3 May 2026 07:14:42 -0400 Subject: [PATCH 30/43] Fix Qwen structural tag parsing --- vllm/tool_parsers/qwen3coder_tool_parser.py | 6 +++++- vllm/tool_parsers/structural_tag_registry.py | 19 ++++++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index a896606cf049..142a406308b1 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -32,6 +32,8 @@ class Qwen3CoderToolParser(ToolParser): + supports_required_and_named: bool = False + def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): super().__init__(tokenizer, tools) @@ -690,5 +692,7 @@ def get_structural_tag( model="qwen_3_5", tools=request.tools, tool_choice=request.tool_choice, - reasoning=request.include_reasoning, + # The reasoning parser gates structured output until reasoning ends. + # Constrain only the post-reasoning tool-call suffix here. + reasoning=False, ) diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py index 7fae6cf59e96..276b0e77db8a 100644 --- a/vllm/tool_parsers/structural_tag_registry.py +++ b/vllm/tool_parsers/structural_tag_registry.py @@ -13,6 +13,7 @@ AnyTextFormat, ConstStringFormat, JSONSchemaFormat, + OrFormat, QwenXMLParameterFormat, SequenceFormat, TagFormat, @@ -253,11 +254,19 @@ def get_qwen_3_5_structural_tag( ) if tags: - suffix_tag = TriggeredTagsFormat( - triggers=[tool_call_trigger], - tags=tags, - excludes=think_exclude_tokens, - stop_after_first=True, + # In auto mode, allow either text-only output or exactly one XML + # tool call. TriggeredTagsFormat can permit free text after a tag, + # which allows repeated tool calls for Qwen3.5. + suffix_tag = OrFormat( + elements=[ + AnyTextFormat(excludes=think_exclude_tokens + [" Date: Sun, 3 May 2026 08:22:35 -0400 Subject: [PATCH 31/43] Normalize tool schemas for Qwen structural tags --- vllm/tool_parsers/structural_tag_registry.py | 51 +++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py index 276b0e77db8a..5febcd0e192e 100644 --- a/vllm/tool_parsers/structural_tag_registry.py +++ b/vllm/tool_parsers/structural_tag_registry.py @@ -108,7 +108,56 @@ def _get_function_parameters(function: Any) -> dict[str, Any] | bool: return True if function.parameters is None: return True - return function.parameters + return _normalize_json_schema_for_xgrammar(function.parameters) + + +def _normalize_json_schema_for_xgrammar(schema: Any) -> Any: + """Normalize common non-standard tool schema aliases to JSON Schema.""" + + if isinstance(schema, list): + return [_normalize_json_schema_for_xgrammar(item) for item in schema] + if not isinstance(schema, dict): + return schema + + metadata_keys = { + "description", + "default", + "examples", + "title", + } + normalized = { + key: _normalize_json_schema_for_xgrammar(value) + for key, value in schema.items() + if key not in metadata_keys + } + + schema_type = normalized.get("type") + type_aliases = { + "dict": "object", + "map": "object", + "list": "array", + "tuple": "array", + "str": "string", + "int": "integer", + "float": "number", + "bool": "boolean", + } + if isinstance(schema_type, str): + normalized["type"] = type_aliases.get(schema_type, schema_type) + elif isinstance(schema_type, list): + normalized["type"] = [ + type_aliases.get(item, item) if isinstance(item, str) else item + for item in schema_type + ] + + enum_values = normalized.get("enum") + if isinstance(enum_values, list) and any( + isinstance(value, str) and ("/" in value or "\\" in value) + for value in enum_values + ): + normalized.pop("enum", None) + + return normalized @register_model_structural_tag("deepseek_v4") From e4285c749e05474aca512aae3b7c558f8fe75eb1 Mon Sep 17 00:00:00 2001 From: Ubospica Date: Sun, 3 May 2026 08:41:13 -0400 Subject: [PATCH 32/43] Allow multiple Qwen structural tool calls --- vllm/tool_parsers/structural_tag_registry.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py index 5febcd0e192e..13988ee178e8 100644 --- a/vllm/tool_parsers/structural_tag_registry.py +++ b/vllm/tool_parsers/structural_tag_registry.py @@ -13,7 +13,6 @@ AnyTextFormat, ConstStringFormat, JSONSchemaFormat, - OrFormat, QwenXMLParameterFormat, SequenceFormat, TagFormat, @@ -303,19 +302,10 @@ def get_qwen_3_5_structural_tag( ) if tags: - # In auto mode, allow either text-only output or exactly one XML - # tool call. TriggeredTagsFormat can permit free text after a tag, - # which allows repeated tool calls for Qwen3.5. - suffix_tag = OrFormat( - elements=[ - AnyTextFormat(excludes=think_exclude_tokens + [" Date: Mon, 4 May 2026 00:23:03 +0800 Subject: [PATCH 33/43] format. Signed-off-by: Yuchuan --- .../test_deepseekv4_tool_parser.py | 22 +++++++++++-------- .../test_qwen3coder_tool_parser.py | 12 ++++++---- vllm/tool_parsers/abstract_tool_parser.py | 11 ++++------ vllm/tool_parsers/deepseekv4_tool_parser.py | 7 +++--- vllm/tool_parsers/qwen3coder_tool_parser.py | 4 +--- 5 files changed, 29 insertions(+), 27 deletions(-) diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py index 095ed7eb17a2..901bae7ec283 100644 --- a/tests/tool_parsers/test_deepseekv4_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py @@ -5,17 +5,18 @@ import json from unittest.mock import MagicMock + import pytest +from xgrammar import StructuralTag -from vllm.tool_parsers import ToolParserManager -from vllm.tool_parsers.deepseekv4_tool_parser import DeepSeekV4ToolParser from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionNamedFunction, ChatCompletionNamedToolChoiceParam, ChatCompletionRequest, ChatCompletionToolsParam, - ChatCompletionNamedFunction, ) -from xgrammar import StructuralTag +from vllm.tool_parsers import ToolParserManager +from vllm.tool_parsers.deepseekv4_tool_parser import DeepSeekV4ToolParser MOCK_TOKENIZER = MagicMock() MOCK_TOKENIZER.get_vocab.return_value = {} @@ -27,6 +28,7 @@ PARAM_START = '<|DSML|parameter name="' PARAM_END = "" + @pytest.fixture def sample_tools() -> list[ChatCompletionToolsParam]: return [ @@ -64,7 +66,6 @@ def sample_tools() -> list[ChatCompletionToolsParam]: ] - def make_parser(tools=None) -> DeepSeekV4ToolParser: return DeepSeekV4ToolParser(MOCK_TOKENIZER, tools=tools) @@ -167,6 +168,7 @@ def test_streaming_extracts_complete_invokes(): assert names == ["search"] assert json.loads(reconstruct_args(deltas)) == {"query": "deepseek v4"} + def test_get_vllm_registry_structural_tag_returns_structural_tag( sample_tools: list[ChatCompletionToolsParam], ) -> None: @@ -179,7 +181,7 @@ def test_get_vllm_registry_structural_tag_returns_structural_tag( ) tag = parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) - + req = ChatCompletionRequest( messages=[], model="m", @@ -188,14 +190,16 @@ def test_get_vllm_registry_structural_tag_returns_structural_tag( ) tag = parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) - + if sample_tools: tool = sample_tools[0] req = ChatCompletionRequest( messages=[], model="m", tools=sample_tools, - tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), + tool_choice=ChatCompletionNamedToolChoiceParam( + function=ChatCompletionNamedFunction(name=tool.function.name) + ), ) tag = parser.get_structural_tag(req) - assert isinstance(tag, StructuralTag) \ No newline at end of file + assert isinstance(tag, StructuralTag) diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index 6e6842c40785..8f4a7c31eddd 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -9,10 +9,10 @@ from xgrammar import StructuralTag from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionNamedFunction, + ChatCompletionNamedToolChoiceParam, ChatCompletionRequest, ChatCompletionToolsParam, - ChatCompletionNamedToolChoiceParam, - ChatCompletionNamedFunction, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaMessage, @@ -127,7 +127,8 @@ def _as_chat_completion_tools( "description": tool.description, "parameters": tool.parameters, }, - )) + ) + ) return normalized @@ -1200,11 +1201,14 @@ def test_get_vllm_registry_structural_tag_returns_structural_tag( messages=[], model="m", tools=request_tools, - tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)), + tool_choice=ChatCompletionNamedToolChoiceParam( + function=ChatCompletionNamedFunction(name=tool.function.name) + ), ) tag = qwen3_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) + @pytest.mark.parametrize("include_reasoning", [True, False]) def test_adjust_request_auto_uses_vllm_registry_structural_tag( qwen3_tool_parser: Qwen3CoderToolParser, diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index 310752d696aa..c8b43f79d588 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -2,10 +2,10 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import importlib +import json import os from collections.abc import Callable, Sequence from functools import cached_property -import json from openai.types.responses import ( ResponseFormatTextJSONSchemaConfig, @@ -14,9 +14,9 @@ from openai.types.responses.function_tool import FunctionTool from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionNamedToolChoiceParam, ChatCompletionRequest, ChatCompletionToolsParam, - ChatCompletionNamedToolChoiceParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaMessage, @@ -88,7 +88,6 @@ def adjust_request( self, request: ChatCompletionRequest | ResponsesRequest, ) -> ChatCompletionRequest | ResponsesRequest: - # If there are no tools, return the request as is. if not request.tools: return request @@ -143,10 +142,8 @@ def adjust_request( return request return request - - def get_structural_tag( - self, request: ChatCompletionRequest - ): + + def get_structural_tag(self, request: ChatCompletionRequest): return None def extract_tool_calls( diff --git a/vllm/tool_parsers/deepseekv4_tool_parser.py b/vllm/tool_parsers/deepseekv4_tool_parser.py index 69b35bab526e..8d1df704a3a2 100644 --- a/vllm/tool_parsers/deepseekv4_tool_parser.py +++ b/vllm/tool_parsers/deepseekv4_tool_parser.py @@ -7,6 +7,7 @@ from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser from vllm.tool_parsers.structural_tag_registry import get_model_structural_tag + class DeepSeekV4ToolParser(DeepSeekV32ToolParser): """ DeepSeek V4 DSML tool parser. @@ -17,10 +18,8 @@ class DeepSeekV4ToolParser(DeepSeekV32ToolParser): tool_call_start_token: str = "<|DSML|tool_calls>" tool_call_end_token: str = "" - - def get_structural_tag( - self, request: ChatCompletionRequest - ): + + def get_structural_tag(self, request: ChatCompletionRequest): return get_model_structural_tag( model="deepseek_v4", tools=request.tools, diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 142a406308b1..1204ea6fba38 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -685,9 +685,7 @@ def extract_tool_calls_streaming( return None - def get_structural_tag( - self, request: ChatCompletionRequest - ): + def get_structural_tag(self, request: ChatCompletionRequest): return get_model_structural_tag( model="qwen_3_5", tools=request.tools, From 260dea23ae5503cbfcb8896662508cdd46c0de4a Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Mon, 4 May 2026 00:50:45 +0800 Subject: [PATCH 34/43] add the requirement. Signed-off-by: Yuchuan --- requirements/test/rocm.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/requirements/test/rocm.txt b/requirements/test/rocm.txt index 05b36860d753..8445634ded40 100644 --- a/requirements/test/rocm.txt +++ b/requirements/test/rocm.txt @@ -42,6 +42,8 @@ anyio==4.13.0 # sse-starlette # starlette # watchfiles +apache-tvm-ffi==0.1.10 + # via xgrammar arctic-inference==0.1.1 # via -r requirements/test/rocm.in argcomplete==3.6.3 @@ -1264,6 +1266,7 @@ typing-extensions==4.15.0 # alembic # anthropic # anyio + # apache-tvm-ffi # azure-core # azure-identity # azure-storage-blob From c6f98c564a4832defa76a0c83e84d1e6fe50249a Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Mon, 4 May 2026 02:57:31 +0800 Subject: [PATCH 35/43] avoid overwriting user's setting. Signed-off-by: Yuchuan remove the protocol change. Signed-off-by: Yuchuan fix test. Signed-off-by: Yuchuan fix test. Signed-off-by: Yuchuan fix test. Signed-off-by: Yuchuan finish the env. Signed-off-by: Yuchuan doc. Signed-off-by: Yuchuan doc. Signed-off-by: Yuchuan fix the logic. Signed-off-by: Yuchuan avoid overwriting user's setting. Signed-off-by: Yuchuan remove the protocol change. Signed-off-by: Yuchuan fix test. Signed-off-by: Yuchuan fix test. Signed-off-by: Yuchuan fix test. Signed-off-by: Yuchuan finish the env. Signed-off-by: Yuchuan doc. Signed-off-by: Yuchuan doc. Signed-off-by: Yuchuan fix the logic. Signed-off-by: Yuchuan test the env. Signed-off-by: Yuchuan --- .../test_deepseekv4_tool_parser.py | 10 +++++----- .../test_qwen3coder_tool_parser.py | 6 +++--- .../openai/chat_completion/protocol.py | 8 ++------ vllm/envs.py | 7 +++++++ vllm/tool_parsers/abstract_tool_parser.py | 19 +++++++++++++------ 5 files changed, 30 insertions(+), 20 deletions(-) diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py index 901bae7ec283..cc77a1f77756 100644 --- a/tests/tool_parsers/test_deepseekv4_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py @@ -197,9 +197,9 @@ def test_get_vllm_registry_structural_tag_returns_structural_tag( messages=[], model="m", tools=sample_tools, - tool_choice=ChatCompletionNamedToolChoiceParam( - function=ChatCompletionNamedFunction(name=tool.function.name) - ), ) - tag = parser.get_structural_tag(req) - assert isinstance(tag, StructuralTag) + req.tool_choice = ChatCompletionNamedToolChoiceParam( + function=ChatCompletionNamedFunction(name=tool.function.name) + ) + tag = parser.get_structural_tag(req) + assert isinstance(tag, StructuralTag) diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index 8f4a7c31eddd..d50a7b9b769e 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -1201,9 +1201,9 @@ def test_get_vllm_registry_structural_tag_returns_structural_tag( messages=[], model="m", tools=request_tools, - tool_choice=ChatCompletionNamedToolChoiceParam( - function=ChatCompletionNamedFunction(name=tool.function.name) - ), + ) + req.tool_choice = ChatCompletionNamedToolChoiceParam( + function=ChatCompletionNamedFunction(name=tool.function.name) ) tag = qwen3_tool_parser.get_structural_tag(req) assert isinstance(tag, StructuralTag) diff --git a/vllm/entrypoints/openai/chat_completion/protocol.py b/vllm/entrypoints/openai/chat_completion/protocol.py index 140a2fe566da..c92cc13da01f 100644 --- a/vllm/entrypoints/openai/chat_completion/protocol.py +++ b/vllm/entrypoints/openai/chat_completion/protocol.py @@ -739,12 +739,8 @@ def check_tool_usage(cls, data): # make sure that tool choice is either a named tool # OR that it's set to "auto" or "required" - if ( - data["tool_choice"] not in ["auto", "required"] - and not isinstance(data["tool_choice"], dict) - and not isinstance( - data["tool_choice"], ChatCompletionNamedToolChoiceParam - ) + if data["tool_choice"] not in ["auto", "required"] and not isinstance( + data["tool_choice"], dict ): raise ValueError( f"Invalid value for `tool_choice`: {data['tool_choice']}! " diff --git a/vllm/envs.py b/vllm/envs.py index 4191cd6a9743..ec777e2bf9a4 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -226,6 +226,7 @@ VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False VLLM_SYSTEM_START_DATE: str | None = None VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY: bool = False + VLLM_ENFORCE_STRICT_TOOL_CALLING: bool = True VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True @@ -1591,6 +1592,12 @@ def _get_or_set_default() -> str: "VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY": lambda: bool( int(os.getenv("VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY", "0")) ), + # When 1,the model structural tags will be used to enforce the model + # output conforming to the model's tool-calling format and schema. + # Default 1 (on). + "VLLM_ENFORCE_STRICT_TOOL_CALLING": lambda: bool( + int(os.getenv("VLLM_ENFORCE_STRICT_TOOL_CALLING", "1")) + ), # Add optional custom scopes for profiling, disable to avoid overheads "VLLM_CUSTOM_SCOPES_FOR_PROFILING": lambda: bool( int(os.getenv("VLLM_CUSTOM_SCOPES_FOR_PROFILING", "0")) diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index c8b43f79d588..c3438082a72d 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -25,6 +25,7 @@ from vllm.entrypoints.openai.responses.protocol import ( ResponsesRequest, ) +from vllm.envs import VLLM_ENFORCE_STRICT_TOOL_CALLING from vllm.logger import init_logger from vllm.sampling_params import ( StructuredOutputsParams, @@ -94,7 +95,10 @@ def adjust_request( # Step 1 (highest priority for ChatCompletionRequest): apply # vLLM-owned structural tag support for model-specific tool formats. - if isinstance(request, ChatCompletionRequest): + if ( + isinstance(request, ChatCompletionRequest) + and VLLM_ENFORCE_STRICT_TOOL_CALLING + ): need_tool_calling = ( request.tool_choice == "auto" or request.tool_choice == "required" @@ -103,9 +107,14 @@ def adjust_request( if need_tool_calling: structure_tag = self.get_structural_tag(request) if structure_tag is not None: - request.structured_outputs = StructuredOutputsParams( - structural_tag=json.dumps(structure_tag.model_dump()), - ) + if request.structured_outputs is None: + request.structured_outputs = StructuredOutputsParams( + structural_tag=json.dumps(structure_tag.model_dump()), + ) + else: + request.structured_outputs.structural_tag = json.dumps( + structure_tag.model_dump() + ) return request # Step 2: set structured output params when tool constraints are @@ -139,8 +148,6 @@ def adjust_request( ) ) - return request - return request def get_structural_tag(self, request: ChatCompletionRequest): From 306d2204b0908e5f1b70bfd6bce57619dcf02d55 Mon Sep 17 00:00:00 2001 From: Ubospica Date: Sun, 3 May 2026 21:04:23 -0400 Subject: [PATCH 36/43] a --- vllm/tool_parsers/structural_tag_registry.py | 54 ++------------------ 1 file changed, 3 insertions(+), 51 deletions(-) diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py index 13988ee178e8..20b4ff9652db 100644 --- a/vllm/tool_parsers/structural_tag_registry.py +++ b/vllm/tool_parsers/structural_tag_registry.py @@ -101,62 +101,14 @@ def _normalize_tool_choice( def _get_function_parameters(function: Any) -> dict[str, Any] | bool: - """Return the JSON schema used for constrained tool arguments.""" + """Return the JSON schema used for constrained tool arguments. + """ if getattr(function, "strict", None) is False: return True if function.parameters is None: return True - return _normalize_json_schema_for_xgrammar(function.parameters) - - -def _normalize_json_schema_for_xgrammar(schema: Any) -> Any: - """Normalize common non-standard tool schema aliases to JSON Schema.""" - - if isinstance(schema, list): - return [_normalize_json_schema_for_xgrammar(item) for item in schema] - if not isinstance(schema, dict): - return schema - - metadata_keys = { - "description", - "default", - "examples", - "title", - } - normalized = { - key: _normalize_json_schema_for_xgrammar(value) - for key, value in schema.items() - if key not in metadata_keys - } - - schema_type = normalized.get("type") - type_aliases = { - "dict": "object", - "map": "object", - "list": "array", - "tuple": "array", - "str": "string", - "int": "integer", - "float": "number", - "bool": "boolean", - } - if isinstance(schema_type, str): - normalized["type"] = type_aliases.get(schema_type, schema_type) - elif isinstance(schema_type, list): - normalized["type"] = [ - type_aliases.get(item, item) if isinstance(item, str) else item - for item in schema_type - ] - - enum_values = normalized.get("enum") - if isinstance(enum_values, list) and any( - isinstance(value, str) and ("/" in value or "\\" in value) - for value in enum_values - ): - normalized.pop("enum", None) - - return normalized + return function.parameters @register_model_structural_tag("deepseek_v4") From 5fb2f35f3e84317939900370105a6a4202cdb4a1 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Mon, 4 May 2026 09:23:16 +0800 Subject: [PATCH 37/43] format. Signed-off-by: Yuchuan --- vllm/tool_parsers/structural_tag_registry.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py index 20b4ff9652db..108546f0c3d1 100644 --- a/vllm/tool_parsers/structural_tag_registry.py +++ b/vllm/tool_parsers/structural_tag_registry.py @@ -101,8 +101,7 @@ def _normalize_tool_choice( def _get_function_parameters(function: Any) -> dict[str, Any] | bool: - """Return the JSON schema used for constrained tool arguments. - """ + """Return the JSON schema used for constrained tool arguments.""" if getattr(function, "strict", None) is False: return True From e6fa4e3e6d56e319c4d89f4683ecb59149579432 Mon Sep 17 00:00:00 2001 From: Yuchuan Date: Mon, 4 May 2026 09:34:28 +0800 Subject: [PATCH 38/43] set the flag off as default. Signed-off-by: Yuchuan --- vllm/envs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index ec777e2bf9a4..acd5f7932f20 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -226,7 +226,7 @@ VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False VLLM_SYSTEM_START_DATE: str | None = None VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY: bool = False - VLLM_ENFORCE_STRICT_TOOL_CALLING: bool = True + VLLM_ENFORCE_STRICT_TOOL_CALLING: bool = False VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True @@ -1594,9 +1594,9 @@ def _get_or_set_default() -> str: ), # When 1,the model structural tags will be used to enforce the model # output conforming to the model's tool-calling format and schema. - # Default 1 (on). + # Default 0 (off). "VLLM_ENFORCE_STRICT_TOOL_CALLING": lambda: bool( - int(os.getenv("VLLM_ENFORCE_STRICT_TOOL_CALLING", "1")) + int(os.getenv("VLLM_ENFORCE_STRICT_TOOL_CALLING", "0")) ), # Add optional custom scopes for profiling, disable to avoid overheads "VLLM_CUSTOM_SCOPES_FOR_PROFILING": lambda: bool( From dfda37c10436b9f5704235221a2d4966593a4d39 Mon Sep 17 00:00:00 2001 From: Ubospica Date: Mon, 4 May 2026 06:20:16 -0400 Subject: [PATCH 39/43] update and fix bug Signed-off-by: Ubospica --- vllm/entrypoints/openai/api_server.py | 15 ++++++ vllm/tool_parsers/deepseekv4_tool_parser.py | 7 ++- vllm/tool_parsers/qwen3coder_tool_parser.py | 9 ++-- vllm/tool_parsers/structural_tag_registry.py | 55 +++++++++++++++----- 4 files changed, 66 insertions(+), 20 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 9aac19e2fda5..da2ec10284c5 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -321,6 +321,21 @@ async def init_app_state( supported_tasks: tuple["SupportedTask", ...] | None = None, ) -> None: vllm_config = engine_client.vllm_config + + # Propagate enable_in_reasoning to the API-server process. The engine core + # runs in a separate process, so the contextvar that backs + # `get_current_vllm_config_or_none()` is None on this stack. Tool parsers + # call `get_enable_structured_outputs_in_reasoning()` during request + # handling and need to see the real flag, otherwise they silently fall + # back to False and mismatch the engine-side bitmask gating. + from vllm.tool_parsers.structural_tag_registry import ( + set_enable_structured_outputs_in_reasoning, + ) + + set_enable_structured_outputs_in_reasoning( + vllm_config.structured_outputs_config.enable_in_reasoning + ) + if supported_tasks is None: warnings.warn( "The 'supported_tasks' parameter was not provided to " diff --git a/vllm/tool_parsers/deepseekv4_tool_parser.py b/vllm/tool_parsers/deepseekv4_tool_parser.py index 8d1df704a3a2..e32451cd8bbd 100644 --- a/vllm/tool_parsers/deepseekv4_tool_parser.py +++ b/vllm/tool_parsers/deepseekv4_tool_parser.py @@ -5,7 +5,10 @@ ChatCompletionRequest, ) from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser -from vllm.tool_parsers.structural_tag_registry import get_model_structural_tag +from vllm.tool_parsers.structural_tag_registry import ( + get_enable_structured_outputs_in_reasoning, + get_model_structural_tag, +) class DeepSeekV4ToolParser(DeepSeekV32ToolParser): @@ -24,5 +27,5 @@ def get_structural_tag(self, request: ChatCompletionRequest): model="deepseek_v4", tools=request.tools, tool_choice=request.tool_choice, - reasoning=request.include_reasoning, + reasoning=get_enable_structured_outputs_in_reasoning(), ) diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 1204ea6fba38..73850b2ab0c5 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -25,7 +25,10 @@ Tool, ToolParser, ) -from vllm.tool_parsers.structural_tag_registry import get_model_structural_tag +from vllm.tool_parsers.structural_tag_registry import ( + get_enable_structured_outputs_in_reasoning, + get_model_structural_tag, +) from vllm.tool_parsers.utils import find_tool_properties logger = init_logger(__name__) @@ -690,7 +693,5 @@ def get_structural_tag(self, request: ChatCompletionRequest): model="qwen_3_5", tools=request.tools, tool_choice=request.tool_choice, - # The reasoning parser gates structured output until reasoning ends. - # Constrain only the post-reasoning tool-call suffix here. - reasoning=False, + reasoning=get_enable_structured_outputs_in_reasoning(), ) diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py index 108546f0c3d1..dc625b8809c5 100644 --- a/vllm/tool_parsers/structural_tag_registry.py +++ b/vllm/tool_parsers/structural_tag_registry.py @@ -5,6 +5,7 @@ # builtin structural tag implementations: # https://github.com/mlc-ai/xgrammar/blob/main/python/xgrammar/builtin_structural_tag.py +import time from collections.abc import Callable from typing import Any, Literal @@ -13,7 +14,6 @@ AnyTextFormat, ConstStringFormat, JSONSchemaFormat, - QwenXMLParameterFormat, SequenceFormat, TagFormat, TagsWithSeparatorFormat, @@ -110,6 +110,31 @@ def _get_function_parameters(function: Any) -> dict[str, Any] | bool: return function.parameters +_enable_structured_outputs_in_reasoning: bool = False + + +def set_enable_structured_outputs_in_reasoning(enabled: bool) -> None: + """Publish the engine's ``enable_in_reasoning`` flag to tool parsers. + + Called once during APIServer startup so request-time parsers can read + it without going through the EngineCore-only contextvar. + """ + + global _enable_structured_outputs_in_reasoning + _enable_structured_outputs_in_reasoning = bool(enabled) + + +def get_enable_structured_outputs_in_reasoning() -> bool: + """Whether structured outputs are active during the reasoning phase. + + When ``True``, the structural tag will cover the reasoning part: + ``...`` prefix (if available); when ``False`` (default), the tag only + constrains the post-reasoning suffix. + """ + + return _enable_structured_outputs_in_reasoning + + @register_model_structural_tag("deepseek_v4") def get_deepseek_v4_structural_tag( tools: list[ChatCompletionToolsParam], @@ -230,7 +255,6 @@ def get_qwen_3_5_structural_tag( This format is used for Qwen3-Coder/Qwen3.5/Qwen3.6 and is compatible with Qwen variants that use the same XML tool-call format. """ - tool_call_begin_prefix = "\n Date: Mon, 4 May 2026 09:12:46 -0400 Subject: [PATCH 40/43] update --- vllm/tool_parsers/structural_tag_registry.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py index dc625b8809c5..754cc52361c5 100644 --- a/vllm/tool_parsers/structural_tag_registry.py +++ b/vllm/tool_parsers/structural_tag_registry.py @@ -5,7 +5,6 @@ # builtin structural tag implementations: # https://github.com/mlc-ai/xgrammar/blob/main/python/xgrammar/builtin_structural_tag.py -import time from collections.abc import Callable from typing import Any, Literal From 376b84ebc5733eee189055be0caa6c9b2c9cb1d7 Mon Sep 17 00:00:00 2001 From: Ubospica Date: Mon, 4 May 2026 09:12:57 -0400 Subject: [PATCH 41/43] update --- vllm/tool_parsers/structural_tag_registry.py | 21 +++++++------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py index 754cc52361c5..513352e12fda 100644 --- a/vllm/tool_parsers/structural_tag_registry.py +++ b/vllm/tool_parsers/structural_tag_registry.py @@ -8,21 +8,14 @@ from collections.abc import Callable from typing import Any, Literal -from xgrammar import StructuralTag -from xgrammar.structural_tag import ( - AnyTextFormat, - ConstStringFormat, - JSONSchemaFormat, - SequenceFormat, - TagFormat, - TagsWithSeparatorFormat, - TriggeredTagsFormat, -) - from vllm.entrypoints.openai.chat_completion.protocol import ( - ChatCompletionNamedToolChoiceParam, - ChatCompletionToolsParam, -) + ChatCompletionNamedToolChoiceParam, ChatCompletionToolsParam) + +from xgrammar import StructuralTag +from xgrammar.structural_tag import (AnyTextFormat, ConstStringFormat, + JSONSchemaFormat, SequenceFormat, + TagFormat, TagsWithSeparatorFormat, + TriggeredTagsFormat) SimplifiedToolChoice = Literal["auto", "required", "forced"] ToolChoice = ( From 45d43b68f06258602b0f7d3caf2c9d0071ed417f Mon Sep 17 00:00:00 2001 From: Ubospica Date: Mon, 4 May 2026 10:21:52 -0400 Subject: [PATCH 42/43] format. --- vllm/tool_parsers/structural_tag_registry.py | 21 +++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py index 513352e12fda..754cc52361c5 100644 --- a/vllm/tool_parsers/structural_tag_registry.py +++ b/vllm/tool_parsers/structural_tag_registry.py @@ -8,14 +8,21 @@ from collections.abc import Callable from typing import Any, Literal -from vllm.entrypoints.openai.chat_completion.protocol import ( - ChatCompletionNamedToolChoiceParam, ChatCompletionToolsParam) - from xgrammar import StructuralTag -from xgrammar.structural_tag import (AnyTextFormat, ConstStringFormat, - JSONSchemaFormat, SequenceFormat, - TagFormat, TagsWithSeparatorFormat, - TriggeredTagsFormat) +from xgrammar.structural_tag import ( + AnyTextFormat, + ConstStringFormat, + JSONSchemaFormat, + SequenceFormat, + TagFormat, + TagsWithSeparatorFormat, + TriggeredTagsFormat, +) + +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionNamedToolChoiceParam, + ChatCompletionToolsParam, +) SimplifiedToolChoice = Literal["auto", "required", "forced"] ToolChoice = ( From ad4395a56077a78a9d84cdc370e173b286d6b61c Mon Sep 17 00:00:00 2001 From: sfeng33 <4florafeng@gmail.com> Date: Mon, 4 May 2026 16:32:52 +0000 Subject: [PATCH 43/43] Fix failing qwen3coder test Signed-off-by: sfeng33 <4florafeng@gmail.com> --- tests/tool_parsers/test_qwen3coder_tool_parser.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index d50a7b9b769e..26bbf1a044bc 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -1211,10 +1211,15 @@ def test_get_vllm_registry_structural_tag_returns_structural_tag( @pytest.mark.parametrize("include_reasoning", [True, False]) def test_adjust_request_auto_uses_vllm_registry_structural_tag( + monkeypatch: pytest.MonkeyPatch, qwen3_tool_parser: Qwen3CoderToolParser, sample_tools: list[ChatCompletionToolsParam], include_reasoning: bool, ) -> None: + monkeypatch.setattr( + "vllm.tool_parsers.abstract_tool_parser.VLLM_ENFORCE_STRICT_TOOL_CALLING", + True, + ) request_tools = _as_chat_completion_tools(sample_tools) req = ChatCompletionRequest( messages=[], @@ -1232,9 +1237,14 @@ def test_adjust_request_auto_uses_vllm_registry_structural_tag( def test_adjust_request_required_prefers_structural_tag( + monkeypatch: pytest.MonkeyPatch, qwen3_tool_parser: Qwen3CoderToolParser, sample_tools: list[ChatCompletionToolsParam], ) -> None: + monkeypatch.setattr( + "vllm.tool_parsers.abstract_tool_parser.VLLM_ENFORCE_STRICT_TOOL_CALLING", + True, + ) request_tools = _as_chat_completion_tools(sample_tools) req = ChatCompletionRequest( messages=[],