From bfe136aefa59ee0b677c325ac56eec19eadf39a7 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Tue, 28 Apr 2026 19:57:01 +0800
Subject: [PATCH 01/43] update the logic of tool parser.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

finish the support for vllm with xgr built-in stag.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

refactor.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

fix.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

fix the detection for the thinking mode.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

add test.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

refactor the structure.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

rename the symbols.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

add the support for more models.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 .../test_qwen3coder_tool_parser.py            | 54 +++++++++++++++++++
 vllm/tool_parsers/abstract_tool_parser.py     | 38 +++++++++++--
 vllm/tool_parsers/deepseekv32_tool_parser.py  | 18 +++++++
 vllm/tool_parsers/kimi_k2_tool_parser.py      | 20 ++++++-
 vllm/tool_parsers/openai_tool_parser.py       | 18 +++++++
 vllm/tool_parsers/qwen3coder_tool_parser.py   | 20 +++++++
 6 files changed, 162 insertions(+), 6 deletions(-)

diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index c62e95830243..9678c88e75a0 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -6,6 +6,7 @@
 
 import pytest
 from openai.types.responses.function_tool import FunctionTool
+from xgrammar import StructuralTag
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
@@ -1146,3 +1147,56 @@ def test_no_double_serialization_string_args(qwen3_tool_parser):
     args = json.loads(raw_arguments)
     assert args["message"] == "hello world"
     assert '\\"hello world\\"' not in raw_arguments
+
+
+def test_support_builtin_structural_tag(qwen3_tool_parser: Qwen3CoderToolParser):
+    assert qwen3_tool_parser.support_structural_tag() is True
+
+
+def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
+    qwen3_tool_parser: Qwen3CoderToolParser,
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="auto",
+    )
+    tag = qwen3_tool_parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+
+
+@pytest.mark.parametrize("include_reasoning", [True, False])
+def test_adjust_request_auto_structural_tag_is_json_string(
+    qwen3_tool_parser: Qwen3CoderToolParser,
+    sample_tools: list[ChatCompletionToolsParam],
+    include_reasoning: bool,
+) -> None:
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="auto",
+        include_reasoning=include_reasoning,
+    )
+    out = qwen3_tool_parser.adjust_request(req)
+    assert out.structured_outputs is not None
+    assert out.structured_outputs.structural_tag is not None
+    assert isinstance(out.structured_outputs.structural_tag, str)
+    loaded = json.loads(out.structured_outputs.structural_tag)
+    assert isinstance(loaded, dict)
+
+
+def test_adjust_request_required_uses_json_schema_not_structural_tag(
+    qwen3_tool_parser: Qwen3CoderToolParser,
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="required",
+    )
+    out = qwen3_tool_parser.adjust_request(req)
+    assert out.structured_outputs.structural_tag is None
diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index 75181d8dfac6..5738d02dcf31 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -5,6 +5,9 @@
 import os
 from collections.abc import Callable, Sequence
 from functools import cached_property
+import json
+
+from xgrammar import StructuralTag
 
 from openai.types.responses import (
     ResponseFormatTextJSONSchemaConfig,
@@ -83,17 +86,19 @@ def vocab(self) -> dict[str, int]:
         return self.model_tokenizer.get_vocab()
 
     def adjust_request(
-        self, request: ChatCompletionRequest | ResponsesRequest
+        self,
+        request: ChatCompletionRequest | ResponsesRequest,
     ) -> ChatCompletionRequest | ResponsesRequest:
-        """
-        Static method that used to adjust the request parameters.
-        """
+
+        # If there are no tools, return the request as is.
         if not request.tools:
             return request
+
+        # Step 1: set structured output params when tool constraints are derived
+        # from the tool schema.
         json_schema_from_tool = get_json_schema_from_tools(
             tool_choice=request.tool_choice, tools=request.tools
         )
-        # Set structured output params for tool calling
         if json_schema_from_tool is not None:
             if isinstance(request, ChatCompletionRequest):
                 # tool_choice: "Forced Function" or "required" will override
@@ -118,9 +123,32 @@ def adjust_request(
                         strict=True,
                     )
                 )
+                
+            return request
 
+        # Only ChatCompletionRequest is supported for Step 2.
+        if not isinstance(request, ChatCompletionRequest):
+            return request
+
+
+        # Step 2: apply xgrammar's built-in tool calling support.
+        if self.support_structural_tag() and request.tool_choice == "auto":
+            structure_tag = self.get_structural_tag(request)
+            request.structured_outputs = StructuredOutputsParams(
+                structural_tag=json.dumps(structure_tag.model_dump()),
+            )
         return request
 
+    def get_structural_tag(
+        self, request: ChatCompletionRequest
+    ) -> StructuralTag:
+        raise NotImplementedError(
+            "ToolParser.get_xgrammar_builtin_structural_tag is not implemented"
+        )
+
+    def support_structural_tag(self) -> bool:
+        return False
+
     def extract_tool_calls(
         self, model_output: str, request: ChatCompletionRequest
     ) -> ExtractedToolCallInformation:
diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py
index b8623592365c..c3efc8ef0139 100644
--- a/vllm/tool_parsers/deepseekv32_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv32_tool_parser.py
@@ -28,6 +28,8 @@
 )
 from vllm.tool_parsers.utils import partial_tag_overlap
 
+from xgrammar import StructuralTag, get_builtin_structural_tag
+
 logger = init_logger(__name__)
 
 
@@ -319,3 +321,19 @@ def extract_tool_calls_streaming(
             return DeltaMessage(content="")
 
         return None
+
+    def support_structural_tag(self) -> bool:
+        return True
+
+    def get_structural_tag(
+        self, request: ChatCompletionRequest
+    ) -> StructuralTag:
+        # Config for xgrammar's built-in structural tagging.
+        dict_tools = [tool.model_dump() for tool in request.tools]
+        thinking_mode = request.include_reasoning
+        return get_builtin_structural_tag(
+            model="deepseek_v3_2",
+            reasoning=True,
+            tools=dict_tools,
+            force_empty_reasoning=not thinking_mode,
+        )
diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py
index 7ddd8fa7a80d..02325eda8641 100644
--- a/vllm/tool_parsers/kimi_k2_tool_parser.py
+++ b/vllm/tool_parsers/kimi_k2_tool_parser.py
@@ -25,6 +25,8 @@
 )
 from vllm.tool_parsers.utils import partial_tag_overlap
 
+from xgrammar import StructuralTag, get_builtin_structural_tag
+
 logger = init_logger(__name__)
 
 
@@ -273,4 +275,20 @@ def extract_tool_calls_streaming(
 
         except Exception:
             logger.exception("Error trying to handle streaming tool call.")
-            return None
+            return None  # do not stream a delta. skip this token ID.
+
+    def support_structural_tag(self) -> bool:
+        return True
+
+    def get_structural_tag(
+        self, request: ChatCompletionRequest
+    ) -> StructuralTag:
+        # Config for xgrammar's built-in structural tagging.
+        dict_tools = [tool.model_dump() for tool in request.tools]
+        thinking_mode = request.include_reasoning
+        return get_builtin_structural_tag(
+            model="kimi",
+            reasoning=True,
+            tools=dict_tools,
+            force_empty_reasoning=not thinking_mode,
+        )
diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py
index ee6dd70718b3..01e036385ab2 100644
--- a/vllm/tool_parsers/openai_tool_parser.py
+++ b/vllm/tool_parsers/openai_tool_parser.py
@@ -20,6 +20,8 @@
     ToolParser,
 )
 
+from xgrammar import StructuralTag, get_builtin_structural_tag
+
 if TYPE_CHECKING:
     from vllm.tokenizers import TokenizerLike
 else:
@@ -112,3 +114,19 @@ def extract_tool_calls_streaming(
         raise NotImplementedError(
             "Not being used, manual parsing in serving_chat.py"  # noqa: E501
         )
+
+    def support_structural_tag(self) -> bool:
+        return True
+
+    def get_structural_tag(
+        self, request: ChatCompletionRequest
+    ) -> StructuralTag:
+        # Config for xgrammar's built-in structural tagging.
+        dict_tools = [tool.model_dump() for tool in request.tools]
+        thinking_mode = request.include_reasoning
+        return get_builtin_structural_tag(
+            model="harmony",
+            reasoning=True,
+            tools=dict_tools,
+            force_empty_reasoning=not thinking_mode,
+        )
diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index 7b089ceffbc0..394bdc458d4e 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -11,6 +11,7 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
 )
+
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaFunctionCall,
     DeltaMessage,
@@ -27,6 +28,8 @@
 )
 from vllm.tool_parsers.utils import find_tool_properties
 
+from xgrammar import StructuralTag, get_builtin_structural_tag
+
 logger = init_logger(__name__)
 
 
@@ -681,3 +684,20 @@ def extract_tool_calls_streaming(
                 return result
 
         return None
+    
+    
+    def support_structural_tag(self) -> bool:
+        return True
+
+    def get_structural_tag(
+        self, request: ChatCompletionRequest
+    ) -> StructuralTag:
+        # Config for xgrammar's built-in structural tagging.
+        dict_tools = [tool.model_dump() for tool in request.tools]
+        thinking_mode = request.include_reasoning
+        return get_builtin_structural_tag(
+            model="qwen_coder",
+            reasoning=True,
+            tools=dict_tools,
+            force_empty_reasoning=not thinking_mode,
+        )

From 76899e1d20ae23dc5785a542ebd5b4a662e61047 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Sun, 5 Apr 2026 11:18:40 +0800
Subject: [PATCH 02/43] finish the test.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 .../test_deepseekv32_tool_parser.py           | 96 +++++++++++++++++++
 .../tool_parsers/test_kimi_k2_tool_parser.py  | 95 ++++++++++++++++++
 tests/tool_parsers/test_openai_tool_parser.py | 95 ++++++++++++++++++
 3 files changed, 286 insertions(+)

diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py
index 6145253d9f90..f2f7e82064c9 100644
--- a/tests/tool_parsers/test_deepseekv32_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py
@@ -10,6 +10,7 @@
 from unittest.mock import MagicMock
 
 import pytest
+from xgrammar import StructuralTag
 
 from tests.tool_parsers.utils import run_tool_extraction_streaming
 from vllm.entrypoints.openai.chat_completion.protocol import (
@@ -17,6 +18,10 @@
     FunctionDefinition,
 )
 from vllm.tokenizers import get_tokenizer
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+    ChatCompletionToolsParam,
+)
 from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
 
 # ---------------------------------------------------------------------------
@@ -48,6 +53,43 @@ def make_request(tools=None) -> MagicMock:
     return req
 
 
+@pytest.fixture
+def sample_tools() -> list[ChatCompletionToolsParam]:
+    return [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "get_current_weather",
+                "description": "Get the current weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string", "description": "The city name"},
+                        "state": {"type": "string", "description": "The state code"},
+                        "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
+                    },
+                    "required": ["city", "state"],
+                },
+            },
+        ),
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "calculate_area",
+                "description": "Calculate area of a shape",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "shape": {"type": "string"},
+                        "dimensions": {"type": "object"},
+                        "precision": {"type": "integer"},
+                    },
+                },
+            },
+        ),
+    ]
+
+
 # Shorthand for the DSML tokens used throughout
 FC_START = "<｜DSML｜function_calls>"
 FC_END = "</｜DSML｜function_calls>"
@@ -797,3 +839,57 @@ def test_convert_param_value_checked_helper(parser):
     assert parser._convert_param_value("null", "integer") is None
     assert parser._convert_param_value("null", "boolean") is None
     assert parser._convert_param_value("null", "object") is None
+
+
+def test_support_builtin_structural_tag():
+    assert make_parser().support_structural_tag() is True
+
+
+def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    parser = make_parser()
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="auto",
+    )
+    tag = parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+
+
+@pytest.mark.parametrize("include_reasoning", [True, False])
+def test_adjust_request_auto_structural_tag_is_json_string(
+    sample_tools: list[ChatCompletionToolsParam],
+    include_reasoning: bool,
+) -> None:
+    parser = make_parser()
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="auto",
+        include_reasoning=include_reasoning,
+    )
+    out = parser.adjust_request(req)
+    assert out.structured_outputs is not None
+    assert out.structured_outputs.structural_tag is not None
+    assert isinstance(out.structured_outputs.structural_tag, str)
+    loaded = json.loads(out.structured_outputs.structural_tag)
+    assert isinstance(loaded, dict)
+
+
+def test_adjust_request_required_uses_json_schema_not_structural_tag(
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    parser = make_parser()
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="required",
+    )
+    out = parser.adjust_request(req)
+    assert out.structured_outputs is not None
+    assert out.structured_outputs.structural_tag is None
diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index b56032b91c17..5125dd7d6431 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -6,11 +6,16 @@
 from unittest.mock import MagicMock
 
 import pytest
+from xgrammar import StructuralTag
 
 from tests.tool_parsers.utils import (
     run_tool_extraction,
     run_tool_extraction_streaming,
 )
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+    ChatCompletionToolsParam,
+)
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
 )
@@ -20,6 +25,43 @@
 MODEL = "moonshotai/Kimi-K2-Instruct"
 
 
+@pytest.fixture
+def sample_tools() -> list[ChatCompletionToolsParam]:
+    return [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "get_current_weather",
+                "description": "Get the current weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string", "description": "The city name"},
+                        "state": {"type": "string", "description": "The state code"},
+                        "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
+                    },
+                    "required": ["city", "state"],
+                },
+            },
+        ),
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "calculate_area",
+                "description": "Calculate area of a shape",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "shape": {"type": "string"},
+                        "dimensions": {"type": "object"},
+                        "precision": {"type": "integer"},
+                    },
+                },
+            },
+        ),
+    ]
+
+
 @pytest.fixture(scope="module")
 def kimi_k2_tokenizer():
     return get_tokenizer(tokenizer_name=MODEL, trust_remote_code=True)
@@ -580,3 +622,56 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer):
         assert len(rec.tool_calls) == 1
         assert rec.tool_calls[0].function.name == "get_weather"
         assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"}
+
+
+def test_support_builtin_structural_tag(kimi_k2_tool_parser: KimiK2ToolParser):
+    assert kimi_k2_tool_parser.support_structural_tag() is True
+
+
+def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
+    kimi_k2_tool_parser: KimiK2ToolParser,
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="auto",
+    )
+    tag = kimi_k2_tool_parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+
+
+@pytest.mark.parametrize("include_reasoning", [True, False])
+def test_adjust_request_auto_structural_tag_is_json_string(
+    kimi_k2_tool_parser: KimiK2ToolParser,
+    sample_tools: list[ChatCompletionToolsParam],
+    include_reasoning: bool,
+) -> None:
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="auto",
+        include_reasoning=include_reasoning,
+    )
+    out = kimi_k2_tool_parser.adjust_request(req)
+    assert out.structured_outputs is not None
+    assert out.structured_outputs.structural_tag is not None
+    assert isinstance(out.structured_outputs.structural_tag, str)
+    loaded = json.loads(out.structured_outputs.structural_tag)
+    assert isinstance(loaded, dict)
+
+
+def test_adjust_request_required_uses_json_schema_not_structural_tag(
+    kimi_k2_tool_parser: KimiK2ToolParser,
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="required",
+    )
+    out = kimi_k2_tool_parser.adjust_request(req)
+    assert out.structured_outputs.structural_tag is None
diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py
index e9e39ef4c029..904f4dbcae49 100644
--- a/tests/tool_parsers/test_openai_tool_parser.py
+++ b/tests/tool_parsers/test_openai_tool_parser.py
@@ -13,7 +13,12 @@
     SystemContent,
     load_harmony_encoding,
 )
+from xgrammar import StructuralTag
 
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+    ChatCompletionToolsParam,
+)
 from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
 from vllm.tokenizers import get_tokenizer
 from vllm.tool_parsers.openai_tool_parser import OpenAIToolParser
@@ -37,6 +42,43 @@ def harmony_encoding():
     return load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
 
 
+@pytest.fixture
+def sample_tools() -> list[ChatCompletionToolsParam]:
+    return [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "get_current_weather",
+                "description": "Get the current weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string", "description": "The city name"},
+                        "state": {"type": "string", "description": "The state code"},
+                        "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
+                    },
+                    "required": ["city", "state"],
+                },
+            },
+        ),
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "calculate_area",
+                "description": "Calculate area of a shape",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "shape": {"type": "string"},
+                        "dimensions": {"type": "object"},
+                        "precision": {"type": "integer"},
+                    },
+                },
+            },
+        ),
+    ]
+
+
 def assert_tool_calls(
     actual_tool_calls: list[ToolCall],
     expected_tool_calls: list[ToolCall],
@@ -261,3 +303,56 @@ def test_extract_tool_calls_with_content(
     ]
     assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
     assert extracted_info.content == final_content
+
+
+def test_support_builtin_structural_tag(openai_tool_parser: OpenAIToolParser):
+    assert openai_tool_parser.support_structural_tag() is True
+
+
+def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
+    openai_tool_parser: OpenAIToolParser,
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="auto",
+    )
+    tag = openai_tool_parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+
+
+@pytest.mark.parametrize("include_reasoning", [True, False])
+def test_adjust_request_auto_structural_tag_is_json_string(
+    openai_tool_parser: OpenAIToolParser,
+    sample_tools: list[ChatCompletionToolsParam],
+    include_reasoning: bool,
+) -> None:
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="auto",
+        include_reasoning=include_reasoning,
+    )
+    out = openai_tool_parser.adjust_request(req)
+    assert out.structured_outputs is not None
+    assert out.structured_outputs.structural_tag is not None
+    assert isinstance(out.structured_outputs.structural_tag, str)
+    loaded = json.loads(out.structured_outputs.structural_tag)
+    assert isinstance(loaded, dict)
+
+
+def test_adjust_request_required_uses_json_schema_not_structural_tag(
+    openai_tool_parser: OpenAIToolParser,
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="required",
+    )
+    out = openai_tool_parser.adjust_request(req)
+    assert out.structured_outputs.structural_tag is None

From 5a984a124e4c6a86a92c6554a1c565d346b4616d Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Tue, 28 Apr 2026 20:12:39 +0800
Subject: [PATCH 03/43] update the qwen_coder.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 vllm/tool_parsers/abstract_tool_parser.py    | 43 +++++++++++++++++---
 vllm/tool_parsers/deepseekv32_tool_parser.py | 16 +-------
 vllm/tool_parsers/kimi_k2_tool_parser.py     | 18 ++------
 vllm/tool_parsers/openai_tool_parser.py      | 18 ++------
 vllm/tool_parsers/qwen3coder_tool_parser.py  | 16 +-------
 5 files changed, 48 insertions(+), 63 deletions(-)

diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index 5738d02dcf31..2d52bdca1546 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -7,7 +7,7 @@
 from functools import cached_property
 import json
 
-from xgrammar import StructuralTag
+from xgrammar import StructuralTag, get_model_structural_tag
 
 from openai.types.responses import (
     ResponseFormatTextJSONSchemaConfig,
@@ -132,19 +132,52 @@ def adjust_request(
 
 
         # Step 2: apply xgrammar's built-in tool calling support.
-        if self.support_structural_tag() and request.tool_choice == "auto":
+        # XGrammar will support tool_choice="none" in the future. Currently, we only support tool_choice="auto" and tool_choice="required".
+        need_tool_calling = request.tool_choice == "auto" or request.tool_choice == "required"
+        if self.support_structural_tag() and need_tool_calling:
             structure_tag = self.get_structural_tag(request)
             request.structured_outputs = StructuredOutputsParams(
                 structural_tag=json.dumps(structure_tag.model_dump()),
             )
         return request
+    
+    def get_model_structural_tag_id(self) -> str:
+        """
+        Return the model ID for the builtin structural tag.
+        """
+        raise NotImplementedError()
+
+    def empty_thinking_as_non_thinking(self) -> bool:
+        """
+        It decides how to handle non-thinking mode. If True, non-thinking mode will force the
+        LLM output an empty thinking. If False, thinking tags like <think> or </think> are not
+        allowed and will not be output by the LLM.
+        """
+        return True
 
     def get_structural_tag(
         self, request: ChatCompletionRequest
     ) -> StructuralTag:
-        raise NotImplementedError(
-            "ToolParser.get_xgrammar_builtin_structural_tag is not implemented"
-        )
+        
+        model_id = self.get_model_structural_tag_id()
+        thinking_mode = request.include_reasoning
+        
+        if thinking_mode:
+            return get_model_structural_tag(
+                model=model_id,
+                tools=request.tools,
+                tool_choice=request.tool_choice,
+                reasoning=True,
+                force_empty_reasoning=False,
+            )
+        else:
+            return get_model_structural_tag(
+                model=model_id,
+                tools=request.tools,
+                tool_choice=request.tool_choice,
+                reasoning=not self.empty_thinking_as_non_thinking(),
+                force_empty_reasoning=self.empty_thinking_as_non_thinking(),
+            )
 
     def support_structural_tag(self) -> bool:
         return False
diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py
index c3efc8ef0139..63e3cd59ab51 100644
--- a/vllm/tool_parsers/deepseekv32_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv32_tool_parser.py
@@ -28,8 +28,6 @@
 )
 from vllm.tool_parsers.utils import partial_tag_overlap
 
-from xgrammar import StructuralTag, get_builtin_structural_tag
-
 logger = init_logger(__name__)
 
 
@@ -325,15 +323,5 @@ def extract_tool_calls_streaming(
     def support_structural_tag(self) -> bool:
         return True
 
-    def get_structural_tag(
-        self, request: ChatCompletionRequest
-    ) -> StructuralTag:
-        # Config for xgrammar's built-in structural tagging.
-        dict_tools = [tool.model_dump() for tool in request.tools]
-        thinking_mode = request.include_reasoning
-        return get_builtin_structural_tag(
-            model="deepseek_v3_2",
-            reasoning=True,
-            tools=dict_tools,
-            force_empty_reasoning=not thinking_mode,
-        )
+    def get_model_structural_tag_id(self) -> str:
+        return "deepseek_v3_2"
\ No newline at end of file
diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py
index 02325eda8641..319f2c19f4b6 100644
--- a/vllm/tool_parsers/kimi_k2_tool_parser.py
+++ b/vllm/tool_parsers/kimi_k2_tool_parser.py
@@ -25,8 +25,6 @@
 )
 from vllm.tool_parsers.utils import partial_tag_overlap
 
-from xgrammar import StructuralTag, get_builtin_structural_tag
-
 logger = init_logger(__name__)
 
 
@@ -279,16 +277,6 @@ def extract_tool_calls_streaming(
 
     def support_structural_tag(self) -> bool:
         return True
-
-    def get_structural_tag(
-        self, request: ChatCompletionRequest
-    ) -> StructuralTag:
-        # Config for xgrammar's built-in structural tagging.
-        dict_tools = [tool.model_dump() for tool in request.tools]
-        thinking_mode = request.include_reasoning
-        return get_builtin_structural_tag(
-            model="kimi",
-            reasoning=True,
-            tools=dict_tools,
-            force_empty_reasoning=not thinking_mode,
-        )
+    
+    def get_model_structural_tag_id(self) -> str:
+        return "kimi"
diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py
index 01e036385ab2..dc906a762a77 100644
--- a/vllm/tool_parsers/openai_tool_parser.py
+++ b/vllm/tool_parsers/openai_tool_parser.py
@@ -20,8 +20,6 @@
     ToolParser,
 )
 
-from xgrammar import StructuralTag, get_builtin_structural_tag
-
 if TYPE_CHECKING:
     from vllm.tokenizers import TokenizerLike
 else:
@@ -117,16 +115,6 @@ def extract_tool_calls_streaming(
 
     def support_structural_tag(self) -> bool:
         return True
-
-    def get_structural_tag(
-        self, request: ChatCompletionRequest
-    ) -> StructuralTag:
-        # Config for xgrammar's built-in structural tagging.
-        dict_tools = [tool.model_dump() for tool in request.tools]
-        thinking_mode = request.include_reasoning
-        return get_builtin_structural_tag(
-            model="harmony",
-            reasoning=True,
-            tools=dict_tools,
-            force_empty_reasoning=not thinking_mode,
-        )
+    
+    def get_model_structural_tag_id(self) -> str:
+        return "harmony"
diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index 394bdc458d4e..ed24752bfd37 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -28,8 +28,6 @@
 )
 from vllm.tool_parsers.utils import find_tool_properties
 
-from xgrammar import StructuralTag, get_builtin_structural_tag
-
 logger = init_logger(__name__)
 
 
@@ -689,15 +687,5 @@ def extract_tool_calls_streaming(
     def support_structural_tag(self) -> bool:
         return True
 
-    def get_structural_tag(
-        self, request: ChatCompletionRequest
-    ) -> StructuralTag:
-        # Config for xgrammar's built-in structural tagging.
-        dict_tools = [tool.model_dump() for tool in request.tools]
-        thinking_mode = request.include_reasoning
-        return get_builtin_structural_tag(
-            model="qwen_coder",
-            reasoning=True,
-            tools=dict_tools,
-            force_empty_reasoning=not thinking_mode,
-        )
+    def get_model_structural_tag_id(self) -> str:
+        return "qwen_coder"

From 95e64e7891dafe716af38b33b87e507de7fb43fc Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 11:28:55 +0800
Subject: [PATCH 04/43] update the logic of get stag.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 vllm/tool_parsers/abstract_tool_parser.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index 2d52bdca1546..e895f69a50c2 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -18,6 +18,7 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
+    ChatCompletionNamedToolChoiceParam,
 )
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaMessage,
@@ -133,7 +134,7 @@ def adjust_request(
 
         # Step 2: apply xgrammar's built-in tool calling support.
         # XGrammar will support tool_choice="none" in the future. Currently, we only support tool_choice="auto" and tool_choice="required".
-        need_tool_calling = request.tool_choice == "auto" or request.tool_choice == "required"
+        need_tool_calling = request.tool_choice == "auto" or request.tool_choice == "required" or isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam)
         if self.support_structural_tag() and need_tool_calling:
             structure_tag = self.get_structural_tag(request)
             request.structured_outputs = StructuredOutputsParams(
@@ -161,20 +162,31 @@ def get_structural_tag(
         
         model_id = self.get_model_structural_tag_id()
         thinking_mode = request.include_reasoning
+        tool_choice_type = (
+            "forced" if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) else request.tool_choice
+        )
+        tool_dicts = []
+        
+        if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
+            for tool in request.tools:
+                if tool.function.name == request.tool_choice.function.name:
+                    tool_dicts.append(tool.model_dump())
+        else:
+            tool_dicts = [tool.model_dump() for tool in request.tools]
         
         if thinking_mode:
             return get_model_structural_tag(
                 model=model_id,
-                tools=request.tools,
-                tool_choice=request.tool_choice,
+                tools=tool_dicts,
+                tool_choice=tool_choice_type,
                 reasoning=True,
                 force_empty_reasoning=False,
             )
         else:
             return get_model_structural_tag(
                 model=model_id,
-                tools=request.tools,
-                tool_choice=request.tool_choice,
+                tools=tool_dicts,
+                tool_choice=tool_choice_type,
                 reasoning=not self.empty_thinking_as_non_thinking(),
                 force_empty_reasoning=self.empty_thinking_as_non_thinking(),
             )

From 7a3bbd0e18db453c45caf5cda22d895b4d2ff595 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 11:32:17 +0800
Subject: [PATCH 05/43] update the test.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 .../test_deepseekv32_tool_parser.py           | 22 +++++++++++++++++
 .../tool_parsers/test_kimi_k2_tool_parser.py  | 24 ++++++++++++++++++-
 tests/tool_parsers/test_openai_tool_parser.py | 23 +++++++++++++++++-
 .../test_qwen3coder_tool_parser.py            | 21 ++++++++++++++++
 4 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py
index f2f7e82064c9..80bcb9cf6b63 100644
--- a/tests/tool_parsers/test_deepseekv32_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py
@@ -21,6 +21,8 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionNamedFunction,
 )
 from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
 
@@ -857,6 +859,26 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
     )
     tag = parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
+    
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="required",
+    )
+    tag = parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+    
+    if sample_tools:
+        tool = sample_tools[0]
+        req = ChatCompletionRequest(
+            messages=[],
+            model="m",
+            tools=sample_tools,
+            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
+        )
+    tag = parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
 
 
 @pytest.mark.parametrize("include_reasoning", [True, False])
diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index 5125dd7d6431..e94ab8299f01 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -15,6 +15,8 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionNamedFunction,
 )
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
@@ -640,7 +642,27 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
     )
     tag = kimi_k2_tool_parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
-
+    
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="required",
+    )
+    tag = kimi_k2_tool_parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+    
+    if sample_tools:
+
+        tool = sample_tools[0]
+        req = ChatCompletionRequest(
+            messages=[],
+            model="m",
+            tools=sample_tools,
+            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
+        )
+    tag = kimi_k2_tool_parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
 
 @pytest.mark.parametrize("include_reasoning", [True, False])
 def test_adjust_request_auto_structural_tag_is_json_string(
diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py
index 904f4dbcae49..56ec4fafa0e0 100644
--- a/tests/tool_parsers/test_openai_tool_parser.py
+++ b/tests/tool_parsers/test_openai_tool_parser.py
@@ -18,6 +18,8 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionNamedFunction,
 )
 from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
 from vllm.tokenizers import get_tokenizer
@@ -321,7 +323,26 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
     )
     tag = openai_tool_parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
-
+    
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="required",
+    )
+    tag = openai_tool_parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+    
+    if sample_tools:
+        tool = sample_tools[0]
+        req = ChatCompletionRequest(
+            messages=[],
+            model="m",
+            tools=sample_tools,
+            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
+        )
+    tag = openai_tool_parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)    
 
 @pytest.mark.parametrize("include_reasoning", [True, False])
 def test_adjust_request_auto_structural_tag_is_json_string(
diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index 9678c88e75a0..a77fa657140f 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -11,6 +11,8 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionNamedFunction,
 )
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaMessage,
@@ -1166,6 +1168,25 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
     tag = qwen3_tool_parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
 
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="required",
+    )
+    tag = qwen3_tool_parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+
+    if sample_tools:
+        tool = sample_tools[0]
+        req = ChatCompletionRequest(
+            messages=[],
+            model="m",
+            tools=sample_tools,
+            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
+        )
+        tag = qwen3_tool_parser.get_structural_tag(req)
+        assert isinstance(tag, StructuralTag)
 
 @pytest.mark.parametrize("include_reasoning", [True, False])
 def test_adjust_request_auto_structural_tag_is_json_string(

From 3f5e0f3767c29abff9586969c2a053f54a4d3296 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 14:05:52 +0800
Subject: [PATCH 06/43] fix the tool_choice type.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 vllm/tool_parsers/abstract_tool_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index e895f69a50c2..f671a866bd0a 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -163,7 +163,7 @@ def get_structural_tag(
         model_id = self.get_model_structural_tag_id()
         thinking_mode = request.include_reasoning
         tool_choice_type = (
-            "forced" if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) else request.tool_choice
+            request.tool_choice.model_dump() if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) else request.tool_choice
         )
         tool_dicts = []
         

From db9ccc6b272357764a9452435ce40689c85da1aa Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 14:18:43 +0800
Subject: [PATCH 07/43] fix the test.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 tests/tool_parsers/test_deepseekv32_tool_parser.py | 9 ++++++---
 tests/tool_parsers/test_kimi_k2_tool_parser.py     | 9 ++++++---
 tests/tool_parsers/test_openai_tool_parser.py      | 9 ++++++---
 tests/tool_parsers/test_qwen3coder_tool_parser.py  | 9 ++++++---
 4 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py
index 80bcb9cf6b63..a55d245054db 100644
--- a/tests/tool_parsers/test_deepseekv32_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py
@@ -21,8 +21,6 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
-    ChatCompletionNamedToolChoiceParam,
-    ChatCompletionNamedFunction,
 )
 from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
 
@@ -875,7 +873,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
             messages=[],
             model="m",
             tools=sample_tools,
-            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
+            tool_choice={
+                "type": "function",
+                "function": {
+                    "name": tool.function.name,
+                },
+            },
         )
     tag = parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index e94ab8299f01..fa1def007216 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -15,8 +15,6 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
-    ChatCompletionNamedToolChoiceParam,
-    ChatCompletionNamedFunction,
 )
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
@@ -659,7 +657,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
             messages=[],
             model="m",
             tools=sample_tools,
-            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
+            tool_choice={
+                "type": "function",
+                "function": {
+                    "name": tool.function.name,
+                },
+            },
         )
     tag = kimi_k2_tool_parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py
index 56ec4fafa0e0..babdafdf54e9 100644
--- a/tests/tool_parsers/test_openai_tool_parser.py
+++ b/tests/tool_parsers/test_openai_tool_parser.py
@@ -18,8 +18,6 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
-    ChatCompletionNamedToolChoiceParam,
-    ChatCompletionNamedFunction,
 )
 from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
 from vllm.tokenizers import get_tokenizer
@@ -339,7 +337,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
             messages=[],
             model="m",
             tools=sample_tools,
-            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
+            tool_choice={
+                "type": "function",
+                "function": {
+                    "name": tool.function.name,
+                },
+            },
         )
     tag = openai_tool_parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)    
diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index a77fa657140f..21063348063d 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -11,8 +11,6 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
-    ChatCompletionNamedToolChoiceParam,
-    ChatCompletionNamedFunction,
 )
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaMessage,
@@ -1183,7 +1181,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
             messages=[],
             model="m",
             tools=sample_tools,
-            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
+            tool_choice={
+                "type": "function",
+                "function": {
+                    "name": tool.function.name,
+                },
+            },
         )
         tag = qwen3_tool_parser.get_structural_tag(req)
         assert isinstance(tag, StructuralTag)

From 8b246f9ac75a7be4b154d811416771cd8a981e61 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 14:20:42 +0800
Subject: [PATCH 08/43] Revert "fix the test."

This reverts commit db9ccc6b272357764a9452435ce40689c85da1aa.
---
 tests/tool_parsers/test_deepseekv32_tool_parser.py | 9 +++------
 tests/tool_parsers/test_kimi_k2_tool_parser.py     | 9 +++------
 tests/tool_parsers/test_openai_tool_parser.py      | 9 +++------
 tests/tool_parsers/test_qwen3coder_tool_parser.py  | 9 +++------
 4 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py
index a55d245054db..80bcb9cf6b63 100644
--- a/tests/tool_parsers/test_deepseekv32_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py
@@ -21,6 +21,8 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionNamedFunction,
 )
 from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
 
@@ -873,12 +875,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
             messages=[],
             model="m",
             tools=sample_tools,
-            tool_choice={
-                "type": "function",
-                "function": {
-                    "name": tool.function.name,
-                },
-            },
+            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
         )
     tag = parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index fa1def007216..e94ab8299f01 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -15,6 +15,8 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionNamedFunction,
 )
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
@@ -657,12 +659,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
             messages=[],
             model="m",
             tools=sample_tools,
-            tool_choice={
-                "type": "function",
-                "function": {
-                    "name": tool.function.name,
-                },
-            },
+            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
         )
     tag = kimi_k2_tool_parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py
index babdafdf54e9..56ec4fafa0e0 100644
--- a/tests/tool_parsers/test_openai_tool_parser.py
+++ b/tests/tool_parsers/test_openai_tool_parser.py
@@ -18,6 +18,8 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionNamedFunction,
 )
 from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
 from vllm.tokenizers import get_tokenizer
@@ -337,12 +339,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
             messages=[],
             model="m",
             tools=sample_tools,
-            tool_choice={
-                "type": "function",
-                "function": {
-                    "name": tool.function.name,
-                },
-            },
+            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
         )
     tag = openai_tool_parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)    
diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index 21063348063d..a77fa657140f 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -11,6 +11,8 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionNamedFunction,
 )
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaMessage,
@@ -1181,12 +1183,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
             messages=[],
             model="m",
             tools=sample_tools,
-            tool_choice={
-                "type": "function",
-                "function": {
-                    "name": tool.function.name,
-                },
-            },
+            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
         )
         tag = qwen3_tool_parser.get_structural_tag(req)
         assert isinstance(tag, StructuralTag)

From a5a5277c593e78dbdc51a5c37146bc1c561a1212 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 14:23:21 +0800
Subject: [PATCH 09/43] fix the validation.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 vllm/entrypoints/openai/chat_completion/protocol.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/entrypoints/openai/chat_completion/protocol.py b/vllm/entrypoints/openai/chat_completion/protocol.py
index 01d2df88d69b..3976ee24287d 100644
--- a/vllm/entrypoints/openai/chat_completion/protocol.py
+++ b/vllm/entrypoints/openai/chat_completion/protocol.py
@@ -715,7 +715,7 @@ def check_tool_usage(cls, data):
             # OR that it's set to "auto" or "required"
             if data["tool_choice"] not in ["auto", "required"] and not isinstance(
                 data["tool_choice"], dict
-            ):
+            ) and not isinstance(data["tool_choice"], ChatCompletionNamedToolChoiceParam):
                 raise ValueError(
                     f"Invalid value for `tool_choice`: {data['tool_choice']}! "
                     'Only named tools, "none", "auto" or "required" '

From 2de7bbdc250d1353162208900ed9a3d9acd9eb22 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 14:48:33 +0800
Subject: [PATCH 10/43] fix the test.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 .../tool_parsers/test_kimi_k2_tool_parser.py  |  3 ++
 .../test_qwen3coder_tool_parser.py            | 37 +++++++++++++++----
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index e94ab8299f01..a23d38aa0e5b 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -507,6 +507,7 @@ def test_sets_skip_special_tokens_false(self, parser):
         request = MagicMock(spec=ChatCompletionRequest)
         request.tools = [{"type": "function", "function": {"name": "test"}}]
         request.tool_choice = "auto"
+        request.include_reasoning = True
         request.skip_special_tokens = True
 
         result = parser.adjust_request(request)
@@ -516,6 +517,7 @@ def test_no_change_when_tool_choice_none(self, parser):
         request = MagicMock(spec=ChatCompletionRequest)
         request.tools = [{"type": "function", "function": {"name": "test"}}]
         request.tool_choice = "none"
+        request.include_reasoning = True
         request.skip_special_tokens = True
 
         result = parser.adjust_request(request)
@@ -525,6 +527,7 @@ def test_no_change_when_no_tools(self, parser):
         request = MagicMock(spec=ChatCompletionRequest)
         request.tools = None
         request.tool_choice = "auto"
+        request.include_reasoning = False
         request.skip_special_tokens = True
 
         result = parser.adjust_request(request)
diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index a77fa657140f..0c1b5061ccbb 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -111,6 +111,26 @@ def sample_tools(request):
         ]
 
 
+def _as_chat_completion_tools(
+    tools: list[ChatCompletionToolsParam | FunctionTool],
+) -> list[ChatCompletionToolsParam]:
+    normalized: list[ChatCompletionToolsParam] = []
+    for tool in tools:
+        if isinstance(tool, ChatCompletionToolsParam):
+            normalized.append(tool)
+        else:
+            normalized.append(
+                ChatCompletionToolsParam(
+                    type="function",
+                    function={
+                        "name": tool.name,
+                        "description": tool.description,
+                        "parameters": tool.parameters,
+                    },
+                ))
+    return normalized
+
+
 def assert_tool_calls(
     actual_tool_calls: list[ToolCall], expected_tool_calls: list[ToolCall]
 ):
@@ -1159,10 +1179,11 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
     qwen3_tool_parser: Qwen3CoderToolParser,
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
+    request_tools = _as_chat_completion_tools(sample_tools)
     req = ChatCompletionRequest(
         messages=[],
         model="m",
-        tools=sample_tools,
+        tools=request_tools,
         tool_choice="auto",
     )
     tag = qwen3_tool_parser.get_structural_tag(req)
@@ -1171,18 +1192,18 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
     req = ChatCompletionRequest(
         messages=[],
         model="m",
-        tools=sample_tools,
+        tools=request_tools,
         tool_choice="required",
     )
     tag = qwen3_tool_parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
 
-    if sample_tools:
-        tool = sample_tools[0]
+    if request_tools:
+        tool = request_tools[0]
         req = ChatCompletionRequest(
             messages=[],
             model="m",
-            tools=sample_tools,
+            tools=request_tools,
             tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
         )
         tag = qwen3_tool_parser.get_structural_tag(req)
@@ -1194,10 +1215,11 @@ def test_adjust_request_auto_structural_tag_is_json_string(
     sample_tools: list[ChatCompletionToolsParam],
     include_reasoning: bool,
 ) -> None:
+    request_tools = _as_chat_completion_tools(sample_tools)
     req = ChatCompletionRequest(
         messages=[],
         model="m",
-        tools=sample_tools,
+        tools=request_tools,
         tool_choice="auto",
         include_reasoning=include_reasoning,
     )
@@ -1213,10 +1235,11 @@ def test_adjust_request_required_uses_json_schema_not_structural_tag(
     qwen3_tool_parser: Qwen3CoderToolParser,
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
+    request_tools = _as_chat_completion_tools(sample_tools)
     req = ChatCompletionRequest(
         messages=[],
         model="m",
-        tools=sample_tools,
+        tools=request_tools,
         tool_choice="required",
     )
     out = qwen3_tool_parser.adjust_request(req)

From e70a7205c6ace6125415bff258ddcc305bb87a05 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 14:51:10 +0800
Subject: [PATCH 11/43] update the version of xgr.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 requirements/common.txt    | 2 +-
 requirements/test/rocm.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements/common.txt b/requirements/common.txt
index 5d4519204ee9..acd7fcb80012 100644
--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -24,7 +24,7 @@ outlines_core == 0.2.14
 # required for outlines backend disk cache
 diskcache == 5.6.3
 lark == 1.2.2
-xgrammar >= 0.1.32, < 1.0.0; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le"
+xgrammar >= 0.1.34, < 1.0.0; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le"
 typing_extensions >= 4.10
 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
 partial-json-parser # used for parsing partial JSON outputs
diff --git a/requirements/test/rocm.txt b/requirements/test/rocm.txt
index ca33e2d09aa0..f87ea438bc28 100644
--- a/requirements/test/rocm.txt
+++ b/requirements/test/rocm.txt
@@ -1597,7 +1597,7 @@ wrapt==2.1.2
     # via smart-open
 xarray==2026.2.0
     # via rioxarray
-xgrammar==0.1.33
+xgrammar==0.1.34
     # via
     #   -c requirements/common.txt
     #   -r requirements/test/../common.txt

From 7dfbd4d4c9ba85905e6066941f2fe4c36cc8833f Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 14:54:35 +0800
Subject: [PATCH 12/43] fix the tool type.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 vllm/tool_parsers/abstract_tool_parser.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index f671a866bd0a..02ba453bc169 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -159,21 +159,32 @@ def empty_thinking_as_non_thinking(self) -> bool:
     def get_structural_tag(
         self, request: ChatCompletionRequest
     ) -> StructuralTag:
-        
+
+        def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
+            if isinstance(tool, dict):
+                return tool
+            if hasattr(tool, "model_dump"):
+                return tool.model_dump()
+            if hasattr(tool, "dict"):
+                return tool.dict()
+            raise TypeError(f"Unsupported tool type: {type(tool)}")
+
         model_id = self.get_model_structural_tag_id()
         thinking_mode = request.include_reasoning
         tool_choice_type = (
             request.tool_choice.model_dump() if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) else request.tool_choice
         )
         tool_dicts = []
-        
+
         if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
             for tool in request.tools:
-                if tool.function.name == request.tool_choice.function.name:
-                    tool_dicts.append(tool.model_dump())
+                tool_dict = _tool_to_dict(tool)
+                tool_name = tool_dict.get("function", {}).get("name")
+                if tool_name == request.tool_choice.function.name:
+                    tool_dicts.append(tool_dict)
         else:
-            tool_dicts = [tool.model_dump() for tool in request.tools]
-        
+            tool_dicts = [_tool_to_dict(tool) for tool in request.tools]
+
         if thinking_mode:
             return get_model_structural_tag(
                 model=model_id,

From f06ccda912a0873518551722fcc2c94a91743344 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 14:58:44 +0800
Subject: [PATCH 13/43] fix the test.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 .../tool_parsers/test_kimi_k2_tool_parser.py  | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index a23d38aa0e5b..f14b1be0993c 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -22,7 +22,7 @@
     ChatCompletionRequest,
 )
 from vllm.tokenizers import get_tokenizer
-from vllm.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser
+from vllm.tool_parsers.parser import KimiK2ToolParser
 
 MODEL = "moonshotai/Kimi-K2-Instruct"
 
@@ -629,12 +629,12 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer):
         assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"}
 
 
-def test_support_builtin_structural_tag(kimi_k2_tool_parser: KimiK2ToolParser):
-    assert kimi_k2_tool_parser.support_structural_tag() is True
+def test_support_builtin_structural_tag(parser: KimiK2ToolParser):
+    assert parser.support_structural_tag() is True
 
 
 def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
-    kimi_k2_tool_parser: KimiK2ToolParser,
+    parser: KimiK2ToolParser,
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
     req = ChatCompletionRequest(
@@ -643,7 +643,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
         tools=sample_tools,
         tool_choice="auto",
     )
-    tag = kimi_k2_tool_parser.get_structural_tag(req)
+    tag = parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
     
     req = ChatCompletionRequest(
@@ -652,7 +652,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
         tools=sample_tools,
         tool_choice="required",
     )
-    tag = kimi_k2_tool_parser.get_structural_tag(req)
+    tag = parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
     
     if sample_tools:
@@ -664,12 +664,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
             tools=sample_tools,
             tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
         )
-    tag = kimi_k2_tool_parser.get_structural_tag(req)
+    tag = parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
 
 @pytest.mark.parametrize("include_reasoning", [True, False])
 def test_adjust_request_auto_structural_tag_is_json_string(
-    kimi_k2_tool_parser: KimiK2ToolParser,
+    parser: KimiK2ToolParser,
     sample_tools: list[ChatCompletionToolsParam],
     include_reasoning: bool,
 ) -> None:
@@ -680,7 +680,7 @@ def test_adjust_request_auto_structural_tag_is_json_string(
         tool_choice="auto",
         include_reasoning=include_reasoning,
     )
-    out = kimi_k2_tool_parser.adjust_request(req)
+    out = parser.adjust_request(req)
     assert out.structured_outputs is not None
     assert out.structured_outputs.structural_tag is not None
     assert isinstance(out.structured_outputs.structural_tag, str)
@@ -689,7 +689,7 @@ def test_adjust_request_auto_structural_tag_is_json_string(
 
 
 def test_adjust_request_required_uses_json_schema_not_structural_tag(
-    kimi_k2_tool_parser: KimiK2ToolParser,
+    parser: KimiK2ToolParser,
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
     req = ChatCompletionRequest(
@@ -698,5 +698,5 @@ def test_adjust_request_required_uses_json_schema_not_structural_tag(
         tools=sample_tools,
         tool_choice="required",
     )
-    out = kimi_k2_tool_parser.adjust_request(req)
+    out = parser.adjust_request(req)
     assert out.structured_outputs.structural_tag is None

From f7c8c9173dd58450a2c040bfabf560cafe8c2535 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 15:01:37 +0800
Subject: [PATCH 14/43] fix the import.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 .../tool_parsers/test_kimi_k2_tool_parser.py  | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index f14b1be0993c..19fa4fd680bf 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -22,7 +22,7 @@
     ChatCompletionRequest,
 )
 from vllm.tokenizers import get_tokenizer
-from vllm.tool_parsers.parser import KimiK2ToolParser
+from vllm.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser
 
 MODEL = "moonshotai/Kimi-K2-Instruct"
 
@@ -629,12 +629,12 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer):
         assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"}
 
 
-def test_support_builtin_structural_tag(parser: KimiK2ToolParser):
-    assert parser.support_structural_tag() is True
+def test_support_builtin_structural_tag(kimi_k2_tool_parser: KimiK2ToolParser):
+    assert kimi_k2_tool_parser.support_structural_tag() is True
 
 
 def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
-    parser: KimiK2ToolParser,
+    kimi_k2_tool_parser: KimiK2ToolParser,
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
     req = ChatCompletionRequest(
@@ -643,7 +643,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
         tools=sample_tools,
         tool_choice="auto",
     )
-    tag = parser.get_structural_tag(req)
+    tag = kimi_k2_tool_parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
     
     req = ChatCompletionRequest(
@@ -652,7 +652,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
         tools=sample_tools,
         tool_choice="required",
     )
-    tag = parser.get_structural_tag(req)
+    tag = kimi_k2_tool_parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
     
     if sample_tools:
@@ -664,12 +664,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
             tools=sample_tools,
             tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
         )
-    tag = parser.get_structural_tag(req)
+    tag = kimi_k2_tool_parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
 
 @pytest.mark.parametrize("include_reasoning", [True, False])
 def test_adjust_request_auto_structural_tag_is_json_string(
-    parser: KimiK2ToolParser,
+    kimi_k2_tool_parser: KimiK2ToolParser,
     sample_tools: list[ChatCompletionToolsParam],
     include_reasoning: bool,
 ) -> None:
@@ -680,7 +680,7 @@ def test_adjust_request_auto_structural_tag_is_json_string(
         tool_choice="auto",
         include_reasoning=include_reasoning,
     )
-    out = parser.adjust_request(req)
+    out = kimi_k2_tool_parser.adjust_request(req)
     assert out.structured_outputs is not None
     assert out.structured_outputs.structural_tag is not None
     assert isinstance(out.structured_outputs.structural_tag, str)
@@ -689,7 +689,7 @@ def test_adjust_request_auto_structural_tag_is_json_string(
 
 
 def test_adjust_request_required_uses_json_schema_not_structural_tag(
-    parser: KimiK2ToolParser,
+    kimi_k2_tool_parser: KimiK2ToolParser,
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
     req = ChatCompletionRequest(

From 5fbb503c56e32c9696c1faab074ead91f385d7fc Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 15:02:39 +0800
Subject: [PATCH 15/43] fix the import.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 tests/tool_parsers/test_kimi_k2_tool_parser.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index 19fa4fd680bf..ff7fc5b0c8c9 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -629,12 +629,12 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer):
         assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"}
 
 
-def test_support_builtin_structural_tag(kimi_k2_tool_parser: KimiK2ToolParser):
-    assert kimi_k2_tool_parser.support_structural_tag() is True
+def test_support_builtin_structural_tag(parser: KimiK2ToolParser):
+    assert parser.support_structural_tag() is True
 
 
 def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
-    kimi_k2_tool_parser: KimiK2ToolParser,
+    parser: KimiK2ToolParser,
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
     req = ChatCompletionRequest(
@@ -643,7 +643,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
         tools=sample_tools,
         tool_choice="auto",
     )
-    tag = kimi_k2_tool_parser.get_structural_tag(req)
+    tag = parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
     
     req = ChatCompletionRequest(
@@ -652,7 +652,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
         tools=sample_tools,
         tool_choice="required",
     )
-    tag = kimi_k2_tool_parser.get_structural_tag(req)
+    tag = parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
     
     if sample_tools:
@@ -664,12 +664,12 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
             tools=sample_tools,
             tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
         )
-    tag = kimi_k2_tool_parser.get_structural_tag(req)
+    tag = parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
 
 @pytest.mark.parametrize("include_reasoning", [True, False])
 def test_adjust_request_auto_structural_tag_is_json_string(
-    kimi_k2_tool_parser: KimiK2ToolParser,
+    parser: KimiK2ToolParser,
     sample_tools: list[ChatCompletionToolsParam],
     include_reasoning: bool,
 ) -> None:
@@ -680,7 +680,7 @@ def test_adjust_request_auto_structural_tag_is_json_string(
         tool_choice="auto",
         include_reasoning=include_reasoning,
     )
-    out = kimi_k2_tool_parser.adjust_request(req)
+    out = parser.adjust_request(req)
     assert out.structured_outputs is not None
     assert out.structured_outputs.structural_tag is not None
     assert isinstance(out.structured_outputs.structural_tag, str)
@@ -689,7 +689,7 @@ def test_adjust_request_auto_structural_tag_is_json_string(
 
 
 def test_adjust_request_required_uses_json_schema_not_structural_tag(
-    kimi_k2_tool_parser: KimiK2ToolParser,
+    parser: KimiK2ToolParser,
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
     req = ChatCompletionRequest(

From 8a09479c5068c5d3c02f32f36faebfd1258750d2 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 16:55:09 +0800
Subject: [PATCH 16/43] update the api.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 vllm/tool_parsers/abstract_tool_parser.py    | 59 +-------------------
 vllm/tool_parsers/deepseekv32_tool_parser.py | 33 ++++++++++-
 vllm/tool_parsers/deepseekv4_tool_parser.py  | 36 +++++++++++-
 vllm/tool_parsers/kimi_k2_tool_parser.py     | 35 +++++++++++-
 vllm/tool_parsers/openai_tool_parser.py      | 36 +++++++++++-
 vllm/tool_parsers/qwen3coder_tool_parser.py  | 33 ++++++++++-
 6 files changed, 165 insertions(+), 67 deletions(-)

diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index 02ba453bc169..b2c88d1449bb 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -7,7 +7,7 @@
 from functools import cached_property
 import json
 
-from xgrammar import StructuralTag, get_model_structural_tag
+from xgrammar import StructuralTag
 
 from openai.types.responses import (
     ResponseFormatTextJSONSchemaConfig,
@@ -142,65 +142,12 @@ def adjust_request(
             )
         return request
     
-    def get_model_structural_tag_id(self) -> str:
-        """
-        Return the model ID for the builtin structural tag.
-        """
-        raise NotImplementedError()
-
-    def empty_thinking_as_non_thinking(self) -> bool:
-        """
-        It decides how to handle non-thinking mode. If True, non-thinking mode will force the
-        LLM output an empty thinking. If False, thinking tags like <think> or </think> are not
-        allowed and will not be output by the LLM.
-        """
-        return True
-
     def get_structural_tag(
         self, request: ChatCompletionRequest
     ) -> StructuralTag:
-
-        def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
-            if isinstance(tool, dict):
-                return tool
-            if hasattr(tool, "model_dump"):
-                return tool.model_dump()
-            if hasattr(tool, "dict"):
-                return tool.dict()
-            raise TypeError(f"Unsupported tool type: {type(tool)}")
-
-        model_id = self.get_model_structural_tag_id()
-        thinking_mode = request.include_reasoning
-        tool_choice_type = (
-            request.tool_choice.model_dump() if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) else request.tool_choice
+        raise NotImplementedError(
+            "ToolParser.get_structural_tag has not been implemented!"
         )
-        tool_dicts = []
-
-        if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
-            for tool in request.tools:
-                tool_dict = _tool_to_dict(tool)
-                tool_name = tool_dict.get("function", {}).get("name")
-                if tool_name == request.tool_choice.function.name:
-                    tool_dicts.append(tool_dict)
-        else:
-            tool_dicts = [_tool_to_dict(tool) for tool in request.tools]
-
-        if thinking_mode:
-            return get_model_structural_tag(
-                model=model_id,
-                tools=tool_dicts,
-                tool_choice=tool_choice_type,
-                reasoning=True,
-                force_empty_reasoning=False,
-            )
-        else:
-            return get_model_structural_tag(
-                model=model_id,
-                tools=tool_dicts,
-                tool_choice=tool_choice_type,
-                reasoning=not self.empty_thinking_as_non_thinking(),
-                force_empty_reasoning=self.empty_thinking_as_non_thinking(),
-            )
 
     def support_structural_tag(self) -> bool:
         return False
diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py
index 63e3cd59ab51..783ec9aa928f 100644
--- a/vllm/tool_parsers/deepseekv32_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv32_tool_parser.py
@@ -7,9 +7,12 @@
 from typing import Any
 
 import regex as re
+from xgrammar import StructuralTag, get_model_structural_tag
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
+    ChatCompletionToolsParam,
 )
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaFunctionCall,
@@ -323,5 +326,31 @@ def extract_tool_calls_streaming(
     def support_structural_tag(self) -> bool:
         return True
 
-    def get_model_structural_tag_id(self) -> str:
-        return "deepseek_v3_2"
\ No newline at end of file
+    def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag:
+        def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
+            if isinstance(tool, dict):
+                return tool
+            if hasattr(tool, "model_dump"):
+                return tool.model_dump()
+            if hasattr(tool, "dict"):
+                return tool.dict()
+            raise TypeError(f"Unsupported tool type: {type(tool)}")
+
+        if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
+            converted_tool_choice = request.tool_choice.model_dump()
+            converted_tools = []
+            for tool in request.tools:
+                tool_dict = _tool_to_dict(tool)
+                tool_name = tool_dict.get("function", {}).get("name")
+                if tool_name == request.tool_choice.function.name:
+                    converted_tools.append(tool_dict)
+        else:
+            converted_tool_choice = request.tool_choice
+            converted_tools = [_tool_to_dict(tool) for tool in request.tools]
+
+        return get_model_structural_tag(
+            model="deepseek_v3_2",
+            tools=converted_tools,
+            tool_choice=converted_tool_choice,
+            reasoning=request.include_reasoning,
+        )
diff --git a/vllm/tool_parsers/deepseekv4_tool_parser.py b/vllm/tool_parsers/deepseekv4_tool_parser.py
index 45a9c1302578..0c9401e54601 100644
--- a/vllm/tool_parsers/deepseekv4_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv4_tool_parser.py
@@ -2,7 +2,12 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
-
+from xgrammar import StructuralTag, get_model_structural_tag
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionRequest,
+    ChatCompletionToolsParam,
+)
 
 class DeepSeekV4ToolParser(DeepSeekV32ToolParser):
     """
@@ -14,3 +19,32 @@ class DeepSeekV4ToolParser(DeepSeekV32ToolParser):
 
     tool_call_start_token: str = "<｜DSML｜tool_calls>"
     tool_call_end_token: str = "</｜DSML｜tool_calls>"
+    
+    def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag:
+        def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
+            if isinstance(tool, dict):
+                return tool
+            if hasattr(tool, "model_dump"):
+                return tool.model_dump()
+            if hasattr(tool, "dict"):
+                return tool.dict()
+            raise TypeError(f"Unsupported tool type: {type(tool)}")
+
+        if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
+            converted_tool_choice = request.tool_choice.model_dump()
+            converted_tools = []
+            for tool in request.tools:
+                tool_dict = _tool_to_dict(tool)
+                tool_name = tool_dict.get("function", {}).get("name")
+                if tool_name == request.tool_choice.function.name:
+                    converted_tools.append(tool_dict)
+        else:
+            converted_tool_choice = request.tool_choice
+            converted_tools = [_tool_to_dict(tool) for tool in request.tools]
+
+        return get_model_structural_tag(
+            model="deepseek_v4",
+            tools=converted_tools,
+            tool_choice=converted_tool_choice,
+            reasoning=request.include_reasoning,
+        ) 
diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py
index 319f2c19f4b6..35df1df2ad49 100644
--- a/vllm/tool_parsers/kimi_k2_tool_parser.py
+++ b/vllm/tool_parsers/kimi_k2_tool_parser.py
@@ -4,9 +4,12 @@
 from collections.abc import Sequence
 
 import regex as re
+from xgrammar import StructuralTag, get_model_structural_tag
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
+    ChatCompletionToolsParam,
 )
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaFunctionCall,
@@ -277,6 +280,32 @@ def extract_tool_calls_streaming(
 
     def support_structural_tag(self) -> bool:
         return True
-    
-    def get_model_structural_tag_id(self) -> str:
-        return "kimi"
+
+    def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag:
+        def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
+            if isinstance(tool, dict):
+                return tool
+            if hasattr(tool, "model_dump"):
+                return tool.model_dump()
+            if hasattr(tool, "dict"):
+                return tool.dict()
+            raise TypeError(f"Unsupported tool type: {type(tool)}")
+
+        if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
+            converted_tool_choice = request.tool_choice.model_dump()
+            converted_tools = []
+            for tool in request.tools:
+                tool_dict = _tool_to_dict(tool)
+                tool_name = tool_dict.get("function", {}).get("name")
+                if tool_name == request.tool_choice.function.name:
+                    converted_tools.append(tool_dict)
+        else:
+            converted_tool_choice = request.tool_choice
+            converted_tools = [_tool_to_dict(tool) for tool in request.tools]
+
+        return get_model_structural_tag(
+            model="kimi",
+            tools=converted_tools,
+            tool_choice=converted_tool_choice,
+            reasoning=request.include_reasoning,
+        )
diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py
index dc906a762a77..152fdf2ce677 100644
--- a/vllm/tool_parsers/openai_tool_parser.py
+++ b/vllm/tool_parsers/openai_tool_parser.py
@@ -4,8 +4,12 @@
 from collections.abc import Sequence
 from typing import TYPE_CHECKING
 
+from xgrammar import StructuralTag, get_model_structural_tag
+
 from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
+    ChatCompletionToolsParam,
 )
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaMessage,
@@ -115,6 +119,32 @@ def extract_tool_calls_streaming(
 
     def support_structural_tag(self) -> bool:
         return True
-    
-    def get_model_structural_tag_id(self) -> str:
-        return "harmony"
+
+    def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag:
+        def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
+            if isinstance(tool, dict):
+                return tool
+            if hasattr(tool, "model_dump"):
+                return tool.model_dump()
+            if hasattr(tool, "dict"):
+                return tool.dict()
+            raise TypeError(f"Unsupported tool type: {type(tool)}")
+
+        if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
+            converted_tool_choice = request.tool_choice.model_dump()
+            converted_tools = []
+            for tool in request.tools:
+                tool_dict = _tool_to_dict(tool)
+                tool_name = tool_dict.get("function", {}).get("name")
+                if tool_name == request.tool_choice.function.name:
+                    converted_tools.append(tool_dict)
+        else:
+            converted_tool_choice = request.tool_choice
+            converted_tools = [_tool_to_dict(tool) for tool in request.tools]
+
+        return get_model_structural_tag(
+            model="harmony",
+            tools=converted_tools,
+            tool_choice=converted_tool_choice,
+            reasoning=request.include_reasoning,
+        )
diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index ed24752bfd37..ce2b4a3069e9 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -7,9 +7,12 @@
 from typing import Any
 
 import regex as re
+from xgrammar import StructuralTag, get_model_structural_tag
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
+    ChatCompletionToolsParam,
 )
 
 from vllm.entrypoints.openai.engine.protocol import (
@@ -687,5 +690,31 @@ def extract_tool_calls_streaming(
     def support_structural_tag(self) -> bool:
         return True
 
-    def get_model_structural_tag_id(self) -> str:
-        return "qwen_coder"
+    def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag:
+        def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
+            if isinstance(tool, dict):
+                return tool
+            if hasattr(tool, "model_dump"):
+                return tool.model_dump()
+            if hasattr(tool, "dict"):
+                return tool.dict()
+            raise TypeError(f"Unsupported tool type: {type(tool)}")
+
+        if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
+            converted_tool_choice = request.tool_choice.model_dump()
+            converted_tools = []
+            for tool in request.tools:
+                tool_dict = _tool_to_dict(tool)
+                tool_name = tool_dict.get("function", {}).get("name")
+                if tool_name == request.tool_choice.function.name:
+                    converted_tools.append(tool_dict)
+        else:
+            converted_tool_choice = request.tool_choice
+            converted_tools = [_tool_to_dict(tool) for tool in request.tools]
+
+        return get_model_structural_tag(
+            model="qwen_coder",
+            tools=converted_tools,
+            tool_choice=converted_tool_choice,
+            reasoning=request.include_reasoning,
+        )

From 098b80c9e9e415e73cd0ee9f139308f2ac205c71 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 17:02:21 +0800
Subject: [PATCH 17/43] add v4 tests.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 .../test_deepseekv4_tool_parser.py            | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py
index 631d0fb97b33..b369c364fead 100644
--- a/tests/tool_parsers/test_deepseekv4_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py
@@ -8,6 +8,13 @@
 
 from vllm.tool_parsers import ToolParserManager
 from vllm.tool_parsers.deepseekv4_tool_parser import DeepSeekV4ToolParser
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionRequest,
+    ChatCompletionToolsParam,
+    ChatCompletionNamedFunction,
+)
+from xgrammar import StructuralTag
 
 MOCK_TOKENIZER = MagicMock()
 MOCK_TOKENIZER.get_vocab.return_value = {}
@@ -121,3 +128,40 @@ def test_streaming_extracts_complete_invokes():
     ]
     assert names == ["search"]
     assert json.loads(reconstruct_args(deltas)) == {"query": "deepseek v4"}
+
+def test_support_builtin_structural_tag():
+    assert make_parser().support_structural_tag() is True
+
+
+def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    parser = make_parser()
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="auto",
+    )
+    tag = parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+    
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=sample_tools,
+        tool_choice="required",
+    )
+    tag = parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+    
+    if sample_tools:
+        tool = sample_tools[0]
+        req = ChatCompletionRequest(
+            messages=[],
+            model="m",
+            tools=sample_tools,
+            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
+        )
+    tag = parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
\ No newline at end of file

From 15c99cb111e626d885d7f54a0b57dc9ff10168db Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 17:13:41 +0800
Subject: [PATCH 18/43] update.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 .../test_deepseekv32_tool_parser.py           |  4 ---
 .../test_deepseekv4_tool_parser.py            |  4 ---
 .../tool_parsers/test_kimi_k2_tool_parser.py  |  4 ---
 tests/tool_parsers/test_openai_tool_parser.py |  4 ---
 .../test_qwen3coder_tool_parser.py            |  4 ---
 vllm/tool_parsers/abstract_tool_parser.py     | 25 ++++++++++---------
 vllm/tool_parsers/deepseekv32_tool_parser.py  |  7 +++---
 vllm/tool_parsers/deepseekv4_tool_parser.py   |  4 ++-
 vllm/tool_parsers/kimi_k2_tool_parser.py      |  7 +++---
 vllm/tool_parsers/openai_tool_parser.py       |  7 +++---
 vllm/tool_parsers/qwen3coder_tool_parser.py   |  7 +++---
 11 files changed, 28 insertions(+), 49 deletions(-)

diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py
index 80bcb9cf6b63..c5ed3ae5a3fd 100644
--- a/tests/tool_parsers/test_deepseekv32_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py
@@ -843,10 +843,6 @@ def test_convert_param_value_checked_helper(parser):
     assert parser._convert_param_value("null", "object") is None
 
 
-def test_support_builtin_structural_tag():
-    assert make_parser().support_structural_tag() is True
-
-
 def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py
index b369c364fead..6849254a6672 100644
--- a/tests/tool_parsers/test_deepseekv4_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py
@@ -129,10 +129,6 @@ def test_streaming_extracts_complete_invokes():
     assert names == ["search"]
     assert json.loads(reconstruct_args(deltas)) == {"query": "deepseek v4"}
 
-def test_support_builtin_structural_tag():
-    assert make_parser().support_structural_tag() is True
-
-
 def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index ff7fc5b0c8c9..20794df89ee4 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -629,10 +629,6 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer):
         assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"}
 
 
-def test_support_builtin_structural_tag(parser: KimiK2ToolParser):
-    assert parser.support_structural_tag() is True
-
-
 def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
     parser: KimiK2ToolParser,
     sample_tools: list[ChatCompletionToolsParam],
diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py
index 56ec4fafa0e0..2fc24df11b1a 100644
--- a/tests/tool_parsers/test_openai_tool_parser.py
+++ b/tests/tool_parsers/test_openai_tool_parser.py
@@ -307,10 +307,6 @@ def test_extract_tool_calls_with_content(
     assert extracted_info.content == final_content
 
 
-def test_support_builtin_structural_tag(openai_tool_parser: OpenAIToolParser):
-    assert openai_tool_parser.support_structural_tag() is True
-
-
 def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
     openai_tool_parser: OpenAIToolParser,
     sample_tools: list[ChatCompletionToolsParam],
diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index 0c1b5061ccbb..e31c30992c72 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -1171,10 +1171,6 @@ def test_no_double_serialization_string_args(qwen3_tool_parser):
     assert '\\"hello world\\"' not in raw_arguments
 
 
-def test_support_builtin_structural_tag(qwen3_tool_parser: Qwen3CoderToolParser):
-    assert qwen3_tool_parser.support_structural_tag() is True
-
-
 def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
     qwen3_tool_parser: Qwen3CoderToolParser,
     sample_tools: list[ChatCompletionToolsParam],
diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index b2c88d1449bb..0988f52c4569 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -134,23 +134,24 @@ def adjust_request(
 
         # Step 2: apply xgrammar's built-in tool calling support.
         # XGrammar will support tool_choice="none" in the future. Currently, we only support tool_choice="auto" and tool_choice="required".
-        need_tool_calling = request.tool_choice == "auto" or request.tool_choice == "required" or isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam)
-        if self.support_structural_tag() and need_tool_calling:
+        need_tool_calling = (
+            request.tool_choice == "auto"
+            or request.tool_choice == "required"
+            or isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam)
+        )
+        if need_tool_calling:
             structure_tag = self.get_structural_tag(request)
-            request.structured_outputs = StructuredOutputsParams(
-                structural_tag=json.dumps(structure_tag.model_dump()),
-            )
+            if structure_tag is not None:
+                request.structured_outputs = StructuredOutputsParams(
+                    structural_tag=json.dumps(structure_tag.model_dump()),
+                )
+
         return request
     
     def get_structural_tag(
         self, request: ChatCompletionRequest
-    ) -> StructuralTag:
-        raise NotImplementedError(
-            "ToolParser.get_structural_tag has not been implemented!"
-        )
-
-    def support_structural_tag(self) -> bool:
-        return False
+    ) -> StructuralTag | None:
+        return None
 
     def extract_tool_calls(
         self, model_output: str, request: ChatCompletionRequest
diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py
index 783ec9aa928f..7a3e8635abc8 100644
--- a/vllm/tool_parsers/deepseekv32_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv32_tool_parser.py
@@ -323,10 +323,9 @@ def extract_tool_calls_streaming(
 
         return None
 
-    def support_structural_tag(self) -> bool:
-        return True
-
-    def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag:
+    def get_structural_tag(
+        self, request: ChatCompletionRequest
+    ) -> StructuralTag | None:
         def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
             if isinstance(tool, dict):
                 return tool
diff --git a/vllm/tool_parsers/deepseekv4_tool_parser.py b/vllm/tool_parsers/deepseekv4_tool_parser.py
index 0c9401e54601..2e6927f759fc 100644
--- a/vllm/tool_parsers/deepseekv4_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv4_tool_parser.py
@@ -20,7 +20,9 @@ class DeepSeekV4ToolParser(DeepSeekV32ToolParser):
     tool_call_start_token: str = "<｜DSML｜tool_calls>"
     tool_call_end_token: str = "</｜DSML｜tool_calls>"
     
-    def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag:
+    def get_structural_tag(
+        self, request: ChatCompletionRequest
+    ) -> StructuralTag | None:
         def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
             if isinstance(tool, dict):
                 return tool
diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py
index 35df1df2ad49..b580c371a980 100644
--- a/vllm/tool_parsers/kimi_k2_tool_parser.py
+++ b/vllm/tool_parsers/kimi_k2_tool_parser.py
@@ -278,10 +278,9 @@ def extract_tool_calls_streaming(
             logger.exception("Error trying to handle streaming tool call.")
             return None  # do not stream a delta. skip this token ID.
 
-    def support_structural_tag(self) -> bool:
-        return True
-
-    def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag:
+    def get_structural_tag(
+        self, request: ChatCompletionRequest
+    ) -> StructuralTag | None:
         def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
             if isinstance(tool, dict):
                 return tool
diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py
index 152fdf2ce677..d308c038ae3b 100644
--- a/vllm/tool_parsers/openai_tool_parser.py
+++ b/vllm/tool_parsers/openai_tool_parser.py
@@ -117,10 +117,9 @@ def extract_tool_calls_streaming(
             "Not being used, manual parsing in serving_chat.py"  # noqa: E501
         )
 
-    def support_structural_tag(self) -> bool:
-        return True
-
-    def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag:
+    def get_structural_tag(
+        self, request: ChatCompletionRequest
+    ) -> StructuralTag | None:
         def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
             if isinstance(tool, dict):
                 return tool
diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index ce2b4a3069e9..4a42aa87c57b 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -687,10 +687,9 @@ def extract_tool_calls_streaming(
         return None
     
     
-    def support_structural_tag(self) -> bool:
-        return True
-
-    def get_structural_tag(self, request: ChatCompletionRequest) -> StructuralTag:
+    def get_structural_tag(
+        self, request: ChatCompletionRequest
+    ) -> StructuralTag | None:
         def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
             if isinstance(tool, dict):
                 return tool

From 93fc4b43716016e05430605664ce4ea3cc54688c Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 17:27:36 +0800
Subject: [PATCH 19/43] update hte priority.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 .../tool_parsers/test_kimi_k2_tool_parser.py  |  5 ++-
 tests/tool_parsers/test_openai_tool_parser.py |  5 ++-
 .../test_qwen3coder_tool_parser.py            |  5 ++-
 vllm/tool_parsers/abstract_tool_parser.py     | 42 +++++++++----------
 4 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index 20794df89ee4..d9509b25a946 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -684,7 +684,7 @@ def test_adjust_request_auto_structural_tag_is_json_string(
     assert isinstance(loaded, dict)
 
 
-def test_adjust_request_required_uses_json_schema_not_structural_tag(
+def test_adjust_request_required_prefers_structural_tag(
     parser: KimiK2ToolParser,
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
@@ -695,4 +695,5 @@ def test_adjust_request_required_uses_json_schema_not_structural_tag(
         tool_choice="required",
     )
     out = parser.adjust_request(req)
-    assert out.structured_outputs.structural_tag is None
+    assert out.structured_outputs is not None
+    assert out.structured_outputs.structural_tag is not None
diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py
index 2fc24df11b1a..d4536d1c5e3d 100644
--- a/tests/tool_parsers/test_openai_tool_parser.py
+++ b/tests/tool_parsers/test_openai_tool_parser.py
@@ -361,7 +361,7 @@ def test_adjust_request_auto_structural_tag_is_json_string(
     assert isinstance(loaded, dict)
 
 
-def test_adjust_request_required_uses_json_schema_not_structural_tag(
+def test_adjust_request_required_prefers_structural_tag(
     openai_tool_parser: OpenAIToolParser,
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
@@ -372,4 +372,5 @@ def test_adjust_request_required_uses_json_schema_not_structural_tag(
         tool_choice="required",
     )
     out = openai_tool_parser.adjust_request(req)
-    assert out.structured_outputs.structural_tag is None
+    assert out.structured_outputs is not None
+    assert out.structured_outputs.structural_tag is not None
diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index e31c30992c72..aad7c2e4b512 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -1227,7 +1227,7 @@ def test_adjust_request_auto_structural_tag_is_json_string(
     assert isinstance(loaded, dict)
 
 
-def test_adjust_request_required_uses_json_schema_not_structural_tag(
+def test_adjust_request_required_prefers_structural_tag(
     qwen3_tool_parser: Qwen3CoderToolParser,
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
@@ -1239,4 +1239,5 @@ def test_adjust_request_required_uses_json_schema_not_structural_tag(
         tool_choice="required",
     )
     out = qwen3_tool_parser.adjust_request(req)
-    assert out.structured_outputs.structural_tag is None
+    assert out.structured_outputs is not None
+    assert out.structured_outputs.structural_tag is not None
diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index 0988f52c4569..e7351199b511 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -95,8 +95,27 @@ def adjust_request(
         if not request.tools:
             return request
 
-        # Step 1: set structured output params when tool constraints are derived
-        # from the tool schema.
+        # Step 1 (highest priority for ChatCompletionRequest): apply
+        # xgrammar's built-in structural tag support.
+        if isinstance(request, ChatCompletionRequest):
+            # XGrammar will support tool_choice="none" in the future.
+            # Currently, we only support tool_choice="auto" and
+            # tool_choice="required".
+            need_tool_calling = (
+                request.tool_choice == "auto"
+                or request.tool_choice == "required"
+                or isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam)
+            )
+            if need_tool_calling:
+                structure_tag = self.get_structural_tag(request)
+                if structure_tag is not None:
+                    request.structured_outputs = StructuredOutputsParams(
+                        structural_tag=json.dumps(structure_tag.model_dump()),
+                    )
+                    return request
+
+        # Step 2: set structured output params when tool constraints are
+        # derived from the tool schema.
         json_schema_from_tool = get_json_schema_from_tools(
             tool_choice=request.tool_choice, tools=request.tools
         )
@@ -124,28 +143,9 @@ def adjust_request(
                         strict=True,
                     )
                 )
-                
-            return request
 
-        # Only ChatCompletionRequest is supported for Step 2.
-        if not isinstance(request, ChatCompletionRequest):
             return request
 
-
-        # Step 2: apply xgrammar's built-in tool calling support.
-        # XGrammar will support tool_choice="none" in the future. Currently, we only support tool_choice="auto" and tool_choice="required".
-        need_tool_calling = (
-            request.tool_choice == "auto"
-            or request.tool_choice == "required"
-            or isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam)
-        )
-        if need_tool_calling:
-            structure_tag = self.get_structural_tag(request)
-            if structure_tag is not None:
-                request.structured_outputs = StructuredOutputsParams(
-                    structural_tag=json.dumps(structure_tag.model_dump()),
-                )
-
         return request
     
     def get_structural_tag(

From cbc745ebaefe709337407c9af792b6000eaabd55 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 18:01:03 +0800
Subject: [PATCH 20/43] fix the test.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 .../test_deepseekv4_tool_parser.py            | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py
index 6849254a6672..2074cb66d073 100644
--- a/tests/tool_parsers/test_deepseekv4_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py
@@ -26,6 +26,43 @@
 PARAM_START = '<｜DSML｜parameter name="'
 PARAM_END = "</｜DSML｜parameter>"
 
+@pytest.fixture
+def sample_tools() -> list[ChatCompletionToolsParam]:
+    return [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "get_current_weather",
+                "description": "Get the current weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string", "description": "The city name"},
+                        "state": {"type": "string", "description": "The state code"},
+                        "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
+                    },
+                    "required": ["city", "state"],
+                },
+            },
+        ),
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "calculate_area",
+                "description": "Calculate area of a shape",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "shape": {"type": "string"},
+                        "dimensions": {"type": "object"},
+                        "precision": {"type": "integer"},
+                    },
+                },
+            },
+        ),
+    ]
+
+
 
 def make_parser(tools=None) -> DeepSeekV4ToolParser:
     return DeepSeekV4ToolParser(MOCK_TOKENIZER, tools=tools)

From 894871f7d02397bf0e7e9a8ad1bfcd2c546b321a Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 18:01:48 +0800
Subject: [PATCH 21/43] fix the import.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 tests/tool_parsers/test_deepseekv4_tool_parser.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py
index 2074cb66d073..b3e737ddc6a1 100644
--- a/tests/tool_parsers/test_deepseekv4_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py
@@ -5,6 +5,7 @@
 
 import json
 from unittest.mock import MagicMock
+import pytest
 
 from vllm.tool_parsers import ToolParserManager
 from vllm.tool_parsers.deepseekv4_tool_parser import DeepSeekV4ToolParser

From b3bf271ff3dfc51d33ae9418d2aaa11d1748b16c Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Wed, 29 Apr 2026 23:00:28 +0800
Subject: [PATCH 22/43] update the version of xgr.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 requirements/common.txt    | 2 +-
 requirements/test/rocm.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements/common.txt b/requirements/common.txt
index acd7fcb80012..652738eebe74 100644
--- a/requirements/common.txt
+++ b/requirements/common.txt
@@ -24,7 +24,7 @@ outlines_core == 0.2.14
 # required for outlines backend disk cache
 diskcache == 5.6.3
 lark == 1.2.2
-xgrammar >= 0.1.34, < 1.0.0; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le"
+xgrammar >= 0.2.0, < 1.0.0; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le"
 typing_extensions >= 4.10
 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
 partial-json-parser # used for parsing partial JSON outputs
diff --git a/requirements/test/rocm.txt b/requirements/test/rocm.txt
index f87ea438bc28..8ba78ee128b5 100644
--- a/requirements/test/rocm.txt
+++ b/requirements/test/rocm.txt
@@ -1597,7 +1597,7 @@ wrapt==2.1.2
     # via smart-open
 xarray==2026.2.0
     # via rioxarray
-xgrammar==0.1.34
+xgrammar==0.2.0
     # via
     #   -c requirements/common.txt
     #   -r requirements/test/../common.txt

From 1ecff43eba8525502f49e710c16370b7da484fa5 Mon Sep 17 00:00:00 2001
From: mgoin <mgoin64@gmail.com>
Date: Fri, 1 May 2026 15:41:40 +0000
Subject: [PATCH 23/43] Lint

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 vllm/entrypoints/openai/chat_completion/protocol.py | 10 +++++++---
 vllm/tool_parsers/qwen3coder_tool_parser.py         |  7 ++++---
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/vllm/entrypoints/openai/chat_completion/protocol.py b/vllm/entrypoints/openai/chat_completion/protocol.py
index 03a473aa4cf6..140a2fe566da 100644
--- a/vllm/entrypoints/openai/chat_completion/protocol.py
+++ b/vllm/entrypoints/openai/chat_completion/protocol.py
@@ -739,9 +739,13 @@ def check_tool_usage(cls, data):
 
             # make sure that tool choice is either a named tool
             # OR that it's set to "auto" or "required"
-            if data["tool_choice"] not in ["auto", "required"] and not isinstance(
-                data["tool_choice"], dict
-            ) and not isinstance(data["tool_choice"], ChatCompletionNamedToolChoiceParam):
+            if (
+                data["tool_choice"] not in ["auto", "required"]
+                and not isinstance(data["tool_choice"], dict)
+                and not isinstance(
+                    data["tool_choice"], ChatCompletionNamedToolChoiceParam
+                )
+            ):
                 raise ValueError(
                     f"Invalid value for `tool_choice`: {data['tool_choice']}! "
                     'Only named tools, "none", "auto" or "required" '
diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index 4a42aa87c57b..729aa123bfea 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -14,7 +14,6 @@
     ChatCompletionRequest,
     ChatCompletionToolsParam,
 )
-
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaFunctionCall,
     DeltaMessage,
@@ -685,8 +684,7 @@ def extract_tool_calls_streaming(
                 return result
 
         return None
-    
-    
+
     def get_structural_tag(
         self, request: ChatCompletionRequest
     ) -> StructuralTag | None:
@@ -699,6 +697,9 @@ def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
                 return tool.dict()
             raise TypeError(f"Unsupported tool type: {type(tool)}")
 
+        if not request.tools:
+            return None
+
         if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
             converted_tool_choice = request.tool_choice.model_dump()
             converted_tools = []

From 6ca893e2391982f52f1d1642bd8f7bed7a6427d1 Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Sun, 3 May 2026 05:12:52 -0400
Subject: [PATCH 24/43] Move structural tag builders into vLLM

---
 .../test_deepseekv32_tool_parser.py           |  63 +---
 .../test_deepseekv4_tool_parser.py            |   2 +-
 .../tool_parsers/test_kimi_k2_tool_parser.py  |  71 ----
 tests/tool_parsers/test_openai_tool_parser.py |  70 ----
 .../test_qwen3coder_tool_parser.py            |   4 +-
 vllm/tool_parsers/abstract_tool_parser.py     |   5 +-
 vllm/tool_parsers/deepseekv32_tool_parser.py  |  33 --
 vllm/tool_parsers/deepseekv4_tool_parser.py   |  35 +-
 vllm/tool_parsers/kimi_k2_tool_parser.py      |  33 --
 vllm/tool_parsers/openai_tool_parser.py       |  34 --
 vllm/tool_parsers/qwen3coder_tool_parser.py   |  35 +-
 vllm/tool_parsers/structural_tag_registry.py  | 327 ++++++++++++++++++
 12 files changed, 344 insertions(+), 368 deletions(-)
 create mode 100644 vllm/tool_parsers/structural_tag_registry.py

diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py
index a620cf5bbec6..f82d1d739a07 100644
--- a/tests/tool_parsers/test_deepseekv32_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py
@@ -10,7 +10,6 @@
 from unittest.mock import MagicMock
 
 import pytest
-from xgrammar import StructuralTag
 
 from tests.tool_parsers.utils import run_tool_extraction_streaming
 from vllm.entrypoints.openai.chat_completion.protocol import (
@@ -18,12 +17,7 @@
     FunctionDefinition,
 )
 from vllm.tokenizers import get_tokenizer
-from vllm.entrypoints.openai.chat_completion.protocol import (
-    ChatCompletionRequest,
-    ChatCompletionToolsParam,
-    ChatCompletionNamedToolChoiceParam,
-    ChatCompletionNamedFunction,
-)
+from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
 from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
 
 # ---------------------------------------------------------------------------
@@ -867,61 +861,6 @@ def test_convert_param_value_checked_helper(parser):
     assert parser._convert_param_value("null", "object") is None
 
 
-def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
-    sample_tools: list[ChatCompletionToolsParam],
-) -> None:
-    parser = make_parser()
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=sample_tools,
-        tool_choice="auto",
-    )
-    tag = parser.get_structural_tag(req)
-    assert isinstance(tag, StructuralTag)
-    
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=sample_tools,
-        tool_choice="required",
-    )
-    tag = parser.get_structural_tag(req)
-    assert isinstance(tag, StructuralTag)
-    
-    if sample_tools:
-        tool = sample_tools[0]
-        req = ChatCompletionRequest(
-            messages=[],
-            model="m",
-            tools=sample_tools,
-            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
-        )
-    tag = parser.get_structural_tag(req)
-    assert isinstance(tag, StructuralTag)
-
-
-@pytest.mark.parametrize("include_reasoning", [True, False])
-def test_adjust_request_auto_structural_tag_is_json_string(
-    sample_tools: list[ChatCompletionToolsParam],
-    include_reasoning: bool,
-) -> None:
-    parser = make_parser()
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=sample_tools,
-        tool_choice="auto",
-        include_reasoning=include_reasoning,
-    )
-    out = parser.adjust_request(req)
-    assert out.structured_outputs is not None
-    assert out.structured_outputs.structural_tag is not None
-    assert isinstance(out.structured_outputs.structural_tag, str)
-    loaded = json.loads(out.structured_outputs.structural_tag)
-    assert isinstance(loaded, dict)
-
-
 def test_adjust_request_required_uses_json_schema_not_structural_tag(
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py
index b3e737ddc6a1..095ed7eb17a2 100644
--- a/tests/tool_parsers/test_deepseekv4_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py
@@ -167,7 +167,7 @@ def test_streaming_extracts_complete_invokes():
     assert names == ["search"]
     assert json.loads(reconstruct_args(deltas)) == {"query": "deepseek v4"}
 
-def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
+def test_get_vllm_registry_structural_tag_returns_structural_tag(
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
     parser = make_parser()
diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index d9509b25a946..5552a977d47d 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -6,7 +6,6 @@
 from unittest.mock import MagicMock
 
 import pytest
-from xgrammar import StructuralTag
 
 from tests.tool_parsers.utils import (
     run_tool_extraction,
@@ -15,8 +14,6 @@
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
-    ChatCompletionNamedToolChoiceParam,
-    ChatCompletionNamedFunction,
 )
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
@@ -629,71 +626,3 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer):
         assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"}
 
 
-def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
-    parser: KimiK2ToolParser,
-    sample_tools: list[ChatCompletionToolsParam],
-) -> None:
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=sample_tools,
-        tool_choice="auto",
-    )
-    tag = parser.get_structural_tag(req)
-    assert isinstance(tag, StructuralTag)
-    
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=sample_tools,
-        tool_choice="required",
-    )
-    tag = parser.get_structural_tag(req)
-    assert isinstance(tag, StructuralTag)
-    
-    if sample_tools:
-
-        tool = sample_tools[0]
-        req = ChatCompletionRequest(
-            messages=[],
-            model="m",
-            tools=sample_tools,
-            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
-        )
-    tag = parser.get_structural_tag(req)
-    assert isinstance(tag, StructuralTag)
-
-@pytest.mark.parametrize("include_reasoning", [True, False])
-def test_adjust_request_auto_structural_tag_is_json_string(
-    parser: KimiK2ToolParser,
-    sample_tools: list[ChatCompletionToolsParam],
-    include_reasoning: bool,
-) -> None:
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=sample_tools,
-        tool_choice="auto",
-        include_reasoning=include_reasoning,
-    )
-    out = parser.adjust_request(req)
-    assert out.structured_outputs is not None
-    assert out.structured_outputs.structural_tag is not None
-    assert isinstance(out.structured_outputs.structural_tag, str)
-    loaded = json.loads(out.structured_outputs.structural_tag)
-    assert isinstance(loaded, dict)
-
-
-def test_adjust_request_required_prefers_structural_tag(
-    parser: KimiK2ToolParser,
-    sample_tools: list[ChatCompletionToolsParam],
-) -> None:
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=sample_tools,
-        tool_choice="required",
-    )
-    out = parser.adjust_request(req)
-    assert out.structured_outputs is not None
-    assert out.structured_outputs.structural_tag is not None
diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py
index d4536d1c5e3d..b5a365b495ca 100644
--- a/tests/tool_parsers/test_openai_tool_parser.py
+++ b/tests/tool_parsers/test_openai_tool_parser.py
@@ -13,13 +13,10 @@
     SystemContent,
     load_harmony_encoding,
 )
-from xgrammar import StructuralTag
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
     ChatCompletionToolsParam,
-    ChatCompletionNamedToolChoiceParam,
-    ChatCompletionNamedFunction,
 )
 from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
 from vllm.tokenizers import get_tokenizer
@@ -307,70 +304,3 @@ def test_extract_tool_calls_with_content(
     assert extracted_info.content == final_content
 
 
-def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
-    openai_tool_parser: OpenAIToolParser,
-    sample_tools: list[ChatCompletionToolsParam],
-) -> None:
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=sample_tools,
-        tool_choice="auto",
-    )
-    tag = openai_tool_parser.get_structural_tag(req)
-    assert isinstance(tag, StructuralTag)
-    
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=sample_tools,
-        tool_choice="required",
-    )
-    tag = openai_tool_parser.get_structural_tag(req)
-    assert isinstance(tag, StructuralTag)
-    
-    if sample_tools:
-        tool = sample_tools[0]
-        req = ChatCompletionRequest(
-            messages=[],
-            model="m",
-            tools=sample_tools,
-            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
-        )
-    tag = openai_tool_parser.get_structural_tag(req)
-    assert isinstance(tag, StructuralTag)    
-
-@pytest.mark.parametrize("include_reasoning", [True, False])
-def test_adjust_request_auto_structural_tag_is_json_string(
-    openai_tool_parser: OpenAIToolParser,
-    sample_tools: list[ChatCompletionToolsParam],
-    include_reasoning: bool,
-) -> None:
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=sample_tools,
-        tool_choice="auto",
-        include_reasoning=include_reasoning,
-    )
-    out = openai_tool_parser.adjust_request(req)
-    assert out.structured_outputs is not None
-    assert out.structured_outputs.structural_tag is not None
-    assert isinstance(out.structured_outputs.structural_tag, str)
-    loaded = json.loads(out.structured_outputs.structural_tag)
-    assert isinstance(loaded, dict)
-
-
-def test_adjust_request_required_prefers_structural_tag(
-    openai_tool_parser: OpenAIToolParser,
-    sample_tools: list[ChatCompletionToolsParam],
-) -> None:
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=sample_tools,
-        tool_choice="required",
-    )
-    out = openai_tool_parser.adjust_request(req)
-    assert out.structured_outputs is not None
-    assert out.structured_outputs.structural_tag is not None
diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index aad7c2e4b512..6e6842c40785 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -1171,7 +1171,7 @@ def test_no_double_serialization_string_args(qwen3_tool_parser):
     assert '\\"hello world\\"' not in raw_arguments
 
 
-def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
+def test_get_vllm_registry_structural_tag_returns_structural_tag(
     qwen3_tool_parser: Qwen3CoderToolParser,
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
@@ -1206,7 +1206,7 @@ def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
         assert isinstance(tag, StructuralTag)
 
 @pytest.mark.parametrize("include_reasoning", [True, False])
-def test_adjust_request_auto_structural_tag_is_json_string(
+def test_adjust_request_auto_uses_vllm_registry_structural_tag(
     qwen3_tool_parser: Qwen3CoderToolParser,
     sample_tools: list[ChatCompletionToolsParam],
     include_reasoning: bool,
diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index e7351199b511..6eb15a444fc0 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -96,11 +96,8 @@ def adjust_request(
             return request
 
         # Step 1 (highest priority for ChatCompletionRequest): apply
-        # xgrammar's built-in structural tag support.
+        # vLLM-owned structural tag support for model-specific tool formats.
         if isinstance(request, ChatCompletionRequest):
-            # XGrammar will support tool_choice="none" in the future.
-            # Currently, we only support tool_choice="auto" and
-            # tool_choice="required".
             need_tool_calling = (
                 request.tool_choice == "auto"
                 or request.tool_choice == "required"
diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py
index e7cc21c8533f..87a1f88cd67a 100644
--- a/vllm/tool_parsers/deepseekv32_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv32_tool_parser.py
@@ -7,12 +7,9 @@
 from typing import Any
 
 import regex as re
-from xgrammar import StructuralTag, get_model_structural_tag
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
-    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
-    ChatCompletionToolsParam,
 )
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaFunctionCall,
@@ -324,33 +321,3 @@ def extract_tool_calls_streaming(
 
         return None
 
-    def get_structural_tag(
-        self, request: ChatCompletionRequest
-    ) -> StructuralTag | None:
-        def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
-            if isinstance(tool, dict):
-                return tool
-            if hasattr(tool, "model_dump"):
-                return tool.model_dump()
-            if hasattr(tool, "dict"):
-                return tool.dict()
-            raise TypeError(f"Unsupported tool type: {type(tool)}")
-
-        if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
-            converted_tool_choice = request.tool_choice.model_dump()
-            converted_tools = []
-            for tool in request.tools:
-                tool_dict = _tool_to_dict(tool)
-                tool_name = tool_dict.get("function", {}).get("name")
-                if tool_name == request.tool_choice.function.name:
-                    converted_tools.append(tool_dict)
-        else:
-            converted_tool_choice = request.tool_choice
-            converted_tools = [_tool_to_dict(tool) for tool in request.tools]
-
-        return get_model_structural_tag(
-            model="deepseek_v3_2",
-            tools=converted_tools,
-            tool_choice=converted_tool_choice,
-            reasoning=request.include_reasoning,
-        )
diff --git a/vllm/tool_parsers/deepseekv4_tool_parser.py b/vllm/tool_parsers/deepseekv4_tool_parser.py
index 2e6927f759fc..12791fc7b2ea 100644
--- a/vllm/tool_parsers/deepseekv4_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv4_tool_parser.py
@@ -1,13 +1,13 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
-from xgrammar import StructuralTag, get_model_structural_tag
+from xgrammar import StructuralTag
+
 from vllm.entrypoints.openai.chat_completion.protocol import (
-    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
-    ChatCompletionToolsParam,
 )
+from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
+from vllm.tool_parsers.structural_tag_registry import get_model_structural_tag
 
 class DeepSeekV4ToolParser(DeepSeekV32ToolParser):
     """
@@ -23,30 +23,9 @@ class DeepSeekV4ToolParser(DeepSeekV32ToolParser):
     def get_structural_tag(
         self, request: ChatCompletionRequest
     ) -> StructuralTag | None:
-        def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
-            if isinstance(tool, dict):
-                return tool
-            if hasattr(tool, "model_dump"):
-                return tool.model_dump()
-            if hasattr(tool, "dict"):
-                return tool.dict()
-            raise TypeError(f"Unsupported tool type: {type(tool)}")
-
-        if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
-            converted_tool_choice = request.tool_choice.model_dump()
-            converted_tools = []
-            for tool in request.tools:
-                tool_dict = _tool_to_dict(tool)
-                tool_name = tool_dict.get("function", {}).get("name")
-                if tool_name == request.tool_choice.function.name:
-                    converted_tools.append(tool_dict)
-        else:
-            converted_tool_choice = request.tool_choice
-            converted_tools = [_tool_to_dict(tool) for tool in request.tools]
-
         return get_model_structural_tag(
             model="deepseek_v4",
-            tools=converted_tools,
-            tool_choice=converted_tool_choice,
+            tools=request.tools,
+            tool_choice=request.tool_choice,
             reasoning=request.include_reasoning,
-        ) 
+        )
diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py
index b580c371a980..31f29900c2b3 100644
--- a/vllm/tool_parsers/kimi_k2_tool_parser.py
+++ b/vllm/tool_parsers/kimi_k2_tool_parser.py
@@ -4,12 +4,9 @@
 from collections.abc import Sequence
 
 import regex as re
-from xgrammar import StructuralTag, get_model_structural_tag
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
-    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
-    ChatCompletionToolsParam,
 )
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaFunctionCall,
@@ -278,33 +275,3 @@ def extract_tool_calls_streaming(
             logger.exception("Error trying to handle streaming tool call.")
             return None  # do not stream a delta. skip this token ID.
 
-    def get_structural_tag(
-        self, request: ChatCompletionRequest
-    ) -> StructuralTag | None:
-        def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
-            if isinstance(tool, dict):
-                return tool
-            if hasattr(tool, "model_dump"):
-                return tool.model_dump()
-            if hasattr(tool, "dict"):
-                return tool.dict()
-            raise TypeError(f"Unsupported tool type: {type(tool)}")
-
-        if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
-            converted_tool_choice = request.tool_choice.model_dump()
-            converted_tools = []
-            for tool in request.tools:
-                tool_dict = _tool_to_dict(tool)
-                tool_name = tool_dict.get("function", {}).get("name")
-                if tool_name == request.tool_choice.function.name:
-                    converted_tools.append(tool_dict)
-        else:
-            converted_tool_choice = request.tool_choice
-            converted_tools = [_tool_to_dict(tool) for tool in request.tools]
-
-        return get_model_structural_tag(
-            model="kimi",
-            tools=converted_tools,
-            tool_choice=converted_tool_choice,
-            reasoning=request.include_reasoning,
-        )
diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py
index d308c038ae3b..57bfa9915e86 100644
--- a/vllm/tool_parsers/openai_tool_parser.py
+++ b/vllm/tool_parsers/openai_tool_parser.py
@@ -4,12 +4,8 @@
 from collections.abc import Sequence
 from typing import TYPE_CHECKING
 
-from xgrammar import StructuralTag, get_model_structural_tag
-
 from vllm.entrypoints.openai.chat_completion.protocol import (
-    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
-    ChatCompletionToolsParam,
 )
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaMessage,
@@ -117,33 +113,3 @@ def extract_tool_calls_streaming(
             "Not being used, manual parsing in serving_chat.py"  # noqa: E501
         )
 
-    def get_structural_tag(
-        self, request: ChatCompletionRequest
-    ) -> StructuralTag | None:
-        def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
-            if isinstance(tool, dict):
-                return tool
-            if hasattr(tool, "model_dump"):
-                return tool.model_dump()
-            if hasattr(tool, "dict"):
-                return tool.dict()
-            raise TypeError(f"Unsupported tool type: {type(tool)}")
-
-        if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
-            converted_tool_choice = request.tool_choice.model_dump()
-            converted_tools = []
-            for tool in request.tools:
-                tool_dict = _tool_to_dict(tool)
-                tool_name = tool_dict.get("function", {}).get("name")
-                if tool_name == request.tool_choice.function.name:
-                    converted_tools.append(tool_dict)
-        else:
-            converted_tool_choice = request.tool_choice
-            converted_tools = [_tool_to_dict(tool) for tool in request.tools]
-
-        return get_model_structural_tag(
-            model="harmony",
-            tools=converted_tools,
-            tool_choice=converted_tool_choice,
-            reasoning=request.include_reasoning,
-        )
diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index 729aa123bfea..b55c2b1274f9 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -7,12 +7,10 @@
 from typing import Any
 
 import regex as re
-from xgrammar import StructuralTag, get_model_structural_tag
+from xgrammar import StructuralTag
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
-    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
-    ChatCompletionToolsParam,
 )
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaFunctionCall,
@@ -28,6 +26,7 @@
     Tool,
     ToolParser,
 )
+from vllm.tool_parsers.structural_tag_registry import get_model_structural_tag
 from vllm.tool_parsers.utils import find_tool_properties
 
 logger = init_logger(__name__)
@@ -688,33 +687,9 @@ def extract_tool_calls_streaming(
     def get_structural_tag(
         self, request: ChatCompletionRequest
     ) -> StructuralTag | None:
-        def _tool_to_dict(tool: ChatCompletionToolsParam | dict) -> dict:
-            if isinstance(tool, dict):
-                return tool
-            if hasattr(tool, "model_dump"):
-                return tool.model_dump()
-            if hasattr(tool, "dict"):
-                return tool.dict()
-            raise TypeError(f"Unsupported tool type: {type(tool)}")
-
-        if not request.tools:
-            return None
-
-        if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam):
-            converted_tool_choice = request.tool_choice.model_dump()
-            converted_tools = []
-            for tool in request.tools:
-                tool_dict = _tool_to_dict(tool)
-                tool_name = tool_dict.get("function", {}).get("name")
-                if tool_name == request.tool_choice.function.name:
-                    converted_tools.append(tool_dict)
-        else:
-            converted_tool_choice = request.tool_choice
-            converted_tools = [_tool_to_dict(tool) for tool in request.tools]
-
         return get_model_structural_tag(
-            model="qwen_coder",
-            tools=converted_tools,
-            tool_choice=converted_tool_choice,
+            model="qwen_3_6",
+            tools=request.tools,
+            tool_choice=request.tool_choice,
             reasoning=request.include_reasoning,
         )
diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
new file mode 100644
index 000000000000..3c7e9b7fc4eb
--- /dev/null
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -0,0 +1,327 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Callable
+from typing import Any, Literal
+
+from xgrammar import StructuralTag
+from xgrammar.structural_tag import (
+    AnyTextFormat,
+    ConstStringFormat,
+    JSONSchemaFormat,
+    QwenXMLParameterFormat,
+    SequenceFormat,
+    TagFormat,
+    TagsWithSeparatorFormat,
+    TriggeredTagsFormat,
+)
+
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionToolsParam,
+)
+
+SimplifiedToolChoice = Literal["auto", "required", "forced"]
+ToolChoice = (
+    Literal["none", "auto", "required"] | ChatCompletionNamedToolChoiceParam | None
+)
+StructuralTagBuilder = Callable[
+    [list[ChatCompletionToolsParam], SimplifiedToolChoice, bool],
+    StructuralTag,
+]
+
+_structural_tag_registry: dict[str, StructuralTagBuilder] = {}
+
+
+def register_model_structural_tag(name: str):
+    """Register a vLLM-owned model-specific structural tag builder."""
+
+    def decorator(func: StructuralTagBuilder) -> StructuralTagBuilder:
+        _structural_tag_registry[name] = func
+        return func
+
+    return decorator
+
+
+def get_model_structural_tag(
+    model: str,
+    tools: list[ChatCompletionToolsParam] | None,
+    tool_choice: ToolChoice,
+    reasoning: bool,
+) -> StructuralTag | None:
+    """Build a structural tag from vLLM-owned model-specific builders."""
+
+    builder = _structural_tag_registry.get(model)
+    if builder is None:
+        supported = list(_structural_tag_registry.keys())
+        raise ValueError(f"Unknown format type: {model}, supported types: {supported}")
+
+    normalized_tools, simplified_tool_choice = _normalize_tool_choice(
+        tools=tools,
+        tool_choice=tool_choice,
+    )
+    if not normalized_tools:
+        return None
+
+    return builder(normalized_tools, simplified_tool_choice, reasoning)
+
+
+def _normalize_tool_choice(
+    tools: list[ChatCompletionToolsParam] | None,
+    tool_choice: ToolChoice,
+) -> tuple[list[ChatCompletionToolsParam], SimplifiedToolChoice]:
+    """Normalize vLLM ChatCompletion tool_choice for structural tag builders."""
+
+    if not tools:
+        return [], "auto"
+
+    if tool_choice is None or tool_choice == "none":
+        return [], "auto"
+
+    if tool_choice == "auto":
+        return tools, "auto"
+
+    if tool_choice == "required":
+        return tools, "required"
+
+    if isinstance(tool_choice, ChatCompletionNamedToolChoiceParam):
+        tool_name = tool_choice.function.name
+        filtered_tools = [
+            tool for tool in tools if tool.function.name == tool_name
+        ]
+        if not filtered_tools:
+            raise ValueError(
+                f"The tool with name '{tool_name}' is not found in the tools list."
+            )
+        return filtered_tools, "forced"
+
+    raise ValueError(f"Unsupported tool_choice for structural tag: {tool_choice}")
+
+
+def _get_function_parameters(function: Any) -> dict[str, Any] | bool:
+    """Return the JSON schema used for constrained tool arguments."""
+
+    if getattr(function, "strict", None) is False:
+        return True
+    if function.parameters is None:
+        return True
+    return function.parameters
+
+
+def _build_deepseek_dsml_structural_tag(
+    tools: list[ChatCompletionToolsParam],
+    tool_choice: SimplifiedToolChoice,
+    reasoning: bool,
+    function_calls_begin: str,
+    function_calls_end: str,
+    function_calls_trigger: str,
+) -> StructuralTag:
+    invoke_begin_prefix = '<｜DSML｜invoke name="'
+    invoke_begin_suffix = '">\n'
+    invoke_end = "</｜DSML｜invoke>\n"
+    tool_calls_prefix = "\n\n"
+    think_tag_end = "</think>"
+    think_exclude_tokens = ["<think>", "</think>"]
+    xml_style = "deepseek_xml"
+
+    if tool_choice == "auto":
+        tags = []
+        for tool in tools:
+            function = tool.function
+            parameters = _get_function_parameters(function)
+            tags.append(
+                TagFormat(
+                    begin=invoke_begin_prefix + function.name + invoke_begin_suffix,
+                    content=JSONSchemaFormat(
+                        json_schema=parameters,
+                        style=xml_style,
+                    ),
+                    end=invoke_end,
+                )
+            )
+
+        if tags:
+            function_calling_tags = TagsWithSeparatorFormat(
+                tags=tags,
+                separator="\n",
+                at_least_one=True,
+            )
+            suffix_tag = TriggeredTagsFormat(
+                triggers=[function_calls_trigger],
+                tags=[
+                    TagFormat(
+                        begin=function_calls_begin,
+                        content=function_calling_tags,
+                        end=function_calls_end,
+                    )
+                ],
+                excludes=think_exclude_tokens,
+            )
+        else:
+            suffix_tag = AnyTextFormat(excludes=think_exclude_tokens)
+
+    elif tool_choice == "forced":
+        if not tools:
+            raise ValueError("Forced tool choice must resolve to exactly one tool.")
+        function = tools[0].function
+        suffix_tag = SequenceFormat(
+            elements=[
+                ConstStringFormat(value=tool_calls_prefix + function_calls_begin),
+                TagFormat(
+                    begin=invoke_begin_prefix + function.name + invoke_begin_suffix,
+                    content=JSONSchemaFormat(
+                        json_schema=_get_function_parameters(function),
+                        style=xml_style,
+                    ),
+                    end=invoke_end,
+                ),
+                ConstStringFormat(value=function_calls_end),
+            ]
+        )
+
+    elif tool_choice == "required":
+        tags = []
+        for tool in tools:
+            function = tool.function
+            parameters = _get_function_parameters(function)
+            tags.append(
+                TagFormat(
+                    begin=invoke_begin_prefix + function.name + invoke_begin_suffix,
+                    content=JSONSchemaFormat(
+                        json_schema=parameters,
+                        style=xml_style,
+                    ),
+                    end=invoke_end,
+                )
+            )
+        assert len(tags) > 0
+        suffix_tag = SequenceFormat(
+            elements=[
+                ConstStringFormat(value=tool_calls_prefix + function_calls_begin),
+                TagsWithSeparatorFormat(
+                    tags=tags,
+                    separator="\n",
+                    at_least_one=True,
+                ),
+                ConstStringFormat(value=function_calls_end),
+            ]
+        )
+
+    if not reasoning:
+        return StructuralTag(format=suffix_tag)
+
+    prefix_tag = TagFormat(begin="", content=AnyTextFormat(), end=think_tag_end)
+    return StructuralTag(format=SequenceFormat(elements=[prefix_tag, suffix_tag]))
+
+
+@register_model_structural_tag("deepseek_v4")
+def get_deepseek_v4_structural_tag(
+    tools: list[ChatCompletionToolsParam],
+    tool_choice: SimplifiedToolChoice,
+    reasoning: bool,
+) -> StructuralTag:
+    """Build DeepSeek V4 structural tags."""
+
+    return _build_deepseek_dsml_structural_tag(
+        tools=tools,
+        tool_choice=tool_choice,
+        reasoning=reasoning,
+        function_calls_begin="<｜DSML｜tool_calls>\n",
+        function_calls_end="</｜DSML｜tool_calls>",
+        function_calls_trigger="<｜DSML｜tool_calls>",
+    )
+
+
+def _build_qwen_xml_structural_tag(
+    tools: list[ChatCompletionToolsParam],
+    tool_choice: SimplifiedToolChoice,
+    reasoning: bool,
+    include_reasoning_prefix: bool,
+) -> StructuralTag:
+    tool_call_begin_prefix = "<tool_call>\n<function="
+    tool_call_begin_suffix = ">\n"
+    tool_call_end = "\n</function>\n</tool_call>"
+    tool_call_trigger = "<tool_call>\n<function="
+    think_tag_end = "</think>"
+    think_suffix = "\n\n"
+    think_exclude_tokens = ["<think>", "</think>"]
+
+    if tool_choice == "auto":
+        tags = []
+        for tool in tools:
+            function = tool.function
+            parameters = _get_function_parameters(function)
+            tags.append(
+                TagFormat(
+                    begin=f"{tool_call_begin_prefix}{function.name}{tool_call_begin_suffix}",
+                    content=QwenXMLParameterFormat(json_schema=parameters),
+                    end=tool_call_end,
+                )
+            )
+
+        if tags:
+            suffix_tag = TriggeredTagsFormat(
+                triggers=[tool_call_trigger],
+                tags=tags,
+                excludes=think_exclude_tokens,
+            )
+        else:
+            suffix_tag = AnyTextFormat(excludes=think_exclude_tokens)
+
+    elif tool_choice == "forced":
+        if not tools:
+            raise ValueError("Forced tool choice must resolve to exactly one tool.")
+        function = tools[0].function
+        suffix_tag = TagFormat(
+            begin=f"{tool_call_begin_prefix}{function.name}{tool_call_begin_suffix}",
+            content=QwenXMLParameterFormat(
+                json_schema=_get_function_parameters(function)
+            ),
+            end=tool_call_end,
+        )
+
+    elif tool_choice == "required":
+        tags = []
+        for tool in tools:
+            function = tool.function
+            parameters = _get_function_parameters(function)
+            tags.append(
+                TagFormat(
+                    begin=f"{tool_call_begin_prefix}{function.name}{tool_call_begin_suffix}",
+                    content=QwenXMLParameterFormat(json_schema=parameters),
+                    end=tool_call_end,
+                )
+            )
+        assert len(tags) > 0
+        suffix_tag = TagsWithSeparatorFormat(
+            tags=tags,
+            separator="",
+            at_least_one=True,
+        )
+
+    if not include_reasoning_prefix or not reasoning:
+        return StructuralTag(format=suffix_tag)
+
+    prefix_tag = SequenceFormat(
+        elements=[
+            TagFormat(begin="", content=AnyTextFormat(), end=think_tag_end),
+            ConstStringFormat(value=think_suffix),
+        ]
+    )
+    return StructuralTag(format=SequenceFormat(elements=[prefix_tag, suffix_tag]))
+
+
+@register_model_structural_tag("qwen_3_6")
+def get_qwen_3_6_structural_tag(
+    tools: list[ChatCompletionToolsParam],
+    tool_choice: SimplifiedToolChoice,
+    reasoning: bool,
+) -> StructuralTag:
+    """Build Qwen3.6 structural tags."""
+
+    return _build_qwen_xml_structural_tag(
+        tools=tools,
+        tool_choice=tool_choice,
+        reasoning=reasoning,
+        include_reasoning_prefix=True,
+    )

From 9ae54785ef4157c619f47da59c2918f0ad10ed87 Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Sun, 3 May 2026 05:16:06 -0400
Subject: [PATCH 25/43] Drop non-target structural tag changes

---
 .../test_deepseekv32_tool_parser.py           | 53 -------------------
 .../tool_parsers/test_kimi_k2_tool_parser.py  | 46 ----------------
 tests/tool_parsers/test_openai_tool_parser.py | 43 ---------------
 vllm/tool_parsers/deepseekv32_tool_parser.py  |  1 -
 vllm/tool_parsers/kimi_k2_tool_parser.py      |  3 +-
 vllm/tool_parsers/openai_tool_parser.py       |  1 -
 6 files changed, 1 insertion(+), 146 deletions(-)

diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py
index f82d1d739a07..c547795e7bf2 100644
--- a/tests/tool_parsers/test_deepseekv32_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py
@@ -17,7 +17,6 @@
     FunctionDefinition,
 )
 from vllm.tokenizers import get_tokenizer
-from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
 from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
 
 # ---------------------------------------------------------------------------
@@ -49,43 +48,6 @@ def make_request(tools=None) -> MagicMock:
     return req
 
 
-@pytest.fixture
-def sample_tools() -> list[ChatCompletionToolsParam]:
-    return [
-        ChatCompletionToolsParam(
-            type="function",
-            function={
-                "name": "get_current_weather",
-                "description": "Get the current weather",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "city": {"type": "string", "description": "The city name"},
-                        "state": {"type": "string", "description": "The state code"},
-                        "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
-                    },
-                    "required": ["city", "state"],
-                },
-            },
-        ),
-        ChatCompletionToolsParam(
-            type="function",
-            function={
-                "name": "calculate_area",
-                "description": "Calculate area of a shape",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "shape": {"type": "string"},
-                        "dimensions": {"type": "object"},
-                        "precision": {"type": "integer"},
-                    },
-                },
-            },
-        ),
-    ]
-
-
 # Shorthand for the DSML tokens used throughout
 FC_START = "<｜DSML｜function_calls>"
 FC_END = "</｜DSML｜function_calls>"
@@ -859,18 +821,3 @@ def test_convert_param_value_checked_helper(parser):
     assert parser._convert_param_value("null", "integer") is None
     assert parser._convert_param_value("null", "boolean") is None
     assert parser._convert_param_value("null", "object") is None
-
-
-def test_adjust_request_required_uses_json_schema_not_structural_tag(
-    sample_tools: list[ChatCompletionToolsParam],
-) -> None:
-    parser = make_parser()
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=sample_tools,
-        tool_choice="required",
-    )
-    out = parser.adjust_request(req)
-    assert out.structured_outputs is not None
-    assert out.structured_outputs.structural_tag is None
diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index 5552a977d47d..b56032b91c17 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -11,10 +11,6 @@
     run_tool_extraction,
     run_tool_extraction_streaming,
 )
-from vllm.entrypoints.openai.chat_completion.protocol import (
-    ChatCompletionRequest,
-    ChatCompletionToolsParam,
-)
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
 )
@@ -24,43 +20,6 @@
 MODEL = "moonshotai/Kimi-K2-Instruct"
 
 
-@pytest.fixture
-def sample_tools() -> list[ChatCompletionToolsParam]:
-    return [
-        ChatCompletionToolsParam(
-            type="function",
-            function={
-                "name": "get_current_weather",
-                "description": "Get the current weather",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "city": {"type": "string", "description": "The city name"},
-                        "state": {"type": "string", "description": "The state code"},
-                        "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
-                    },
-                    "required": ["city", "state"],
-                },
-            },
-        ),
-        ChatCompletionToolsParam(
-            type="function",
-            function={
-                "name": "calculate_area",
-                "description": "Calculate area of a shape",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "shape": {"type": "string"},
-                        "dimensions": {"type": "object"},
-                        "precision": {"type": "integer"},
-                    },
-                },
-            },
-        ),
-    ]
-
-
 @pytest.fixture(scope="module")
 def kimi_k2_tokenizer():
     return get_tokenizer(tokenizer_name=MODEL, trust_remote_code=True)
@@ -504,7 +463,6 @@ def test_sets_skip_special_tokens_false(self, parser):
         request = MagicMock(spec=ChatCompletionRequest)
         request.tools = [{"type": "function", "function": {"name": "test"}}]
         request.tool_choice = "auto"
-        request.include_reasoning = True
         request.skip_special_tokens = True
 
         result = parser.adjust_request(request)
@@ -514,7 +472,6 @@ def test_no_change_when_tool_choice_none(self, parser):
         request = MagicMock(spec=ChatCompletionRequest)
         request.tools = [{"type": "function", "function": {"name": "test"}}]
         request.tool_choice = "none"
-        request.include_reasoning = True
         request.skip_special_tokens = True
 
         result = parser.adjust_request(request)
@@ -524,7 +481,6 @@ def test_no_change_when_no_tools(self, parser):
         request = MagicMock(spec=ChatCompletionRequest)
         request.tools = None
         request.tool_choice = "auto"
-        request.include_reasoning = False
         request.skip_special_tokens = True
 
         result = parser.adjust_request(request)
@@ -624,5 +580,3 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer):
         assert len(rec.tool_calls) == 1
         assert rec.tool_calls[0].function.name == "get_weather"
         assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"}
-
-
diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py
index b5a365b495ca..e9e39ef4c029 100644
--- a/tests/tool_parsers/test_openai_tool_parser.py
+++ b/tests/tool_parsers/test_openai_tool_parser.py
@@ -14,10 +14,6 @@
     load_harmony_encoding,
 )
 
-from vllm.entrypoints.openai.chat_completion.protocol import (
-    ChatCompletionRequest,
-    ChatCompletionToolsParam,
-)
 from vllm.entrypoints.openai.engine.protocol import FunctionCall, ToolCall
 from vllm.tokenizers import get_tokenizer
 from vllm.tool_parsers.openai_tool_parser import OpenAIToolParser
@@ -41,43 +37,6 @@ def harmony_encoding():
     return load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
 
 
-@pytest.fixture
-def sample_tools() -> list[ChatCompletionToolsParam]:
-    return [
-        ChatCompletionToolsParam(
-            type="function",
-            function={
-                "name": "get_current_weather",
-                "description": "Get the current weather",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "city": {"type": "string", "description": "The city name"},
-                        "state": {"type": "string", "description": "The state code"},
-                        "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
-                    },
-                    "required": ["city", "state"],
-                },
-            },
-        ),
-        ChatCompletionToolsParam(
-            type="function",
-            function={
-                "name": "calculate_area",
-                "description": "Calculate area of a shape",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "shape": {"type": "string"},
-                        "dimensions": {"type": "object"},
-                        "precision": {"type": "integer"},
-                    },
-                },
-            },
-        ),
-    ]
-
-
 def assert_tool_calls(
     actual_tool_calls: list[ToolCall],
     expected_tool_calls: list[ToolCall],
@@ -302,5 +261,3 @@ def test_extract_tool_calls_with_content(
     ]
     assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
     assert extracted_info.content == final_content
-
-
diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py
index 87a1f88cd67a..02182e22935a 100644
--- a/vllm/tool_parsers/deepseekv32_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv32_tool_parser.py
@@ -320,4 +320,3 @@ def extract_tool_calls_streaming(
             return DeltaMessage(content="")
 
         return None
-
diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py
index 31f29900c2b3..7ddd8fa7a80d 100644
--- a/vllm/tool_parsers/kimi_k2_tool_parser.py
+++ b/vllm/tool_parsers/kimi_k2_tool_parser.py
@@ -273,5 +273,4 @@ def extract_tool_calls_streaming(
 
         except Exception:
             logger.exception("Error trying to handle streaming tool call.")
-            return None  # do not stream a delta. skip this token ID.
-
+            return None
diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py
index 57bfa9915e86..ee6dd70718b3 100644
--- a/vllm/tool_parsers/openai_tool_parser.py
+++ b/vllm/tool_parsers/openai_tool_parser.py
@@ -112,4 +112,3 @@ def extract_tool_calls_streaming(
         raise NotImplementedError(
             "Not being used, manual parsing in serving_chat.py"  # noqa: E501
         )
-

From d962b8084e1c5554c7d0438ee82a51a90fca8f03 Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Sun, 3 May 2026 05:17:58 -0400
Subject: [PATCH 26/43] Centralize structural tag xgrammar imports

---
 vllm/tool_parsers/abstract_tool_parser.py   | 4 +---
 vllm/tool_parsers/deepseekv4_tool_parser.py | 4 +---
 vllm/tool_parsers/qwen3coder_tool_parser.py | 3 +--
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index 6eb15a444fc0..81bf0bb0d2f3 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -7,8 +7,6 @@
 from functools import cached_property
 import json
 
-from xgrammar import StructuralTag
-
 from openai.types.responses import (
     ResponseFormatTextJSONSchemaConfig,
     ResponseTextConfig,
@@ -147,7 +145,7 @@ def adjust_request(
     
     def get_structural_tag(
         self, request: ChatCompletionRequest
-    ) -> StructuralTag | None:
+    ):
         return None
 
     def extract_tool_calls(
diff --git a/vllm/tool_parsers/deepseekv4_tool_parser.py b/vllm/tool_parsers/deepseekv4_tool_parser.py
index 12791fc7b2ea..69b35bab526e 100644
--- a/vllm/tool_parsers/deepseekv4_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv4_tool_parser.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from xgrammar import StructuralTag
-
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
 )
@@ -22,7 +20,7 @@ class DeepSeekV4ToolParser(DeepSeekV32ToolParser):
     
     def get_structural_tag(
         self, request: ChatCompletionRequest
-    ) -> StructuralTag | None:
+    ):
         return get_model_structural_tag(
             model="deepseek_v4",
             tools=request.tools,
diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index b55c2b1274f9..f548fdb07e0e 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -7,7 +7,6 @@
 from typing import Any
 
 import regex as re
-from xgrammar import StructuralTag
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
@@ -686,7 +685,7 @@ def extract_tool_calls_streaming(
 
     def get_structural_tag(
         self, request: ChatCompletionRequest
-    ) -> StructuralTag | None:
+    ):
         return get_model_structural_tag(
             model="qwen_3_6",
             tools=request.tools,

From 7d908320ee04d3429f3615b8c5e9df2f426df8b6 Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Sun, 3 May 2026 05:36:19 -0400
Subject: [PATCH 27/43] Rename Qwen structural tag key

---
 vllm/tool_parsers/qwen3coder_tool_parser.py  |  2 +-
 vllm/tool_parsers/structural_tag_registry.py | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index f548fdb07e0e..a896606cf049 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -687,7 +687,7 @@ def get_structural_tag(
         self, request: ChatCompletionRequest
     ):
         return get_model_structural_tag(
-            model="qwen_3_6",
+            model="qwen_3_5",
             tools=request.tools,
             tool_choice=request.tool_choice,
             reasoning=request.include_reasoning,
diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
index 3c7e9b7fc4eb..ad5da6e9cf3f 100644
--- a/vllm/tool_parsers/structural_tag_registry.py
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -86,9 +86,7 @@ def _normalize_tool_choice(
 
     if isinstance(tool_choice, ChatCompletionNamedToolChoiceParam):
         tool_name = tool_choice.function.name
-        filtered_tools = [
-            tool for tool in tools if tool.function.name == tool_name
-        ]
+        filtered_tools = [tool for tool in tools if tool.function.name == tool_name]
         if not filtered_tools:
             raise ValueError(
                 f"The tool with name '{tool_name}' is not found in the tools list."
@@ -311,13 +309,17 @@ def _build_qwen_xml_structural_tag(
     return StructuralTag(format=SequenceFormat(elements=[prefix_tag, suffix_tag]))
 
 
-@register_model_structural_tag("qwen_3_6")
-def get_qwen_3_6_structural_tag(
+@register_model_structural_tag("qwen_3_5")
+def get_qwen_3_5_structural_tag(
     tools: list[ChatCompletionToolsParam],
     tool_choice: SimplifiedToolChoice,
     reasoning: bool,
 ) -> StructuralTag:
-    """Build Qwen3.6 structural tags."""
+    """Build Qwen XML structural tags.
+
+    This format is used for Qwen3-Coder/Qwen3.5/Qwen3.6 and is compatible with
+    Qwen variants that use the same XML tool-call format.
+    """
 
     return _build_qwen_xml_structural_tag(
         tools=tools,

From 760e5af5ec4c006f57a5dbf5e0f1ef29c4c36fd2 Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Sun, 3 May 2026 05:40:48 -0400
Subject: [PATCH 28/43] Inline structural tag builders

---
 vllm/tool_parsers/structural_tag_registry.py | 61 ++++++--------------
 1 file changed, 18 insertions(+), 43 deletions(-)

diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
index ad5da6e9cf3f..108546f0c3d1 100644
--- a/vllm/tool_parsers/structural_tag_registry.py
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -1,6 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+# Model-specific structural tag builders adapted from XGrammar's
+# builtin structural tag implementations:
+# https://github.com/mlc-ai/xgrammar/blob/main/python/xgrammar/builtin_structural_tag.py
+
 from collections.abc import Callable
 from typing import Any, Literal
 
@@ -106,18 +110,21 @@ def _get_function_parameters(function: Any) -> dict[str, Any] | bool:
     return function.parameters
 
 
-def _build_deepseek_dsml_structural_tag(
+@register_model_structural_tag("deepseek_v4")
+def get_deepseek_v4_structural_tag(
     tools: list[ChatCompletionToolsParam],
     tool_choice: SimplifiedToolChoice,
     reasoning: bool,
-    function_calls_begin: str,
-    function_calls_end: str,
-    function_calls_trigger: str,
 ) -> StructuralTag:
+    """Build DeepSeek V4 structural tags."""
+
     invoke_begin_prefix = '<｜DSML｜invoke name="'
     invoke_begin_suffix = '">\n'
     invoke_end = "</｜DSML｜invoke>\n"
     tool_calls_prefix = "\n\n"
+    function_calls_begin = "<｜DSML｜tool_calls>\n"
+    function_calls_end = "</｜DSML｜tool_calls>"
+    function_calls_trigger = "<｜DSML｜tool_calls>"
     think_tag_end = "</think>"
     think_exclude_tokens = ["<think>", "</think>"]
     xml_style = "deepseek_xml"
@@ -212,30 +219,18 @@ def _build_deepseek_dsml_structural_tag(
     return StructuralTag(format=SequenceFormat(elements=[prefix_tag, suffix_tag]))
 
 
-@register_model_structural_tag("deepseek_v4")
-def get_deepseek_v4_structural_tag(
+@register_model_structural_tag("qwen_3_5")
+def get_qwen_3_5_structural_tag(
     tools: list[ChatCompletionToolsParam],
     tool_choice: SimplifiedToolChoice,
     reasoning: bool,
 ) -> StructuralTag:
-    """Build DeepSeek V4 structural tags."""
-
-    return _build_deepseek_dsml_structural_tag(
-        tools=tools,
-        tool_choice=tool_choice,
-        reasoning=reasoning,
-        function_calls_begin="<｜DSML｜tool_calls>\n",
-        function_calls_end="</｜DSML｜tool_calls>",
-        function_calls_trigger="<｜DSML｜tool_calls>",
-    )
+    """Build Qwen XML structural tags.
 
+    This format is used for Qwen3-Coder/Qwen3.5/Qwen3.6 and is compatible with
+    Qwen variants that use the same XML tool-call format.
+    """
 
-def _build_qwen_xml_structural_tag(
-    tools: list[ChatCompletionToolsParam],
-    tool_choice: SimplifiedToolChoice,
-    reasoning: bool,
-    include_reasoning_prefix: bool,
-) -> StructuralTag:
     tool_call_begin_prefix = "<tool_call>\n<function="
     tool_call_begin_suffix = ">\n"
     tool_call_end = "\n</function>\n</tool_call>"
@@ -297,7 +292,7 @@ def _build_qwen_xml_structural_tag(
             at_least_one=True,
         )
 
-    if not include_reasoning_prefix or not reasoning:
+    if not reasoning:
         return StructuralTag(format=suffix_tag)
 
     prefix_tag = SequenceFormat(
@@ -307,23 +302,3 @@ def _build_qwen_xml_structural_tag(
         ]
     )
     return StructuralTag(format=SequenceFormat(elements=[prefix_tag, suffix_tag]))
-
-
-@register_model_structural_tag("qwen_3_5")
-def get_qwen_3_5_structural_tag(
-    tools: list[ChatCompletionToolsParam],
-    tool_choice: SimplifiedToolChoice,
-    reasoning: bool,
-) -> StructuralTag:
-    """Build Qwen XML structural tags.
-
-    This format is used for Qwen3-Coder/Qwen3.5/Qwen3.6 and is compatible with
-    Qwen variants that use the same XML tool-call format.
-    """
-
-    return _build_qwen_xml_structural_tag(
-        tools=tools,
-        tool_choice=tool_choice,
-        reasoning=reasoning,
-        include_reasoning_prefix=True,
-    )

From 4bd7d7217b6fd0789380d3e8595a93eb2b899c25 Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Sun, 3 May 2026 06:38:14 -0400
Subject: [PATCH 29/43] Stop Qwen 3.5 structural tag after first tool call

Pass `stop_after_first=True` to the `TriggeredTagsFormat` used in the
`auto` branch of `get_qwen_3_5_structural_tag` so the constrained
generation closes the tool-call section once a single call is emitted,
matching the parser's expectation of one tool call per response.

Also add a short comment in `ToolParser.adjust_request` clarifying the
purpose of the JSON-schema-from-tools branch.

Signed-off-by: Ubospica <ubospica@gmail.com>
---
 vllm/tool_parsers/abstract_tool_parser.py    | 1 +
 vllm/tool_parsers/structural_tag_registry.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index 81bf0bb0d2f3..310752d696aa 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -114,6 +114,7 @@ def adjust_request(
         json_schema_from_tool = get_json_schema_from_tools(
             tool_choice=request.tool_choice, tools=request.tools
         )
+        # Set structured output params for tool calling
         if json_schema_from_tool is not None:
             if isinstance(request, ChatCompletionRequest):
                 # tool_choice: "Forced Function" or "required" will override
diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
index 108546f0c3d1..7fae6cf59e96 100644
--- a/vllm/tool_parsers/structural_tag_registry.py
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -257,6 +257,7 @@ def get_qwen_3_5_structural_tag(
                 triggers=[tool_call_trigger],
                 tags=tags,
                 excludes=think_exclude_tokens,
+                stop_after_first=True,
             )
         else:
             suffix_tag = AnyTextFormat(excludes=think_exclude_tokens)

From 1e94b999e139dfcfddb6ef03dcb3dba61a9eea51 Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Sun, 3 May 2026 07:14:42 -0400
Subject: [PATCH 30/43] Fix Qwen structural tag parsing

---
 vllm/tool_parsers/qwen3coder_tool_parser.py  |  6 +++++-
 vllm/tool_parsers/structural_tag_registry.py | 19 ++++++++++++++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index a896606cf049..142a406308b1 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -32,6 +32,8 @@
 
 
 class Qwen3CoderToolParser(ToolParser):
+    supports_required_and_named: bool = False
+
     def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         super().__init__(tokenizer, tools)
 
@@ -690,5 +692,7 @@ def get_structural_tag(
             model="qwen_3_5",
             tools=request.tools,
             tool_choice=request.tool_choice,
-            reasoning=request.include_reasoning,
+            # The reasoning parser gates structured output until reasoning ends.
+            # Constrain only the post-reasoning tool-call suffix here.
+            reasoning=False,
         )
diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
index 7fae6cf59e96..276b0e77db8a 100644
--- a/vllm/tool_parsers/structural_tag_registry.py
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -13,6 +13,7 @@
     AnyTextFormat,
     ConstStringFormat,
     JSONSchemaFormat,
+    OrFormat,
     QwenXMLParameterFormat,
     SequenceFormat,
     TagFormat,
@@ -253,11 +254,19 @@ def get_qwen_3_5_structural_tag(
             )
 
         if tags:
-            suffix_tag = TriggeredTagsFormat(
-                triggers=[tool_call_trigger],
-                tags=tags,
-                excludes=think_exclude_tokens,
-                stop_after_first=True,
+            # In auto mode, allow either text-only output or exactly one XML
+            # tool call. TriggeredTagsFormat can permit free text after a tag,
+            # which allows repeated tool calls for Qwen3.5.
+            suffix_tag = OrFormat(
+                elements=[
+                    AnyTextFormat(excludes=think_exclude_tokens + ["<tool"]),
+                    TagsWithSeparatorFormat(
+                        tags=tags,
+                        separator="",
+                        at_least_one=True,
+                        stop_after_first=True,
+                    ),
+                ]
             )
         else:
             suffix_tag = AnyTextFormat(excludes=think_exclude_tokens)

From deaf07aa1c3eef0ebdfb84db198e7a49e2093bb8 Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Sun, 3 May 2026 08:22:35 -0400
Subject: [PATCH 31/43] Normalize tool schemas for Qwen structural tags

---
 vllm/tool_parsers/structural_tag_registry.py | 51 +++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
index 276b0e77db8a..5febcd0e192e 100644
--- a/vllm/tool_parsers/structural_tag_registry.py
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -108,7 +108,56 @@ def _get_function_parameters(function: Any) -> dict[str, Any] | bool:
         return True
     if function.parameters is None:
         return True
-    return function.parameters
+    return _normalize_json_schema_for_xgrammar(function.parameters)
+
+
+def _normalize_json_schema_for_xgrammar(schema: Any) -> Any:
+    """Normalize common non-standard tool schema aliases to JSON Schema."""
+
+    if isinstance(schema, list):
+        return [_normalize_json_schema_for_xgrammar(item) for item in schema]
+    if not isinstance(schema, dict):
+        return schema
+
+    metadata_keys = {
+        "description",
+        "default",
+        "examples",
+        "title",
+    }
+    normalized = {
+        key: _normalize_json_schema_for_xgrammar(value)
+        for key, value in schema.items()
+        if key not in metadata_keys
+    }
+
+    schema_type = normalized.get("type")
+    type_aliases = {
+        "dict": "object",
+        "map": "object",
+        "list": "array",
+        "tuple": "array",
+        "str": "string",
+        "int": "integer",
+        "float": "number",
+        "bool": "boolean",
+    }
+    if isinstance(schema_type, str):
+        normalized["type"] = type_aliases.get(schema_type, schema_type)
+    elif isinstance(schema_type, list):
+        normalized["type"] = [
+            type_aliases.get(item, item) if isinstance(item, str) else item
+            for item in schema_type
+        ]
+
+    enum_values = normalized.get("enum")
+    if isinstance(enum_values, list) and any(
+        isinstance(value, str) and ("/" in value or "\\" in value)
+        for value in enum_values
+    ):
+        normalized.pop("enum", None)
+
+    return normalized
 
 
 @register_model_structural_tag("deepseek_v4")

From e4285c749e05474aca512aae3b7c558f8fe75eb1 Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Sun, 3 May 2026 08:41:13 -0400
Subject: [PATCH 32/43] Allow multiple Qwen structural tool calls

---
 vllm/tool_parsers/structural_tag_registry.py | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
index 5febcd0e192e..13988ee178e8 100644
--- a/vllm/tool_parsers/structural_tag_registry.py
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -13,7 +13,6 @@
     AnyTextFormat,
     ConstStringFormat,
     JSONSchemaFormat,
-    OrFormat,
     QwenXMLParameterFormat,
     SequenceFormat,
     TagFormat,
@@ -303,19 +302,10 @@ def get_qwen_3_5_structural_tag(
             )
 
         if tags:
-            # In auto mode, allow either text-only output or exactly one XML
-            # tool call. TriggeredTagsFormat can permit free text after a tag,
-            # which allows repeated tool calls for Qwen3.5.
-            suffix_tag = OrFormat(
-                elements=[
-                    AnyTextFormat(excludes=think_exclude_tokens + ["<tool"]),
-                    TagsWithSeparatorFormat(
-                        tags=tags,
-                        separator="",
-                        at_least_one=True,
-                        stop_after_first=True,
-                    ),
-                ]
+            suffix_tag = TriggeredTagsFormat(
+                triggers=[tool_call_trigger],
+                tags=tags,
+                excludes=think_exclude_tokens,
             )
         else:
             suffix_tag = AnyTextFormat(excludes=think_exclude_tokens)

From e6ec2365b8b9e45dfcff7e768b46748638ce1eb7 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Mon, 4 May 2026 00:23:03 +0800
Subject: [PATCH 33/43] format.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 .../test_deepseekv4_tool_parser.py            | 22 +++++++++++--------
 .../test_qwen3coder_tool_parser.py            | 12 ++++++----
 vllm/tool_parsers/abstract_tool_parser.py     | 11 ++++------
 vllm/tool_parsers/deepseekv4_tool_parser.py   |  7 +++---
 vllm/tool_parsers/qwen3coder_tool_parser.py   |  4 +---
 5 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py
index 095ed7eb17a2..901bae7ec283 100644
--- a/tests/tool_parsers/test_deepseekv4_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py
@@ -5,17 +5,18 @@
 
 import json
 from unittest.mock import MagicMock
+
 import pytest
+from xgrammar import StructuralTag
 
-from vllm.tool_parsers import ToolParserManager
-from vllm.tool_parsers.deepseekv4_tool_parser import DeepSeekV4ToolParser
 from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedFunction,
     ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
     ChatCompletionToolsParam,
-    ChatCompletionNamedFunction,
 )
-from xgrammar import StructuralTag
+from vllm.tool_parsers import ToolParserManager
+from vllm.tool_parsers.deepseekv4_tool_parser import DeepSeekV4ToolParser
 
 MOCK_TOKENIZER = MagicMock()
 MOCK_TOKENIZER.get_vocab.return_value = {}
@@ -27,6 +28,7 @@
 PARAM_START = '<｜DSML｜parameter name="'
 PARAM_END = "</｜DSML｜parameter>"
 
+
 @pytest.fixture
 def sample_tools() -> list[ChatCompletionToolsParam]:
     return [
@@ -64,7 +66,6 @@ def sample_tools() -> list[ChatCompletionToolsParam]:
     ]
 
 
-
 def make_parser(tools=None) -> DeepSeekV4ToolParser:
     return DeepSeekV4ToolParser(MOCK_TOKENIZER, tools=tools)
 
@@ -167,6 +168,7 @@ def test_streaming_extracts_complete_invokes():
     assert names == ["search"]
     assert json.loads(reconstruct_args(deltas)) == {"query": "deepseek v4"}
 
+
 def test_get_vllm_registry_structural_tag_returns_structural_tag(
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
@@ -179,7 +181,7 @@ def test_get_vllm_registry_structural_tag_returns_structural_tag(
     )
     tag = parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
-    
+
     req = ChatCompletionRequest(
         messages=[],
         model="m",
@@ -188,14 +190,16 @@ def test_get_vllm_registry_structural_tag_returns_structural_tag(
     )
     tag = parser.get_structural_tag(req)
     assert isinstance(tag, StructuralTag)
-    
+
     if sample_tools:
         tool = sample_tools[0]
         req = ChatCompletionRequest(
             messages=[],
             model="m",
             tools=sample_tools,
-            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
+            tool_choice=ChatCompletionNamedToolChoiceParam(
+                function=ChatCompletionNamedFunction(name=tool.function.name)
+            ),
         )
     tag = parser.get_structural_tag(req)
-    assert isinstance(tag, StructuralTag)
\ No newline at end of file
+    assert isinstance(tag, StructuralTag)
diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index 6e6842c40785..8f4a7c31eddd 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -9,10 +9,10 @@
 from xgrammar import StructuralTag
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedFunction,
+    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
     ChatCompletionToolsParam,
-    ChatCompletionNamedToolChoiceParam,
-    ChatCompletionNamedFunction,
 )
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaMessage,
@@ -127,7 +127,8 @@ def _as_chat_completion_tools(
                         "description": tool.description,
                         "parameters": tool.parameters,
                     },
-                ))
+                )
+            )
     return normalized
 
 
@@ -1200,11 +1201,14 @@ def test_get_vllm_registry_structural_tag_returns_structural_tag(
             messages=[],
             model="m",
             tools=request_tools,
-            tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
+            tool_choice=ChatCompletionNamedToolChoiceParam(
+                function=ChatCompletionNamedFunction(name=tool.function.name)
+            ),
         )
         tag = qwen3_tool_parser.get_structural_tag(req)
         assert isinstance(tag, StructuralTag)
 
+
 @pytest.mark.parametrize("include_reasoning", [True, False])
 def test_adjust_request_auto_uses_vllm_registry_structural_tag(
     qwen3_tool_parser: Qwen3CoderToolParser,
diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index 310752d696aa..c8b43f79d588 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -2,10 +2,10 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import importlib
+import json
 import os
 from collections.abc import Callable, Sequence
 from functools import cached_property
-import json
 
 from openai.types.responses import (
     ResponseFormatTextJSONSchemaConfig,
@@ -14,9 +14,9 @@
 from openai.types.responses.function_tool import FunctionTool
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
     ChatCompletionToolsParam,
-    ChatCompletionNamedToolChoiceParam,
 )
 from vllm.entrypoints.openai.engine.protocol import (
     DeltaMessage,
@@ -88,7 +88,6 @@ def adjust_request(
         self,
         request: ChatCompletionRequest | ResponsesRequest,
     ) -> ChatCompletionRequest | ResponsesRequest:
-
         # If there are no tools, return the request as is.
         if not request.tools:
             return request
@@ -143,10 +142,8 @@ def adjust_request(
             return request
 
         return request
-    
-    def get_structural_tag(
-        self, request: ChatCompletionRequest
-    ):
+
+    def get_structural_tag(self, request: ChatCompletionRequest):
         return None
 
     def extract_tool_calls(
diff --git a/vllm/tool_parsers/deepseekv4_tool_parser.py b/vllm/tool_parsers/deepseekv4_tool_parser.py
index 69b35bab526e..8d1df704a3a2 100644
--- a/vllm/tool_parsers/deepseekv4_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv4_tool_parser.py
@@ -7,6 +7,7 @@
 from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
 from vllm.tool_parsers.structural_tag_registry import get_model_structural_tag
 
+
 class DeepSeekV4ToolParser(DeepSeekV32ToolParser):
     """
     DeepSeek V4 DSML tool parser.
@@ -17,10 +18,8 @@ class DeepSeekV4ToolParser(DeepSeekV32ToolParser):
 
     tool_call_start_token: str = "<｜DSML｜tool_calls>"
     tool_call_end_token: str = "</｜DSML｜tool_calls>"
-    
-    def get_structural_tag(
-        self, request: ChatCompletionRequest
-    ):
+
+    def get_structural_tag(self, request: ChatCompletionRequest):
         return get_model_structural_tag(
             model="deepseek_v4",
             tools=request.tools,
diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index 142a406308b1..1204ea6fba38 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -685,9 +685,7 @@ def extract_tool_calls_streaming(
 
         return None
 
-    def get_structural_tag(
-        self, request: ChatCompletionRequest
-    ):
+    def get_structural_tag(self, request: ChatCompletionRequest):
         return get_model_structural_tag(
             model="qwen_3_5",
             tools=request.tools,

From 260dea23ae5503cbfcb8896662508cdd46c0de4a Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Mon, 4 May 2026 00:50:45 +0800
Subject: [PATCH 34/43] add the requirement.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 requirements/test/rocm.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/requirements/test/rocm.txt b/requirements/test/rocm.txt
index 05b36860d753..8445634ded40 100644
--- a/requirements/test/rocm.txt
+++ b/requirements/test/rocm.txt
@@ -42,6 +42,8 @@ anyio==4.13.0
     #   sse-starlette
     #   starlette
     #   watchfiles
+apache-tvm-ffi==0.1.10
+    # via xgrammar
 arctic-inference==0.1.1
     # via -r requirements/test/rocm.in
 argcomplete==3.6.3
@@ -1264,6 +1266,7 @@ typing-extensions==4.15.0
     #   alembic
     #   anthropic
     #   anyio
+    #   apache-tvm-ffi
     #   azure-core
     #   azure-identity
     #   azure-storage-blob

From c6f98c564a4832defa76a0c83e84d1e6fe50249a Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Mon, 4 May 2026 02:57:31 +0800
Subject: [PATCH 35/43] avoid overwriting user's setting.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

remove the protocol change.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

fix test.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

fix test.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

fix test.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

finish the env.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

doc.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

doc.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

fix the logic.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

avoid overwriting user's setting.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

remove the protocol change.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

fix test.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

fix test.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

fix test.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

finish the env.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

doc.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

doc.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

fix the logic.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>

test the env.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 .../test_deepseekv4_tool_parser.py            | 10 +++++-----
 .../test_qwen3coder_tool_parser.py            |  6 +++---
 .../openai/chat_completion/protocol.py        |  8 ++------
 vllm/envs.py                                  |  7 +++++++
 vllm/tool_parsers/abstract_tool_parser.py     | 19 +++++++++++++------
 5 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/tests/tool_parsers/test_deepseekv4_tool_parser.py b/tests/tool_parsers/test_deepseekv4_tool_parser.py
index 901bae7ec283..cc77a1f77756 100644
--- a/tests/tool_parsers/test_deepseekv4_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv4_tool_parser.py
@@ -197,9 +197,9 @@ def test_get_vllm_registry_structural_tag_returns_structural_tag(
             messages=[],
             model="m",
             tools=sample_tools,
-            tool_choice=ChatCompletionNamedToolChoiceParam(
-                function=ChatCompletionNamedFunction(name=tool.function.name)
-            ),
         )
-    tag = parser.get_structural_tag(req)
-    assert isinstance(tag, StructuralTag)
+        req.tool_choice = ChatCompletionNamedToolChoiceParam(
+            function=ChatCompletionNamedFunction(name=tool.function.name)
+        )
+        tag = parser.get_structural_tag(req)
+        assert isinstance(tag, StructuralTag)
diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index 8f4a7c31eddd..d50a7b9b769e 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -1201,9 +1201,9 @@ def test_get_vllm_registry_structural_tag_returns_structural_tag(
             messages=[],
             model="m",
             tools=request_tools,
-            tool_choice=ChatCompletionNamedToolChoiceParam(
-                function=ChatCompletionNamedFunction(name=tool.function.name)
-            ),
+        )
+        req.tool_choice = ChatCompletionNamedToolChoiceParam(
+            function=ChatCompletionNamedFunction(name=tool.function.name)
         )
         tag = qwen3_tool_parser.get_structural_tag(req)
         assert isinstance(tag, StructuralTag)
diff --git a/vllm/entrypoints/openai/chat_completion/protocol.py b/vllm/entrypoints/openai/chat_completion/protocol.py
index 140a2fe566da..c92cc13da01f 100644
--- a/vllm/entrypoints/openai/chat_completion/protocol.py
+++ b/vllm/entrypoints/openai/chat_completion/protocol.py
@@ -739,12 +739,8 @@ def check_tool_usage(cls, data):
 
             # make sure that tool choice is either a named tool
             # OR that it's set to "auto" or "required"
-            if (
-                data["tool_choice"] not in ["auto", "required"]
-                and not isinstance(data["tool_choice"], dict)
-                and not isinstance(
-                    data["tool_choice"], ChatCompletionNamedToolChoiceParam
-                )
+            if data["tool_choice"] not in ["auto", "required"] and not isinstance(
+                data["tool_choice"], dict
             ):
                 raise ValueError(
                     f"Invalid value for `tool_choice`: {data['tool_choice']}! "
diff --git a/vllm/envs.py b/vllm/envs.py
index 4191cd6a9743..ec777e2bf9a4 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -226,6 +226,7 @@
     VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
     VLLM_SYSTEM_START_DATE: str | None = None
     VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY: bool = False
+    VLLM_ENFORCE_STRICT_TOOL_CALLING: bool = True
     VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
     VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False
     VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True
@@ -1591,6 +1592,12 @@ def _get_or_set_default() -> str:
     "VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY": lambda: bool(
         int(os.getenv("VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY", "0"))
     ),
+    # When 1,the model structural tags will be used to enforce the model
+    # output conforming to the model's tool-calling format and schema.
+    # Default 1 (on).
+    "VLLM_ENFORCE_STRICT_TOOL_CALLING": lambda: bool(
+        int(os.getenv("VLLM_ENFORCE_STRICT_TOOL_CALLING", "1"))
+    ),
     # Add optional custom scopes for profiling, disable to avoid overheads
     "VLLM_CUSTOM_SCOPES_FOR_PROFILING": lambda: bool(
         int(os.getenv("VLLM_CUSTOM_SCOPES_FOR_PROFILING", "0"))
diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index c8b43f79d588..c3438082a72d 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -25,6 +25,7 @@
 from vllm.entrypoints.openai.responses.protocol import (
     ResponsesRequest,
 )
+from vllm.envs import VLLM_ENFORCE_STRICT_TOOL_CALLING
 from vllm.logger import init_logger
 from vllm.sampling_params import (
     StructuredOutputsParams,
@@ -94,7 +95,10 @@ def adjust_request(
 
         # Step 1 (highest priority for ChatCompletionRequest): apply
         # vLLM-owned structural tag support for model-specific tool formats.
-        if isinstance(request, ChatCompletionRequest):
+        if (
+            isinstance(request, ChatCompletionRequest)
+            and VLLM_ENFORCE_STRICT_TOOL_CALLING
+        ):
             need_tool_calling = (
                 request.tool_choice == "auto"
                 or request.tool_choice == "required"
@@ -103,9 +107,14 @@ def adjust_request(
             if need_tool_calling:
                 structure_tag = self.get_structural_tag(request)
                 if structure_tag is not None:
-                    request.structured_outputs = StructuredOutputsParams(
-                        structural_tag=json.dumps(structure_tag.model_dump()),
-                    )
+                    if request.structured_outputs is None:
+                        request.structured_outputs = StructuredOutputsParams(
+                            structural_tag=json.dumps(structure_tag.model_dump()),
+                        )
+                    else:
+                        request.structured_outputs.structural_tag = json.dumps(
+                            structure_tag.model_dump()
+                        )
                     return request
 
         # Step 2: set structured output params when tool constraints are
@@ -139,8 +148,6 @@ def adjust_request(
                     )
                 )
 
-            return request
-
         return request
 
     def get_structural_tag(self, request: ChatCompletionRequest):

From 306d2204b0908e5f1b70bfd6bce57619dcf02d55 Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Sun, 3 May 2026 21:04:23 -0400
Subject: [PATCH 36/43] a

---
 vllm/tool_parsers/structural_tag_registry.py | 54 ++------------------
 1 file changed, 3 insertions(+), 51 deletions(-)

diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
index 13988ee178e8..20b4ff9652db 100644
--- a/vllm/tool_parsers/structural_tag_registry.py
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -101,62 +101,14 @@ def _normalize_tool_choice(
 
 
 def _get_function_parameters(function: Any) -> dict[str, Any] | bool:
-    """Return the JSON schema used for constrained tool arguments."""
+    """Return the JSON schema used for constrained tool arguments.
+    """
 
     if getattr(function, "strict", None) is False:
         return True
     if function.parameters is None:
         return True
-    return _normalize_json_schema_for_xgrammar(function.parameters)
-
-
-def _normalize_json_schema_for_xgrammar(schema: Any) -> Any:
-    """Normalize common non-standard tool schema aliases to JSON Schema."""
-
-    if isinstance(schema, list):
-        return [_normalize_json_schema_for_xgrammar(item) for item in schema]
-    if not isinstance(schema, dict):
-        return schema
-
-    metadata_keys = {
-        "description",
-        "default",
-        "examples",
-        "title",
-    }
-    normalized = {
-        key: _normalize_json_schema_for_xgrammar(value)
-        for key, value in schema.items()
-        if key not in metadata_keys
-    }
-
-    schema_type = normalized.get("type")
-    type_aliases = {
-        "dict": "object",
-        "map": "object",
-        "list": "array",
-        "tuple": "array",
-        "str": "string",
-        "int": "integer",
-        "float": "number",
-        "bool": "boolean",
-    }
-    if isinstance(schema_type, str):
-        normalized["type"] = type_aliases.get(schema_type, schema_type)
-    elif isinstance(schema_type, list):
-        normalized["type"] = [
-            type_aliases.get(item, item) if isinstance(item, str) else item
-            for item in schema_type
-        ]
-
-    enum_values = normalized.get("enum")
-    if isinstance(enum_values, list) and any(
-        isinstance(value, str) and ("/" in value or "\\" in value)
-        for value in enum_values
-    ):
-        normalized.pop("enum", None)
-
-    return normalized
+    return function.parameters
 
 
 @register_model_structural_tag("deepseek_v4")

From 5fb2f35f3e84317939900370105a6a4202cdb4a1 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Mon, 4 May 2026 09:23:16 +0800
Subject: [PATCH 37/43] format.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 vllm/tool_parsers/structural_tag_registry.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
index 20b4ff9652db..108546f0c3d1 100644
--- a/vllm/tool_parsers/structural_tag_registry.py
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -101,8 +101,7 @@ def _normalize_tool_choice(
 
 
 def _get_function_parameters(function: Any) -> dict[str, Any] | bool:
-    """Return the JSON schema used for constrained tool arguments.
-    """
+    """Return the JSON schema used for constrained tool arguments."""
 
     if getattr(function, "strict", None) is False:
         return True

From e6fa4e3e6d56e319c4d89f4683ecb59149579432 Mon Sep 17 00:00:00 2001
From: Yuchuan <yuchuan.7streams@gmail.com>
Date: Mon, 4 May 2026 09:34:28 +0800
Subject: [PATCH 38/43] set the flag off as default.

Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
---
 vllm/envs.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/envs.py b/vllm/envs.py
index ec777e2bf9a4..acd5f7932f20 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -226,7 +226,7 @@
     VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
     VLLM_SYSTEM_START_DATE: str | None = None
     VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY: bool = False
-    VLLM_ENFORCE_STRICT_TOOL_CALLING: bool = True
+    VLLM_ENFORCE_STRICT_TOOL_CALLING: bool = False
     VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
     VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False
     VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True
@@ -1594,9 +1594,9 @@ def _get_or_set_default() -> str:
     ),
     # When 1,the model structural tags will be used to enforce the model
     # output conforming to the model's tool-calling format and schema.
-    # Default 1 (on).
+    # Default 0 (off).
     "VLLM_ENFORCE_STRICT_TOOL_CALLING": lambda: bool(
-        int(os.getenv("VLLM_ENFORCE_STRICT_TOOL_CALLING", "1"))
+        int(os.getenv("VLLM_ENFORCE_STRICT_TOOL_CALLING", "0"))
     ),
     # Add optional custom scopes for profiling, disable to avoid overheads
     "VLLM_CUSTOM_SCOPES_FOR_PROFILING": lambda: bool(

From dfda37c10436b9f5704235221a2d4966593a4d39 Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Mon, 4 May 2026 06:20:16 -0400
Subject: [PATCH 39/43] update and fix bug

Signed-off-by: Ubospica <ubospica@gmail.com>
---
 vllm/entrypoints/openai/api_server.py        | 15 ++++++
 vllm/tool_parsers/deepseekv4_tool_parser.py  |  7 ++-
 vllm/tool_parsers/qwen3coder_tool_parser.py  |  9 ++--
 vllm/tool_parsers/structural_tag_registry.py | 55 +++++++++++++++-----
 4 files changed, 66 insertions(+), 20 deletions(-)

diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 9aac19e2fda5..da2ec10284c5 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -321,6 +321,21 @@ async def init_app_state(
     supported_tasks: tuple["SupportedTask", ...] | None = None,
 ) -> None:
     vllm_config = engine_client.vllm_config
+
+    # Propagate enable_in_reasoning to the API-server process. The engine core
+    # runs in a separate process, so the contextvar that backs
+    # `get_current_vllm_config_or_none()` is None on this stack. Tool parsers
+    # call `get_enable_structured_outputs_in_reasoning()` during request
+    # handling and need to see the real flag, otherwise they silently fall
+    # back to False and mismatch the engine-side bitmask gating.
+    from vllm.tool_parsers.structural_tag_registry import (
+        set_enable_structured_outputs_in_reasoning,
+    )
+
+    set_enable_structured_outputs_in_reasoning(
+        vllm_config.structured_outputs_config.enable_in_reasoning
+    )
+
     if supported_tasks is None:
         warnings.warn(
             "The 'supported_tasks' parameter was not provided to "
diff --git a/vllm/tool_parsers/deepseekv4_tool_parser.py b/vllm/tool_parsers/deepseekv4_tool_parser.py
index 8d1df704a3a2..e32451cd8bbd 100644
--- a/vllm/tool_parsers/deepseekv4_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv4_tool_parser.py
@@ -5,7 +5,10 @@
     ChatCompletionRequest,
 )
 from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
-from vllm.tool_parsers.structural_tag_registry import get_model_structural_tag
+from vllm.tool_parsers.structural_tag_registry import (
+    get_enable_structured_outputs_in_reasoning,
+    get_model_structural_tag,
+)
 
 
 class DeepSeekV4ToolParser(DeepSeekV32ToolParser):
@@ -24,5 +27,5 @@ def get_structural_tag(self, request: ChatCompletionRequest):
             model="deepseek_v4",
             tools=request.tools,
             tool_choice=request.tool_choice,
-            reasoning=request.include_reasoning,
+            reasoning=get_enable_structured_outputs_in_reasoning(),
         )
diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index 1204ea6fba38..73850b2ab0c5 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -25,7 +25,10 @@
     Tool,
     ToolParser,
 )
-from vllm.tool_parsers.structural_tag_registry import get_model_structural_tag
+from vllm.tool_parsers.structural_tag_registry import (
+    get_enable_structured_outputs_in_reasoning,
+    get_model_structural_tag,
+)
 from vllm.tool_parsers.utils import find_tool_properties
 
 logger = init_logger(__name__)
@@ -690,7 +693,5 @@ def get_structural_tag(self, request: ChatCompletionRequest):
             model="qwen_3_5",
             tools=request.tools,
             tool_choice=request.tool_choice,
-            # The reasoning parser gates structured output until reasoning ends.
-            # Constrain only the post-reasoning tool-call suffix here.
-            reasoning=False,
+            reasoning=get_enable_structured_outputs_in_reasoning(),
         )
diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
index 108546f0c3d1..dc625b8809c5 100644
--- a/vllm/tool_parsers/structural_tag_registry.py
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -5,6 +5,7 @@
 # builtin structural tag implementations:
 # https://github.com/mlc-ai/xgrammar/blob/main/python/xgrammar/builtin_structural_tag.py
 
+import time
 from collections.abc import Callable
 from typing import Any, Literal
 
@@ -13,7 +14,6 @@
     AnyTextFormat,
     ConstStringFormat,
     JSONSchemaFormat,
-    QwenXMLParameterFormat,
     SequenceFormat,
     TagFormat,
     TagsWithSeparatorFormat,
@@ -110,6 +110,31 @@ def _get_function_parameters(function: Any) -> dict[str, Any] | bool:
     return function.parameters
 
 
+_enable_structured_outputs_in_reasoning: bool = False
+
+
+def set_enable_structured_outputs_in_reasoning(enabled: bool) -> None:
+    """Publish the engine's ``enable_in_reasoning`` flag to tool parsers.
+
+    Called once during APIServer startup so request-time parsers can read
+    it without going through the EngineCore-only contextvar.
+    """
+
+    global _enable_structured_outputs_in_reasoning
+    _enable_structured_outputs_in_reasoning = bool(enabled)
+
+
+def get_enable_structured_outputs_in_reasoning() -> bool:
+    """Whether structured outputs are active during the reasoning phase.
+
+    When ``True``, the structural tag will cover the reasoning part:
+    ``<think>...</think>`` prefix (if available); when ``False`` (default), the tag only
+    constrains the post-reasoning suffix.
+    """
+
+    return _enable_structured_outputs_in_reasoning
+
+
 @register_model_structural_tag("deepseek_v4")
 def get_deepseek_v4_structural_tag(
     tools: list[ChatCompletionToolsParam],
@@ -230,7 +255,6 @@ def get_qwen_3_5_structural_tag(
     This format is used for Qwen3-Coder/Qwen3.5/Qwen3.6 and is compatible with
     Qwen variants that use the same XML tool-call format.
     """
-
     tool_call_begin_prefix = "<tool_call>\n<function="
     tool_call_begin_suffix = ">\n"
     tool_call_end = "\n</function>\n</tool_call>"
@@ -247,7 +271,7 @@ def get_qwen_3_5_structural_tag(
             tags.append(
                 TagFormat(
                     begin=f"{tool_call_begin_prefix}{function.name}{tool_call_begin_suffix}",
-                    content=QwenXMLParameterFormat(json_schema=parameters),
+                    content=JSONSchemaFormat(json_schema=parameters, style="qwen_xml"),
                     end=tool_call_end,
                 )
             )
@@ -267,8 +291,9 @@ def get_qwen_3_5_structural_tag(
         function = tools[0].function
         suffix_tag = TagFormat(
             begin=f"{tool_call_begin_prefix}{function.name}{tool_call_begin_suffix}",
-            content=QwenXMLParameterFormat(
-                json_schema=_get_function_parameters(function)
+            content=JSONSchemaFormat(
+                json_schema=_get_function_parameters(function),
+                style="qwen_xml",
             ),
             end=tool_call_end,
         )
@@ -281,7 +306,7 @@ def get_qwen_3_5_structural_tag(
             tags.append(
                 TagFormat(
                     begin=f"{tool_call_begin_prefix}{function.name}{tool_call_begin_suffix}",
-                    content=QwenXMLParameterFormat(json_schema=parameters),
+                    content=JSONSchemaFormat(json_schema=parameters, style="qwen_xml"),
                     end=tool_call_end,
                 )
             )
@@ -293,12 +318,14 @@ def get_qwen_3_5_structural_tag(
         )
 
     if not reasoning:
-        return StructuralTag(format=suffix_tag)
+        result = StructuralTag(format=suffix_tag)
+    else:
+        prefix_tag = SequenceFormat(
+            elements=[
+                TagFormat(begin="", content=AnyTextFormat(), end=think_tag_end),
+                ConstStringFormat(value=think_suffix),
+            ]
+        )
+        result = StructuralTag(format=SequenceFormat(elements=[prefix_tag, suffix_tag]))
 
-    prefix_tag = SequenceFormat(
-        elements=[
-            TagFormat(begin="", content=AnyTextFormat(), end=think_tag_end),
-            ConstStringFormat(value=think_suffix),
-        ]
-    )
-    return StructuralTag(format=SequenceFormat(elements=[prefix_tag, suffix_tag]))
+    return result

From 07eb072bbc7b1013b65af41dab64a915e1c00400 Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Mon, 4 May 2026 09:12:46 -0400
Subject: [PATCH 40/43] update

---
 vllm/tool_parsers/structural_tag_registry.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
index dc625b8809c5..754cc52361c5 100644
--- a/vllm/tool_parsers/structural_tag_registry.py
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -5,7 +5,6 @@
 # builtin structural tag implementations:
 # https://github.com/mlc-ai/xgrammar/blob/main/python/xgrammar/builtin_structural_tag.py
 
-import time
 from collections.abc import Callable
 from typing import Any, Literal
 

From 376b84ebc5733eee189055be0caa6c9b2c9cb1d7 Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Mon, 4 May 2026 09:12:57 -0400
Subject: [PATCH 41/43] update

---
 vllm/tool_parsers/structural_tag_registry.py | 21 +++++++-------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
index 754cc52361c5..513352e12fda 100644
--- a/vllm/tool_parsers/structural_tag_registry.py
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -8,21 +8,14 @@
 from collections.abc import Callable
 from typing import Any, Literal
 
-from xgrammar import StructuralTag
-from xgrammar.structural_tag import (
-    AnyTextFormat,
-    ConstStringFormat,
-    JSONSchemaFormat,
-    SequenceFormat,
-    TagFormat,
-    TagsWithSeparatorFormat,
-    TriggeredTagsFormat,
-)
-
 from vllm.entrypoints.openai.chat_completion.protocol import (
-    ChatCompletionNamedToolChoiceParam,
-    ChatCompletionToolsParam,
-)
+    ChatCompletionNamedToolChoiceParam, ChatCompletionToolsParam)
+
+from xgrammar import StructuralTag
+from xgrammar.structural_tag import (AnyTextFormat, ConstStringFormat,
+                                     JSONSchemaFormat, SequenceFormat,
+                                     TagFormat, TagsWithSeparatorFormat,
+                                     TriggeredTagsFormat)
 
 SimplifiedToolChoice = Literal["auto", "required", "forced"]
 ToolChoice = (

From 45d43b68f06258602b0f7d3caf2c9d0071ed417f Mon Sep 17 00:00:00 2001
From: Ubospica <ubospica@gmail.com>
Date: Mon, 4 May 2026 10:21:52 -0400
Subject: [PATCH 42/43] format.

---
 vllm/tool_parsers/structural_tag_registry.py | 21 +++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/vllm/tool_parsers/structural_tag_registry.py b/vllm/tool_parsers/structural_tag_registry.py
index 513352e12fda..754cc52361c5 100644
--- a/vllm/tool_parsers/structural_tag_registry.py
+++ b/vllm/tool_parsers/structural_tag_registry.py
@@ -8,14 +8,21 @@
 from collections.abc import Callable
 from typing import Any, Literal
 
-from vllm.entrypoints.openai.chat_completion.protocol import (
-    ChatCompletionNamedToolChoiceParam, ChatCompletionToolsParam)
-
 from xgrammar import StructuralTag
-from xgrammar.structural_tag import (AnyTextFormat, ConstStringFormat,
-                                     JSONSchemaFormat, SequenceFormat,
-                                     TagFormat, TagsWithSeparatorFormat,
-                                     TriggeredTagsFormat)
+from xgrammar.structural_tag import (
+    AnyTextFormat,
+    ConstStringFormat,
+    JSONSchemaFormat,
+    SequenceFormat,
+    TagFormat,
+    TagsWithSeparatorFormat,
+    TriggeredTagsFormat,
+)
+
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionToolsParam,
+)
 
 SimplifiedToolChoice = Literal["auto", "required", "forced"]
 ToolChoice = (

From ad4395a56077a78a9d84cdc370e173b286d6b61c Mon Sep 17 00:00:00 2001
From: sfeng33 <4florafeng@gmail.com>
Date: Mon, 4 May 2026 16:32:52 +0000
Subject: [PATCH 43/43] Fix failing qwen3coder test

Signed-off-by: sfeng33 <4florafeng@gmail.com>
---
 tests/tool_parsers/test_qwen3coder_tool_parser.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index d50a7b9b769e..26bbf1a044bc 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -1211,10 +1211,15 @@ def test_get_vllm_registry_structural_tag_returns_structural_tag(
 
 @pytest.mark.parametrize("include_reasoning", [True, False])
 def test_adjust_request_auto_uses_vllm_registry_structural_tag(
+    monkeypatch: pytest.MonkeyPatch,
     qwen3_tool_parser: Qwen3CoderToolParser,
     sample_tools: list[ChatCompletionToolsParam],
     include_reasoning: bool,
 ) -> None:
+    monkeypatch.setattr(
+        "vllm.tool_parsers.abstract_tool_parser.VLLM_ENFORCE_STRICT_TOOL_CALLING",
+        True,
+    )
     request_tools = _as_chat_completion_tools(sample_tools)
     req = ChatCompletionRequest(
         messages=[],
@@ -1232,9 +1237,14 @@ def test_adjust_request_auto_uses_vllm_registry_structural_tag(
 
 
 def test_adjust_request_required_prefers_structural_tag(
+    monkeypatch: pytest.MonkeyPatch,
     qwen3_tool_parser: Qwen3CoderToolParser,
     sample_tools: list[ChatCompletionToolsParam],
 ) -> None:
+    monkeypatch.setattr(
+        "vllm.tool_parsers.abstract_tool_parser.VLLM_ENFORCE_STRICT_TOOL_CALLING",
+        True,
+    )
     request_tools = _as_chat_completion_tools(sample_tools)
     req = ChatCompletionRequest(
         messages=[],