From 0985a564f4884e339ceda19b1b9667df77df6936 Mon Sep 17 00:00:00 2001
From: Will Deines <will@garr.io>
Date: Tue, 17 Mar 2026 12:16:55 -0400
Subject: [PATCH 01/10] feat(responses): unified tool_choice + structured
 output via triggered tags

Extend prepare_structured_tag() to be the single authority for all
generation constraints in GPT-OSS Harmony models: channel structure,
tool enforcement, argument validation, and content constraints.

tool_choice=required support:
- New from_function_tool_to_tag() and tag_with_function_tools() helpers
- prepare_structured_tag() extended with tool_choice, function_tools params
- Channel blocking: omit <|channel|>final trigger to force tool calls
- Remove NotImplementedError for non-auto tool_choice in Harmony path

Absorbed from upstream PR #35904 (structured output + reasoning):
- Content constraint embedding in <|channel|>final tag
- _constraint_to_content_format() and _extract_response_format_schema()
- struct_out is None branch (reasoning tags always applied)
- inject_response_formats() for Harmony cookbook compliance
- json_object format handling (was silently ignored)
- Streaming .model_dump() alias bug fix

Signed-off-by: Will Deines <will@garr.io>
---
 .../openai/parser/test_harmony_utils.py       |  39 ++
 .../openai/responses/test_response_formats.py |  96 ++++
 .../openai/responses/test_sampling_params.py  |  19 +
 .../responses/test_structured_output.py       |  71 +++
 .../reasoning/test_gptoss_reasoning_parser.py | 415 ++++++++++++++++++
 .../openai/parser/harmony_utils.py            |  20 +
 vllm/entrypoints/openai/responses/protocol.py |   4 +
 vllm/entrypoints/openai/responses/serving.py  | 167 ++++++-
 vllm/reasoning/abs_reasoning_parsers.py       |  18 +-
 vllm/reasoning/gptoss_reasoning_parser.py     | 125 ++++--
 10 files changed, 926 insertions(+), 48 deletions(-)
 create mode 100644 tests/entrypoints/openai/responses/test_response_formats.py

diff --git a/tests/entrypoints/openai/parser/test_harmony_utils.py b/tests/entrypoints/openai/parser/test_harmony_utils.py
index 21b53dff1507..01ac12bd9408 100644
--- a/tests/entrypoints/openai/parser/test_harmony_utils.py
+++ b/tests/entrypoints/openai/parser/test_harmony_utils.py
@@ -10,6 +10,7 @@
     get_encoding,
     get_system_message,
     has_custom_tools,
+    inject_response_formats,
     parse_chat_input_to_harmony_message,
     parse_chat_output,
 )
@@ -928,3 +929,41 @@ def test_reasoning_with_empty_content_returns_none(self):
         msg = response_input_to_harmony(item, prev_responses=[])
 
         assert msg is None
+
+
+class TestInjectResponseFormats:
+    def test_appends_to_existing_instructions(self):
+        result = inject_response_formats("You are helpful.", {"type": "object"})
+        assert result.startswith("You are helpful.")
+        assert "# Response Formats" in result
+        assert '{"type":"object"}' in result
+
+    def test_none_instructions_creates_section(self):
+        result = inject_response_formats(None, {"type": "object"})
+        assert result.startswith("# Response Formats")
+        assert '{"type":"object"}' in result
+
+    def test_custom_format_name(self):
+        result = inject_response_formats(
+            None, {"type": "object"}, format_name="order"
+        )
+        assert "## order" in result
+
+    def test_compact_json_no_spaces(self):
+        schema = {
+            "type": "object",
+            "properties": {"name": {"type": "string"}},
+        }
+        result = inject_response_formats(None, schema)
+        assert (
+            '{"type":"object","properties":{"name":{"type":"string"}}}'
+            in result
+        )
+
+    def test_section_separated_by_blank_lines(self):
+        result = inject_response_formats(
+            "Instructions here.", {"type": "object"}
+        )
+        assert (
+            "\n\n# Response Formats\n\n## structured_output\n\n" in result
+        )
diff --git a/tests/entrypoints/openai/responses/test_response_formats.py b/tests/entrypoints/openai/responses/test_response_formats.py
new file mode 100644
index 000000000000..61681dfbf8ec
--- /dev/null
+++ b/tests/entrypoints/openai/responses/test_response_formats.py
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Tests for response format schema extraction and developer message injection.
+
+These tests verify that structured output schemas are correctly extracted from
+ResponsesRequest and injected into the Harmony developer message per the
+Harmony cookbook specification.
+"""
+
+from openai.types.responses.response_format_text_json_schema_config import (
+    ResponseFormatTextJSONSchemaConfig,
+)
+
+from vllm.entrypoints.openai.responses.protocol import (
+    ResponsesRequest,
+    ResponseTextConfig,
+)
+from vllm.entrypoints.openai.responses.serving import (
+    _extract_response_format_schema,
+)
+from vllm.sampling_params import StructuredOutputsParams
+
+
+def _make_json_schema_text_config(schema: dict) -> ResponseTextConfig:
+    text_config = ResponseTextConfig()
+    text_config.format = ResponseFormatTextJSONSchemaConfig(
+        type="json_schema",
+        name="test_schema",
+        schema=schema,
+    )
+    return text_config
+
+
+class TestExtractResponseFormatSchema:
+    def test_extracts_from_text_format_json_schema(self):
+        schema = {
+            "type": "object",
+            "properties": {"name": {"type": "string"}},
+        }
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            text=_make_json_schema_text_config(schema),
+        )
+        result = _extract_response_format_schema(request)
+        assert result == schema
+
+    def test_extracts_from_structured_outputs_json(self):
+        schema = {
+            "type": "object",
+            "properties": {"id": {"type": "integer"}},
+        }
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            structured_outputs=StructuredOutputsParams(json=schema),
+        )
+        result = _extract_response_format_schema(request)
+        assert result == schema
+
+    def test_returns_none_for_text_format(self):
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            text=ResponseTextConfig(format={"type": "text"}),
+        )
+        result = _extract_response_format_schema(request)
+        assert result is None
+
+    def test_returns_none_for_no_format(self):
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+        )
+        result = _extract_response_format_schema(request)
+        assert result is None
+
+    def test_text_format_takes_precedence(self):
+        """text.format.json_schema is checked before structured_outputs."""
+        text_schema = {
+            "type": "object",
+            "properties": {"a": {"type": "string"}},
+        }
+        so_schema = {
+            "type": "object",
+            "properties": {"b": {"type": "string"}},
+        }
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            text=_make_json_schema_text_config(text_schema),
+            structured_outputs=StructuredOutputsParams(json=so_schema),
+        )
+        result = _extract_response_format_schema(request)
+        assert result == text_schema
diff --git a/tests/entrypoints/openai/responses/test_sampling_params.py b/tests/entrypoints/openai/responses/test_sampling_params.py
index 87910271dd75..7509489ca3c4 100644
--- a/tests/entrypoints/openai/responses/test_sampling_params.py
+++ b/tests/entrypoints/openai/responses/test_sampling_params.py
@@ -132,6 +132,25 @@ def test_structured_outputs_passed_through(self):
         assert sampling_params.structured_outputs is not None
         assert sampling_params.structured_outputs.grammar == "root ::= 'hello'"
 
+    def test_json_object_format_produces_structured_outputs(self):
+        """Test that text.format.type=json_object creates StructuredOutputsParams."""
+        from openai.types.shared.response_format_json_object import (
+            ResponseFormatJSONObject,
+        )
+
+        text_config = ResponseTextConfig()
+        text_config.format = ResponseFormatJSONObject(type="json_object")
+        request = ResponsesRequest(
+            model="test-model",
+            input="test input",
+            text=text_config,
+        )
+
+        sampling_params = request.to_sampling_params(default_max_tokens=1000)
+
+        assert sampling_params.structured_outputs is not None
+        assert sampling_params.structured_outputs.json_object is True
+
     def test_structured_outputs_and_json_schema_conflict(self):
         """Test that specifying both structured_outputs and json_schema raises."""
         structured_outputs = StructuredOutputsParams(grammar="root ::= 'hello'")
diff --git a/tests/entrypoints/openai/responses/test_structured_output.py b/tests/entrypoints/openai/responses/test_structured_output.py
index db8b87768e44..16742708041c 100644
--- a/tests/entrypoints/openai/responses/test_structured_output.py
+++ b/tests/entrypoints/openai/responses/test_structured_output.py
@@ -6,6 +6,11 @@
 import pytest
 from pydantic import BaseModel
 
+from vllm.entrypoints.openai.responses.serving import (
+    _constraint_to_content_format,
+)
+from vllm.sampling_params import StructuredOutputsParams
+
 
 @pytest.mark.asyncio
 async def test_structured_output(client: openai.AsyncOpenAI):
@@ -76,3 +81,69 @@ class CalendarEvent(BaseModel):
     assert len(participants) == 2
     assert participants[0] == "Alice"
     assert participants[1] == "Bob"
+
+
+class TestConstraintToContentFormat:
+    """Test _constraint_to_content_format helper."""
+
+    def test_json_schema_string_is_parsed(self):
+        """JSON schema passed as a string gets json.loads'd into a dict."""
+        schema = {"type": "object", "properties": {"age": {"type": "integer"}}}
+        params = StructuredOutputsParams(json=json.dumps(schema))
+        result = _constraint_to_content_format(params)
+
+        assert result == {"type": "json_schema", "json_schema": schema}
+
+    def test_json_schema_dict(self):
+        """JSON schema passed as a dict is used directly."""
+        schema = {"type": "object", "properties": {"age": {"type": "integer"}}}
+        params = StructuredOutputsParams(json=schema)
+        result = _constraint_to_content_format(params)
+
+        assert result == {"type": "json_schema", "json_schema": schema}
+
+    def test_json_object(self):
+        """json_object maps to minimal JSON schema."""
+        params = StructuredOutputsParams(json_object=True)
+        result = _constraint_to_content_format(params)
+
+        assert result == {
+            "type": "json_schema",
+            "json_schema": {"type": "object"},
+        }
+
+    def test_regex(self):
+        """Regex constraint is converted correctly."""
+        params = StructuredOutputsParams(regex=r"\d+")
+        result = _constraint_to_content_format(params)
+
+        assert result == {"type": "regex", "pattern": r"\d+"}
+
+    def test_grammar(self):
+        """Grammar constraint is converted correctly."""
+        params = StructuredOutputsParams(grammar="root ::= 'hello'")
+        result = _constraint_to_content_format(params)
+
+        assert result == {"type": "grammar", "grammar": "root ::= 'hello'"}
+
+    def test_choice(self):
+        """Choice constraint is converted correctly."""
+        params = StructuredOutputsParams(choice=["yes", "no"])
+        result = _constraint_to_content_format(params)
+
+        assert result == {
+            "type": "or",
+            "elements": [
+                {"type": "const_string", "value": "yes"},
+                {"type": "const_string", "value": "no"},
+            ],
+        }
+
+    def test_structural_tag_only_returns_none(self):
+        """structural_tag is not a content constraint -- should return None."""
+        params = StructuredOutputsParams(
+            structural_tag='{"type": "structural_tag"}'
+        )
+        result = _constraint_to_content_format(params)
+
+        assert result is None
diff --git a/tests/reasoning/test_gptoss_reasoning_parser.py b/tests/reasoning/test_gptoss_reasoning_parser.py
index 3b1327acb688..1329e05e269f 100644
--- a/tests/reasoning/test_gptoss_reasoning_parser.py
+++ b/tests/reasoning/test_gptoss_reasoning_parser.py
@@ -12,7 +12,10 @@
 from vllm.reasoning.gptoss_reasoning_parser import (
     GptOssReasoningParser,
     from_builtin_tool_to_tag,
+    from_function_tool_to_tag,
     no_func_reasoning_tag,
+    tag_with_builtin_funcs,
+    tag_with_function_tools,
 )
 
 REASONING_MODEL_NAME = "openai/gpt-oss-120b"
@@ -280,3 +283,415 @@ def test_tag_format_consistency(self, reasoning_parser):
             assert tag["content"]["type"] == "any_text"
             assert tag["end"] == "<|end|>"
             assert tag["begin"].startswith("<|channel|>")
+
+    # --- Fixtures for tool_choice / function_tools tests ---
+
+    @pytest.fixture
+    def mock_tool_server_empty(self):
+        """Create a mock ToolServer with no tools."""
+        tool_server = Mock(spec=ToolServer)
+        tool_server.has_tool = Mock(return_value=False)
+        return tool_server
+
+    @pytest.fixture
+    def mock_tool_server_with_browser(self):
+        """Create a mock ToolServer with browser tool."""
+        tool_server = Mock(spec=ToolServer)
+        tool_server.has_tool = Mock(side_effect=lambda tool: tool == "browser")
+        return tool_server
+
+    @pytest.fixture
+    def mock_tool_server_with_all_tools(self):
+        """Create a mock ToolServer with all builtin tools."""
+        tool_server = Mock(spec=ToolServer)
+        tool_server.has_tool = Mock(
+            side_effect=lambda tool: tool in ["browser", "python", "container"]
+        )
+        return tool_server
+
+    # --- Tests from structured output PR ---
+
+    def test_prepare_structured_tag_with_all_tools(
+        self, reasoning_parser, mock_tool_server_with_all_tools
+    ):
+        """Test prepare_structured_tag with all builtin tools."""
+        result = reasoning_parser.prepare_structured_tag(
+            None, mock_tool_server_with_all_tools
+        )
+        parsed = json.loads(result)
+
+        # Should have analysis tag + tags for all 3 tools (2 tags each)
+        assert len(parsed["format"]["tags"]) == 7  # 1 analysis + 6 tool tags
+
+        # Check all tool tags are present
+        tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
+        for tool in ["browser", "python", "container"]:
+            assert f"<|channel|>commentary to={tool}" in tag_begins
+            assert f"<|channel|>analysis to={tool}" in tag_begins
+
+    def test_tag_with_builtin_funcs(self):
+        """Test tag_with_builtin_funcs function."""
+        builtin_tools = ["browser", "python"]
+        result = tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tools)
+
+        assert result["type"] == "structural_tag"
+        # Should have original analysis tag + 2 tags per tool
+        assert len(result["format"]["tags"]) == 5  # 1 + 2*2
+
+        # Should have added commentary trigger
+        assert "<|channel|>commentary to=" in result["format"]["triggers"]
+        assert "<|channel|>analysis" in result["format"]["triggers"]
+
+    def test_tag_structure_invariants(self):
+        """Test that the basic tag structure follows expected format."""
+        assert no_func_reasoning_tag["type"] == "structural_tag"
+        assert no_func_reasoning_tag["format"]["type"] == "triggered_tags"
+        assert no_func_reasoning_tag["format"]["stop_after_first"] is False
+
+        # Verify analysis tag structure
+        analysis_tag = no_func_reasoning_tag["format"]["tags"][0]
+        assert analysis_tag["begin"] == "<|channel|>analysis<|message|>"
+        assert analysis_tag["content"]["type"] == "any_text"
+        assert analysis_tag["end"] == "<|end|>"
+
+    def test_json_serialization_valid(
+        self, reasoning_parser, mock_tool_server_with_all_tools
+    ):
+        """Test that all generated tags produce valid JSON."""
+        # Test with no tool server
+        result1 = reasoning_parser.prepare_structured_tag(None, None)
+        json.loads(result1)  # Should not raise
+
+        # Test with empty tool server
+        empty_server = Mock(spec=ToolServer)
+        empty_server.has_tool = Mock(return_value=False)
+        result2 = reasoning_parser.prepare_structured_tag(None, empty_server)
+        json.loads(result2)  # Should not raise
+
+        # Test with tools
+        result3 = reasoning_parser.prepare_structured_tag(
+            None, mock_tool_server_with_all_tools
+        )
+        json.loads(result3)  # Should not raise
+
+    @pytest.mark.parametrize("tool_name", ["browser", "python", "container"])
+    def test_single_tool_integration(self, reasoning_parser, tool_name):
+        """Test integration with individual tools."""
+        tool_server = Mock(spec=ToolServer)
+        tool_server.has_tool = Mock(side_effect=lambda tool: tool == tool_name)
+
+        result = reasoning_parser.prepare_structured_tag(None, tool_server)
+        parsed = json.loads(result)
+
+        # Should have 1 analysis + 2 tool-specific tags
+        assert len(parsed["format"]["tags"]) == 3
+
+        tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
+        assert f"<|channel|>commentary to={tool_name}" in tag_begins
+        assert f"<|channel|>analysis to={tool_name}" in tag_begins
+
+    # --- final_content_format tests ---
+
+    def test_prepare_structured_tag_with_json_schema(self, reasoning_parser):
+        """Test that final channel tag has json_schema content constraint."""
+        content_format = {
+            "type": "json_schema",
+            "json_schema": {
+                "type": "object",
+                "properties": {"name": {"type": "string"}},
+            },
+        }
+        result = reasoning_parser.prepare_structured_tag(
+            None, None, final_content_format=content_format
+        )
+        parsed = json.loads(result)
+
+        # Should have analysis tag + final channel tag
+        assert len(parsed["format"]["tags"]) == 2
+
+        # Verify analysis tag is unchanged
+        assert (
+            parsed["format"]["tags"][0]["begin"]
+            == "<|channel|>analysis<|message|>"
+        )
+        assert parsed["format"]["tags"][0]["content"]["type"] == "any_text"
+
+        # Verify final channel tag has the json_schema content constraint
+        final_tag = parsed["format"]["tags"][1]
+        assert final_tag["begin"] == "<|channel|>final<|message|>"
+        assert final_tag["end"] == "<|end|>"
+        assert final_tag["content"] == content_format
+
+        # Verify triggers include both analysis and final
+        assert "<|channel|>analysis" in parsed["format"]["triggers"]
+        assert "<|channel|>final" in parsed["format"]["triggers"]
+
+    def test_prepare_structured_tag_original_tag_ignores_constraint(
+        self, reasoning_parser
+    ):
+        """When original_tag is provided, final_content_format is ignored."""
+        original_tag = '{"custom": "tag"}'
+        content_format = {
+            "type": "json_schema",
+            "json_schema": {"type": "object"},
+        }
+        result = reasoning_parser.prepare_structured_tag(
+            original_tag, None, final_content_format=content_format
+        )
+
+        # Should return the original tag unchanged
+        assert result == original_tag
+
+    def test_prepare_structured_tag_with_tools_and_constraint(
+        self, reasoning_parser, mock_tool_server_with_browser
+    ):
+        """Test that tools and content constraint coexist in the tag."""
+        content_format = {
+            "type": "json_schema",
+            "json_schema": {"type": "object"},
+        }
+        result = reasoning_parser.prepare_structured_tag(
+            None,
+            mock_tool_server_with_browser,
+            final_content_format=content_format,
+        )
+        parsed = json.loads(result)
+
+        # Should have analysis + 2 browser tags + final channel tag = 4
+        assert len(parsed["format"]["tags"]) == 4
+
+        tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
+        assert "<|channel|>analysis<|message|>" in tag_begins
+        assert "<|channel|>commentary to=browser" in tag_begins
+        assert "<|channel|>analysis to=browser" in tag_begins
+        assert "<|channel|>final<|message|>" in tag_begins
+
+        # Verify final tag has the constraint
+        final_tag = next(
+            t
+            for t in parsed["format"]["tags"]
+            if t["begin"] == "<|channel|>final<|message|>"
+        )
+        assert final_tag["content"] == content_format
+
+    # --- Function tool and tool_choice tests ---
+
+    def test_function_tool_tags_on_both_channels(self):
+        """Verify from_function_tool_to_tag creates commentary + analysis."""
+        tags = from_function_tool_to_tag("get_weather", None)
+
+        assert len(tags) == 2
+        assert (
+            tags[0]["begin"]
+            == "<|channel|>commentary to=functions.get_weather<|message|>"
+        )
+        assert (
+            tags[1]["begin"]
+            == "<|channel|>analysis to=functions.get_weather<|message|>"
+        )
+        assert tags[0]["end"] == "<|end|>"
+        assert tags[1]["end"] == "<|end|>"
+        # No parameters -> any_text
+        assert tags[0]["content"] == {"type": "any_text"}
+        assert tags[1]["content"] == {"type": "any_text"}
+
+    def test_function_tool_json_schema_content(self):
+        """Verify JSON schema from tool parameters is used as content."""
+        schema = {
+            "type": "object",
+            "properties": {"city": {"type": "string"}},
+            "required": ["city"],
+        }
+        tags = from_function_tool_to_tag("get_weather", schema)
+
+        expected_content = {"type": "json_schema", "json_schema": schema}
+        assert tags[0]["content"] == expected_content
+        assert tags[1]["content"] == expected_content
+
+    def test_tool_choice_required_blocks_final(self, reasoning_parser):
+        """No final trigger/tag when tool_choice=required (no tools)."""
+        result = reasoning_parser.prepare_structured_tag(
+            None, None, tool_choice="required"
+        )
+        parsed = json.loads(result)
+
+        tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
+        assert not any("final" in b for b in tag_begins)
+        assert "<|channel|>final" not in parsed["format"]["triggers"]
+
+    def test_tool_choice_required_with_function_tools(self, reasoning_parser):
+        """Tool tags present but no final when tool_choice=required."""
+        fn_tools = [
+            {"name": "get_weather", "parameters": {"type": "object"}},
+        ]
+        result = reasoning_parser.prepare_structured_tag(
+            None, None, tool_choice="required", function_tools=fn_tools
+        )
+        parsed = json.loads(result)
+
+        tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
+        # Function tool tags present
+        assert (
+            "<|channel|>commentary to=functions.get_weather<|message|>"
+            in tag_begins
+        )
+        assert (
+            "<|channel|>analysis to=functions.get_weather<|message|>"
+            in tag_begins
+        )
+        # No final
+        assert not any("final" in b for b in tag_begins)
+        assert "<|channel|>final" not in parsed["format"]["triggers"]
+
+    def test_tool_choice_required_ignores_final_content_format(
+        self, reasoning_parser
+    ):
+        """Final is blocked even when final_content_format is provided."""
+        content_fmt = {
+            "type": "json_schema",
+            "json_schema": {"type": "object"},
+        }
+        fn_tools = [{"name": "my_func"}]
+        result = reasoning_parser.prepare_structured_tag(
+            None,
+            None,
+            final_content_format=content_fmt,
+            tool_choice="required",
+            function_tools=fn_tools,
+        )
+        parsed = json.loads(result)
+
+        tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
+        assert not any("final" in b for b in tag_begins)
+
+    def test_tool_choice_auto_with_tools_and_content_format(
+        self, reasoning_parser
+    ):
+        """Tool tags + final with content constraint for auto."""
+        schema = {"type": "object", "properties": {"x": {"type": "integer"}}}
+        content_fmt = {"type": "json_schema", "json_schema": schema}
+        fn_tools = [{"name": "compute", "parameters": schema}]
+
+        result = reasoning_parser.prepare_structured_tag(
+            None,
+            None,
+            final_content_format=content_fmt,
+            tool_choice="auto",
+            function_tools=fn_tools,
+        )
+        parsed = json.loads(result)
+
+        tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
+        # Function tool tags
+        assert (
+            "<|channel|>commentary to=functions.compute<|message|>"
+            in tag_begins
+        )
+        # Final tag with content constraint
+        assert "<|channel|>final<|message|>" in tag_begins
+        assert "<|channel|>final" in parsed["format"]["triggers"]
+
+        final_tag = next(
+            t
+            for t in parsed["format"]["tags"]
+            if t["begin"] == "<|channel|>final<|message|>"
+        )
+        assert final_tag["content"] == content_fmt
+
+    def test_tool_choice_auto_with_tools_final_is_any_text(
+        self, reasoning_parser
+    ):
+        """auto + function tools but no content format -> final allows free text."""
+        fn_tools = [{"name": "get_weather", "parameters": {"type": "object"}}]
+        result = reasoning_parser.prepare_structured_tag(
+            None,
+            None,
+            tool_choice="auto",
+            function_tools=fn_tools,
+        )
+        parsed = json.loads(result)
+
+        final_tag = next(
+            t
+            for t in parsed["format"]["tags"]
+            if t["begin"] == "<|channel|>final<|message|>"
+        )
+        # No content format -> model can respond with any text
+        assert final_tag["content"] == {"type": "any_text"}
+
+    def test_tool_choice_none_strips_tool_tags(
+        self, reasoning_parser, mock_tool_server_with_all_tools
+    ):
+        """No tool tags with tool_choice=none, analysis only."""
+        fn_tools = [{"name": "get_weather"}]
+        result = reasoning_parser.prepare_structured_tag(
+            None,
+            mock_tool_server_with_all_tools,
+            tool_choice="none",
+            function_tools=fn_tools,
+        )
+        parsed = json.loads(result)
+
+        tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
+        # Only analysis tag, no tool tags
+        assert tag_begins == ["<|channel|>analysis<|message|>"]
+        assert parsed["format"]["triggers"] == ["<|channel|>analysis"]
+
+    def test_mixed_builtin_and_function_tools(
+        self, reasoning_parser, mock_tool_server_with_browser
+    ):
+        """Both builtin and function tool tags coexist."""
+        fn_tools = [{"name": "get_weather"}]
+        result = reasoning_parser.prepare_structured_tag(
+            None,
+            mock_tool_server_with_browser,
+            tool_choice="auto",
+            function_tools=fn_tools,
+        )
+        parsed = json.loads(result)
+
+        tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
+        # Builtin tool tags
+        assert "<|channel|>commentary to=browser" in tag_begins
+        assert "<|channel|>analysis to=browser" in tag_begins
+        # Function tool tags
+        assert (
+            "<|channel|>commentary to=functions.get_weather<|message|>"
+            in tag_begins
+        )
+        assert (
+            "<|channel|>analysis to=functions.get_weather<|message|>"
+            in tag_begins
+        )
+        # Final tag (auto + function tools)
+        assert "<|channel|>final<|message|>" in tag_begins
+        # General commentary trigger covers both builtin and function
+        assert "<|channel|>commentary to=" in parsed["format"]["triggers"]
+
+    def test_named_tool_choice(self, reasoning_parser):
+        """Only the named tool's tags present, final blocked."""
+        fn_tools = [
+            {"name": "get_weather", "parameters": {"type": "object"}},
+            {"name": "get_stock", "parameters": {"type": "object"}},
+        ]
+        result = reasoning_parser.prepare_structured_tag(
+            None,
+            None,
+            tool_choice={"type": "function", "name": "get_weather"},
+            function_tools=fn_tools,
+        )
+        parsed = json.loads(result)
+
+        tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
+        # Only get_weather tags, not get_stock
+        assert (
+            "<|channel|>commentary to=functions.get_weather<|message|>"
+            in tag_begins
+        )
+        assert (
+            "<|channel|>analysis to=functions.get_weather<|message|>"
+            in tag_begins
+        )
+        assert not any("get_stock" in b for b in tag_begins)
+        # No final (named tool choice blocks final)
+        assert not any("final" in b for b in tag_begins)
diff --git a/vllm/entrypoints/openai/parser/harmony_utils.py b/vllm/entrypoints/openai/parser/harmony_utils.py
index 9b4264456c51..e8202516985e 100644
--- a/vllm/entrypoints/openai/parser/harmony_utils.py
+++ b/vllm/entrypoints/openai/parser/harmony_utils.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import datetime
+import json
 from collections.abc import Iterable, Sequence
 from typing import Literal
 
@@ -150,6 +151,25 @@ def get_developer_message(
     return dev_msg
 
 
+def inject_response_formats(
+    instructions: str | None,
+    schema: dict,
+    format_name: str = "structured_output",
+) -> str:
+    """Append a Harmony cookbook ``# Response Formats`` section.
+
+    Per the cookbook, structured output schemas should appear in the
+    developer message under a ``# Response Formats`` heading so the
+    model knows what format to produce.  This complements grammar
+    enforcement via structural tags.
+    """
+    schema_json = json.dumps(schema, separators=(",", ":"))
+    section = f"\n\n# Response Formats\n\n## {format_name}\n\n{schema_json}"
+    if instructions:
+        return instructions + section
+    return section.lstrip("\n")
+
+
 def get_user_message(content: str) -> Message:
     return Message.from_role_and_content(Role.USER, content)
 
diff --git a/vllm/entrypoints/openai/responses/protocol.py b/vllm/entrypoints/openai/responses/protocol.py
index a5f62bdd8c39..831fb1077243 100644
--- a/vllm/entrypoints/openai/responses/protocol.py
+++ b/vllm/entrypoints/openai/responses/protocol.py
@@ -346,6 +346,10 @@ def to_sampling_params(
                     # --follow-imports skip hides the class definition but also hides
                     # multiple third party conflicts, so best of both evils
                 )
+            elif response_format.type == "json_object":
+                structured_outputs = StructuredOutputsParams(
+                    json_object=True  # type: ignore[call-arg]
+                )
 
         stop = self.stop if self.stop else []
         if isinstance(stop, str):
diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py
index b2428e97e20d..48d08a8784d8 100644
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import asyncio
+import json as json_mod
 import time
 import uuid
 from collections import deque
@@ -65,6 +66,7 @@
     get_system_message,
     get_user_message,
     has_custom_tools,
+    inject_response_formats,
     render_for_completion,
 )
 from vllm.entrypoints.openai.responses.context import (
@@ -125,6 +127,54 @@
 logger = init_logger(__name__)
 
 
+def _extract_response_format_schema(request: ResponsesRequest) -> dict | None:
+    """Extract JSON schema from the request's structured output config."""
+    if (
+        request.text is not None
+        and request.text.format is not None
+        and request.text.format.type == "json_schema"
+        and request.text.format.schema_ is not None
+    ):
+        return request.text.format.schema_
+    if (
+        request.structured_outputs is not None
+        and request.structured_outputs.json is not None
+    ):
+        val = request.structured_outputs.json
+        if isinstance(val, str):
+            return json_mod.loads(val)
+        return val
+    return None
+
+
+def _constraint_to_content_format(
+    params: StructuredOutputsParams,
+) -> dict | None:
+    """Convert a StructuredOutputsParams constraint into an xgrammar
+    content format dict suitable for embedding in a structural tag."""
+    if params.json is not None:
+        schema = (
+            params.json
+            if isinstance(params.json, dict)
+            else json_mod.loads(params.json)
+        )
+        return {"type": "json_schema", "json_schema": schema}
+    if params.json_object:
+        return {"type": "json_schema", "json_schema": {"type": "object"}}
+    if params.regex is not None:
+        return {"type": "regex", "pattern": params.regex}
+    if params.grammar is not None:
+        return {"type": "grammar", "grammar": params.grammar}
+    if params.choice is not None:
+        return {
+            "type": "or",
+            "elements": [
+                {"type": "const_string", "value": c} for c in params.choice
+            ],
+        }
+    return None
+
+
 def _extract_allowed_tools_from_mcp_requests(
     tools: list[Tool],
 ) -> dict[str, list[str] | None]:
@@ -470,21 +520,88 @@ async def create_responses(
                 else:
                     context = SimpleContext()
 
+            # Extract function tools for the reasoning parser
+            function_tools_for_parser = None
+            if request.tools:
+                ft = [
+                    {
+                        "name": t.name,
+                        **(
+                            {"parameters": t.parameters}
+                            if t.parameters
+                            else {}
+                        ),
+                    }
+                    for t in request.tools
+                    if getattr(t, "type", None) == "function"
+                ]
+                if ft:
+                    function_tools_for_parser = ft
+
             if self.parser and self.parser.reasoning_parser_cls is not None:
                 reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
-                if (
-                    isinstance(
-                        struct_out := sampling_params.structured_outputs,
-                        StructuredOutputsParams,
-                    )
-                    and struct_out.all_non_structural_tag_constraints_none()
-                ):
-                    sampling_params.structured_outputs = replace(
-                        struct_out,
-                        structural_tag=reasoning_parser.prepare_structured_tag(
-                            struct_out.structural_tag, self.tool_server
-                        ),
+                struct_out = sampling_params.structured_outputs
+
+                if isinstance(struct_out, StructuredOutputsParams):
+                    if struct_out.all_non_structural_tag_constraints_none():
+                        # No content constraint — just apply reasoning
+                        # channel tags + tool_choice + function tools
+                        sampling_params.structured_outputs = replace(
+                            struct_out,
+                            structural_tag=(
+                                reasoning_parser.prepare_structured_tag(
+                                    struct_out.structural_tag,
+                                    self.tool_server,
+                                    tool_choice=request.tool_choice,
+                                    function_tools=function_tools_for_parser,
+                                )
+                            ),
+                        )
+                    else:
+                        # Content constraint present (json, regex,
+                        # grammar, choice, json_object). Embed it in the
+                        # final channel tag within the structural tag.
+                        content_fmt = _constraint_to_content_format(
+                            struct_out
+                        )
+                        if content_fmt is not None:
+                            structural_tag = (
+                                reasoning_parser.prepare_structured_tag(
+                                    None,
+                                    self.tool_server,
+                                    final_content_format=content_fmt,
+                                    tool_choice=request.tool_choice,
+                                    function_tools=function_tools_for_parser,
+                                )
+                            )
+                            if structural_tag is not None:
+                                # Clear content constraints, set
+                                # structural_tag, but preserve options
+                                # like disable_any_whitespace.
+                                sampling_params.structured_outputs = replace(
+                                    struct_out,
+                                    json=None,
+                                    regex=None,
+                                    choice=None,
+                                    grammar=None,
+                                    json_object=None,
+                                    structural_tag=structural_tag,
+                                )
+                elif struct_out is None:
+                    # No structured output requested, but still need
+                    # reasoning channel tags + tool_choice + function tools
+                    tag = reasoning_parser.prepare_structured_tag(
+                        None,
+                        self.tool_server,
+                        tool_choice=request.tool_choice,
+                        function_tools=function_tools_for_parser,
                     )
+                    if tag is not None:
+                        sampling_params.structured_outputs = (
+                            StructuredOutputsParams(
+                                structural_tag=tag  # type: ignore[call-arg]
+                            )
+                        )
             generator = self._generate_with_builtin_tools(
                 request_id=request.request_id,
                 engine_prompt=engine_prompt,
@@ -712,11 +829,6 @@ def _make_request_with_harmony(
         request: ResponsesRequest,
         prev_response: ResponsesResponse | None,
     ):
-        if request.tool_choice != "auto":
-            raise NotImplementedError(
-                "Only 'auto' tool_choice is supported in response API with Harmony"
-            )
-
         messages = self._construct_input_messages_with_harmony(request, prev_response)
         prompt_token_ids = render_for_completion(messages)
         engine_prompt = token_inputs(prompt_token_ids)
@@ -1143,9 +1255,24 @@ def _construct_input_messages_with_harmony(
                 request, with_custom_tools, tool_types
             )
             messages.append(sys_msg)
-            if with_custom_tools:
+
+            # Determine if we need a developer message.
+            # Per Harmony cookbook: developer message holds instructions,
+            # function tools, AND response format schemas.
+            response_format_schema = _extract_response_format_schema(request)
+            needs_dev_msg = (
+                with_custom_tools or response_format_schema is not None
+            )
+
+            if needs_dev_msg:
+                dev_instructions = request.instructions
+                if response_format_schema is not None:
+                    dev_instructions = inject_response_formats(
+                        dev_instructions, response_format_schema
+                    )
                 dev_msg = get_developer_message(
-                    instructions=request.instructions, tools=request.tools
+                    instructions=dev_instructions,
+                    tools=request.tools if with_custom_tools else None,
                 )
                 messages.append(dev_msg)
             messages += construct_harmony_previous_input_messages(request)
@@ -1985,7 +2112,7 @@ def _increment_sequence_number_and_return(
                 output=[],
                 status="in_progress",
                 usage=None,
-            ).model_dump()
+            )
             yield _increment_sequence_number_and_return(
                 ResponseCreatedEvent(
                     type="response.created",
diff --git a/vllm/reasoning/abs_reasoning_parsers.py b/vllm/reasoning/abs_reasoning_parsers.py
index 5271a307075e..ee3acd92b675 100644
--- a/vllm/reasoning/abs_reasoning_parsers.py
+++ b/vllm/reasoning/abs_reasoning_parsers.py
@@ -154,10 +154,24 @@ def prepare_structured_tag(
         self,
         original_tag: str | None,
         tool_server: ToolServer | None,
+        final_content_format: dict | None = None,
+        tool_choice: str | dict | None = None,
+        function_tools: list[dict] | None = None,
     ) -> str | None:
         """
-        Instance method that is implemented for preparing the structured tag
-        Otherwise, None is returned
+        Instance method that is implemented for preparing the structured tag.
+        Otherwise, None is returned.
+
+        Args:
+            original_tag: An existing structural tag string, if any.
+            tool_server: The tool server for builtin tool support.
+            final_content_format: Optional xgrammar content format dict
+                (e.g. json_schema, regex) to embed in the <|channel|>final
+                tag for constraining the model's final output region.
+            tool_choice: The tool_choice setting from the request
+                ("auto", "required", "none", or a named tool dict).
+            function_tools: List of function tool dicts with "name" and
+                optional "parameters" keys.
         """
         return None
 
diff --git a/vllm/reasoning/gptoss_reasoning_parser.py b/vllm/reasoning/gptoss_reasoning_parser.py
index 89299d4b12b8..016864ac6361 100644
--- a/vllm/reasoning/gptoss_reasoning_parser.py
+++ b/vllm/reasoning/gptoss_reasoning_parser.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import copy
 import json
 from collections.abc import Sequence
 from typing import TYPE_CHECKING
@@ -52,8 +53,6 @@ def from_builtin_tool_to_tag(tool: str) -> list[dict]:
 
 
 def tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tool_list: list[str]) -> dict:
-    import copy
-
     new_tag = copy.deepcopy(no_func_reasoning_tag)
     new_tag["format"]["triggers"].append("<|channel|>commentary to=")
 
@@ -62,6 +61,45 @@ def tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tool_list: list[str])
     return new_tag
 
 
+def from_function_tool_to_tag(name: str, parameters: dict | None) -> list[dict]:
+    content = (
+        {"type": "json_schema", "json_schema": parameters}
+        if parameters
+        else {"type": "any_text"}
+    )
+    return [
+        {
+            "begin": f"<|channel|>commentary to=functions.{name}<|message|>",
+            "content": content,
+            "end": "<|end|>",
+        },
+        {
+            "begin": f"<|channel|>analysis to=functions.{name}<|message|>",
+            "content": content,
+            "end": "<|end|>",
+        },
+    ]
+
+
+def tag_with_function_tools(
+    base_tag: dict, function_tools: list[dict]
+) -> dict:
+    new_tag = copy.deepcopy(base_tag)
+
+    # Add commentary trigger for function tools if not already covered
+    # by the general commentary trigger (added by builtin tools).
+    if "<|channel|>commentary to=" not in new_tag["format"]["triggers"]:
+        new_tag["format"]["triggers"].append(
+            "<|channel|>commentary to=functions."
+        )
+
+    for tool in function_tools:
+        new_tag["format"]["tags"].extend(
+            from_function_tool_to_tag(tool["name"], tool.get("parameters"))
+        )
+    return new_tag
+
+
 class GptOssReasoningParser(ReasoningParser):
     """
     Reasoning parser for GptOss model.
@@ -158,30 +196,65 @@ def extract_reasoning(
 
     # This function prepares the structural tag to format reasoning output
     def prepare_structured_tag(
-        self, original_tag: str | None, tool_server: ToolServer | None
+        self,
+        original_tag: str | None,
+        tool_server: ToolServer | None,
+        final_content_format: dict | None = None,
+        tool_choice: str | dict | None = None,
+        function_tools: list[dict] | None = None,
     ) -> str | None:
-        if original_tag is None:
-            if tool_server is None:
-                return json.dumps(no_func_reasoning_tag)
-            else:
-                builtin_tool_list: list[str] = []
-                if tool_server.has_tool("browser"):
-                    builtin_tool_list.append("browser")
-                if tool_server.has_tool("python"):
-                    builtin_tool_list.append("python")
-                if tool_server.has_tool("container"):
-                    builtin_tool_list.append("container")
-
-                if len(builtin_tool_list) > 0:
-                    logger.info("Builtin_tool_list: %s", builtin_tool_list)
-                    func_tag = json.dumps(
-                        tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tool_list)
-                    )
-                else:
-                    logger.info("Builtin_tool_list is empty")
-                    func_tag = json.dumps(no_func_reasoning_tag)
-
-                return func_tag
-        else:
+        if original_tag is not None:
             # There is potential risk for appending the tag to the original tag
             return original_tag
+
+        # Build base tag with analysis channel
+        base_tag = copy.deepcopy(no_func_reasoning_tag)
+
+        # Add builtin tool tags (unless tool_choice is "none")
+        if tool_choice != "none" and tool_server is not None:
+            builtin_tool_list: list[str] = []
+            if tool_server.has_tool("browser"):
+                builtin_tool_list.append("browser")
+            if tool_server.has_tool("python"):
+                builtin_tool_list.append("python")
+            if tool_server.has_tool("container"):
+                builtin_tool_list.append("container")
+
+            if builtin_tool_list:
+                logger.info("Builtin_tool_list: %s", builtin_tool_list)
+                base_tag = tag_with_builtin_funcs(base_tag, builtin_tool_list)
+            else:
+                logger.info("Builtin_tool_list is empty")
+
+        # Add function tool tags (unless tool_choice is "none")
+        effective_function_tools = None
+        if tool_choice != "none" and function_tools:
+            effective_function_tools = function_tools
+            # If named tool choice, filter to only the named tool
+            if isinstance(tool_choice, dict):
+                named = tool_choice.get("name")
+                effective_function_tools = [
+                    t for t in function_tools if t["name"] == named
+                ]
+            if effective_function_tools:
+                base_tag = tag_with_function_tools(
+                    base_tag, effective_function_tools
+                )
+
+        # Add final channel tag unless tool_choice blocks it
+        if tool_choice != "required" and not isinstance(tool_choice, dict):
+            has_function_tools = bool(effective_function_tools)
+            if has_function_tools or final_content_format:
+                final_content = (
+                    final_content_format
+                    if final_content_format
+                    else {"type": "any_text"}
+                )
+                base_tag["format"]["tags"].append({
+                    "begin": "<|channel|>final<|message|>",
+                    "content": final_content,
+                    "end": "<|end|>",
+                })
+                base_tag["format"]["triggers"].append("<|channel|>final")
+
+        return json.dumps(base_tag)

From b1a04b3037f3525dc34b059e7081d8c3b9588dfb Mon Sep 17 00:00:00 2001
From: Will Deines <will@garr.io>
Date: Tue, 17 Mar 2026 15:23:45 -0400
Subject: [PATCH 02/10] fix(responses): suppress tool descriptions in prompt
 when tool_choice=none

When tool_choice=none, the structural tag grammar correctly blocks
tool-calling channels, but the system/developer messages still
described the tools to the model. The model would then attempt tool
calls that leaked through the output parser.

Introduce a tools_visible flag that is false when tool_choice=none,
suppressing both the commentary channel in the system message and
tool descriptions in the developer message.

Signed-off-by: Will Deines <will@garr.io>
---
 vllm/entrypoints/openai/responses/serving.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py
index 48d08a8784d8..672f4028b00a 100644
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -1251,8 +1251,17 @@ def _construct_input_messages_with_harmony(
             tool_types = extract_tool_types(request.tools)
             with_custom_tools = has_custom_tools(tool_types)
 
+            # When tool_choice=none, suppress tool awareness in the
+            # prompt so the model doesn't attempt tool calls.  The
+            # structural tag grammar already blocks tool channels, but
+            # omitting tools from the system/developer messages
+            # prevents the model from even reasoning about calling them.
+            tools_visible = (
+                with_custom_tools and request.tool_choice != "none"
+            )
+
             sys_msg = self._construct_harmony_system_input_message(
-                request, with_custom_tools, tool_types
+                request, tools_visible, tool_types
             )
             messages.append(sys_msg)
 
@@ -1261,7 +1270,7 @@ def _construct_input_messages_with_harmony(
             # function tools, AND response format schemas.
             response_format_schema = _extract_response_format_schema(request)
             needs_dev_msg = (
-                with_custom_tools or response_format_schema is not None
+                tools_visible or response_format_schema is not None
             )
 
             if needs_dev_msg:
@@ -1272,7 +1281,7 @@ def _construct_input_messages_with_harmony(
                     )
                 dev_msg = get_developer_message(
                     instructions=dev_instructions,
-                    tools=request.tools if with_custom_tools else None,
+                    tools=request.tools if tools_visible else None,
                 )
                 messages.append(dev_msg)
             messages += construct_harmony_previous_input_messages(request)

From 6fe76535744e1109f6ae3ecff2634f4bbeaad05d Mon Sep 17 00:00:00 2001
From: Will Deines <will@garr.io>
Date: Tue, 17 Mar 2026 16:12:22 -0400
Subject: [PATCH 03/10] fix(responses): enforce tool_choice=required via
 at_least_one grammar constraint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

triggered_tags allows free text between triggers, so omitting <|channel|>final
from the tag list was not sufficient to prevent the model from using it.
xgrammar's at_least_one=True forces the grammar to begin with a triggered
channel immediately, blocking <|channel|>final and EOS at the token level.

With this flag set on tool_choice=required or a named tool, only 2 tokens
are allowed at generation start (the <|channel|> special token), and after
that only analysis/commentary continuations — not "final" — are valid.

Signed-off-by: Will Deines <will@garr.io>
---
 vllm/reasoning/gptoss_reasoning_parser.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/vllm/reasoning/gptoss_reasoning_parser.py b/vllm/reasoning/gptoss_reasoning_parser.py
index 016864ac6361..d7cea68d13b7 100644
--- a/vllm/reasoning/gptoss_reasoning_parser.py
+++ b/vllm/reasoning/gptoss_reasoning_parser.py
@@ -257,4 +257,10 @@ def prepare_structured_tag(
                 })
                 base_tag["format"]["triggers"].append("<|channel|>final")
 
+        # For tool_choice=required or named tool, force at least one triggered
+        # tag. This blocks <|channel|>final and EOS at the grammar level until
+        # the model has emitted at least one tool-call channel.
+        if tool_choice == "required" or isinstance(tool_choice, dict):
+            base_tag["format"]["at_least_one"] = True
+
         return json.dumps(base_tag)

From d2ecd4abfd0e6b0dfb9b4514fde3980bc16878db Mon Sep 17 00:00:00 2001
From: Will Deines <will@garr.io>
Date: Wed, 18 Mar 2026 07:16:53 -0400
Subject: [PATCH 04/10] =?UTF-8?q?fix(responses):=20fix=20tool=5Fchoice=3Dr?=
 =?UTF-8?q?equired=20grammar=20=E2=80=94=20remove=20pure=20analysis=20from?=
 =?UTF-8?q?=20at=5Fleast=5Fone=20scope?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous approach (empty base with pre-set triggers) caused duplicate
triggers after tag_with_function_tools ran, crashing xgrammar with 500.

New approach: use the normal base tag, then filter out the pure analysis
tag (<|channel|>analysis<|message|>) for required/named tool_choice before
setting at_least_one=True. The analysis trigger is kept so
analysis-to-functions tags remain reachable in triggered_tags_sub.

Verified locally: grammar compiles, triggered_tags_first has only tool call
options, after <|channel|> only analysis/commentary continuations allowed
(11 tokens), final channel blocked.

Signed-off-by: Will Deines <will@garr.io>
---
 vllm/reasoning/gptoss_reasoning_parser.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/vllm/reasoning/gptoss_reasoning_parser.py b/vllm/reasoning/gptoss_reasoning_parser.py
index d7cea68d13b7..2e1f522aae52 100644
--- a/vllm/reasoning/gptoss_reasoning_parser.py
+++ b/vllm/reasoning/gptoss_reasoning_parser.py
@@ -207,7 +207,6 @@ def prepare_structured_tag(
             # There is potential risk for appending the tag to the original tag
             return original_tag
 
-        # Build base tag with analysis channel
         base_tag = copy.deepcopy(no_func_reasoning_tag)
 
         # Add builtin tool tags (unless tool_choice is "none")
@@ -261,6 +260,16 @@ def prepare_structured_tag(
         # tag. This blocks <|channel|>final and EOS at the grammar level until
         # the model has emitted at least one tool-call channel.
         if tool_choice == "required" or isinstance(tool_choice, dict):
+            # Remove the pure analysis tag (no recipient) from the tag list so
+            # that triggered_tags_first only contains function-call tags.  The
+            # analysis trigger is kept so analysis-to-functions tags remain
+            # reachable in triggered_tags_sub.  This prevents the model from
+            # satisfying at_least_one with a pure reasoning channel instead of
+            # an actual tool call.
+            base_tag["format"]["tags"] = [
+                t for t in base_tag["format"]["tags"]
+                if t.get("begin") != "<|channel|>analysis<|message|>"
+            ]
             base_tag["format"]["at_least_one"] = True
 
         return json.dumps(base_tag)

From a933921864dfd826ad5f38e8b649a71fc8090ea2 Mon Sep 17 00:00:00 2001
From: Will Deines <will@garr.io>
Date: Wed, 18 Mar 2026 09:19:41 -0400
Subject: [PATCH 05/10] =?UTF-8?q?style:=20fix=20pre-commit=20lint=20?=
 =?UTF-8?q?=E2=80=94=20ruff=20formatting=20and=20mypy=20type=20errors?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- ruff: reformat long lines in tests and serving.py
- mypy: annotate base_tag as dict[str, Any] in gptoss_reasoning_parser
  to allow nested dict indexing through copy.deepcopy result
- mypy: suppress arg-type on list(name) in abs_reasoning_parsers where
  TypeIs narrowing is not respected under --python-version 3.10

Signed-off-by: Will Deines <will@garr.io>
---
 .../openai/parser/test_harmony_utils.py       | 17 ++----
 .../responses/test_structured_output.py       |  4 +-
 .../reasoning/test_gptoss_reasoning_parser.py | 53 ++++---------------
 vllm/entrypoints/openai/responses/serving.py  | 42 +++++----------
 vllm/reasoning/abs_reasoning_parsers.py       |  3 +-
 vllm/reasoning/gptoss_reasoning_parser.py     | 31 +++++------
 6 files changed, 45 insertions(+), 105 deletions(-)

diff --git a/tests/entrypoints/openai/parser/test_harmony_utils.py b/tests/entrypoints/openai/parser/test_harmony_utils.py
index 01ac12bd9408..f82eb1fdf47b 100644
--- a/tests/entrypoints/openai/parser/test_harmony_utils.py
+++ b/tests/entrypoints/openai/parser/test_harmony_utils.py
@@ -944,9 +944,7 @@ def test_none_instructions_creates_section(self):
         assert '{"type":"object"}' in result
 
     def test_custom_format_name(self):
-        result = inject_response_formats(
-            None, {"type": "object"}, format_name="order"
-        )
+        result = inject_response_formats(None, {"type": "object"}, format_name="order")
         assert "## order" in result
 
     def test_compact_json_no_spaces(self):
@@ -955,15 +953,8 @@ def test_compact_json_no_spaces(self):
             "properties": {"name": {"type": "string"}},
         }
         result = inject_response_formats(None, schema)
-        assert (
-            '{"type":"object","properties":{"name":{"type":"string"}}}'
-            in result
-        )
+        assert '{"type":"object","properties":{"name":{"type":"string"}}}' in result
 
     def test_section_separated_by_blank_lines(self):
-        result = inject_response_formats(
-            "Instructions here.", {"type": "object"}
-        )
-        assert (
-            "\n\n# Response Formats\n\n## structured_output\n\n" in result
-        )
+        result = inject_response_formats("Instructions here.", {"type": "object"})
+        assert "\n\n# Response Formats\n\n## structured_output\n\n" in result
diff --git a/tests/entrypoints/openai/responses/test_structured_output.py b/tests/entrypoints/openai/responses/test_structured_output.py
index 16742708041c..1f155e15456e 100644
--- a/tests/entrypoints/openai/responses/test_structured_output.py
+++ b/tests/entrypoints/openai/responses/test_structured_output.py
@@ -141,9 +141,7 @@ def test_choice(self):
 
     def test_structural_tag_only_returns_none(self):
         """structural_tag is not a content constraint -- should return None."""
-        params = StructuredOutputsParams(
-            structural_tag='{"type": "structural_tag"}'
-        )
+        params = StructuredOutputsParams(structural_tag='{"type": "structural_tag"}')
         result = _constraint_to_content_format(params)
 
         assert result is None
diff --git a/tests/reasoning/test_gptoss_reasoning_parser.py b/tests/reasoning/test_gptoss_reasoning_parser.py
index 1329e05e269f..500b03861cea 100644
--- a/tests/reasoning/test_gptoss_reasoning_parser.py
+++ b/tests/reasoning/test_gptoss_reasoning_parser.py
@@ -15,7 +15,6 @@
     from_function_tool_to_tag,
     no_func_reasoning_tag,
     tag_with_builtin_funcs,
-    tag_with_function_tools,
 )
 
 REASONING_MODEL_NAME = "openai/gpt-oss-120b"
@@ -410,10 +409,7 @@ def test_prepare_structured_tag_with_json_schema(self, reasoning_parser):
         assert len(parsed["format"]["tags"]) == 2
 
         # Verify analysis tag is unchanged
-        assert (
-            parsed["format"]["tags"][0]["begin"]
-            == "<|channel|>analysis<|message|>"
-        )
+        assert parsed["format"]["tags"][0]["begin"] == "<|channel|>analysis<|message|>"
         assert parsed["format"]["tags"][0]["content"]["type"] == "any_text"
 
         # Verify final channel tag has the json_schema content constraint
@@ -531,21 +527,13 @@ def test_tool_choice_required_with_function_tools(self, reasoning_parser):
 
         tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
         # Function tool tags present
-        assert (
-            "<|channel|>commentary to=functions.get_weather<|message|>"
-            in tag_begins
-        )
-        assert (
-            "<|channel|>analysis to=functions.get_weather<|message|>"
-            in tag_begins
-        )
+        assert "<|channel|>commentary to=functions.get_weather<|message|>" in tag_begins
+        assert "<|channel|>analysis to=functions.get_weather<|message|>" in tag_begins
         # No final
         assert not any("final" in b for b in tag_begins)
         assert "<|channel|>final" not in parsed["format"]["triggers"]
 
-    def test_tool_choice_required_ignores_final_content_format(
-        self, reasoning_parser
-    ):
+    def test_tool_choice_required_ignores_final_content_format(self, reasoning_parser):
         """Final is blocked even when final_content_format is provided."""
         content_fmt = {
             "type": "json_schema",
@@ -564,9 +552,7 @@ def test_tool_choice_required_ignores_final_content_format(
         tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
         assert not any("final" in b for b in tag_begins)
 
-    def test_tool_choice_auto_with_tools_and_content_format(
-        self, reasoning_parser
-    ):
+    def test_tool_choice_auto_with_tools_and_content_format(self, reasoning_parser):
         """Tool tags + final with content constraint for auto."""
         schema = {"type": "object", "properties": {"x": {"type": "integer"}}}
         content_fmt = {"type": "json_schema", "json_schema": schema}
@@ -583,10 +569,7 @@ def test_tool_choice_auto_with_tools_and_content_format(
 
         tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
         # Function tool tags
-        assert (
-            "<|channel|>commentary to=functions.compute<|message|>"
-            in tag_begins
-        )
+        assert "<|channel|>commentary to=functions.compute<|message|>" in tag_begins
         # Final tag with content constraint
         assert "<|channel|>final<|message|>" in tag_begins
         assert "<|channel|>final" in parsed["format"]["triggers"]
@@ -598,9 +581,7 @@ def test_tool_choice_auto_with_tools_and_content_format(
         )
         assert final_tag["content"] == content_fmt
 
-    def test_tool_choice_auto_with_tools_final_is_any_text(
-        self, reasoning_parser
-    ):
+    def test_tool_choice_auto_with_tools_final_is_any_text(self, reasoning_parser):
         """auto + function tools but no content format -> final allows free text."""
         fn_tools = [{"name": "get_weather", "parameters": {"type": "object"}}]
         result = reasoning_parser.prepare_structured_tag(
@@ -655,14 +636,8 @@ def test_mixed_builtin_and_function_tools(
         assert "<|channel|>commentary to=browser" in tag_begins
         assert "<|channel|>analysis to=browser" in tag_begins
         # Function tool tags
-        assert (
-            "<|channel|>commentary to=functions.get_weather<|message|>"
-            in tag_begins
-        )
-        assert (
-            "<|channel|>analysis to=functions.get_weather<|message|>"
-            in tag_begins
-        )
+        assert "<|channel|>commentary to=functions.get_weather<|message|>" in tag_begins
+        assert "<|channel|>analysis to=functions.get_weather<|message|>" in tag_begins
         # Final tag (auto + function tools)
         assert "<|channel|>final<|message|>" in tag_begins
         # General commentary trigger covers both builtin and function
@@ -684,14 +659,8 @@ def test_named_tool_choice(self, reasoning_parser):
 
         tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
         # Only get_weather tags, not get_stock
-        assert (
-            "<|channel|>commentary to=functions.get_weather<|message|>"
-            in tag_begins
-        )
-        assert (
-            "<|channel|>analysis to=functions.get_weather<|message|>"
-            in tag_begins
-        )
+        assert "<|channel|>commentary to=functions.get_weather<|message|>" in tag_begins
+        assert "<|channel|>analysis to=functions.get_weather<|message|>" in tag_begins
         assert not any("get_stock" in b for b in tag_begins)
         # No final (named tool choice blocks final)
         assert not any("final" in b for b in tag_begins)
diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py
index 672f4028b00a..6297cb6cbc33 100644
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -168,9 +168,7 @@ def _constraint_to_content_format(
     if params.choice is not None:
         return {
             "type": "or",
-            "elements": [
-                {"type": "const_string", "value": c} for c in params.choice
-            ],
+            "elements": [{"type": "const_string", "value": c} for c in params.choice],
         }
     return None
 
@@ -526,11 +524,7 @@ async def create_responses(
                 ft = [
                     {
                         "name": t.name,
-                        **(
-                            {"parameters": t.parameters}
-                            if t.parameters
-                            else {}
-                        ),
+                        **({"parameters": t.parameters} if t.parameters else {}),
                     }
                     for t in request.tools
                     if getattr(t, "type", None) == "function"
@@ -561,18 +555,14 @@ async def create_responses(
                         # Content constraint present (json, regex,
                         # grammar, choice, json_object). Embed it in the
                         # final channel tag within the structural tag.
-                        content_fmt = _constraint_to_content_format(
-                            struct_out
-                        )
+                        content_fmt = _constraint_to_content_format(struct_out)
                         if content_fmt is not None:
-                            structural_tag = (
-                                reasoning_parser.prepare_structured_tag(
-                                    None,
-                                    self.tool_server,
-                                    final_content_format=content_fmt,
-                                    tool_choice=request.tool_choice,
-                                    function_tools=function_tools_for_parser,
-                                )
+                            structural_tag = reasoning_parser.prepare_structured_tag(
+                                None,
+                                self.tool_server,
+                                final_content_format=content_fmt,
+                                tool_choice=request.tool_choice,
+                                function_tools=function_tools_for_parser,
                             )
                             if structural_tag is not None:
                                 # Clear content constraints, set
@@ -597,10 +587,8 @@ async def create_responses(
                         function_tools=function_tools_for_parser,
                     )
                     if tag is not None:
-                        sampling_params.structured_outputs = (
-                            StructuredOutputsParams(
-                                structural_tag=tag  # type: ignore[call-arg]
-                            )
+                        sampling_params.structured_outputs = StructuredOutputsParams(
+                            structural_tag=tag  # type: ignore[call-arg]
                         )
             generator = self._generate_with_builtin_tools(
                 request_id=request.request_id,
@@ -1256,9 +1244,7 @@ def _construct_input_messages_with_harmony(
             # structural tag grammar already blocks tool channels, but
             # omitting tools from the system/developer messages
             # prevents the model from even reasoning about calling them.
-            tools_visible = (
-                with_custom_tools and request.tool_choice != "none"
-            )
+            tools_visible = with_custom_tools and request.tool_choice != "none"
 
             sys_msg = self._construct_harmony_system_input_message(
                 request, tools_visible, tool_types
@@ -1269,9 +1255,7 @@ def _construct_input_messages_with_harmony(
             # Per Harmony cookbook: developer message holds instructions,
             # function tools, AND response format schemas.
             response_format_schema = _extract_response_format_schema(request)
-            needs_dev_msg = (
-                tools_visible or response_format_schema is not None
-            )
+            needs_dev_msg = tools_visible or response_format_schema is not None
 
             if needs_dev_msg:
                 dev_instructions = request.instructions
diff --git a/vllm/reasoning/abs_reasoning_parsers.py b/vllm/reasoning/abs_reasoning_parsers.py
index ee3acd92b675..8edc45bbb6d9 100644
--- a/vllm/reasoning/abs_reasoning_parsers.py
+++ b/vllm/reasoning/abs_reasoning_parsers.py
@@ -312,7 +312,8 @@ def _decorator(obj: type[ReasoningParser]) -> type[ReasoningParser]:
             if isinstance(name, str):
                 names = [name]
             elif is_list_of(name, str):
-                names = name
+                assert name is not None
+                names = list(name)
             else:
                 names = [class_name]
 
diff --git a/vllm/reasoning/gptoss_reasoning_parser.py b/vllm/reasoning/gptoss_reasoning_parser.py
index 2e1f522aae52..9b117501b453 100644
--- a/vllm/reasoning/gptoss_reasoning_parser.py
+++ b/vllm/reasoning/gptoss_reasoning_parser.py
@@ -3,7 +3,7 @@
 import copy
 import json
 from collections.abc import Sequence
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 from transformers import PreTrainedTokenizerBase
 
@@ -81,17 +81,13 @@ def from_function_tool_to_tag(name: str, parameters: dict | None) -> list[dict]:
     ]
 
 
-def tag_with_function_tools(
-    base_tag: dict, function_tools: list[dict]
-) -> dict:
+def tag_with_function_tools(base_tag: dict, function_tools: list[dict]) -> dict:
     new_tag = copy.deepcopy(base_tag)
 
     # Add commentary trigger for function tools if not already covered
     # by the general commentary trigger (added by builtin tools).
     if "<|channel|>commentary to=" not in new_tag["format"]["triggers"]:
-        new_tag["format"]["triggers"].append(
-            "<|channel|>commentary to=functions."
-        )
+        new_tag["format"]["triggers"].append("<|channel|>commentary to=functions.")
 
     for tool in function_tools:
         new_tag["format"]["tags"].extend(
@@ -207,7 +203,7 @@ def prepare_structured_tag(
             # There is potential risk for appending the tag to the original tag
             return original_tag
 
-        base_tag = copy.deepcopy(no_func_reasoning_tag)
+        base_tag: dict[str, Any] = copy.deepcopy(no_func_reasoning_tag)
 
         # Add builtin tool tags (unless tool_choice is "none")
         if tool_choice != "none" and tool_server is not None:
@@ -236,9 +232,7 @@ def prepare_structured_tag(
                     t for t in function_tools if t["name"] == named
                 ]
             if effective_function_tools:
-                base_tag = tag_with_function_tools(
-                    base_tag, effective_function_tools
-                )
+                base_tag = tag_with_function_tools(base_tag, effective_function_tools)
 
         # Add final channel tag unless tool_choice blocks it
         if tool_choice != "required" and not isinstance(tool_choice, dict):
@@ -249,11 +243,13 @@ def prepare_structured_tag(
                     if final_content_format
                     else {"type": "any_text"}
                 )
-                base_tag["format"]["tags"].append({
-                    "begin": "<|channel|>final<|message|>",
-                    "content": final_content,
-                    "end": "<|end|>",
-                })
+                base_tag["format"]["tags"].append(
+                    {
+                        "begin": "<|channel|>final<|message|>",
+                        "content": final_content,
+                        "end": "<|end|>",
+                    }
+                )
                 base_tag["format"]["triggers"].append("<|channel|>final")
 
         # For tool_choice=required or named tool, force at least one triggered
@@ -267,7 +263,8 @@ def prepare_structured_tag(
             # satisfying at_least_one with a pure reasoning channel instead of
             # an actual tool call.
             base_tag["format"]["tags"] = [
-                t for t in base_tag["format"]["tags"]
+                t
+                for t in base_tag["format"]["tags"]
                 if t.get("begin") != "<|channel|>analysis<|message|>"
             ]
             base_tag["format"]["at_least_one"] = True

From d23f78ff4c53d4544961ad5be2be52f8e19316c3 Mon Sep 17 00:00:00 2001
From: Will Deines <will@garr.io>
Date: Wed, 18 Mar 2026 14:20:45 -0400
Subject: [PATCH 06/10] =?UTF-8?q?fix:=20address=20P1=20review=20comments?=
 =?UTF-8?q?=20=E2=80=94=20three=20tool=5Fchoice=20bugs?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. Preserve developer instructions when tool_choice="none": add
   `request.instructions is not None` to `needs_dev_msg` condition so
   instructions flow through even when tools are hidden.

2. Suppress builtin tool descriptions for tool_choice="none": rename
   `with_custom_tools` param to `tools_visible` in
   `_construct_harmony_system_input_message` and gate browser/python/
   container descriptions on it.

3. Exclude builtin channels for named function tool_choice: skip builtin
   tool tags when `isinstance(tool_choice, dict)` so they cannot satisfy
   the `at_least_one` grammar constraint instead of the named function.

Signed-off-by: Will Deines <will@garr.io>
---
 .../responses/test_tool_choice_harmony.py     | 110 ++++++++++++++++++
 .../reasoning/test_gptoss_reasoning_parser.py |  58 +++++++++
 vllm/entrypoints/openai/responses/serving.py  |  24 +++-
 vllm/reasoning/gptoss_reasoning_parser.py     |  11 +-
 4 files changed, 195 insertions(+), 8 deletions(-)
 create mode 100644 tests/entrypoints/openai/responses/test_tool_choice_harmony.py

diff --git a/tests/entrypoints/openai/responses/test_tool_choice_harmony.py b/tests/entrypoints/openai/responses/test_tool_choice_harmony.py
new file mode 100644
index 000000000000..f873aeb5db24
--- /dev/null
+++ b/tests/entrypoints/openai/responses/test_tool_choice_harmony.py
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for tool_choice handling in the Harmony-based Responses API.
+
+These tests verify that:
+- Developer instructions are preserved when tool_choice="none" (Bug 1)
+- Builtin tool descriptions are suppressed when tool_choice="none" (Bug 2)
+"""
+
+from __future__ import annotations
+
+from unittest.mock import Mock
+
+from openai_harmony import Role, ToolNamespaceConfig
+
+from vllm.entrypoints.openai.parser.harmony_utils import (
+    get_developer_message,
+    get_system_message,
+)
+
+
+class TestToolChoiceNoneInstructions:
+    """Bug 1: Developer instructions must not be dropped when
+    tool_choice='none' causes tools to be hidden."""
+
+    def test_developer_message_with_instructions_no_tools(self):
+        """get_developer_message must include instructions even when
+        tools=None (the condition that arises from tool_choice='none'
+        with no custom tools)."""
+        dev_msg = get_developer_message(
+            instructions="Be helpful and concise", tools=None
+        )
+        assert dev_msg.author.role == Role.DEVELOPER
+        rendered = str(dev_msg)
+        assert "Be helpful and concise" in rendered
+
+    def test_developer_message_with_instructions_and_tools(self):
+        """Baseline: instructions + tools both appear in the developer
+        message when tools are visible."""
+        tool = Mock()
+        tool.type = "function"
+        tool.name = "get_weather"
+        tool.description = "Get weather"
+        tool.parameters = {"type": "object", "properties": {}}
+
+        dev_msg = get_developer_message(instructions="Be helpful", tools=[tool])
+        rendered = str(dev_msg)
+        assert "Be helpful" in rendered
+        assert "get_weather" in rendered
+
+    def test_developer_message_no_instructions_no_tools(self):
+        """When neither instructions nor tools are provided, the
+        developer message is still valid (just empty content)."""
+        dev_msg = get_developer_message(instructions=None, tools=None)
+        assert dev_msg.author.role == Role.DEVELOPER
+
+
+class TestToolChoiceNoneSystemMessage:
+    """Bug 2: Builtin tool descriptions in the system message must be
+    suppressed when tool_choice='none'."""
+
+    def test_system_message_no_tool_descriptions(self):
+        """When all tool descriptions are None (as happens when
+        tools_visible=False), the system message must not contain
+        tool descriptions."""
+        sys_msg = get_system_message(
+            browser_description=None,
+            python_description=None,
+            container_description=None,
+            with_custom_tools=False,
+        )
+        assert sys_msg.author.role == Role.SYSTEM
+        # tools should be None or empty when no descriptions are provided
+        assert not sys_msg.content[0].tools
+
+    def test_system_message_with_browser_description(self):
+        """Baseline: when a ToolNamespaceConfig is provided, it appears
+        in the system message tools."""
+        browser_ns = ToolNamespaceConfig.browser()
+        sys_msg = get_system_message(
+            browser_description=browser_ns,
+            python_description=None,
+            container_description=None,
+            with_custom_tools=False,
+        )
+        assert sys_msg.author.role == Role.SYSTEM
+        assert "browser" in sys_msg.content[0].tools
+
+    def test_system_message_with_python_description(self):
+        """Python tool description appears in system message when provided."""
+        python_ns = ToolNamespaceConfig.python()
+        sys_msg = get_system_message(
+            browser_description=None,
+            python_description=python_ns,
+            container_description=None,
+            with_custom_tools=False,
+        )
+        assert sys_msg.author.role == Role.SYSTEM
+        assert "python" in sys_msg.content[0].tools
+
+    def test_none_descriptions_mean_no_tools(self):
+        """Passing None for all tool descriptions (as happens when
+        tools_visible=False) must result in no tools in the system msg."""
+        sys_msg = get_system_message(
+            browser_description=None,
+            python_description=None,
+            container_description=None,
+            with_custom_tools=False,
+        )
+        assert not sys_msg.content[0].tools
diff --git a/tests/reasoning/test_gptoss_reasoning_parser.py b/tests/reasoning/test_gptoss_reasoning_parser.py
index 500b03861cea..3b3671b71795 100644
--- a/tests/reasoning/test_gptoss_reasoning_parser.py
+++ b/tests/reasoning/test_gptoss_reasoning_parser.py
@@ -664,3 +664,61 @@ def test_named_tool_choice(self, reasoning_parser):
         assert not any("get_stock" in b for b in tag_begins)
         # No final (named tool choice blocks final)
         assert not any("final" in b for b in tag_begins)
+
+    def test_named_tool_choice_excludes_builtins(
+        self, reasoning_parser, mock_tool_server_with_all_tools
+    ):
+        """Named function tool_choice must exclude builtin tool tags.
+
+        With at_least_one=True, builtin channels (browser/python/container)
+        could satisfy the grammar constraint instead of the named function."""
+        fn_tools = [{"name": "get_weather", "parameters": {"type": "object"}}]
+        result = reasoning_parser.prepare_structured_tag(
+            None,
+            mock_tool_server_with_all_tools,
+            tool_choice={"type": "function", "name": "get_weather"},
+            function_tools=fn_tools,
+        )
+        parsed = json.loads(result)
+
+        tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
+        # Named function tags present
+        assert "<|channel|>commentary to=functions.get_weather<|message|>" in tag_begins
+        # No builtin tags
+        assert not any("to=browser" in b for b in tag_begins)
+        assert not any("to=python" in b for b in tag_begins)
+        assert not any("to=container" in b for b in tag_begins)
+
+    def test_tool_choice_none_excludes_builtins(
+        self, reasoning_parser, mock_tool_server_with_all_tools
+    ):
+        """tool_choice='none' must suppress builtin tool tags even when
+        a tool_server with builtins is present."""
+        result = reasoning_parser.prepare_structured_tag(
+            None,
+            mock_tool_server_with_all_tools,
+            tool_choice="none",
+        )
+        parsed = json.loads(result)
+
+        tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
+        # Only the base analysis tag — no builtin channels
+        assert tag_begins == ["<|channel|>analysis<|message|>"]
+
+    @pytest.mark.parametrize("tool_choice", ["auto", "required", None])
+    def test_tool_choice_auto_required_include_builtins(
+        self, reasoning_parser, mock_tool_server_with_all_tools, tool_choice
+    ):
+        """tool_choice='auto'/'required'/None should include builtin tool
+        tags when a tool_server has builtins."""
+        result = reasoning_parser.prepare_structured_tag(
+            None,
+            mock_tool_server_with_all_tools,
+            tool_choice=tool_choice,
+        )
+        parsed = json.loads(result)
+
+        tag_begins = [t["begin"] for t in parsed["format"]["tags"]]
+        assert "<|channel|>commentary to=browser" in tag_begins
+        assert "<|channel|>commentary to=python" in tag_begins
+        assert "<|channel|>commentary to=container" in tag_begins
diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py
index 6297cb6cbc33..7f5729977392 100644
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -1178,7 +1178,10 @@ def _extract_system_message_from_request(
         return system_msg
 
     def _construct_harmony_system_input_message(
-        self, request: ResponsesRequest, with_custom_tools: bool, tool_types: set[str]
+        self,
+        request: ResponsesRequest,
+        tools_visible: bool,
+        tool_types: set[str],
     ) -> OpenAIHarmonyMessage:
         model_identity = self._extract_system_message_from_request(request)
 
@@ -1189,11 +1192,14 @@ def _construct_harmony_system_input_message(
 
         # Get filtered tool descriptions first.
         # If get_tool_description returns None (due to filtering), the tool is disabled.
+        # When tools_visible is False (e.g. tool_choice="none"), suppress all
+        # builtin tool descriptions so the model doesn't see them.
         browser_description = (
             self.tool_server.get_tool_description(
                 "browser", allowed_tools_map.get("web_search_preview")
             )
-            if "web_search_preview" in tool_types
+            if tools_visible
+            and "web_search_preview" in tool_types
             and self.tool_server is not None
             and self.tool_server.has_tool("browser")
             else None
@@ -1202,7 +1208,8 @@ def _construct_harmony_system_input_message(
             self.tool_server.get_tool_description(
                 "python", allowed_tools_map.get("code_interpreter")
             )
-            if "code_interpreter" in tool_types
+            if tools_visible
+            and "code_interpreter" in tool_types
             and self.tool_server is not None
             and self.tool_server.has_tool("python")
             else None
@@ -1211,7 +1218,8 @@ def _construct_harmony_system_input_message(
             self.tool_server.get_tool_description(
                 "container", allowed_tools_map.get("container")
             )
-            if "container" in tool_types
+            if tools_visible
+            and "container" in tool_types
             and self.tool_server is not None
             and self.tool_server.has_tool("container")
             else None
@@ -1224,7 +1232,7 @@ def _construct_harmony_system_input_message(
             python_description=python_description,
             container_description=container_description,
             instructions=request.instructions,
-            with_custom_tools=with_custom_tools,
+            with_custom_tools=tools_visible,
         )
         return sys_msg
 
@@ -1255,7 +1263,11 @@ def _construct_input_messages_with_harmony(
             # Per Harmony cookbook: developer message holds instructions,
             # function tools, AND response format schemas.
             response_format_schema = _extract_response_format_schema(request)
-            needs_dev_msg = tools_visible or response_format_schema is not None
+            needs_dev_msg = (
+                tools_visible
+                or response_format_schema is not None
+                or request.instructions is not None
+            )
 
             if needs_dev_msg:
                 dev_instructions = request.instructions
diff --git a/vllm/reasoning/gptoss_reasoning_parser.py b/vllm/reasoning/gptoss_reasoning_parser.py
index 9b117501b453..2d72930441bc 100644
--- a/vllm/reasoning/gptoss_reasoning_parser.py
+++ b/vllm/reasoning/gptoss_reasoning_parser.py
@@ -205,8 +205,15 @@ def prepare_structured_tag(
 
         base_tag: dict[str, Any] = copy.deepcopy(no_func_reasoning_tag)
 
-        # Add builtin tool tags (unless tool_choice is "none")
-        if tool_choice != "none" and tool_server is not None:
+        # Add builtin tool tags unless tool_choice is "none" or a named
+        # function dict — named forcing should only allow the specific
+        # function, not builtin channels that could satisfy at_least_one.
+        is_named_function_choice = isinstance(tool_choice, dict)
+        if (
+            tool_choice != "none"
+            and not is_named_function_choice
+            and tool_server is not None
+        ):
             builtin_tool_list: list[str] = []
             if tool_server.has_tool("browser"):
                 builtin_tool_list.append("browser")

From 0a5c9f9b534c99bf1d846c26f1bfebe8c595e3fa Mon Sep 17 00:00:00 2001
From: Will Deines <will@garr.io>
Date: Thu, 19 Mar 2026 14:03:21 -0400
Subject: [PATCH 07/10] fix(responses): detect tool_choice violation when
 grammar enforcement fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When tool_choice="required" or a named tool is specified, the grammar
layer (at_least_one + final channel removal) is the primary enforcement.
But if the parser discards all tokens or the model produces text instead
of a tool call, the response silently returns status="completed" with no
function call — violating the contract.

Add _check_tool_choice_violation() as a post-generation safety net that
detects when no ResponseFunctionToolCall is present in output and forces
status="incomplete" with a diagnostic warning log.

Signed-off-by: Will Deines <will@garr.io>
---
 vllm/entrypoints/openai/responses/serving.py | 32 ++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py
index 7f5729977392..f9e306777c8f 100644
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -928,6 +928,7 @@ async def responses_full_generator(
             num_tool_output_tokens = 0
 
         assert isinstance(context, (SimpleContext, HarmonyContext, ParsableContext))
+        status = self._check_tool_choice_violation(request, output, status, context)
         num_prompt_tokens = context.num_prompt_tokens
         num_generated_tokens = context.num_output_tokens
         num_cached_tokens = context.num_cached_tokens
@@ -1139,6 +1140,37 @@ def _make_response_output_items(
             )
         ]
 
+    def _check_tool_choice_violation(
+        self,
+        request: ResponsesRequest,
+        output: list[ResponseOutputItem],
+        status: ResponseStatus,
+        context: ConversationContext,
+    ) -> ResponseStatus:
+        """Detect when tool_choice requires a function call but none was
+        produced.  Returns ``"incomplete"`` if the constraint is violated,
+        otherwise returns *status* unchanged."""
+        if request.tool_choice != "required" and not isinstance(
+            request.tool_choice, dict
+        ):
+            return status
+        has_function_call = any(
+            isinstance(item, ResponseFunctionToolCall) for item in output
+        )
+        if not has_function_call:
+            logger.warning(
+                "tool_choice=%r but no function tool call in output "
+                "(output_items=%d, status=%s, finish_reason=%s, "
+                "output_tokens=%d). Grammar enforcement may have failed.",
+                request.tool_choice,
+                len(output),
+                status,
+                getattr(context, "finish_reason", None),
+                context.num_output_tokens,
+            )
+            return "incomplete"
+        return status
+
     def _make_response_output_items_with_harmony(
         self,
         context: HarmonyContext,

From 19e4867308328f99bac0b6f9c3b252c8badc4443 Mon Sep 17 00:00:00 2001
From: Will Deines <will@garr.io>
Date: Thu, 19 Mar 2026 23:13:42 -0400
Subject: [PATCH 08/10] fix: resolve mypy error for num_output_tokens on
 ConversationContext ABC

Signed-off-by: Will Deines <will@garr.io>
---
 vllm/entrypoints/openai/responses/serving.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py
index 4a15b65350b0..3c061ed60899 100644
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -1159,7 +1159,7 @@ def _check_tool_choice_violation(
                 len(output),
                 status,
                 getattr(context, "finish_reason", None),
-                context.num_output_tokens,
+                getattr(context, "num_output_tokens", -1),
             )
             return "incomplete"
         return status

From 16b2394f65c589d98ad9134b71bf29d02fe83ed4 Mon Sep 17 00:00:00 2001
From: Will Deines <will@garr.io>
Date: Fri, 20 Mar 2026 13:20:32 -0400
Subject: [PATCH 09/10] feat: add structural tag support to Chat Completions
 for Harmony models

Inject structural tags into sampling_params for the Chat Completions
path, mirroring the existing Responses API implementation. Without this,
tool_choice="auto" fails for Harmony models because the model stops
after analysis reasoning and never emits the actual tool call.

Also suppress tools in the prompt when tool_choice="none" for Harmony
models, matching the Responses API behavior.

Signed-off-by: Will Deines <will@garr.io>
---
 .../openai/chat_completion/serving.py         | 87 ++++++++++++++++++-
 vllm/entrypoints/serve/render/serving.py      |  6 +-
 2 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py
index 62a0192e7b7a..edd3f6d90362 100644
--- a/vllm/entrypoints/openai/chat_completion/serving.py
+++ b/vllm/entrypoints/openai/chat_completion/serving.py
@@ -14,6 +14,7 @@
 from fastapi import Request
 from partial_json_parser.core.options import Allow
 
+from vllm.config.utils import replace
 from vllm.engine.protocol import EngineClient
 from vllm.entrypoints.chat_utils import (
     ChatTemplateContentFormatOption,
@@ -61,6 +62,7 @@
     get_streamable_parser_for_assistant,
     parse_chat_output,
 )
+from vllm.entrypoints.openai.responses.serving import _constraint_to_content_format
 from vllm.entrypoints.openai.utils import maybe_filter_parallel_tool_calls
 from vllm.entrypoints.utils import get_max_tokens, should_include_usage
 from vllm.inputs.data import ProcessorInputs
@@ -70,7 +72,11 @@
 from vllm.parser import ParserManager
 from vllm.reasoning import ReasoningParser
 from vllm.renderers import ChatParams
-from vllm.sampling_params import BeamSearchParams, SamplingParams
+from vllm.sampling_params import (
+    BeamSearchParams,
+    SamplingParams,
+    StructuredOutputsParams,
+)
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers import ToolParser
 from vllm.tool_parsers.mistral_tool_parser import MistralToolCall
@@ -227,6 +233,33 @@ async def create_chat_completion(
                 tokenizer,
                 chat_template_kwargs=chat_template_kwargs,  # type: ignore[call-arg]
             )
+
+        # Pre-compute function tools and tool_choice for structural tags
+        function_tools_for_parser: list[dict] | None = None
+        tool_choice_for_parser: str | dict | None = None
+        if self.use_harmony and reasoning_parser is not None:
+            if request.tools:
+                ft = [
+                    {
+                        "name": t.function.name,
+                        **(
+                            {"parameters": t.function.parameters}
+                            if t.function.parameters
+                            else {}
+                        ),
+                    }
+                    for t in request.tools
+                ]
+                if ft:
+                    function_tools_for_parser = ft
+
+            # Convert ChatCompletionNamedToolChoiceParam to dict format
+            tc = request.tool_choice
+            if isinstance(tc, ChatCompletionNamedToolChoiceParam):
+                tool_choice_for_parser = {"name": tc.function.name}
+            else:
+                tool_choice_for_parser = tc
+
         result = await self.render_chat_request(request)
         if isinstance(result, ErrorResponse):
             return result
@@ -281,6 +314,58 @@ async def create_chat_completion(
                     self.default_sampling_params,
                 )
 
+            # Inject structural tags for Harmony models
+            if (
+                self.use_harmony
+                and reasoning_parser is not None
+                and isinstance(sampling_params, SamplingParams)
+            ):
+                struct_out = sampling_params.structured_outputs
+                if isinstance(struct_out, StructuredOutputsParams):
+                    if struct_out.all_non_structural_tag_constraints_none():
+                        sampling_params.structured_outputs = replace(
+                            struct_out,
+                            structural_tag=(
+                                reasoning_parser.prepare_structured_tag(
+                                    struct_out.structural_tag,
+                                    None,  # tool_server
+                                    tool_choice=tool_choice_for_parser,
+                                    function_tools=function_tools_for_parser,
+                                )
+                            ),
+                        )
+                    else:
+                        content_fmt = _constraint_to_content_format(struct_out)
+                        if content_fmt is not None:
+                            structural_tag = reasoning_parser.prepare_structured_tag(
+                                None,
+                                None,  # tool_server
+                                final_content_format=content_fmt,
+                                tool_choice=tool_choice_for_parser,
+                                function_tools=function_tools_for_parser,
+                            )
+                            if structural_tag is not None:
+                                sampling_params.structured_outputs = replace(
+                                    struct_out,
+                                    json=None,
+                                    regex=None,
+                                    choice=None,
+                                    grammar=None,
+                                    json_object=None,
+                                    structural_tag=structural_tag,
+                                )
+                elif struct_out is None:
+                    tag = reasoning_parser.prepare_structured_tag(
+                        None,
+                        None,  # tool_server
+                        tool_choice=tool_choice_for_parser,
+                        function_tools=function_tools_for_parser,
+                    )
+                    if tag is not None:
+                        sampling_params.structured_outputs = StructuredOutputsParams(
+                            structural_tag=tag  # type: ignore[call-arg]
+                        )
+
             self._log_inputs(
                 sub_request_id,
                 engine_prompt,
diff --git a/vllm/entrypoints/serve/render/serving.py b/vllm/entrypoints/serve/render/serving.py
index d1c5acad8c72..26f437e6485c 100644
--- a/vllm/entrypoints/serve/render/serving.py
+++ b/vllm/entrypoints/serve/render/serving.py
@@ -245,8 +245,10 @@ async def render_chat(
                 tool_parser=tool_parser,
             )
         else:
-            # For GPT-OSS.
-            should_include_tools = tool_dicts is not None
+            # For GPT-OSS: always suppress tools when tool_choice="none"
+            should_include_tools = (
+                tool_dicts is not None and request.tool_choice != "none"
+            )
             conversation, engine_prompts = self._make_request_with_harmony(
                 request, should_include_tools
             )

From 1b63a00f39cd0942be584bb9069c9e2d94611091 Mon Sep 17 00:00:00 2001
From: Will Deines <will@garr.io>
Date: Fri, 20 Mar 2026 13:38:12 -0400
Subject: [PATCH 10/10] fix(test): update test_function_calling_required for
 tool_choice support

tool_choice="required" now works (added in feat/responses-tool-choice-required),
so the test should expect a successful function call instead of InternalServerError.

Signed-off-by: Will Deines <will@garrio.com>

Signed-off-by: Will Deines <will@garr.io>
---
 .../openai/responses/test_harmony.py          | 23 ++++++++++++-------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/tests/entrypoints/openai/responses/test_harmony.py b/tests/entrypoints/openai/responses/test_harmony.py
index 74f3360df45f..b6f1ab71f4be 100644
--- a/tests/entrypoints/openai/responses/test_harmony.py
+++ b/tests/entrypoints/openai/responses/test_harmony.py
@@ -13,7 +13,7 @@
 import pytest
 import pytest_asyncio
 import requests
-from openai import InternalServerError, NotFoundError, OpenAI
+from openai import NotFoundError, OpenAI
 from openai_harmony import Message
 
 from tests.utils import RemoteOpenAIServer
@@ -697,15 +697,22 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str):
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_function_calling_required(client: OpenAI, model_name: str):
+    """tool_choice='required' must force at least one function call."""
     tools = [GET_WEATHER_SCHEMA]
 
-    with pytest.raises(InternalServerError):
-        await client.responses.create(
-            model=model_name,
-            input="What's the weather like in Paris today?",
-            tools=tools,
-            tool_choice="required",
-        )
+    response = await retry_for_tool_call(
+        client,
+        model=model_name,
+        expected_tool_type="function_call",
+        input="What's the weather like in Paris today?",
+        tools=tools,
+        tool_choice="required",
+    )
+    tool_calls = [item for item in response.output if item.type == "function_call"]
+    assert tool_calls, (
+        f"tool_choice='required' should force a function call, "
+        f"got: {[item.type for item in response.output]}"
+    )
 
 
 @pytest.mark.asyncio