diff --git a/tests/entrypoints/openai/parser/test_harmony_utils.py b/tests/entrypoints/openai/parser/test_harmony_utils.py index 21b53dff1507..f82eb1fdf47b 100644 --- a/tests/entrypoints/openai/parser/test_harmony_utils.py +++ b/tests/entrypoints/openai/parser/test_harmony_utils.py @@ -10,6 +10,7 @@ get_encoding, get_system_message, has_custom_tools, + inject_response_formats, parse_chat_input_to_harmony_message, parse_chat_output, ) @@ -928,3 +929,32 @@ def test_reasoning_with_empty_content_returns_none(self): msg = response_input_to_harmony(item, prev_responses=[]) assert msg is None + + +class TestInjectResponseFormats: + def test_appends_to_existing_instructions(self): + result = inject_response_formats("You are helpful.", {"type": "object"}) + assert result.startswith("You are helpful.") + assert "# Response Formats" in result + assert '{"type":"object"}' in result + + def test_none_instructions_creates_section(self): + result = inject_response_formats(None, {"type": "object"}) + assert result.startswith("# Response Formats") + assert '{"type":"object"}' in result + + def test_custom_format_name(self): + result = inject_response_formats(None, {"type": "object"}, format_name="order") + assert "## order" in result + + def test_compact_json_no_spaces(self): + schema = { + "type": "object", + "properties": {"name": {"type": "string"}}, + } + result = inject_response_formats(None, schema) + assert '{"type":"object","properties":{"name":{"type":"string"}}}' in result + + def test_section_separated_by_blank_lines(self): + result = inject_response_formats("Instructions here.", {"type": "object"}) + assert "\n\n# Response Formats\n\n## structured_output\n\n" in result diff --git a/tests/entrypoints/openai/responses/test_harmony.py b/tests/entrypoints/openai/responses/test_harmony.py index 74f3360df45f..b6f1ab71f4be 100644 --- a/tests/entrypoints/openai/responses/test_harmony.py +++ b/tests/entrypoints/openai/responses/test_harmony.py @@ -13,7 +13,7 @@ import pytest import pytest_asyncio import requests -from openai import InternalServerError, NotFoundError, OpenAI +from openai import NotFoundError, OpenAI from openai_harmony import Message from tests.utils import RemoteOpenAIServer @@ -697,15 +697,22 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) async def test_function_calling_required(client: OpenAI, model_name: str): + """tool_choice='required' must force at least one function call.""" tools = [GET_WEATHER_SCHEMA] - with pytest.raises(InternalServerError): - await client.responses.create( - model=model_name, - input="What's the weather like in Paris today?", - tools=tools, - tool_choice="required", - ) + response = await retry_for_tool_call( + client, + model=model_name, + expected_tool_type="function_call", + input="What's the weather like in Paris today?", + tools=tools, + tool_choice="required", + ) + tool_calls = [item for item in response.output if item.type == "function_call"] + assert tool_calls, ( + f"tool_choice='required' should force a function call, " + f"got: {[item.type for item in response.output]}" + ) @pytest.mark.asyncio diff --git a/tests/entrypoints/openai/responses/test_response_formats.py b/tests/entrypoints/openai/responses/test_response_formats.py new file mode 100644 index 000000000000..61681dfbf8ec --- /dev/null +++ b/tests/entrypoints/openai/responses/test_response_formats.py @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +"""Tests for response format schema extraction and developer message injection. + +These tests verify that structured output schemas are correctly extracted from +ResponsesRequest and injected into the Harmony developer message per the +Harmony cookbook specification. +""" + +from openai.types.responses.response_format_text_json_schema_config import ( + ResponseFormatTextJSONSchemaConfig, +) + +from vllm.entrypoints.openai.responses.protocol import ( + ResponsesRequest, + ResponseTextConfig, +) +from vllm.entrypoints.openai.responses.serving import ( + _extract_response_format_schema, +) +from vllm.sampling_params import StructuredOutputsParams + + +def _make_json_schema_text_config(schema: dict) -> ResponseTextConfig: + text_config = ResponseTextConfig() + text_config.format = ResponseFormatTextJSONSchemaConfig( + type="json_schema", + name="test_schema", + schema=schema, + ) + return text_config + + +class TestExtractResponseFormatSchema: + def test_extracts_from_text_format_json_schema(self): + schema = { + "type": "object", + "properties": {"name": {"type": "string"}}, + } + request = ResponsesRequest( + model="test-model", + input="test", + text=_make_json_schema_text_config(schema), + ) + result = _extract_response_format_schema(request) + assert result == schema + + def test_extracts_from_structured_outputs_json(self): + schema = { + "type": "object", + "properties": {"id": {"type": "integer"}}, + } + request = ResponsesRequest( + model="test-model", + input="test", + structured_outputs=StructuredOutputsParams(json=schema), + ) + result = _extract_response_format_schema(request) + assert result == schema + + def test_returns_none_for_text_format(self): + request = ResponsesRequest( + model="test-model", + input="test", + text=ResponseTextConfig(format={"type": "text"}), + ) + result = _extract_response_format_schema(request) + assert result is None + + def test_returns_none_for_no_format(self): + request = ResponsesRequest( + model="test-model", + input="test", + ) + result = _extract_response_format_schema(request) + assert result is None + + def test_text_format_takes_precedence(self): + """text.format.json_schema is checked before structured_outputs.""" + text_schema = { + "type": "object", + "properties": {"a": {"type": "string"}}, + } + so_schema = { + "type": "object", + "properties": {"b": {"type": "string"}}, + } + request = ResponsesRequest( + model="test-model", + input="test", + text=_make_json_schema_text_config(text_schema), + structured_outputs=StructuredOutputsParams(json=so_schema), + ) + result = _extract_response_format_schema(request) + assert result == text_schema diff --git a/tests/entrypoints/openai/responses/test_sampling_params.py b/tests/entrypoints/openai/responses/test_sampling_params.py index 87910271dd75..7509489ca3c4 100644 --- a/tests/entrypoints/openai/responses/test_sampling_params.py +++ b/tests/entrypoints/openai/responses/test_sampling_params.py @@ -132,6 +132,25 @@ def test_structured_outputs_passed_through(self): assert sampling_params.structured_outputs is not None assert sampling_params.structured_outputs.grammar == "root ::= 'hello'" + def test_json_object_format_produces_structured_outputs(self): + """Test that text.format.type=json_object creates StructuredOutputsParams.""" + from openai.types.shared.response_format_json_object import ( + ResponseFormatJSONObject, + ) + + text_config = ResponseTextConfig() + text_config.format = ResponseFormatJSONObject(type="json_object") + request = ResponsesRequest( + model="test-model", + input="test input", + text=text_config, + ) + + sampling_params = request.to_sampling_params(default_max_tokens=1000) + + assert sampling_params.structured_outputs is not None + assert sampling_params.structured_outputs.json_object is True + def test_structured_outputs_and_json_schema_conflict(self): """Test that specifying both structured_outputs and json_schema raises.""" structured_outputs = StructuredOutputsParams(grammar="root ::= 'hello'") diff --git a/tests/entrypoints/openai/responses/test_structured_output.py b/tests/entrypoints/openai/responses/test_structured_output.py index db8b87768e44..1f155e15456e 100644 --- a/tests/entrypoints/openai/responses/test_structured_output.py +++ b/tests/entrypoints/openai/responses/test_structured_output.py @@ -6,6 +6,11 @@ import pytest from pydantic import BaseModel +from vllm.entrypoints.openai.responses.serving import ( + _constraint_to_content_format, +) +from vllm.sampling_params import StructuredOutputsParams + @pytest.mark.asyncio async def test_structured_output(client: openai.AsyncOpenAI): @@ -76,3 +81,67 @@ class CalendarEvent(BaseModel): assert len(participants) == 2 assert participants[0] == "Alice" assert participants[1] == "Bob" + + +class TestConstraintToContentFormat: + """Test _constraint_to_content_format helper.""" + + def test_json_schema_string_is_parsed(self): + """JSON schema passed as a string gets json.loads'd into a dict.""" + schema = {"type": "object", "properties": {"age": {"type": "integer"}}} + params = StructuredOutputsParams(json=json.dumps(schema)) + result = _constraint_to_content_format(params) + + assert result == {"type": "json_schema", "json_schema": schema} + + def test_json_schema_dict(self): + """JSON schema passed as a dict is used directly.""" + schema = {"type": "object", "properties": {"age": {"type": "integer"}}} + params = StructuredOutputsParams(json=schema) + result = _constraint_to_content_format(params) + + assert result == {"type": "json_schema", "json_schema": schema} + + def test_json_object(self): + """json_object maps to minimal JSON schema.""" + params = StructuredOutputsParams(json_object=True) + result = _constraint_to_content_format(params) + + assert result == { + "type": "json_schema", + "json_schema": {"type": "object"}, + } + + def test_regex(self): + """Regex constraint is converted correctly.""" + params = StructuredOutputsParams(regex=r"\d+") + result = _constraint_to_content_format(params) + + assert result == {"type": "regex", "pattern": r"\d+"} + + def test_grammar(self): + """Grammar constraint is converted correctly.""" + params = StructuredOutputsParams(grammar="root ::= 'hello'") + result = _constraint_to_content_format(params) + + assert result == {"type": "grammar", "grammar": "root ::= 'hello'"} + + def test_choice(self): + """Choice constraint is converted correctly.""" + params = StructuredOutputsParams(choice=["yes", "no"]) + result = _constraint_to_content_format(params) + + assert result == { + "type": "or", + "elements": [ + {"type": "const_string", "value": "yes"}, + {"type": "const_string", "value": "no"}, + ], + } + + def test_structural_tag_only_returns_none(self): + """structural_tag is not a content constraint -- should return None.""" + params = StructuredOutputsParams(structural_tag='{"type": "structural_tag"}') + result = _constraint_to_content_format(params) + + assert result is None diff --git a/tests/entrypoints/openai/responses/test_tool_choice_harmony.py b/tests/entrypoints/openai/responses/test_tool_choice_harmony.py new file mode 100644 index 000000000000..f873aeb5db24 --- /dev/null +++ b/tests/entrypoints/openai/responses/test_tool_choice_harmony.py @@ -0,0 +1,110 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Unit tests for tool_choice handling in the Harmony-based Responses API. + +These tests verify that: +- Developer instructions are preserved when tool_choice="none" (Bug 1) +- Builtin tool descriptions are suppressed when tool_choice="none" (Bug 2) +""" + +from __future__ import annotations + +from unittest.mock import Mock + +from openai_harmony import Role, ToolNamespaceConfig + +from vllm.entrypoints.openai.parser.harmony_utils import ( + get_developer_message, + get_system_message, +) + + +class TestToolChoiceNoneInstructions: + """Bug 1: Developer instructions must not be dropped when + tool_choice='none' causes tools to be hidden.""" + + def test_developer_message_with_instructions_no_tools(self): + """get_developer_message must include instructions even when + tools=None (the condition that arises from tool_choice='none' + with no custom tools).""" + dev_msg = get_developer_message( + instructions="Be helpful and concise", tools=None + ) + assert dev_msg.author.role == Role.DEVELOPER + rendered = str(dev_msg) + assert "Be helpful and concise" in rendered + + def test_developer_message_with_instructions_and_tools(self): + """Baseline: instructions + tools both appear in the developer + message when tools are visible.""" + tool = Mock() + tool.type = "function" + tool.name = "get_weather" + tool.description = "Get weather" + tool.parameters = {"type": "object", "properties": {}} + + dev_msg = get_developer_message(instructions="Be helpful", tools=[tool]) + rendered = str(dev_msg) + assert "Be helpful" in rendered + assert "get_weather" in rendered + + def test_developer_message_no_instructions_no_tools(self): + """When neither instructions nor tools are provided, the + developer message is still valid (just empty content).""" + dev_msg = get_developer_message(instructions=None, tools=None) + assert dev_msg.author.role == Role.DEVELOPER + + +class TestToolChoiceNoneSystemMessage: + """Bug 2: Builtin tool descriptions in the system message must be + suppressed when tool_choice='none'.""" + + def test_system_message_no_tool_descriptions(self): + """When all tool descriptions are None (as happens when + tools_visible=False), the system message must not contain + tool descriptions.""" + sys_msg = get_system_message( + browser_description=None, + python_description=None, + container_description=None, + with_custom_tools=False, + ) + assert sys_msg.author.role == Role.SYSTEM + # tools should be None or empty when no descriptions are provided + assert not sys_msg.content[0].tools + + def test_system_message_with_browser_description(self): + """Baseline: when a ToolNamespaceConfig is provided, it appears + in the system message tools.""" + browser_ns = ToolNamespaceConfig.browser() + sys_msg = get_system_message( + browser_description=browser_ns, + python_description=None, + container_description=None, + with_custom_tools=False, + ) + assert sys_msg.author.role == Role.SYSTEM + assert "browser" in sys_msg.content[0].tools + + def test_system_message_with_python_description(self): + """Python tool description appears in system message when provided.""" + python_ns = ToolNamespaceConfig.python() + sys_msg = get_system_message( + browser_description=None, + python_description=python_ns, + container_description=None, + with_custom_tools=False, + ) + assert sys_msg.author.role == Role.SYSTEM + assert "python" in sys_msg.content[0].tools + + def test_none_descriptions_mean_no_tools(self): + """Passing None for all tool descriptions (as happens when + tools_visible=False) must result in no tools in the system msg.""" + sys_msg = get_system_message( + browser_description=None, + python_description=None, + container_description=None, + with_custom_tools=False, + ) + assert not sys_msg.content[0].tools diff --git a/tests/reasoning/test_gptoss_reasoning_parser.py b/tests/reasoning/test_gptoss_reasoning_parser.py index 3b1327acb688..3b3671b71795 100644 --- a/tests/reasoning/test_gptoss_reasoning_parser.py +++ b/tests/reasoning/test_gptoss_reasoning_parser.py @@ -12,7 +12,9 @@ from vllm.reasoning.gptoss_reasoning_parser import ( GptOssReasoningParser, from_builtin_tool_to_tag, + from_function_tool_to_tag, no_func_reasoning_tag, + tag_with_builtin_funcs, ) REASONING_MODEL_NAME = "openai/gpt-oss-120b" @@ -280,3 +282,443 @@ def test_tag_format_consistency(self, reasoning_parser): assert tag["content"]["type"] == "any_text" assert tag["end"] == "<|end|>" assert tag["begin"].startswith("<|channel|>") + + # --- Fixtures for tool_choice / function_tools tests --- + + @pytest.fixture + def mock_tool_server_empty(self): + """Create a mock ToolServer with no tools.""" + tool_server = Mock(spec=ToolServer) + tool_server.has_tool = Mock(return_value=False) + return tool_server + + @pytest.fixture + def mock_tool_server_with_browser(self): + """Create a mock ToolServer with browser tool.""" + tool_server = Mock(spec=ToolServer) + tool_server.has_tool = Mock(side_effect=lambda tool: tool == "browser") + return tool_server + + @pytest.fixture + def mock_tool_server_with_all_tools(self): + """Create a mock ToolServer with all builtin tools.""" + tool_server = Mock(spec=ToolServer) + tool_server.has_tool = Mock( + side_effect=lambda tool: tool in ["browser", "python", "container"] + ) + return tool_server + + # --- Tests from structured output PR --- + + def test_prepare_structured_tag_with_all_tools( + self, reasoning_parser, mock_tool_server_with_all_tools + ): + """Test prepare_structured_tag with all builtin tools.""" + result = reasoning_parser.prepare_structured_tag( + None, mock_tool_server_with_all_tools + ) + parsed = json.loads(result) + + # Should have analysis tag + tags for all 3 tools (2 tags each) + assert len(parsed["format"]["tags"]) == 7 # 1 analysis + 6 tool tags + + # Check all tool tags are present + tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]] + for tool in ["browser", "python", "container"]: + assert f"<|channel|>commentary to={tool}" in tag_begins + assert f"<|channel|>analysis to={tool}" in tag_begins + + def test_tag_with_builtin_funcs(self): + """Test tag_with_builtin_funcs function.""" + builtin_tools = ["browser", "python"] + result = tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tools) + + assert result["type"] == "structural_tag" + # Should have original analysis tag + 2 tags per tool + assert len(result["format"]["tags"]) == 5 # 1 + 2*2 + + # Should have added commentary trigger + assert "<|channel|>commentary to=" in result["format"]["triggers"] + assert "<|channel|>analysis" in result["format"]["triggers"] + + def test_tag_structure_invariants(self): + """Test that the basic tag structure follows expected format.""" + assert no_func_reasoning_tag["type"] == "structural_tag" + assert no_func_reasoning_tag["format"]["type"] == "triggered_tags" + assert no_func_reasoning_tag["format"]["stop_after_first"] is False + + # Verify analysis tag structure + analysis_tag = no_func_reasoning_tag["format"]["tags"][0] + assert analysis_tag["begin"] == "<|channel|>analysis<|message|>" + assert analysis_tag["content"]["type"] == "any_text" + assert analysis_tag["end"] == "<|end|>" + + def test_json_serialization_valid( + self, reasoning_parser, mock_tool_server_with_all_tools + ): + """Test that all generated tags produce valid JSON.""" + # Test with no tool server + result1 = reasoning_parser.prepare_structured_tag(None, None) + json.loads(result1) # Should not raise + + # Test with empty tool server + empty_server = Mock(spec=ToolServer) + empty_server.has_tool = Mock(return_value=False) + result2 = reasoning_parser.prepare_structured_tag(None, empty_server) + json.loads(result2) # Should not raise + + # Test with tools + result3 = reasoning_parser.prepare_structured_tag( + None, mock_tool_server_with_all_tools + ) + json.loads(result3) # Should not raise + + @pytest.mark.parametrize("tool_name", ["browser", "python", "container"]) + def test_single_tool_integration(self, reasoning_parser, tool_name): + """Test integration with individual tools.""" + tool_server = Mock(spec=ToolServer) + tool_server.has_tool = Mock(side_effect=lambda tool: tool == tool_name) + + result = reasoning_parser.prepare_structured_tag(None, tool_server) + parsed = json.loads(result) + + # Should have 1 analysis + 2 tool-specific tags + assert len(parsed["format"]["tags"]) == 3 + + tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]] + assert f"<|channel|>commentary to={tool_name}" in tag_begins + assert f"<|channel|>analysis to={tool_name}" in tag_begins + + # --- final_content_format tests --- + + def test_prepare_structured_tag_with_json_schema(self, reasoning_parser): + """Test that final channel tag has json_schema content constraint.""" + content_format = { + "type": "json_schema", + "json_schema": { + "type": "object", + "properties": {"name": {"type": "string"}}, + }, + } + result = reasoning_parser.prepare_structured_tag( + None, None, final_content_format=content_format + ) + parsed = json.loads(result) + + # Should have analysis tag + final channel tag + assert len(parsed["format"]["tags"]) == 2 + + # Verify analysis tag is unchanged + assert parsed["format"]["tags"][0]["begin"] == "<|channel|>analysis<|message|>" + assert parsed["format"]["tags"][0]["content"]["type"] == "any_text" + + # Verify final channel tag has the json_schema content constraint + final_tag = parsed["format"]["tags"][1] + assert final_tag["begin"] == "<|channel|>final<|message|>" + assert final_tag["end"] == "<|end|>" + assert final_tag["content"] == content_format + + # Verify triggers include both analysis and final + assert "<|channel|>analysis" in parsed["format"]["triggers"] + assert "<|channel|>final" in parsed["format"]["triggers"] + + def test_prepare_structured_tag_original_tag_ignores_constraint( + self, reasoning_parser + ): + """When original_tag is provided, final_content_format is ignored.""" + original_tag = '{"custom": "tag"}' + content_format = { + "type": "json_schema", + "json_schema": {"type": "object"}, + } + result = reasoning_parser.prepare_structured_tag( + original_tag, None, final_content_format=content_format + ) + + # Should return the original tag unchanged + assert result == original_tag + + def test_prepare_structured_tag_with_tools_and_constraint( + self, reasoning_parser, mock_tool_server_with_browser + ): + """Test that tools and content constraint coexist in the tag.""" + content_format = { + "type": "json_schema", + "json_schema": {"type": "object"}, + } + result = reasoning_parser.prepare_structured_tag( + None, + mock_tool_server_with_browser, + final_content_format=content_format, + ) + parsed = json.loads(result) + + # Should have analysis + 2 browser tags + final channel tag = 4 + assert len(parsed["format"]["tags"]) == 4 + + tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]] + assert "<|channel|>analysis<|message|>" in tag_begins + assert "<|channel|>commentary to=browser" in tag_begins + assert "<|channel|>analysis to=browser" in tag_begins + assert "<|channel|>final<|message|>" in tag_begins + + # Verify final tag has the constraint + final_tag = next( + t + for t in parsed["format"]["tags"] + if t["begin"] == "<|channel|>final<|message|>" + ) + assert final_tag["content"] == content_format + + # --- Function tool and tool_choice tests --- + + def test_function_tool_tags_on_both_channels(self): + """Verify from_function_tool_to_tag creates commentary + analysis.""" + tags = from_function_tool_to_tag("get_weather", None) + + assert len(tags) == 2 + assert ( + tags[0]["begin"] + == "<|channel|>commentary to=functions.get_weather<|message|>" + ) + assert ( + tags[1]["begin"] + == "<|channel|>analysis to=functions.get_weather<|message|>" + ) + assert tags[0]["end"] == "<|end|>" + assert tags[1]["end"] == "<|end|>" + # No parameters -> any_text + assert tags[0]["content"] == {"type": "any_text"} + assert tags[1]["content"] == {"type": "any_text"} + + def test_function_tool_json_schema_content(self): + """Verify JSON schema from tool parameters is used as content.""" + schema = { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + } + tags = from_function_tool_to_tag("get_weather", schema) + + expected_content = {"type": "json_schema", "json_schema": schema} + assert tags[0]["content"] == expected_content + assert tags[1]["content"] == expected_content + + def test_tool_choice_required_blocks_final(self, reasoning_parser): + """No final trigger/tag when tool_choice=required (no tools).""" + result = reasoning_parser.prepare_structured_tag( + None, None, tool_choice="required" + ) + parsed = json.loads(result) + + tag_begins = [t["begin"] for t in parsed["format"]["tags"]] + assert not any("final" in b for b in tag_begins) + assert "<|channel|>final" not in parsed["format"]["triggers"] + + def test_tool_choice_required_with_function_tools(self, reasoning_parser): + """Tool tags present but no final when tool_choice=required.""" + fn_tools = [ + {"name": "get_weather", "parameters": {"type": "object"}}, + ] + result = reasoning_parser.prepare_structured_tag( + None, None, tool_choice="required", function_tools=fn_tools + ) + parsed = json.loads(result) + + tag_begins = [t["begin"] for t in parsed["format"]["tags"]] + # Function tool tags present + assert "<|channel|>commentary to=functions.get_weather<|message|>" in tag_begins + assert "<|channel|>analysis to=functions.get_weather<|message|>" in tag_begins + # No final + assert not any("final" in b for b in tag_begins) + assert "<|channel|>final" not in parsed["format"]["triggers"] + + def test_tool_choice_required_ignores_final_content_format(self, reasoning_parser): + """Final is blocked even when final_content_format is provided.""" + content_fmt = { + "type": "json_schema", + "json_schema": {"type": "object"}, + } + fn_tools = [{"name": "my_func"}] + result = reasoning_parser.prepare_structured_tag( + None, + None, + final_content_format=content_fmt, + tool_choice="required", + function_tools=fn_tools, + ) + parsed = json.loads(result) + + tag_begins = [t["begin"] for t in parsed["format"]["tags"]] + assert not any("final" in b for b in tag_begins) + + def test_tool_choice_auto_with_tools_and_content_format(self, reasoning_parser): + """Tool tags + final with content constraint for auto.""" + schema = {"type": "object", "properties": {"x": {"type": "integer"}}} + content_fmt = {"type": "json_schema", "json_schema": schema} + fn_tools = [{"name": "compute", "parameters": schema}] + + result = reasoning_parser.prepare_structured_tag( + None, + None, + final_content_format=content_fmt, + tool_choice="auto", + function_tools=fn_tools, + ) + parsed = json.loads(result) + + tag_begins = [t["begin"] for t in parsed["format"]["tags"]] + # Function tool tags + assert "<|channel|>commentary to=functions.compute<|message|>" in tag_begins + # Final tag with content constraint + assert "<|channel|>final<|message|>" in tag_begins + assert "<|channel|>final" in parsed["format"]["triggers"] + + final_tag = next( + t + for t in parsed["format"]["tags"] + if t["begin"] == "<|channel|>final<|message|>" + ) + assert final_tag["content"] == content_fmt + + def test_tool_choice_auto_with_tools_final_is_any_text(self, reasoning_parser): + """auto + function tools but no content format -> final allows free text.""" + fn_tools = [{"name": "get_weather", "parameters": {"type": "object"}}] + result = reasoning_parser.prepare_structured_tag( + None, + None, + tool_choice="auto", + function_tools=fn_tools, + ) + parsed = json.loads(result) + + final_tag = next( + t + for t in parsed["format"]["tags"] + if t["begin"] == "<|channel|>final<|message|>" + ) + # No content format -> model can respond with any text + assert final_tag["content"] == {"type": "any_text"} + + def test_tool_choice_none_strips_tool_tags( + self, reasoning_parser, mock_tool_server_with_all_tools + ): + """No tool tags with tool_choice=none, analysis only.""" + fn_tools = [{"name": "get_weather"}] + result = reasoning_parser.prepare_structured_tag( + None, + mock_tool_server_with_all_tools, + tool_choice="none", + function_tools=fn_tools, + ) + parsed = json.loads(result) + + tag_begins = [t["begin"] for t in parsed["format"]["tags"]] + # Only analysis tag, no tool tags + assert tag_begins == ["<|channel|>analysis<|message|>"] + assert parsed["format"]["triggers"] == ["<|channel|>analysis"] + + def test_mixed_builtin_and_function_tools( + self, reasoning_parser, mock_tool_server_with_browser + ): + """Both builtin and function tool tags coexist.""" + fn_tools = [{"name": "get_weather"}] + result = reasoning_parser.prepare_structured_tag( + None, + mock_tool_server_with_browser, + tool_choice="auto", + function_tools=fn_tools, + ) + parsed = json.loads(result) + + tag_begins = [t["begin"] for t in parsed["format"]["tags"]] + # Builtin tool tags + assert "<|channel|>commentary to=browser" in tag_begins + assert "<|channel|>analysis to=browser" in tag_begins + # Function tool tags + assert "<|channel|>commentary to=functions.get_weather<|message|>" in tag_begins + assert "<|channel|>analysis to=functions.get_weather<|message|>" in tag_begins + # Final tag (auto + function tools) + assert "<|channel|>final<|message|>" in tag_begins + # General commentary trigger covers both builtin and function + assert "<|channel|>commentary to=" in parsed["format"]["triggers"] + + def test_named_tool_choice(self, reasoning_parser): + """Only the named tool's tags present, final blocked.""" + fn_tools = [ + {"name": "get_weather", "parameters": {"type": "object"}}, + {"name": "get_stock", "parameters": {"type": "object"}}, + ] + result = reasoning_parser.prepare_structured_tag( + None, + None, + tool_choice={"type": "function", "name": "get_weather"}, + function_tools=fn_tools, + ) + parsed = json.loads(result) + + tag_begins = [t["begin"] for t in parsed["format"]["tags"]] + # Only get_weather tags, not get_stock + assert "<|channel|>commentary to=functions.get_weather<|message|>" in tag_begins + assert "<|channel|>analysis to=functions.get_weather<|message|>" in tag_begins + assert not any("get_stock" in b for b in tag_begins) + # No final (named tool choice blocks final) + assert not any("final" in b for b in tag_begins) + + def test_named_tool_choice_excludes_builtins( + self, reasoning_parser, mock_tool_server_with_all_tools + ): + """Named function tool_choice must exclude builtin tool tags. + + With at_least_one=True, builtin channels (browser/python/container) + could satisfy the grammar constraint instead of the named function.""" + fn_tools = [{"name": "get_weather", "parameters": {"type": "object"}}] + result = reasoning_parser.prepare_structured_tag( + None, + mock_tool_server_with_all_tools, + tool_choice={"type": "function", "name": "get_weather"}, + function_tools=fn_tools, + ) + parsed = json.loads(result) + + tag_begins = [t["begin"] for t in parsed["format"]["tags"]] + # Named function tags present + assert "<|channel|>commentary to=functions.get_weather<|message|>" in tag_begins + # No builtin tags + assert not any("to=browser" in b for b in tag_begins) + assert not any("to=python" in b for b in tag_begins) + assert not any("to=container" in b for b in tag_begins) + + def test_tool_choice_none_excludes_builtins( + self, reasoning_parser, mock_tool_server_with_all_tools + ): + """tool_choice='none' must suppress builtin tool tags even when + a tool_server with builtins is present.""" + result = reasoning_parser.prepare_structured_tag( + None, + mock_tool_server_with_all_tools, + tool_choice="none", + ) + parsed = json.loads(result) + + tag_begins = [t["begin"] for t in parsed["format"]["tags"]] + # Only the base analysis tag — no builtin channels + assert tag_begins == ["<|channel|>analysis<|message|>"] + + @pytest.mark.parametrize("tool_choice", ["auto", "required", None]) + def test_tool_choice_auto_required_include_builtins( + self, reasoning_parser, mock_tool_server_with_all_tools, tool_choice + ): + """tool_choice='auto'/'required'/None should include builtin tool + tags when a tool_server has builtins.""" + result = reasoning_parser.prepare_structured_tag( + None, + mock_tool_server_with_all_tools, + tool_choice=tool_choice, + ) + parsed = json.loads(result) + + tag_begins = [t["begin"] for t in parsed["format"]["tags"]] + assert "<|channel|>commentary to=browser" in tag_begins + assert "<|channel|>commentary to=python" in tag_begins + assert "<|channel|>commentary to=container" in tag_begins diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index 62a0192e7b7a..edd3f6d90362 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -14,6 +14,7 @@ from fastapi import Request from partial_json_parser.core.options import Allow +from vllm.config.utils import replace from vllm.engine.protocol import EngineClient from vllm.entrypoints.chat_utils import ( ChatTemplateContentFormatOption, @@ -61,6 +62,7 @@ get_streamable_parser_for_assistant, parse_chat_output, ) +from vllm.entrypoints.openai.responses.serving import _constraint_to_content_format from vllm.entrypoints.openai.utils import maybe_filter_parallel_tool_calls from vllm.entrypoints.utils import get_max_tokens, should_include_usage from vllm.inputs.data import ProcessorInputs @@ -70,7 +72,11 @@ from vllm.parser import ParserManager from vllm.reasoning import ReasoningParser from vllm.renderers import ChatParams -from vllm.sampling_params import BeamSearchParams, SamplingParams +from vllm.sampling_params import ( + BeamSearchParams, + SamplingParams, + StructuredOutputsParams, +) from vllm.tokenizers import TokenizerLike from vllm.tool_parsers import ToolParser from vllm.tool_parsers.mistral_tool_parser import MistralToolCall @@ -227,6 +233,33 @@ async def create_chat_completion( tokenizer, chat_template_kwargs=chat_template_kwargs, # type: ignore[call-arg] ) + + # Pre-compute function tools and tool_choice for structural tags + function_tools_for_parser: list[dict] | None = None + tool_choice_for_parser: str | dict | None = None + if self.use_harmony and reasoning_parser is not None: + if request.tools: + ft = [ + { + "name": t.function.name, + **( + {"parameters": t.function.parameters} + if t.function.parameters + else {} + ), + } + for t in request.tools + ] + if ft: + function_tools_for_parser = ft + + # Convert ChatCompletionNamedToolChoiceParam to dict format + tc = request.tool_choice + if isinstance(tc, ChatCompletionNamedToolChoiceParam): + tool_choice_for_parser = {"name": tc.function.name} + else: + tool_choice_for_parser = tc + result = await self.render_chat_request(request) if isinstance(result, ErrorResponse): return result @@ -281,6 +314,58 @@ async def create_chat_completion( self.default_sampling_params, ) + # Inject structural tags for Harmony models + if ( + self.use_harmony + and reasoning_parser is not None + and isinstance(sampling_params, SamplingParams) + ): + struct_out = sampling_params.structured_outputs + if isinstance(struct_out, StructuredOutputsParams): + if struct_out.all_non_structural_tag_constraints_none(): + sampling_params.structured_outputs = replace( + struct_out, + structural_tag=( + reasoning_parser.prepare_structured_tag( + struct_out.structural_tag, + None, # tool_server + tool_choice=tool_choice_for_parser, + function_tools=function_tools_for_parser, + ) + ), + ) + else: + content_fmt = _constraint_to_content_format(struct_out) + if content_fmt is not None: + structural_tag = reasoning_parser.prepare_structured_tag( + None, + None, # tool_server + final_content_format=content_fmt, + tool_choice=tool_choice_for_parser, + function_tools=function_tools_for_parser, + ) + if structural_tag is not None: + sampling_params.structured_outputs = replace( + struct_out, + json=None, + regex=None, + choice=None, + grammar=None, + json_object=None, + structural_tag=structural_tag, + ) + elif struct_out is None: + tag = reasoning_parser.prepare_structured_tag( + None, + None, # tool_server + tool_choice=tool_choice_for_parser, + function_tools=function_tools_for_parser, + ) + if tag is not None: + sampling_params.structured_outputs = StructuredOutputsParams( + structural_tag=tag # type: ignore[call-arg] + ) + self._log_inputs( sub_request_id, engine_prompt, diff --git a/vllm/entrypoints/openai/parser/harmony_utils.py b/vllm/entrypoints/openai/parser/harmony_utils.py index 9b4264456c51..e8202516985e 100644 --- a/vllm/entrypoints/openai/parser/harmony_utils.py +++ b/vllm/entrypoints/openai/parser/harmony_utils.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import datetime +import json from collections.abc import Iterable, Sequence from typing import Literal @@ -150,6 +151,25 @@ def get_developer_message( return dev_msg +def inject_response_formats( + instructions: str | None, + schema: dict, + format_name: str = "structured_output", +) -> str: + """Append a Harmony cookbook ``# Response Formats`` section. + + Per the cookbook, structured output schemas should appear in the + developer message under a ``# Response Formats`` heading so the + model knows what format to produce. This complements grammar + enforcement via structural tags. + """ + schema_json = json.dumps(schema, separators=(",", ":")) + section = f"\n\n# Response Formats\n\n## {format_name}\n\n{schema_json}" + if instructions: + return instructions + section + return section.lstrip("\n") + + def get_user_message(content: str) -> Message: return Message.from_role_and_content(Role.USER, content) diff --git a/vllm/entrypoints/openai/responses/protocol.py b/vllm/entrypoints/openai/responses/protocol.py index a5f62bdd8c39..831fb1077243 100644 --- a/vllm/entrypoints/openai/responses/protocol.py +++ b/vllm/entrypoints/openai/responses/protocol.py @@ -346,6 +346,10 @@ def to_sampling_params( # --follow-imports skip hides the class definition but also hides # multiple third party conflicts, so best of both evils ) + elif response_format.type == "json_object": + structured_outputs = StructuredOutputsParams( + json_object=True # type: ignore[call-arg] + ) stop = self.stop if self.stop else [] if isinstance(stop, str): diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py index 574282c4cdc6..3c061ed60899 100644 --- a/vllm/entrypoints/openai/responses/serving.py +++ b/vllm/entrypoints/openai/responses/serving.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import asyncio +import json as json_mod import time import uuid from collections import deque @@ -66,6 +67,7 @@ get_system_message, get_user_message, has_custom_tools, + inject_response_formats, render_for_completion, ) from vllm.entrypoints.openai.responses.context import ( @@ -126,6 +128,52 @@ logger = init_logger(__name__) +def _extract_response_format_schema(request: ResponsesRequest) -> dict | None: + """Extract JSON schema from the request's structured output config.""" + if ( + request.text is not None + and request.text.format is not None + and request.text.format.type == "json_schema" + and request.text.format.schema_ is not None + ): + return request.text.format.schema_ + if ( + request.structured_outputs is not None + and request.structured_outputs.json is not None + ): + val = request.structured_outputs.json + if isinstance(val, str): + return json_mod.loads(val) + return val + return None + + +def _constraint_to_content_format( + params: StructuredOutputsParams, +) -> dict | None: + """Convert a StructuredOutputsParams constraint into an xgrammar + content format dict suitable for embedding in a structural tag.""" + if params.json is not None: + schema = ( + params.json + if isinstance(params.json, dict) + else json_mod.loads(params.json) + ) + return {"type": "json_schema", "json_schema": schema} + if params.json_object: + return {"type": "json_schema", "json_schema": {"type": "object"}} + if params.regex is not None: + return {"type": "regex", "pattern": params.regex} + if params.grammar is not None: + return {"type": "grammar", "grammar": params.grammar} + if params.choice is not None: + return { + "type": "or", + "elements": [{"type": "const_string", "value": c} for c in params.choice], + } + return None + + def _extract_allowed_tools_from_mcp_requests( tools: list[Tool], ) -> dict[str, list[str] | None]: @@ -463,21 +511,78 @@ async def create_responses( else: context = SimpleContext() + # Extract function tools for the reasoning parser + function_tools_for_parser = None + if request.tools: + ft = [ + { + "name": t.name, + **({"parameters": t.parameters} if t.parameters else {}), + } + for t in request.tools + if getattr(t, "type", None) == "function" + ] + if ft: + function_tools_for_parser = ft + if self.parser and self.parser.reasoning_parser_cls is not None: reasoning_parser = self.parser.reasoning_parser_cls(tokenizer) - if ( - isinstance( - struct_out := sampling_params.structured_outputs, - StructuredOutputsParams, - ) - and struct_out.all_non_structural_tag_constraints_none() - ): - sampling_params.structured_outputs = replace( - struct_out, - structural_tag=reasoning_parser.prepare_structured_tag( - struct_out.structural_tag, self.tool_server - ), + struct_out = sampling_params.structured_outputs + + if isinstance(struct_out, StructuredOutputsParams): + if struct_out.all_non_structural_tag_constraints_none(): + # No content constraint — just apply reasoning + # channel tags + tool_choice + function tools + sampling_params.structured_outputs = replace( + struct_out, + structural_tag=( + reasoning_parser.prepare_structured_tag( + struct_out.structural_tag, + self.tool_server, + tool_choice=request.tool_choice, + function_tools=function_tools_for_parser, + ) + ), + ) + else: + # Content constraint present (json, regex, + # grammar, choice, json_object). Embed it in the + # final channel tag within the structural tag. + content_fmt = _constraint_to_content_format(struct_out) + if content_fmt is not None: + structural_tag = reasoning_parser.prepare_structured_tag( + None, + self.tool_server, + final_content_format=content_fmt, + tool_choice=request.tool_choice, + function_tools=function_tools_for_parser, + ) + if structural_tag is not None: + # Clear content constraints, set + # structural_tag, but preserve options + # like disable_any_whitespace. + sampling_params.structured_outputs = replace( + struct_out, + json=None, + regex=None, + choice=None, + grammar=None, + json_object=None, + structural_tag=structural_tag, + ) + elif struct_out is None: + # No structured output requested, but still need + # reasoning channel tags + tool_choice + function tools + tag = reasoning_parser.prepare_structured_tag( + None, + self.tool_server, + tool_choice=request.tool_choice, + function_tools=function_tools_for_parser, ) + if tag is not None: + sampling_params.structured_outputs = StructuredOutputsParams( + structural_tag=tag # type: ignore[call-arg] + ) generator = self._generate_with_builtin_tools( request_id=request.request_id, engine_prompt=engine_prompt, @@ -705,11 +810,6 @@ def _make_request_with_harmony( request: ResponsesRequest, prev_response: ResponsesResponse | None, ): - if request.tool_choice != "auto": - raise NotImplementedError( - "Only 'auto' tool_choice is supported in response API with Harmony" - ) - messages = self._construct_input_messages_with_harmony(request, prev_response) prompt_token_ids = render_for_completion(messages) engine_prompt = token_inputs(prompt_token_ids) @@ -821,6 +921,7 @@ async def responses_full_generator( num_tool_output_tokens = 0 assert isinstance(context, (SimpleContext, HarmonyContext, ParsableContext)) + status = self._check_tool_choice_violation(request, output, status, context) num_prompt_tokens = context.num_prompt_tokens num_generated_tokens = context.num_output_tokens num_cached_tokens = context.num_cached_tokens @@ -1032,6 +1133,37 @@ def _make_response_output_items( ) ] + def _check_tool_choice_violation( + self, + request: ResponsesRequest, + output: list[ResponseOutputItem], + status: ResponseStatus, + context: ConversationContext, + ) -> ResponseStatus: + """Detect when tool_choice requires a function call but none was + produced. Returns ``"incomplete"`` if the constraint is violated, + otherwise returns *status* unchanged.""" + if request.tool_choice != "required" and not isinstance( + request.tool_choice, dict + ): + return status + has_function_call = any( + isinstance(item, ResponseFunctionToolCall) for item in output + ) + if not has_function_call: + logger.warning( + "tool_choice=%r but no function tool call in output " + "(output_items=%d, status=%s, finish_reason=%s, " + "output_tokens=%d). Grammar enforcement may have failed.", + request.tool_choice, + len(output), + status, + getattr(context, "finish_reason", None), + getattr(context, "num_output_tokens", -1), + ) + return "incomplete" + return status + def _make_response_output_items_with_harmony( self, context: HarmonyContext, @@ -1071,7 +1203,10 @@ def _extract_system_message_from_request( return system_msg def _construct_harmony_system_input_message( - self, request: ResponsesRequest, with_custom_tools: bool, tool_types: set[str] + self, + request: ResponsesRequest, + tools_visible: bool, + tool_types: set[str], ) -> OpenAIHarmonyMessage: model_identity = self._extract_system_message_from_request(request) @@ -1082,11 +1217,14 @@ def _construct_harmony_system_input_message( # Get filtered tool descriptions first. # If get_tool_description returns None (due to filtering), the tool is disabled. + # When tools_visible is False (e.g. tool_choice="none"), suppress all + # builtin tool descriptions so the model doesn't see them. browser_description = ( self.tool_server.get_tool_description( "browser", allowed_tools_map.get("web_search_preview") ) - if "web_search_preview" in tool_types + if tools_visible + and "web_search_preview" in tool_types and self.tool_server is not None and self.tool_server.has_tool("browser") else None @@ -1095,7 +1233,8 @@ def _construct_harmony_system_input_message( self.tool_server.get_tool_description( "python", allowed_tools_map.get("code_interpreter") ) - if "code_interpreter" in tool_types + if tools_visible + and "code_interpreter" in tool_types and self.tool_server is not None and self.tool_server.has_tool("python") else None @@ -1104,7 +1243,8 @@ def _construct_harmony_system_input_message( self.tool_server.get_tool_description( "container", allowed_tools_map.get("container") ) - if "container" in tool_types + if tools_visible + and "container" in tool_types and self.tool_server is not None and self.tool_server.has_tool("container") else None @@ -1117,7 +1257,7 @@ def _construct_harmony_system_input_message( python_description=python_description, container_description=container_description, instructions=request.instructions, - with_custom_tools=with_custom_tools, + with_custom_tools=tools_visible, ) return sys_msg @@ -1132,13 +1272,37 @@ def _construct_input_messages_with_harmony( tool_types = extract_tool_types(request.tools) with_custom_tools = has_custom_tools(tool_types) + # When tool_choice=none, suppress tool awareness in the + # prompt so the model doesn't attempt tool calls. The + # structural tag grammar already blocks tool channels, but + # omitting tools from the system/developer messages + # prevents the model from even reasoning about calling them. + tools_visible = with_custom_tools and request.tool_choice != "none" + sys_msg = self._construct_harmony_system_input_message( - request, with_custom_tools, tool_types + request, tools_visible, tool_types ) messages.append(sys_msg) - if with_custom_tools: + + # Determine if we need a developer message. + # Per Harmony cookbook: developer message holds instructions, + # function tools, AND response format schemas. + response_format_schema = _extract_response_format_schema(request) + needs_dev_msg = ( + tools_visible + or response_format_schema is not None + or request.instructions is not None + ) + + if needs_dev_msg: + dev_instructions = request.instructions + if response_format_schema is not None: + dev_instructions = inject_response_formats( + dev_instructions, response_format_schema + ) dev_msg = get_developer_message( - instructions=request.instructions, tools=request.tools + instructions=dev_instructions, + tools=request.tools if tools_visible else None, ) messages.append(dev_msg) messages += construct_harmony_previous_input_messages(request) @@ -1978,7 +2142,7 @@ def _increment_sequence_number_and_return( output=[], status="in_progress", usage=None, - ).model_dump() + ) yield _increment_sequence_number_and_return( ResponseCreatedEvent( type="response.created", diff --git a/vllm/entrypoints/serve/render/serving.py b/vllm/entrypoints/serve/render/serving.py index d1c5acad8c72..26f437e6485c 100644 --- a/vllm/entrypoints/serve/render/serving.py +++ b/vllm/entrypoints/serve/render/serving.py @@ -245,8 +245,10 @@ async def render_chat( tool_parser=tool_parser, ) else: - # For GPT-OSS. - should_include_tools = tool_dicts is not None + # For GPT-OSS: always suppress tools when tool_choice="none" + should_include_tools = ( + tool_dicts is not None and request.tool_choice != "none" + ) conversation, engine_prompts = self._make_request_with_harmony( request, should_include_tools ) diff --git a/vllm/reasoning/abs_reasoning_parsers.py b/vllm/reasoning/abs_reasoning_parsers.py index 5271a307075e..8edc45bbb6d9 100644 --- a/vllm/reasoning/abs_reasoning_parsers.py +++ b/vllm/reasoning/abs_reasoning_parsers.py @@ -154,10 +154,24 @@ def prepare_structured_tag( self, original_tag: str | None, tool_server: ToolServer | None, + final_content_format: dict | None = None, + tool_choice: str | dict | None = None, + function_tools: list[dict] | None = None, ) -> str | None: """ - Instance method that is implemented for preparing the structured tag - Otherwise, None is returned + Instance method that is implemented for preparing the structured tag. + Otherwise, None is returned. + + Args: + original_tag: An existing structural tag string, if any. + tool_server: The tool server for builtin tool support. + final_content_format: Optional xgrammar content format dict + (e.g. json_schema, regex) to embed in the <|channel|>final + tag for constraining the model's final output region. + tool_choice: The tool_choice setting from the request + ("auto", "required", "none", or a named tool dict). + function_tools: List of function tool dicts with "name" and + optional "parameters" keys. """ return None @@ -298,7 +312,8 @@ def _decorator(obj: type[ReasoningParser]) -> type[ReasoningParser]: if isinstance(name, str): names = [name] elif is_list_of(name, str): - names = name + assert name is not None + names = list(name) else: names = [class_name] diff --git a/vllm/reasoning/gptoss_reasoning_parser.py b/vllm/reasoning/gptoss_reasoning_parser.py index 89299d4b12b8..2d72930441bc 100644 --- a/vllm/reasoning/gptoss_reasoning_parser.py +++ b/vllm/reasoning/gptoss_reasoning_parser.py @@ -1,8 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import copy import json from collections.abc import Sequence -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from transformers import PreTrainedTokenizerBase @@ -52,8 +53,6 @@ def from_builtin_tool_to_tag(tool: str) -> list[dict]: def tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tool_list: list[str]) -> dict: - import copy - new_tag = copy.deepcopy(no_func_reasoning_tag) new_tag["format"]["triggers"].append("<|channel|>commentary to=") @@ -62,6 +61,41 @@ def tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tool_list: list[str]) return new_tag +def from_function_tool_to_tag(name: str, parameters: dict | None) -> list[dict]: + content = ( + {"type": "json_schema", "json_schema": parameters} + if parameters + else {"type": "any_text"} + ) + return [ + { + "begin": f"<|channel|>commentary to=functions.{name}<|message|>", + "content": content, + "end": "<|end|>", + }, + { + "begin": f"<|channel|>analysis to=functions.{name}<|message|>", + "content": content, + "end": "<|end|>", + }, + ] + + +def tag_with_function_tools(base_tag: dict, function_tools: list[dict]) -> dict: + new_tag = copy.deepcopy(base_tag) + + # Add commentary trigger for function tools if not already covered + # by the general commentary trigger (added by builtin tools). + if "<|channel|>commentary to=" not in new_tag["format"]["triggers"]: + new_tag["format"]["triggers"].append("<|channel|>commentary to=functions.") + + for tool in function_tools: + new_tag["format"]["tags"].extend( + from_function_tool_to_tag(tool["name"], tool.get("parameters")) + ) + return new_tag + + class GptOssReasoningParser(ReasoningParser): """ Reasoning parser for GptOss model. @@ -158,30 +192,88 @@ def extract_reasoning( # This function prepares the structural tag to format reasoning output def prepare_structured_tag( - self, original_tag: str | None, tool_server: ToolServer | None + self, + original_tag: str | None, + tool_server: ToolServer | None, + final_content_format: dict | None = None, + tool_choice: str | dict | None = None, + function_tools: list[dict] | None = None, ) -> str | None: - if original_tag is None: - if tool_server is None: - return json.dumps(no_func_reasoning_tag) - else: - builtin_tool_list: list[str] = [] - if tool_server.has_tool("browser"): - builtin_tool_list.append("browser") - if tool_server.has_tool("python"): - builtin_tool_list.append("python") - if tool_server.has_tool("container"): - builtin_tool_list.append("container") - - if len(builtin_tool_list) > 0: - logger.info("Builtin_tool_list: %s", builtin_tool_list) - func_tag = json.dumps( - tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tool_list) - ) - else: - logger.info("Builtin_tool_list is empty") - func_tag = json.dumps(no_func_reasoning_tag) - - return func_tag - else: + if original_tag is not None: # There is potential risk for appending the tag to the original tag return original_tag + + base_tag: dict[str, Any] = copy.deepcopy(no_func_reasoning_tag) + + # Add builtin tool tags unless tool_choice is "none" or a named + # function dict — named forcing should only allow the specific + # function, not builtin channels that could satisfy at_least_one. + is_named_function_choice = isinstance(tool_choice, dict) + if ( + tool_choice != "none" + and not is_named_function_choice + and tool_server is not None + ): + builtin_tool_list: list[str] = [] + if tool_server.has_tool("browser"): + builtin_tool_list.append("browser") + if tool_server.has_tool("python"): + builtin_tool_list.append("python") + if tool_server.has_tool("container"): + builtin_tool_list.append("container") + + if builtin_tool_list: + logger.info("Builtin_tool_list: %s", builtin_tool_list) + base_tag = tag_with_builtin_funcs(base_tag, builtin_tool_list) + else: + logger.info("Builtin_tool_list is empty") + + # Add function tool tags (unless tool_choice is "none") + effective_function_tools = None + if tool_choice != "none" and function_tools: + effective_function_tools = function_tools + # If named tool choice, filter to only the named tool + if isinstance(tool_choice, dict): + named = tool_choice.get("name") + effective_function_tools = [ + t for t in function_tools if t["name"] == named + ] + if effective_function_tools: + base_tag = tag_with_function_tools(base_tag, effective_function_tools) + + # Add final channel tag unless tool_choice blocks it + if tool_choice != "required" and not isinstance(tool_choice, dict): + has_function_tools = bool(effective_function_tools) + if has_function_tools or final_content_format: + final_content = ( + final_content_format + if final_content_format + else {"type": "any_text"} + ) + base_tag["format"]["tags"].append( + { + "begin": "<|channel|>final<|message|>", + "content": final_content, + "end": "<|end|>", + } + ) + base_tag["format"]["triggers"].append("<|channel|>final") + + # For tool_choice=required or named tool, force at least one triggered + # tag. This blocks <|channel|>final and EOS at the grammar level until + # the model has emitted at least one tool-call channel. + if tool_choice == "required" or isinstance(tool_choice, dict): + # Remove the pure analysis tag (no recipient) from the tag list so + # that triggered_tags_first only contains function-call tags. The + # analysis trigger is kept so analysis-to-functions tags remain + # reachable in triggered_tags_sub. This prevents the model from + # satisfying at_least_one with a pure reasoning channel instead of + # an actual tool call. + base_tag["format"]["tags"] = [ + t + for t in base_tag["format"]["tags"] + if t.get("begin") != "<|channel|>analysis<|message|>" + ] + base_tag["format"]["at_least_one"] = True + + return json.dumps(base_tag)