vllm-project · will-deines · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 18, 2026
@@ -10,6 +10,7 @@
     get_encoding,
     get_system_message,
     has_custom_tools,
+    inject_response_formats,
     parse_chat_input_to_harmony_message,
     parse_chat_output,
 )
@@ -928,3 +929,32 @@ def test_reasoning_with_empty_content_returns_none(self):
         msg = response_input_to_harmony(item, prev_responses=[])
 
         assert msg is None
+
+
+class TestInjectResponseFormats:
+    def test_appends_to_existing_instructions(self):
+        result = inject_response_formats("You are helpful.", {"type": "object"})
+        assert result.startswith("You are helpful.")
+        assert "# Response Formats" in result
+        assert '{"type":"object"}' in result
+
+    def test_none_instructions_creates_section(self):
+        result = inject_response_formats(None, {"type": "object"})
+        assert result.startswith("# Response Formats")
+        assert '{"type":"object"}' in result
+
+    def test_custom_format_name(self):
+        result = inject_response_formats(None, {"type": "object"}, format_name="order")
+        assert "## order" in result
+
+    def test_compact_json_no_spaces(self):
+        schema = {
+            "type": "object",
+            "properties": {"name": {"type": "string"}},
+        }
+        result = inject_response_formats(None, schema)
+        assert '{"type":"object","properties":{"name":{"type":"string"}}}' in result
+
+    def test_section_separated_by_blank_lines(self):
+        result = inject_response_formats("Instructions here.", {"type": "object"})
+        assert "\n\n# Response Formats\n\n## structured_output\n\n" in result
@@ -13,7 +13,7 @@
 import pytest
 import pytest_asyncio
 import requests
-from openai import InternalServerError, NotFoundError, OpenAI
+from openai import NotFoundError, OpenAI
 from openai_harmony import Message
 
 from tests.utils import RemoteOpenAIServer
@@ -697,15 +697,22 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str):
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_function_calling_required(client: OpenAI, model_name: str):
+    """tool_choice='required' must force at least one function call."""
     tools = [GET_WEATHER_SCHEMA]
 
-    with pytest.raises(InternalServerError):
-        await client.responses.create(
-            model=model_name,
-            input="What's the weather like in Paris today?",
-            tools=tools,
-            tool_choice="required",
-        )
+    response = await retry_for_tool_call(
+        client,
+        model=model_name,
+        expected_tool_type="function_call",
+        input="What's the weather like in Paris today?",
+        tools=tools,
+        tool_choice="required",
+    )
+    tool_calls = [item for item in response.output if item.type == "function_call"]
+    assert tool_calls, (
+        f"tool_choice='required' should force a function call, "
+        f"got: {[item.type for item in response.output]}"
+    )
 
 
 @pytest.mark.asyncio

@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Tests for response format schema extraction and developer message injection.
+
+These tests verify that structured output schemas are correctly extracted from
+ResponsesRequest and injected into the Harmony developer message per the
+Harmony cookbook specification.
+"""
+
+from openai.types.responses.response_format_text_json_schema_config import (
+    ResponseFormatTextJSONSchemaConfig,
+)
+
+from vllm.entrypoints.openai.responses.protocol import (
+    ResponsesRequest,
+    ResponseTextConfig,
+)
+from vllm.entrypoints.openai.responses.serving import (
+    _extract_response_format_schema,
+)
+from vllm.sampling_params import StructuredOutputsParams
+
+
+def _make_json_schema_text_config(schema: dict) -> ResponseTextConfig:
+    text_config = ResponseTextConfig()
+    text_config.format = ResponseFormatTextJSONSchemaConfig(
+        type="json_schema",
+        name="test_schema",
+        schema=schema,
+    )
+    return text_config
+
+
+class TestExtractResponseFormatSchema:
+    def test_extracts_from_text_format_json_schema(self):
+        schema = {
+            "type": "object",
+            "properties": {"name": {"type": "string"}},
+        }
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            text=_make_json_schema_text_config(schema),
+        )
+        result = _extract_response_format_schema(request)
+        assert result == schema
+
+    def test_extracts_from_structured_outputs_json(self):
+        schema = {
+            "type": "object",
+            "properties": {"id": {"type": "integer"}},
+        }
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            structured_outputs=StructuredOutputsParams(json=schema),
+        )
+        result = _extract_response_format_schema(request)
+        assert result == schema
+
+    def test_returns_none_for_text_format(self):
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            text=ResponseTextConfig(format={"type": "text"}),
+        )
+        result = _extract_response_format_schema(request)
+        assert result is None
+
+    def test_returns_none_for_no_format(self):
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+        )
+        result = _extract_response_format_schema(request)
+        assert result is None
+
+    def test_text_format_takes_precedence(self):
+        """text.format.json_schema is checked before structured_outputs."""
+        text_schema = {
+            "type": "object",
+            "properties": {"a": {"type": "string"}},
+        }
+        so_schema = {
+            "type": "object",
+            "properties": {"b": {"type": "string"}},
+        }
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            text=_make_json_schema_text_config(text_schema),
+            structured_outputs=StructuredOutputsParams(json=so_schema),
+        )
+        result = _extract_response_format_schema(request)
+        assert result == text_schema
@@ -132,6 +132,25 @@ def test_structured_outputs_passed_through(self):
         assert sampling_params.structured_outputs is not None
         assert sampling_params.structured_outputs.grammar == "root ::= 'hello'"
 
+    def test_json_object_format_produces_structured_outputs(self):
+        """Test that text.format.type=json_object creates StructuredOutputsParams."""
+        from openai.types.shared.response_format_json_object import (
+            ResponseFormatJSONObject,
+        )
+
+        text_config = ResponseTextConfig()
+        text_config.format = ResponseFormatJSONObject(type="json_object")
+        request = ResponsesRequest(
+            model="test-model",
+            input="test input",
+            text=text_config,
+        )
+
+        sampling_params = request.to_sampling_params(default_max_tokens=1000)
+
+        assert sampling_params.structured_outputs is not None
+        assert sampling_params.structured_outputs.json_object is True
+
     def test_structured_outputs_and_json_schema_conflict(self):
         """Test that specifying both structured_outputs and json_schema raises."""
         structured_outputs = StructuredOutputsParams(grammar="root ::= 'hello'")

@@ -6,6 +6,11 @@
 import pytest
 from pydantic import BaseModel
 
+from vllm.entrypoints.openai.responses.serving import (
+    _constraint_to_content_format,
+)
+from vllm.sampling_params import StructuredOutputsParams
+
 
 @pytest.mark.asyncio
 async def test_structured_output(client: openai.AsyncOpenAI):
@@ -76,3 +81,67 @@ class CalendarEvent(BaseModel):
     assert len(participants) == 2
     assert participants[0] == "Alice"
     assert participants[1] == "Bob"
+
+
+class TestConstraintToContentFormat:
+    """Test _constraint_to_content_format helper."""
+
+    def test_json_schema_string_is_parsed(self):
+        """JSON schema passed as a string gets json.loads'd into a dict."""
+        schema = {"type": "object", "properties": {"age": {"type": "integer"}}}
+        params = StructuredOutputsParams(json=json.dumps(schema))
+        result = _constraint_to_content_format(params)
+
+        assert result == {"type": "json_schema", "json_schema": schema}
+
+    def test_json_schema_dict(self):
+        """JSON schema passed as a dict is used directly."""
+        schema = {"type": "object", "properties": {"age": {"type": "integer"}}}
+        params = StructuredOutputsParams(json=schema)
+        result = _constraint_to_content_format(params)
+
+        assert result == {"type": "json_schema", "json_schema": schema}
+
+    def test_json_object(self):
+        """json_object maps to minimal JSON schema."""
+        params = StructuredOutputsParams(json_object=True)
+        result = _constraint_to_content_format(params)
+
+        assert result == {
+            "type": "json_schema",
+            "json_schema": {"type": "object"},
+        }
+
+    def test_regex(self):
+        """Regex constraint is converted correctly."""
+        params = StructuredOutputsParams(regex=r"\d+")
+        result = _constraint_to_content_format(params)
+
+        assert result == {"type": "regex", "pattern": r"\d+"}
+
+    def test_grammar(self):
+        """Grammar constraint is converted correctly."""
+        params = StructuredOutputsParams(grammar="root ::= 'hello'")
+        result = _constraint_to_content_format(params)
+
+        assert result == {"type": "grammar", "grammar": "root ::= 'hello'"}
+
+    def test_choice(self):
+        """Choice constraint is converted correctly."""
+        params = StructuredOutputsParams(choice=["yes", "no"])
+        result = _constraint_to_content_format(params)
+
+        assert result == {
+            "type": "or",
+            "elements": [
+                {"type": "const_string", "value": "yes"},
+                {"type": "const_string", "value": "no"},
+            ],
+        }
+
+    def test_structural_tag_only_returns_none(self):
+        """structural_tag is not a content constraint -- should return None."""
+        params = StructuredOutputsParams(structural_tag='{"type": "structural_tag"}')
+        result = _constraint_to_content_format(params)
+
+        assert result is None
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for tool_choice handling in the Harmony-based Responses API.
+
+These tests verify that:
+- Developer instructions are preserved when tool_choice="none" (Bug 1)
+- Builtin tool descriptions are suppressed when tool_choice="none" (Bug 2)
+"""
+
+from __future__ import annotations
+
+from unittest.mock import Mock
+
+from openai_harmony import Role, ToolNamespaceConfig
+
+from vllm.entrypoints.openai.parser.harmony_utils import (
+    get_developer_message,
+    get_system_message,
+)
+
+
+class TestToolChoiceNoneInstructions:
+    """Bug 1: Developer instructions must not be dropped when
+    tool_choice='none' causes tools to be hidden."""
+
+    def test_developer_message_with_instructions_no_tools(self):
+        """get_developer_message must include instructions even when
+        tools=None (the condition that arises from tool_choice='none'
+        with no custom tools)."""
+        dev_msg = get_developer_message(
+            instructions="Be helpful and concise", tools=None
+        )
+        assert dev_msg.author.role == Role.DEVELOPER
+        rendered = str(dev_msg)
+        assert "Be helpful and concise" in rendered
+
+    def test_developer_message_with_instructions_and_tools(self):
+        """Baseline: instructions + tools both appear in the developer
+        message when tools are visible."""
+        tool = Mock()
+        tool.type = "function"
+        tool.name = "get_weather"
+        tool.description = "Get weather"
+        tool.parameters = {"type": "object", "properties": {}}
+
+        dev_msg = get_developer_message(instructions="Be helpful", tools=[tool])
+        rendered = str(dev_msg)
+        assert "Be helpful" in rendered
+        assert "get_weather" in rendered
+
+    def test_developer_message_no_instructions_no_tools(self):
+        """When neither instructions nor tools are provided, the
+        developer message is still valid (just empty content)."""
+        dev_msg = get_developer_message(instructions=None, tools=None)
+        assert dev_msg.author.role == Role.DEVELOPER
+
+
+class TestToolChoiceNoneSystemMessage:
+    """Bug 2: Builtin tool descriptions in the system message must be
+    suppressed when tool_choice='none'."""
+
+    def test_system_message_no_tool_descriptions(self):
+        """When all tool descriptions are None (as happens when
+        tools_visible=False), the system message must not contain
+        tool descriptions."""
+        sys_msg = get_system_message(
+            browser_description=None,
+            python_description=None,
+            container_description=None,
+            with_custom_tools=False,
+        )
+        assert sys_msg.author.role == Role.SYSTEM
+        # tools should be None or empty when no descriptions are provided
+        assert not sys_msg.content[0].tools
+
+    def test_system_message_with_browser_description(self):
+        """Baseline: when a ToolNamespaceConfig is provided, it appears
+        in the system message tools."""
+        browser_ns = ToolNamespaceConfig.browser()
+        sys_msg = get_system_message(
+            browser_description=browser_ns,
+            python_description=None,
+            container_description=None,
+            with_custom_tools=False,
+        )
+        assert sys_msg.author.role == Role.SYSTEM
+        assert "browser" in sys_msg.content[0].tools
+
+    def test_system_message_with_python_description(self):
+        """Python tool description appears in system message when provided."""
+        python_ns = ToolNamespaceConfig.python()
+        sys_msg = get_system_message(
+            browser_description=None,
+            python_description=python_ns,
+            container_description=None,
+            with_custom_tools=False,
+        )
+        assert sys_msg.author.role == Role.SYSTEM
+        assert "python" in sys_msg.content[0].tools
+
+    def test_none_descriptions_mean_no_tools(self):
+        """Passing None for all tool descriptions (as happens when
+        tools_visible=False) must result in no tools in the system msg."""
+        sys_msg = get_system_message(
+            browser_description=None,
+            python_description=None,
+            container_description=None,
+            with_custom_tools=False,
+        )
+        assert not sys_msg.content[0].tools