vllm-project · will-deines · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 4, 2026
diff --git a/.gitignore b/.gitignore
@@ -239,5 +239,5 @@ vllm/grpc/vllm_engine_pb2.py
 vllm/grpc/vllm_engine_pb2_grpc.py
 vllm/grpc/vllm_engine_pb2.pyi
 
-# Ignore generated cpu headers 
+# Ignore generated cpu headers
 csrc/cpu/cpu_attn_dispatch_generated.h
@@ -1,15 +1,19 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from unittest.mock import patch
+
 import pytest
 from openai_harmony import Message, Role
 
 from tests.entrypoints.openai.utils import verify_harmony_messages
 from vllm.entrypoints.openai.parser.harmony_utils import (
     auto_drop_analysis_messages,
+    get_developer_message,
     get_encoding,
     get_system_message,
     has_custom_tools,
+    inject_response_formats,
     parse_chat_input_to_harmony_message,
     parse_chat_output,
 )
@@ -928,3 +932,99 @@ def test_reasoning_with_empty_content_returns_none(self):
         msg = response_input_to_harmony(item, prev_responses=[])
 
         assert msg is None
+
+
+class TestInjectResponseFormats:
+    def test_appends_to_existing_instructions(self):
+        result = inject_response_formats("You are helpful.", {"type": "object"})
+        assert result.startswith("You are helpful.")
+        assert "# Response Formats" in result
+        assert '{"type":"object"}' in result
+
+    def test_none_instructions_creates_section(self):
+        result = inject_response_formats(None, {"type": "object"})
+        assert result.startswith("# Response Formats")
+        assert '{"type":"object"}' in result
+
+    def test_custom_format_name(self):
+        result = inject_response_formats(None, {"type": "object"}, format_name="order")
+        assert "## order" in result
+
+    def test_compact_json_no_spaces(self):
+        schema = {"type": "object", "properties": {"name": {"type": "string"}}}
+        result = inject_response_formats(None, schema)
+        assert '{"type":"object","properties":{"name":{"type":"string"}}}' in result
+
+    def test_section_separated_by_blank_lines(self):
+        result = inject_response_formats("Instructions here.", {"type": "object"})
+        assert "\n\n# Response Formats\n\n## structured_output\n\n" in result
+
+
+class TestGetDeveloperMessageResponseFormats:
+    """Tests for response_format_section parameter in get_developer_message."""
+
+    ENV_VAR = (
+        "vllm.entrypoints.openai.parser.harmony_utils"
+        ".envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS"
+    )
+
+    def _extract_instructions_text(self, dev_msg: Message) -> str | None:
+        """Extract the raw text from a developer message's instructions."""
+        for content_item in dev_msg.content:
+            instructions = getattr(content_item, "instructions", None)
+            if instructions is not None:
+                return instructions
+        return None
+
+    def test_response_format_preserved_with_system_instructions(self):
+        """When VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS is True,
+        user instructions should be dropped but response format schema
+        should still appear in the developer message."""
+        schema_section = "# Response Formats\n\n## structured_output\n\n{}"
+        with patch(self.ENV_VAR, True):
+            dev_msg = get_developer_message(
+                instructions="Be concise.",
+                response_format_section=schema_section,
+            )
+        text = self._extract_instructions_text(dev_msg)
+        assert text is not None
+        assert "# Response Formats" in text
+        # User instructions should NOT be present
+        assert "Be concise." not in text
+
+    def test_response_format_and_instructions_without_system_instructions(self):
+        """When VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS is False,
+        both instructions and response format schema should appear."""
+        schema_section = "# Response Formats\n\n## structured_output\n\n{}"
+        with patch(self.ENV_VAR, False):
+            dev_msg = get_developer_message(
+                instructions="Be concise.",
+                response_format_section=schema_section,
+            )
+        text = self._extract_instructions_text(dev_msg)
+        assert text is not None
+        assert "Be concise." in text
+        assert "# Response Formats" in text
+
+    def test_response_format_only_no_instructions(self):
+        """With instructions=None, only the response format section appears."""
+        schema_section = "# Response Formats\n\n## structured_output\n\n{}"
+        with patch(self.ENV_VAR, False):
+            dev_msg = get_developer_message(
+                instructions=None,
+                response_format_section=schema_section,
+            )
+        text = self._extract_instructions_text(dev_msg)
+        assert text is not None
+        assert "# Response Formats" in text
+
+    def test_backward_compat_no_response_format(self):
+        """Without response_format_section, behavior matches the original."""
+        with patch(self.ENV_VAR, False):
+            dev_msg = get_developer_message(
+                instructions="Be concise.",
+            )
+        text = self._extract_instructions_text(dev_msg)
+        assert text is not None
+        assert "Be concise." in text
+        assert "# Response Formats" not in text
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Tests for response format schema extraction and developer message injection.
+
+These tests verify that structured output schemas are correctly extracted from
+ResponsesRequest and injected into the Harmony developer message per the
+Harmony cookbook specification.
+"""
+
+from openai.types.responses.response_format_text_json_schema_config import (
+    ResponseFormatTextJSONSchemaConfig,
+)
+
+from vllm.entrypoints.openai.responses.protocol import (
+    ResponsesRequest,
+    ResponseTextConfig,
+)
+from vllm.entrypoints.openai.responses.serving import (
+    _extract_response_format_schema,
+)
+from vllm.sampling_params import StructuredOutputsParams
+
+
+def _make_json_schema_text_config(schema: dict) -> ResponseTextConfig:
+    text_config = ResponseTextConfig()
+    text_config.format = ResponseFormatTextJSONSchemaConfig(
+        type="json_schema",
+        name="test_schema",
+        schema=schema,
+    )
+    return text_config
+
+
+class TestExtractResponseFormatSchema:
+    def test_extracts_from_text_format_json_schema(self):
+        schema = {"type": "object", "properties": {"name": {"type": "string"}}}
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            text=_make_json_schema_text_config(schema),
+        )
+        result = _extract_response_format_schema(request)
+        assert result == schema
+
+    def test_extracts_from_structured_outputs_json(self):
+        schema = {"type": "object", "properties": {"id": {"type": "integer"}}}
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            structured_outputs=StructuredOutputsParams(json=schema),
+        )
+        result = _extract_response_format_schema(request)
+        assert result == schema
+
+    def test_returns_none_for_text_format(self):
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            text=ResponseTextConfig(format={"type": "text"}),
+        )
+        result = _extract_response_format_schema(request)
+        assert result is None
+
+    def test_returns_none_for_no_format(self):
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+        )
+        result = _extract_response_format_schema(request)
+        assert result is None
+
+    def test_text_format_takes_precedence(self):
+        """text.format.json_schema is checked before structured_outputs."""
+        text_schema = {
+            "type": "object",
+            "properties": {"a": {"type": "string"}},
+        }
+        so_schema = {
+            "type": "object",
+            "properties": {"b": {"type": "string"}},
+        }
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            text=_make_json_schema_text_config(text_schema),
+            structured_outputs=StructuredOutputsParams(json=so_schema),
+        )
+        result = _extract_response_format_schema(request)
+        assert result == text_schema
@@ -132,6 +132,25 @@ def test_structured_outputs_passed_through(self):
         assert sampling_params.structured_outputs is not None
         assert sampling_params.structured_outputs.grammar == "root ::= 'hello'"
 
+    def test_json_object_format_produces_structured_outputs(self):
+        """Test that text.format.type=json_object creates StructuredOutputsParams."""
+        from openai.types.shared.response_format_json_object import (
+            ResponseFormatJSONObject,
+        )
+
+        text_config = ResponseTextConfig()
+        text_config.format = ResponseFormatJSONObject(type="json_object")
+        request = ResponsesRequest(
+            model="test-model",
+            input="test input",
+            text=text_config,
+        )
+
+        sampling_params = request.to_sampling_params(default_max_tokens=1000)
+
+        assert sampling_params.structured_outputs is not None
+        assert sampling_params.structured_outputs.json_object is True
+
     def test_structured_outputs_and_json_schema_conflict(self):
         """Test that specifying both structured_outputs and json_schema raises."""
         structured_outputs = StructuredOutputsParams(grammar="root ::= 'hello'")

@@ -1,11 +1,18 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for structured output helpers in the Responses API."""
+
 import json
 
 import openai
 import pytest
 from pydantic import BaseModel
 
+from vllm.entrypoints.openai.responses.serving import (
+    _constraint_to_content_format,
+)
+from vllm.sampling_params import StructuredOutputsParams
+
 
 @pytest.mark.asyncio
 async def test_structured_output(client: openai.AsyncOpenAI):
@@ -76,3 +83,22 @@ class CalendarEvent(BaseModel):
     assert len(participants) == 2
     assert participants[0] == "Alice"
     assert participants[1] == "Bob"
+
+
+class TestConstraintToContentFormat:
+    """Test _constraint_to_content_format helper."""
+
+    def test_json_schema_string_is_parsed(self):
+        """JSON schema passed as a string gets json.loads'd into a dict."""
+        schema = {"type": "object", "properties": {"age": {"type": "integer"}}}
+        params = StructuredOutputsParams(json=json.dumps(schema))
+        result = _constraint_to_content_format(params)
+
+        assert result == {"type": "json_schema", "json_schema": schema}
+
+    def test_structural_tag_only_returns_none(self):
+        """structural_tag is not a content constraint — should return None."""
+        params = StructuredOutputsParams(structural_tag='{"type": "structural_tag"}')
+        result = _constraint_to_content_format(params)
+
+        assert result is None