Skip to content
Open
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -239,5 +239,5 @@ vllm/grpc/vllm_engine_pb2.py
vllm/grpc/vllm_engine_pb2_grpc.py
vllm/grpc/vllm_engine_pb2.pyi

# Ignore generated cpu headers
# Ignore generated cpu headers
csrc/cpu/cpu_attn_dispatch_generated.h
100 changes: 100 additions & 0 deletions tests/entrypoints/openai/parser/test_harmony_utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from unittest.mock import patch

import pytest
from openai_harmony import Message, Role

from tests.entrypoints.openai.utils import verify_harmony_messages
from vllm.entrypoints.openai.parser.harmony_utils import (
auto_drop_analysis_messages,
get_developer_message,
get_encoding,
get_system_message,
has_custom_tools,
inject_response_formats,
parse_chat_input_to_harmony_message,
parse_chat_output,
)
Expand Down Expand Up @@ -928,3 +932,99 @@ def test_reasoning_with_empty_content_returns_none(self):
msg = response_input_to_harmony(item, prev_responses=[])

assert msg is None


class TestInjectResponseFormats:
def test_appends_to_existing_instructions(self):
result = inject_response_formats("You are helpful.", {"type": "object"})
assert result.startswith("You are helpful.")
assert "# Response Formats" in result
assert '{"type":"object"}' in result

def test_none_instructions_creates_section(self):
result = inject_response_formats(None, {"type": "object"})
assert result.startswith("# Response Formats")
assert '{"type":"object"}' in result

def test_custom_format_name(self):
result = inject_response_formats(None, {"type": "object"}, format_name="order")
assert "## order" in result

def test_compact_json_no_spaces(self):
schema = {"type": "object", "properties": {"name": {"type": "string"}}}
result = inject_response_formats(None, schema)
assert '{"type":"object","properties":{"name":{"type":"string"}}}' in result

def test_section_separated_by_blank_lines(self):
result = inject_response_formats("Instructions here.", {"type": "object"})
assert "\n\n# Response Formats\n\n## structured_output\n\n" in result


class TestGetDeveloperMessageResponseFormats:
"""Tests for response_format_section parameter in get_developer_message."""

ENV_VAR = (
"vllm.entrypoints.openai.parser.harmony_utils"
".envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS"
)

def _extract_instructions_text(self, dev_msg: Message) -> str | None:
"""Extract the raw text from a developer message's instructions."""
for content_item in dev_msg.content:
instructions = getattr(content_item, "instructions", None)
if instructions is not None:
return instructions
return None

def test_response_format_preserved_with_system_instructions(self):
"""When VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS is True,
user instructions should be dropped but response format schema
should still appear in the developer message."""
schema_section = "# Response Formats\n\n## structured_output\n\n{}"
with patch(self.ENV_VAR, True):
dev_msg = get_developer_message(
instructions="Be concise.",
response_format_section=schema_section,
)
text = self._extract_instructions_text(dev_msg)
assert text is not None
assert "# Response Formats" in text
# User instructions should NOT be present
assert "Be concise." not in text

def test_response_format_and_instructions_without_system_instructions(self):
"""When VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS is False,
both instructions and response format schema should appear."""
schema_section = "# Response Formats\n\n## structured_output\n\n{}"
with patch(self.ENV_VAR, False):
dev_msg = get_developer_message(
instructions="Be concise.",
response_format_section=schema_section,
)
text = self._extract_instructions_text(dev_msg)
assert text is not None
assert "Be concise." in text
assert "# Response Formats" in text

def test_response_format_only_no_instructions(self):
"""With instructions=None, only the response format section appears."""
schema_section = "# Response Formats\n\n## structured_output\n\n{}"
with patch(self.ENV_VAR, False):
dev_msg = get_developer_message(
instructions=None,
response_format_section=schema_section,
)
text = self._extract_instructions_text(dev_msg)
assert text is not None
assert "# Response Formats" in text

def test_backward_compat_no_response_format(self):
"""Without response_format_section, behavior matches the original."""
with patch(self.ENV_VAR, False):
dev_msg = get_developer_message(
instructions="Be concise.",
)
text = self._extract_instructions_text(dev_msg)
assert text is not None
assert "Be concise." in text
assert "# Response Formats" not in text
90 changes: 90 additions & 0 deletions tests/entrypoints/openai/responses/test_response_formats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

"""Tests for response format schema extraction and developer message injection.

These tests verify that structured output schemas are correctly extracted from
ResponsesRequest and injected into the Harmony developer message per the
Harmony cookbook specification.
"""

from openai.types.responses.response_format_text_json_schema_config import (
ResponseFormatTextJSONSchemaConfig,
)

from vllm.entrypoints.openai.responses.protocol import (
ResponsesRequest,
ResponseTextConfig,
)
from vllm.entrypoints.openai.responses.serving import (
_extract_response_format_schema,
)
from vllm.sampling_params import StructuredOutputsParams


def _make_json_schema_text_config(schema: dict) -> ResponseTextConfig:
text_config = ResponseTextConfig()
text_config.format = ResponseFormatTextJSONSchemaConfig(
type="json_schema",
name="test_schema",
schema=schema,
)
return text_config


class TestExtractResponseFormatSchema:
def test_extracts_from_text_format_json_schema(self):
schema = {"type": "object", "properties": {"name": {"type": "string"}}}
request = ResponsesRequest(
model="test-model",
input="test",
text=_make_json_schema_text_config(schema),
)
result = _extract_response_format_schema(request)
assert result == schema

def test_extracts_from_structured_outputs_json(self):
schema = {"type": "object", "properties": {"id": {"type": "integer"}}}
request = ResponsesRequest(
model="test-model",
input="test",
structured_outputs=StructuredOutputsParams(json=schema),
)
result = _extract_response_format_schema(request)
assert result == schema

def test_returns_none_for_text_format(self):
request = ResponsesRequest(
model="test-model",
input="test",
text=ResponseTextConfig(format={"type": "text"}),
)
result = _extract_response_format_schema(request)
assert result is None

def test_returns_none_for_no_format(self):
request = ResponsesRequest(
model="test-model",
input="test",
)
result = _extract_response_format_schema(request)
assert result is None

def test_text_format_takes_precedence(self):
"""text.format.json_schema is checked before structured_outputs."""
text_schema = {
"type": "object",
"properties": {"a": {"type": "string"}},
}
so_schema = {
"type": "object",
"properties": {"b": {"type": "string"}},
}
request = ResponsesRequest(
model="test-model",
input="test",
text=_make_json_schema_text_config(text_schema),
structured_outputs=StructuredOutputsParams(json=so_schema),
)
result = _extract_response_format_schema(request)
assert result == text_schema
19 changes: 19 additions & 0 deletions tests/entrypoints/openai/responses/test_sampling_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,25 @@ def test_structured_outputs_passed_through(self):
assert sampling_params.structured_outputs is not None
assert sampling_params.structured_outputs.grammar == "root ::= 'hello'"

def test_json_object_format_produces_structured_outputs(self):
"""Test that text.format.type=json_object creates StructuredOutputsParams."""
from openai.types.shared.response_format_json_object import (
ResponseFormatJSONObject,
)

text_config = ResponseTextConfig()
text_config.format = ResponseFormatJSONObject(type="json_object")
request = ResponsesRequest(
model="test-model",
input="test input",
text=text_config,
)

sampling_params = request.to_sampling_params(default_max_tokens=1000)

assert sampling_params.structured_outputs is not None
assert sampling_params.structured_outputs.json_object is True

def test_structured_outputs_and_json_schema_conflict(self):
"""Test that specifying both structured_outputs and json_schema raises."""
structured_outputs = StructuredOutputsParams(grammar="root ::= 'hello'")
Expand Down
26 changes: 26 additions & 0 deletions tests/entrypoints/openai/responses/test_structured_output.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for structured output helpers in the Responses API."""

import json

import openai
import pytest
from pydantic import BaseModel

from vllm.entrypoints.openai.responses.serving import (
_constraint_to_content_format,
)
from vllm.sampling_params import StructuredOutputsParams


@pytest.mark.asyncio
async def test_structured_output(client: openai.AsyncOpenAI):
Expand Down Expand Up @@ -76,3 +83,22 @@ class CalendarEvent(BaseModel):
assert len(participants) == 2
assert participants[0] == "Alice"
assert participants[1] == "Bob"


class TestConstraintToContentFormat:
"""Test _constraint_to_content_format helper."""

def test_json_schema_string_is_parsed(self):
"""JSON schema passed as a string gets json.loads'd into a dict."""
schema = {"type": "object", "properties": {"age": {"type": "integer"}}}
params = StructuredOutputsParams(json=json.dumps(schema))
result = _constraint_to_content_format(params)

assert result == {"type": "json_schema", "json_schema": schema}

def test_structural_tag_only_returns_none(self):
"""structural_tag is not a content constraint — should return None."""
params = StructuredOutputsParams(structural_tag='{"type": "structural_tag"}')
result = _constraint_to_content_format(params)

assert result is None
Loading
Loading