Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions tests/entrypoints/openai/test_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,25 @@ async def test_response_format_json_schema(client: openai.AsyncOpenAI):
assert loaded == {"result": 2}, loaded


@pytest.mark.asyncio
async def test_response_format_text(client: openai.AsyncOpenAI):
for _ in range(2):
resp = await client.chat.completions.create(
model=MODEL_NAME,
messages=[
{
"role": "user",
"content": "what is 1+1?",
}
],
max_completion_tokens=10,
response_format={"type": "text"},
)

content = resp.choices[0].message.content
assert content is not None


@pytest.mark.asyncio
async def test_extra_fields_allowed(client: openai.AsyncOpenAI):
resp = await client.chat.completions.create(
Expand Down
21 changes: 14 additions & 7 deletions vllm/entrypoints/openai/chat_completion/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py
import json
import time
from dataclasses import replace
from typing import Annotated, Any, ClassVar, Literal

import torch
Expand Down Expand Up @@ -417,18 +418,15 @@ def to_sampling_params(

response_format = self.response_format
if response_format is not None:
# If structured outputs wasn't already enabled,
# we must enable it for these features to work
if self.structured_outputs is None:
self.structured_outputs = StructuredOutputsParams()
structured_outputs_kwargs = dict[str, Any]()

# Set structured output params for response format
if response_format.type == "json_object":
self.structured_outputs.json_object = True
structured_outputs_kwargs["json_object"] = True
elif response_format.type == "json_schema":
json_schema = response_format.json_schema
assert json_schema is not None
self.structured_outputs.json = json_schema.json_schema
structured_outputs_kwargs["json"] = json_schema.json_schema
elif response_format.type == "structural_tag":
structural_tag = response_format
assert structural_tag is not None and isinstance(
Expand All @@ -439,7 +437,16 @@ def to_sampling_params(
),
)
s_tag_obj = structural_tag.model_dump(by_alias=True)
self.structured_outputs.structural_tag = json.dumps(s_tag_obj)
structured_outputs_kwargs["structural_tag"] = json.dumps(s_tag_obj)

# If structured outputs wasn't already enabled,
# we must enable it for these features to work
if len(structured_outputs_kwargs) > 0:
self.structured_outputs = (
StructuredOutputsParams(**structured_outputs_kwargs)
if self.structured_outputs is None
else replace(self.structured_outputs, **structured_outputs_kwargs)
)

extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {}
if self.kv_transfer_params:
Expand Down
21 changes: 14 additions & 7 deletions vllm/entrypoints/openai/completion/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py
import json
import time
from dataclasses import replace
from typing import Annotated, Any, Literal

import torch
Expand Down Expand Up @@ -247,18 +248,15 @@ def to_sampling_params(

response_format = self.response_format
if response_format is not None:
# If structured outputs wasn't already enabled,
# we must enable it for these features to work
if self.structured_outputs is None:
self.structured_outputs = StructuredOutputsParams()
structured_outputs_kwargs = dict[str, Any]()

# Set structured output params for response format
if response_format.type == "json_object":
self.structured_outputs.json_object = True
structured_outputs_kwargs["json_object"] = True
elif response_format.type == "json_schema":
json_schema = response_format.json_schema
assert json_schema is not None
self.structured_outputs.json = json_schema.json_schema
structured_outputs_kwargs["json"] = json_schema.json_schema
elif response_format.type == "structural_tag":
structural_tag = response_format
assert structural_tag is not None and isinstance(
Expand All @@ -269,7 +267,16 @@ def to_sampling_params(
),
)
s_tag_obj = structural_tag.model_dump(by_alias=True)
self.structured_outputs.structural_tag = json.dumps(s_tag_obj)
structured_outputs_kwargs["structural_tag"] = json.dumps(s_tag_obj)

# If structured outputs wasn't already enabled,
# we must enable it for these features to work
if len(structured_outputs_kwargs) > 0:
self.structured_outputs = (
StructuredOutputsParams(**structured_outputs_kwargs)
if self.structured_outputs is None
else replace(self.structured_outputs, **structured_outputs_kwargs)
)

extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {}
if self.kv_transfer_params:
Expand Down
23 changes: 13 additions & 10 deletions vllm/entrypoints/openai/responses/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from collections.abc import AsyncGenerator, AsyncIterator, Callable, Sequence
from contextlib import AsyncExitStack
from copy import copy
from dataclasses import dataclass
from dataclasses import dataclass, replace
from http import HTTPStatus
from typing import Final

Expand Down Expand Up @@ -467,15 +467,18 @@ async def create_responses(

if self.reasoning_parser is not None:
reasoning_parser = self.reasoning_parser(tokenizer)
if sampling_params.structured_outputs is None:
sampling_params.structured_outputs = StructuredOutputsParams()
struct_out = sampling_params.structured_outputs
if struct_out.all_non_structural_tag_constraints_none():
sampling_params.structured_outputs.structural_tag = (
reasoning_parser.prepare_structured_tag(
sampling_params.structured_outputs.structural_tag,
self.tool_server,
)
if (
isinstance(
struct_out := sampling_params.structured_outputs,
StructuredOutputsParams,
)
and struct_out.all_non_structural_tag_constraints_none()
):
sampling_params.structured_outputs = replace(
struct_out,
structural_tag=reasoning_parser.prepare_structured_tag(
struct_out.structural_tag, self.tool_server
),
)
generator = self._generate_with_builtin_tools(
request_id=request.request_id,
Expand Down
5 changes: 5 additions & 0 deletions vllm/sampling_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def __post_init__(self):
"You can only use one kind of structured outputs constraint "
f"but multiple are specified: {self.__dict__}"
)
if count < 1:
raise ValueError(
"You must use one kind of structured outputs constraint "
f"but none are specified: {self.__dict__}"
)
Comment on lines +70 to +74
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

This new validation is a great improvement for ensuring StructuredOutputsParams is always in a valid state. However, it introduces a potential regression for deprecated parameters.

Specifically, the logic for handling deprecated guided_* parameters in vllm/entrypoints/openai/protocol.py (lines 794-806) can now raise this ValueError. If a user provides only guided_whitespace_pattern (which maps to whitespace_pattern), the code will attempt to create StructuredOutputsParams with only a non-constraint parameter. This will cause count to be 0 here, triggering this error.

While the problematic code is not in this diff, this change makes it faulty. To prevent this regression, the logic for handling deprecated parameters should be updated to only construct StructuredOutputsParams if at least one constraint parameter (e.g., guided_json, guided_regex) is provided.

For example, in ChatCompletionRequest.to_sampling_params in vllm/entrypoints/openai/protocol.py, the logic could be adjusted:

# ... inside to_sampling_params, after collecting kwargs from deprecated params
            kwargs = {k: v for k, v in kwargs.items() if v is not None}
            constraint_keys = {'json', 'regex', 'choice', 'grammar', 'structural_tag'}
            if any(k in constraint_keys for k in kwargs):
                self.structured_outputs = StructuredOutputsParams(**kwargs)

This would ensure backward compatibility for the deprecated parameters while upholding the new, stricter validation.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK guided_whitespace_pattern must be given with one of the structural constraints, otherwise the same no structured parameter error is raised (so it is not a newly introduced regression but pre-existing bug).

Though I'd welcome a better way to validate the structured outputs params.


def all_constraints_none(self) -> bool:
"""
Expand Down
5 changes: 3 additions & 2 deletions vllm/tool_parsers/abstract_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,11 @@ def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionReques
# Set structured output params for tool calling
if json_schema_from_tool is not None:
if isinstance(request, ChatCompletionRequest):
request.structured_outputs = StructuredOutputsParams()
# tool_choice: "Forced Function" or "required" will override
# structured output json settings to make tool calling work correctly
request.structured_outputs.json = json_schema_from_tool
request.structured_outputs = StructuredOutputsParams(
json=json_schema_from_tool
)
request.response_format = None
if isinstance(request, ResponsesRequest):
request.text = ResponseTextConfig()
Expand Down