diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py index ae94c149017e..0cc064cd8f12 100644 --- a/tests/entrypoints/openai/test_chat.py +++ b/tests/entrypoints/openai/test_chat.py @@ -671,6 +671,25 @@ async def test_response_format_json_schema(client: openai.AsyncOpenAI): assert loaded == {"result": 2}, loaded +@pytest.mark.asyncio +async def test_response_format_text(client: openai.AsyncOpenAI): + for _ in range(2): + resp = await client.chat.completions.create( + model=MODEL_NAME, + messages=[ + { + "role": "user", + "content": "what is 1+1?", + } + ], + max_completion_tokens=10, + response_format={"type": "text"}, + ) + + content = resp.choices[0].message.content + assert content is not None + + @pytest.mark.asyncio async def test_extra_fields_allowed(client: openai.AsyncOpenAI): resp = await client.chat.completions.create( diff --git a/vllm/entrypoints/openai/chat_completion/protocol.py b/vllm/entrypoints/openai/chat_completion/protocol.py index 299069925bbd..a76dc73d9ba3 100644 --- a/vllm/entrypoints/openai/chat_completion/protocol.py +++ b/vllm/entrypoints/openai/chat_completion/protocol.py @@ -5,6 +5,7 @@ # https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py import json import time +from dataclasses import replace from typing import Annotated, Any, ClassVar, Literal import torch @@ -417,18 +418,15 @@ def to_sampling_params( response_format = self.response_format if response_format is not None: - # If structured outputs wasn't already enabled, - # we must enable it for these features to work - if self.structured_outputs is None: - self.structured_outputs = StructuredOutputsParams() + structured_outputs_kwargs = dict[str, Any]() # Set structured output params for response format if response_format.type == "json_object": - self.structured_outputs.json_object = True + structured_outputs_kwargs["json_object"] = True elif response_format.type == "json_schema": json_schema = response_format.json_schema assert json_schema is not None - self.structured_outputs.json = json_schema.json_schema + structured_outputs_kwargs["json"] = json_schema.json_schema elif response_format.type == "structural_tag": structural_tag = response_format assert structural_tag is not None and isinstance( @@ -439,7 +437,16 @@ def to_sampling_params( ), ) s_tag_obj = structural_tag.model_dump(by_alias=True) - self.structured_outputs.structural_tag = json.dumps(s_tag_obj) + structured_outputs_kwargs["structural_tag"] = json.dumps(s_tag_obj) + + # If structured outputs wasn't already enabled, + # we must enable it for these features to work + if len(structured_outputs_kwargs) > 0: + self.structured_outputs = ( + StructuredOutputsParams(**structured_outputs_kwargs) + if self.structured_outputs is None + else replace(self.structured_outputs, **structured_outputs_kwargs) + ) extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {} if self.kv_transfer_params: diff --git a/vllm/entrypoints/openai/completion/protocol.py b/vllm/entrypoints/openai/completion/protocol.py index df432aea15f0..fc773c402ede 100644 --- a/vllm/entrypoints/openai/completion/protocol.py +++ b/vllm/entrypoints/openai/completion/protocol.py @@ -5,6 +5,7 @@ # https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py import json import time +from dataclasses import replace from typing import Annotated, Any, Literal import torch @@ -247,18 +248,15 @@ def to_sampling_params( response_format = self.response_format if response_format is not None: - # If structured outputs wasn't already enabled, - # we must enable it for these features to work - if self.structured_outputs is None: - self.structured_outputs = StructuredOutputsParams() + structured_outputs_kwargs = dict[str, Any]() # Set structured output params for response format if response_format.type == "json_object": - self.structured_outputs.json_object = True + structured_outputs_kwargs["json_object"] = True elif response_format.type == "json_schema": json_schema = response_format.json_schema assert json_schema is not None - self.structured_outputs.json = json_schema.json_schema + structured_outputs_kwargs["json"] = json_schema.json_schema elif response_format.type == "structural_tag": structural_tag = response_format assert structural_tag is not None and isinstance( @@ -269,7 +267,16 @@ def to_sampling_params( ), ) s_tag_obj = structural_tag.model_dump(by_alias=True) - self.structured_outputs.structural_tag = json.dumps(s_tag_obj) + structured_outputs_kwargs["structural_tag"] = json.dumps(s_tag_obj) + + # If structured outputs wasn't already enabled, + # we must enable it for these features to work + if len(structured_outputs_kwargs) > 0: + self.structured_outputs = ( + StructuredOutputsParams(**structured_outputs_kwargs) + if self.structured_outputs is None + else replace(self.structured_outputs, **structured_outputs_kwargs) + ) extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {} if self.kv_transfer_params: diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py index 2e5c0baa92c2..d58f9963c1cc 100644 --- a/vllm/entrypoints/openai/responses/serving.py +++ b/vllm/entrypoints/openai/responses/serving.py @@ -9,7 +9,7 @@ from collections.abc import AsyncGenerator, AsyncIterator, Callable, Sequence from contextlib import AsyncExitStack from copy import copy -from dataclasses import dataclass +from dataclasses import dataclass, replace from http import HTTPStatus from typing import Final @@ -467,15 +467,18 @@ async def create_responses( if self.reasoning_parser is not None: reasoning_parser = self.reasoning_parser(tokenizer) - if sampling_params.structured_outputs is None: - sampling_params.structured_outputs = StructuredOutputsParams() - struct_out = sampling_params.structured_outputs - if struct_out.all_non_structural_tag_constraints_none(): - sampling_params.structured_outputs.structural_tag = ( - reasoning_parser.prepare_structured_tag( - sampling_params.structured_outputs.structural_tag, - self.tool_server, - ) + if ( + isinstance( + struct_out := sampling_params.structured_outputs, + StructuredOutputsParams, + ) + and struct_out.all_non_structural_tag_constraints_none() + ): + sampling_params.structured_outputs = replace( + struct_out, + structural_tag=reasoning_parser.prepare_structured_tag( + struct_out.structural_tag, self.tool_server + ), ) generator = self._generate_with_builtin_tools( request_id=request.request_id, diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 00542830a5d4..1d097852e194 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -67,6 +67,11 @@ def __post_init__(self): "You can only use one kind of structured outputs constraint " f"but multiple are specified: {self.__dict__}" ) + if count < 1: + raise ValueError( + "You must use one kind of structured outputs constraint " + f"but none are specified: {self.__dict__}" + ) def all_constraints_none(self) -> bool: """ diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index 5416027edf18..75cffd3297f6 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -65,10 +65,11 @@ def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionReques # Set structured output params for tool calling if json_schema_from_tool is not None: if isinstance(request, ChatCompletionRequest): - request.structured_outputs = StructuredOutputsParams() # tool_choice: "Forced Function" or "required" will override # structured output json settings to make tool calling work correctly - request.structured_outputs.json = json_schema_from_tool + request.structured_outputs = StructuredOutputsParams( + json=json_schema_from_tool + ) request.response_format = None if isinstance(request, ResponsesRequest): request.text = ResponseTextConfig()