From a38e60cdc1f41850dffa46bcc91445fbc616c0d3 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 15 Oct 2025 03:35:57 +0000 Subject: [PATCH 01/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- .../openai_responses_client_with_tools.py | 79 ++++++++ .../entrypoints/openai/responses/conftest.py | 7 +- .../openai/responses/test_function_call.py | 185 ++++++++++++++++++ vllm/entrypoints/openai/protocol.py | 120 ++++++++++-- vllm/entrypoints/openai/serving_engine.py | 14 +- vllm/entrypoints/openai/serving_responses.py | 154 +++++++++++++-- 6 files changed, 522 insertions(+), 37 deletions(-) create mode 100644 examples/online_serving/openai_responses_client_with_tools.py create mode 100644 tests/v1/entrypoints/openai/responses/test_function_call.py diff --git a/examples/online_serving/openai_responses_client_with_tools.py b/examples/online_serving/openai_responses_client_with_tools.py new file mode 100644 index 000000000000..fb0cb90d9a8c --- /dev/null +++ b/examples/online_serving/openai_responses_client_with_tools.py @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +Set up this example by starting a vLLM OpenAI-compatible server with tool call +options enabled. +Reasoning models can be used through the Responses API as seen here +https://platform.openai.com/docs/api-reference/responses +For example: +vllm serve Qwen/Qwen3-1.7B --reasoning-parser qwen3 \ + --guided-decoding-backend xgrammar \ + --enable-auto-tool-choice --tool-call-parser hermes +""" + +import json + +from openai import OpenAI + + +def get_weather(latitude: float, longitude: float) -> str: + """ + Mock function to simulate getting weather data. + In a real application, this would call an external weather API. + """ + return f"Current temperature at ({latitude}, {longitude}) is 20°C." + + +tools = [ + { + "type": "function", + "name": "get_weather", + "description": "Get current temperature for provided coordinates in celsius.", + "parameters": { + "type": "object", + "properties": { + "latitude": {"type": "number"}, + "longitude": {"type": "number"}, + }, + "required": ["latitude", "longitude"], + "additionalProperties": False, + }, + "strict": True, + } +] + +input_messages = [ + {"role": "user", "content": "What's the weather like in Paris today?"} +] + + +def main(): + base_url = "http://0.0.0.0:8000/v1" + model = "Qwen/Qwen3-1.7B" + client = OpenAI(base_url=base_url, api_key="empty") + response = client.responses.create( + model=model, input=input_messages, tools=tools, tool_choice="required" + ) + tool_call = response.output[0] + args = json.loads(tool_call.arguments) + + result = get_weather(args["latitude"], args["longitude"]) + + input_messages.append(tool_call) # append model's function call message + input_messages.append( + { # append result message + "type": "function_call_output", + "call_id": tool_call.call_id, + "output": str(result), + } + ) + response_2 = client.responses.create( + model=model, + input=input_messages, + tools=tools, + ) + print(response_2.output_text) + + +if __name__ == "__main__": + main() diff --git a/tests/v1/entrypoints/openai/responses/conftest.py b/tests/v1/entrypoints/openai/responses/conftest.py index 032ed42f43d1..22730dbae067 100644 --- a/tests/v1/entrypoints/openai/responses/conftest.py +++ b/tests/v1/entrypoints/openai/responses/conftest.py @@ -15,8 +15,13 @@ def default_server_args(): "--max-model-len", "8192", "--enforce-eager", # For faster startup. + "--enable-auto-tool-choice", + "--guided-decoding-backend", + "xgrammar", + "--tool-call-parser", + "hermes", "--reasoning-parser", - "deepseek_r1", + "qwen3", ] diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py new file mode 100644 index 000000000000..4e4e847d1663 --- /dev/null +++ b/tests/v1/entrypoints/openai/responses/test_function_call.py @@ -0,0 +1,185 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import json + +import openai # use the official client for correctness check +import pytest + +MODEL_NAME = "Qwen/Qwen3-0.6B" +tools = [ + { + "type": "function", + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The city to find the weather for, e.g. 'Vienna'", + "default": "Vienna", + }, + "country": { + "type": "string", + "description": "The country that the city is in, e.g. 'Austria'", + }, + "unit": { + "type": "string", + "description": "The unit to fetch the temperature in", + "enum": ["celsius", "fahrenheit"], + }, + "options": { + "$ref": "#/$defs/WeatherOptions", + "description": "Optional parameters for weather query", + }, + }, + "required": ["country", "unit"], + "$defs": { + "WeatherOptions": { + "title": "WeatherOptions", + "type": "object", + "additionalProperties": False, + "properties": { + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "default": "celsius", + "description": "Temperature unit", + "title": "Temperature Unit", + }, + "include_forecast": { + "type": "boolean", + "default": False, + "description": "Whether to include a 24-hour forecast", + "title": "Include Forecast", + }, + "language": { + "type": "string", + "default": "zh-CN", + "description": "Language of the response", + "title": "Language", + "enum": ["zh-CN", "en-US", "ja-JP"], + }, + }, + }, + }, + }, + }, + { + "type": "function", + "name": "get_forecast", + "description": "Get the weather forecast for a given location", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The city to get the forecast for, e.g. 'Vienna'", + "default": "Vienna", + }, + "country": { + "type": "string", + "description": "The country that the city is in, e.g. 'Austria'", + }, + "days": { + "type": "integer", + "description": "Number of days to get the forecast for (1-7)", + }, + "unit": { + "type": "string", + "description": "The unit to fetch the temperature in", + "enum": ["celsius", "fahrenheit"], + }, + }, + "required": ["country", "days", "unit"], + }, + }, +] + + +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("tool_choice", ["auto", "required"]) +async def test_function_tool_use( + client: openai.AsyncOpenAI, model_name: str, tool_choice: str +): + prompt = [ + { + "role": "user", + "content": "Can you tell me what the current weather is in Berlin and the " + "forecast for the next 5 days, in fahrenheit?", + }, + ] + response = await client.responses.create( + model=model_name, + input=prompt, + tools=tools, + tool_choice=tool_choice, + ) + + assert len(response.output) >= 1 + tool_call = response.output[0] + + assert tool_call.type == "function_call" + assert json.loads(tool_call.arguments) is not None + + +@pytest.mark.asyncio +async def test_named_tool_use(client: openai.AsyncOpenAI): + def get_weather(latitude: float, longitude: float) -> str: + """ + Mock function to simulate getting weather data. + In a real application, this would call an external weather API. + """ + return f"Current temperature at ({latitude}, {longitude}) is 20°C." + + tools = [ + { + "type": "function", + "name": "get_weather", + "description": "Get current temperature for provided coordinates in celsius.", + "parameters": { + "type": "object", + "properties": { + "latitude": {"type": "number"}, + "longitude": {"type": "number"}, + }, + "required": ["latitude", "longitude"], + "additionalProperties": False, + }, + "strict": True, + } + ] + + input_messages = [ + {"role": "user", "content": "What's the weather like in Paris today?"} + ] + + response = await client.responses.create( + model=MODEL_NAME, + input=input_messages, + tools=tools, + tool_choice={"type": "function", "name": "get_weather"}, + ) + assert len(response.output) == 1 + tool_call = response.output[0] + assert tool_call.type == "function_call" + assert tool_call.name == "get_weather" + args = json.loads(tool_call.arguments) + assert args["latitude"] is not None + assert args["longitude"] is not None + # call the tool + result = get_weather(args["latitude"], args["longitude"]) + input_messages.append(tool_call) # append model's function call message + input_messages.append( + { # append result message + "type": "function_call_output", + "call_id": tool_call.call_id, + "output": str(result), + } + ) + # create a new response with the tool call result + response_2 = await client.responses.create(model=MODEL_NAME, input=input_messages) + # check the output + assert len(response_2.output_text) > 0 diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 33256de6dd47..2a762c557b2f 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -65,7 +65,7 @@ from openai.types.responses.response import IncompleteDetails, ToolChoice -from openai.types.responses.tool import Tool +from openai.types.responses.tool import FunctionTool, Tool from openai.types.shared import Metadata, Reasoning from pydantic import ( BaseModel, @@ -304,6 +304,88 @@ def get_logits_processors( return None +def get_json_schema_from_tool( + tool_choice: str | ToolChoice | ChatCompletionNamedToolChoiceParam, + tools: list[Tool | ChatCompletionToolsParam] | None, +) -> str | dict | BaseModel | None: + if tool_choice in ("none", None) or tools is None: + return None + if (not isinstance(tool_choice, str)) and isinstance(tool_choice, ToolChoice): + tool_name = tool_choice.name + tool_map = {tool.name: tool for tool in tools if isinstance(tool, Tool)} + if tool_name not in tool_map: + raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.") + return tool_map[tool_name].parameters + + if (not isinstance(tool_choice, str)) and isinstance( + tool_choice, ChatCompletionNamedToolChoiceParam + ): + tool_name = tool_choice.function.name + tool_map = { + tool.function.name: tool + for tool in tools + if isinstance(tool, ChatCompletionToolsParam) + } + if tool_name not in tool_map: + raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.") + return tool_map[tool_name].function.parameters + + if tool_choice == "required": + + def extract_tool_info( + tool: Tool | ChatCompletionToolsParam, + ) -> tuple[str, dict[str, Any] | None]: + if isinstance(tool, FunctionTool): + return tool.name, tool.parameters + elif isinstance(tool, ChatCompletionToolsParam): + return tool.function.name, tool.function.parameters + else: + raise TypeError(f"Unsupported tool type: {type(tool)}") + + def get_tool_schema(tool: Tool | ChatCompletionToolsParam) -> dict: + name, params = extract_tool_info(tool) + params = params if params else {"type": "object", "properties": {}} + return { + "properties": { + "name": {"type": "string", "enum": [name]}, + "parameters": params, + }, + "required": ["name", "parameters"], + } + + def get_tool_schema_defs( + tools: list[Tool | ChatCompletionToolsParam], + ) -> dict: + all_defs: dict[str, dict[str, Any]] = {} + for tool in tools: + _, params = extract_tool_info(tool) + if params is None: + continue + defs = params.pop("$defs", {}) + for def_name, def_schema in defs.items(): + if def_name in all_defs and all_defs[def_name] != def_schema: + raise ValueError( + f"Tool definition '{def_name}' has multiple schemas, which is not supported." + ) + all_defs[def_name] = def_schema + return all_defs + + json_schema = { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "anyOf": [get_tool_schema(tool) for tool in tools], + }, + } + json_schema_defs = get_tool_schema_defs(tools) + if json_schema_defs: + json_schema["$defs"] = json_schema_defs + return json_schema + + return None + + ResponseInputOutputItem: TypeAlias = ( ResponseInputItemParam | ResponseReasoningItem | ResponseFunctionToolCall ) @@ -423,18 +505,7 @@ def to_sampling_params( stop_token_ids = default_sampling_params.get("stop_token_ids") # Structured output - structured_outputs = None - if self.text is not None and self.text.format is not None: - response_format = self.text.format - if ( - response_format.type == "json_schema" - and response_format.schema_ is not None - ): - structured_outputs = StructuredOutputsParams( - json=response_format.schema_ - ) - elif response_format.type == "json_object": - raise NotImplementedError("json_object is not supported") + structured_outputs = self._get_structured_outputs() # TODO: add more parameters return SamplingParams.from_optional( @@ -449,6 +520,29 @@ def to_sampling_params( structured_outputs=structured_outputs, ) + def _get_structured_outputs(self) -> StructuredOutputsParams | None: + # Structured output + structured_outputs = None + if self.text is not None and self.text.format is not None: + response_format = self.text.format + if ( + response_format.type == "json_schema" + and response_format.schema_ is not None + ): + structured_outputs = StructuredOutputsParams( + json=response_format.schema_ + ) + elif response_format.type == "json_object": + raise NotImplementedError("json_object is not supported") + # Function call + elif not (self.tool_choice == "none" or self.tools is None): + structured_outputs = StructuredOutputsParams( + json=get_json_schema_from_tool( + tools=self.tools, tool_choice=self.tool_choice + ) + ) + return structured_outputs + def is_include_output_logprobs(self) -> bool: """Check if the request includes output logprobs.""" if self.include is None: diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index bafc0e2c372f..ddbeeba99447 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -1098,13 +1098,17 @@ async def _preprocess_chat( ) if should_parse_tools: - if not isinstance(request, ChatCompletionRequest): - msg = "Tool usage is only supported for Chat Completions API" + if not isinstance(request, ChatCompletionRequest | ResponsesRequest): + msg = ( + "Tool usage is only supported for Chat Completions API " + "or Responses API requests." + ) raise NotImplementedError(msg) - request = tool_parser(tokenizer).adjust_request( # type: ignore - request=request - ) + if isinstance(request, ChatCompletionRequest): + request = tool_parser(tokenizer).adjust_request( # type: ignore + request=request + ) if tokenizer is None: assert isinstance(request_prompt, str), ( diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 2ee8de5fba07..0905cf0ab413 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -14,6 +14,14 @@ import jinja2 from fastapi import Request +from openai.types.chat import ( + ChatCompletionAssistantMessageParam, + ChatCompletionMessageToolCallParam, + ChatCompletionToolMessageParam, +) +from openai.types.chat.chat_completion_message_tool_call_param import ( + Function as FunctionCallTool, +) from openai.types.responses import ( ResponseCodeInterpreterCallCodeDeltaEvent, ResponseCodeInterpreterCallCodeDoneEvent, @@ -41,6 +49,7 @@ ResponseWebSearchCallCompletedEvent, ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent, + ToolChoiceFunction, response_function_web_search, response_text_delta_event, ) @@ -50,6 +59,7 @@ ) from openai.types.responses.tool import Tool from openai_harmony import Message as OpenAIHarmonyMessage +from pydantic import TypeAdapter from vllm import envs from vllm.engine.protocol import EngineClient @@ -79,12 +89,15 @@ from vllm.entrypoints.openai.protocol import ( DeltaMessage, ErrorResponse, + FunctionCall, + FunctionDefinition, InputTokensDetails, OutputTokensDetails, RequestResponseMetadata, ResponseCompletedEvent, ResponseCreatedEvent, ResponseInProgressEvent, + ResponseInputOutputItem, ResponseReasoningPartAddedEvent, ResponseReasoningPartDoneEvent, ResponsesRequest, @@ -198,14 +211,10 @@ def __init__( ) # set up tool use - self.enable_auto_tools: bool = enable_auto_tools - if self.enable_auto_tools: - logger.info( - '"auto" tool choice has been enabled please note that while' - " the parallel_tool_calls client option is preset for " - "compatibility reasons, it will be ignored." - ) - + self.tool_parser = self._get_tool_parser( + tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools + ) + self.exclude_tools_when_tool_choice_none = False # HACK(woosuk): This is a hack. We should use a better store. # FIXME: If enable_store=True, this may cause a memory leak since we # never remove responses from the store. @@ -511,16 +520,20 @@ async def _make_request( prev_response: ResponsesResponse | None, tokenizer: AnyTokenizer, ): - if len(request.tools) > 0: - raise NotImplementedError( - "Tool use is not supported in Responses API without Harmony" - ) + if request.tools is None or ( + request.tool_choice == "none" and self.exclude_tools_when_tool_choice_none + ): + tool_dicts = None + else: + tool_dicts = [tool.model_dump() for tool in request.tools] # Construct the input messages. messages = self._construct_input_messages(request, prev_response) _, request_prompts, engine_prompts = await self._preprocess_chat( request, tokenizer, messages, + tool_dicts=tool_dicts, + tool_parser=self.tool_parser, chat_template=self.chat_template, chat_template_content_format=self.chat_template_content_format, ) @@ -802,7 +815,8 @@ def _make_response_output_items( delta=False, ) - output = [] + reasoning_item = None + message_item = None if reasoning_content: reasoning_item = ResponseReasoningItem( id=f"rs_{random_uuid()}", @@ -815,7 +829,6 @@ def _make_response_output_items( ], status=None, # NOTE: Only the last output item has status. ) - output.append(reasoning_item) if content: output_text = ResponseOutputText( text=content, @@ -832,15 +845,119 @@ def _make_response_output_items( else None ), ) - message = ResponseOutputMessage( + message_item = ResponseOutputMessage( id=f"msg_{random_uuid()}", content=[output_text], role="assistant", status="completed", type="message", ) - output.append(message) - return output + outputs = [] + function_calls = self._extract_tool_calls(request, tokenizer, content=content) + if function_calls: + outputs.extend( + [ + ResponseFunctionToolCall( + id=f"fc_{random_uuid()}", + call_id=f"call_{random_uuid()}", + type="function_call", + status="completed", + name=tool_call.name, + arguments=tool_call.arguments, + ) + for tool_call in function_calls + ] + ) + else: + if reasoning_item: + outputs.append(reasoning_item) + if message_item: + outputs.append(message_item) + return outputs + + def _extract_tool_calls( + self, + request: ResponsesRequest, + tokenizer: AnyTokenizer, + content: str | None = None, + ) -> list[FunctionCall] | None: + function_calls = list[FunctionCall]() + if not self.enable_auto_tools or not self.tool_parser: + # Tools are not enabled + return None + elif request.tool_choice is None: + # No tool calls. + return None + elif request.tool_choice and isinstance( + request.tool_choice, ToolChoiceFunction + ): + # Forced Function Call + function_calls.append( + FunctionCall(name=request.tool_choice.name, arguments=content) + ) + elif request.tool_choice == "required": + assert content is not None + tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content) + function_calls.extend( + [ + FunctionCall( + name=tool_call.name, + arguments=json.dumps(tool_call.parameters, ensure_ascii=False), + ) + for tool_call in tool_calls + ] + ) + elif request.tool_choice == "auto" or request.tool_choice == "none": + try: + tool_parser = self.tool_parser(tokenizer) + except RuntimeError as e: + logger.exception("Error in tool parser creation.") + raise e + tool_call_info = tool_parser.extract_tool_calls( + content if content is not None else "", request=request + ) + if tool_call_info is not None and tool_call_info.tools_called: + # extract_tool_calls() returns a list of tool calls. + function_calls.extend( + FunctionCall( + name=tool_call.function.name, + arguments=tool_call.function.arguments, + ) + for tool_call in tool_call_info.tool_calls + ) + else: + # No tool calls. + return None + else: + raise ValueError(f"Invalid tool_choice: {request.tool_choice}") + return function_calls + + def _parse_chat_tool_call( + self, item: ResponseInputOutputItem + ) -> ChatCompletionMessageParam: + if item.get("type") == "function_call": + # Append the function call as a tool call. + return ChatCompletionAssistantMessageParam( + role="assistant", + tool_calls=[ + ChatCompletionMessageToolCallParam( + id=item.get("call_id"), + function=FunctionCallTool( + name=item.get("name"), + arguments=item.get("arguments"), + ), + type="function", + ) + ], + ) + elif item.get("type") == "function_call_output": + # Append the function call output as a tool message. + return ChatCompletionToolMessageParam( + role="tool", + content=item.get("output"), + tool_call_id=item.get("call_id"), + ) + return item # type: ignore def _make_response_output_items_with_harmony( self, @@ -893,7 +1010,8 @@ def _construct_input_messages( if isinstance(request.input, str): messages.append({"role": "user", "content": request.input}) else: - messages.extend(request.input) # type: ignore + for item in request.input: + messages.append(self._parse_chat_tool_call(item)) return messages def _construct_harmony_system_input_message( From 878f10459c2492aa1cd8e940101fa00363ed1b13 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 15 Oct 2025 08:17:51 +0000 Subject: [PATCH 02/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/protocol.py | 2 +- vllm/entrypoints/openai/serving_responses.py | 2 +- vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 2a762c557b2f..236867cbcd7d 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -307,7 +307,7 @@ def get_logits_processors( def get_json_schema_from_tool( tool_choice: str | ToolChoice | ChatCompletionNamedToolChoiceParam, tools: list[Tool | ChatCompletionToolsParam] | None, -) -> str | dict | BaseModel | None: +) -> str | dict | None: if tool_choice in ("none", None) or tools is None: return None if (not isinstance(tool_choice, str)) and isinstance(tool_choice, ToolChoice): diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 0905cf0ab413..536497b7d22a 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -209,7 +209,7 @@ def __init__( self.default_sampling_params["stop_token_ids"].extend( get_stop_tokens_for_assistant_actions() ) - + self.enable_auto_tools = enable_auto_tools # set up tool use self.tool_parser = self._get_tool_parser( tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools diff --git a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py index 8d520f5bf8ef..b647098efb41 100644 --- a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py @@ -10,6 +10,7 @@ ChatCompletionRequest, DeltaMessage, ExtractedToolCallInformation, + ResponsesRequest, ) from vllm.entrypoints.openai.tool_parsers.utils import get_json_schema_from_tools from vllm.logger import init_logger @@ -64,7 +65,7 @@ def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionReques return request def extract_tool_calls( - self, model_output: str, request: ChatCompletionRequest + self, model_output: str, request: ChatCompletionRequest | ResponsesRequest ) -> ExtractedToolCallInformation: """ Static method that should be implemented for extracting tool calls from From c11eb9d957fbac78161ff040697fdfc20d84f669 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 15 Oct 2025 08:31:22 +0000 Subject: [PATCH 03/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_responses.py | 3 ++- vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 536497b7d22a..397449affe37 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -914,7 +914,8 @@ def _extract_tool_calls( logger.exception("Error in tool parser creation.") raise e tool_call_info = tool_parser.extract_tool_calls( - content if content is not None else "", request=request + content if content is not None else "", + request=request, # type: ignore ) if tool_call_info is not None and tool_call_info.tools_called: # extract_tool_calls() returns a list of tool calls. diff --git a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py index b647098efb41..8d520f5bf8ef 100644 --- a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py @@ -10,7 +10,6 @@ ChatCompletionRequest, DeltaMessage, ExtractedToolCallInformation, - ResponsesRequest, ) from vllm.entrypoints.openai.tool_parsers.utils import get_json_schema_from_tools from vllm.logger import init_logger @@ -65,7 +64,7 @@ def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionReques return request def extract_tool_calls( - self, model_output: str, request: ChatCompletionRequest | ResponsesRequest + self, model_output: str, request: ChatCompletionRequest ) -> ExtractedToolCallInformation: """ Static method that should be implemented for extracting tool calls from From ecd5942c437189a4dbe4989cbbbd3405efba6b66 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 15 Oct 2025 08:35:15 +0000 Subject: [PATCH 04/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- tests/v1/entrypoints/openai/responses/test_function_call.py | 4 +++- vllm/entrypoints/openai/protocol.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py index 4e4e847d1663..5ea300772804 100644 --- a/tests/v1/entrypoints/openai/responses/test_function_call.py +++ b/tests/v1/entrypoints/openai/responses/test_function_call.py @@ -138,7 +138,9 @@ def get_weather(latitude: float, longitude: float) -> str: { "type": "function", "name": "get_weather", - "description": "Get current temperature for provided coordinates in celsius.", + "description": ( + "Get current temperature for provided coordinates in celsius." + ), "parameters": { "type": "object", "properties": { diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 236867cbcd7d..db87d7a81686 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -365,7 +365,8 @@ def get_tool_schema_defs( for def_name, def_schema in defs.items(): if def_name in all_defs and all_defs[def_name] != def_schema: raise ValueError( - f"Tool definition '{def_name}' has multiple schemas, which is not supported." + f"Tool definition '{def_name}' has multiple schemas, " + "which is not supported." ) all_defs[def_name] = def_schema return all_defs From b03d0b26ec4d911150e45c3bed06940cdf8551fe Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 15 Oct 2025 10:03:54 +0000 Subject: [PATCH 05/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- .../openai_responses_client_with_tools.py | 2 +- .../entrypoints/openai/responses/conftest.py | 2 +- .../openai/responses/test_function_call.py | 2 +- vllm/entrypoints/openai/protocol.py | 20 +++++++++++-------- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/examples/online_serving/openai_responses_client_with_tools.py b/examples/online_serving/openai_responses_client_with_tools.py index fb0cb90d9a8c..a985699ec162 100644 --- a/examples/online_serving/openai_responses_client_with_tools.py +++ b/examples/online_serving/openai_responses_client_with_tools.py @@ -7,7 +7,7 @@ https://platform.openai.com/docs/api-reference/responses For example: vllm serve Qwen/Qwen3-1.7B --reasoning-parser qwen3 \ - --guided-decoding-backend xgrammar \ + --structured-outputs-config.backend xgrammar \ --enable-auto-tool-choice --tool-call-parser hermes """ diff --git a/tests/v1/entrypoints/openai/responses/conftest.py b/tests/v1/entrypoints/openai/responses/conftest.py index 22730dbae067..8081e5fa1d83 100644 --- a/tests/v1/entrypoints/openai/responses/conftest.py +++ b/tests/v1/entrypoints/openai/responses/conftest.py @@ -16,7 +16,7 @@ def default_server_args(): "8192", "--enforce-eager", # For faster startup. "--enable-auto-tool-choice", - "--guided-decoding-backend", + "--structured-outputs-config.backend", "xgrammar", "--tool-call-parser", "hermes", diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py index 5ea300772804..f964d45a60c6 100644 --- a/tests/v1/entrypoints/openai/responses/test_function_call.py +++ b/tests/v1/entrypoints/openai/responses/test_function_call.py @@ -100,7 +100,7 @@ @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) -@pytest.mark.parametrize("tool_choice", ["auto", "required"]) +@pytest.mark.parametrize("tool_choice", ["auto",]) async def test_function_tool_use( client: openai.AsyncOpenAI, model_name: str, tool_choice: str ): diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index db87d7a81686..65c49e8705f7 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -16,6 +16,7 @@ ) from openai.types.chat.chat_completion_message import Annotation as OpenAIAnnotation from openai.types.responses import ( + FunctionTool, ResponseCodeInterpreterCallCodeDeltaEvent, ResponseCodeInterpreterCallCodeDoneEvent, ResponseCodeInterpreterCallCompletedEvent, @@ -36,6 +37,7 @@ ResponseWebSearchCallCompletedEvent, ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent, + ToolChoiceFunction, ) from openai.types.responses import ( ResponseCompletedEvent as OpenAIResponseCompletedEvent, @@ -65,7 +67,7 @@ from openai.types.responses.response import IncompleteDetails, ToolChoice -from openai.types.responses.tool import FunctionTool, Tool +from openai.types.responses.tool import Tool from openai.types.shared import Metadata, Reasoning from pydantic import ( BaseModel, @@ -306,13 +308,15 @@ def get_logits_processors( def get_json_schema_from_tool( tool_choice: str | ToolChoice | ChatCompletionNamedToolChoiceParam, - tools: list[Tool | ChatCompletionToolsParam] | None, + tools: list[FunctionTool | ChatCompletionToolsParam] | None, ) -> str | dict | None: if tool_choice in ("none", None) or tools is None: return None - if (not isinstance(tool_choice, str)) and isinstance(tool_choice, ToolChoice): + if (not isinstance(tool_choice, str)) and isinstance( + tool_choice, ToolChoiceFunction + ): tool_name = tool_choice.name - tool_map = {tool.name: tool for tool in tools if isinstance(tool, Tool)} + tool_map = {tool.name: tool for tool in tools if isinstance(tool, FunctionTool)} if tool_name not in tool_map: raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.") return tool_map[tool_name].parameters @@ -537,11 +541,11 @@ def _get_structured_outputs(self) -> StructuredOutputsParams | None: raise NotImplementedError("json_object is not supported") # Function call elif not (self.tool_choice == "none" or self.tools is None): - structured_outputs = StructuredOutputsParams( - json=get_json_schema_from_tool( - tools=self.tools, tool_choice=self.tool_choice - ) + json_schema = get_json_schema_from_tool( + tools=self.tools, tool_choice=self.tool_choice ) + if json_schema is not None: + structured_outputs = StructuredOutputsParams(json=json_schema) return structured_outputs def is_include_output_logprobs(self) -> bool: From 577c191a4fec282a4f05f503464bd9659f9e147f Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 15 Oct 2025 10:05:10 +0000 Subject: [PATCH 06/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/protocol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 65c49e8705f7..d78fd97c4556 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -307,7 +307,7 @@ def get_logits_processors( def get_json_schema_from_tool( - tool_choice: str | ToolChoice | ChatCompletionNamedToolChoiceParam, + tool_choice: str | ToolChoiceFunction | ChatCompletionNamedToolChoiceParam, tools: list[FunctionTool | ChatCompletionToolsParam] | None, ) -> str | dict | None: if tool_choice in ("none", None) or tools is None: From 98af9c9803b9fd5e1755abb2b702e0b8eae486b9 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 15 Oct 2025 10:15:13 +0000 Subject: [PATCH 07/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- tests/v1/entrypoints/openai/responses/test_function_call.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py index f964d45a60c6..5ea300772804 100644 --- a/tests/v1/entrypoints/openai/responses/test_function_call.py +++ b/tests/v1/entrypoints/openai/responses/test_function_call.py @@ -100,7 +100,7 @@ @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) -@pytest.mark.parametrize("tool_choice", ["auto",]) +@pytest.mark.parametrize("tool_choice", ["auto", "required"]) async def test_function_tool_use( client: openai.AsyncOpenAI, model_name: str, tool_choice: str ): From 2f13d9903ca558df523f283fb085716b2ed9042f Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Tue, 21 Oct 2025 07:46:32 +0000 Subject: [PATCH 08/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- .../openai_responses_client_with_tools.py | 10 +++++-- .../openai/responses/test_function_call.py | 12 +++++--- vllm/entrypoints/openai/serving_responses.py | 28 +++++++++++-------- 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/examples/online_serving/openai_responses_client_with_tools.py b/examples/online_serving/openai_responses_client_with_tools.py index a985699ec162..276010197b5a 100644 --- a/examples/online_serving/openai_responses_client_with_tools.py +++ b/examples/online_serving/openai_responses_client_with_tools.py @@ -14,6 +14,7 @@ import json from openai import OpenAI +from utils import get_first_model def get_weather(latitude: float, longitude: float) -> str: @@ -49,14 +50,17 @@ def get_weather(latitude: float, longitude: float) -> str: def main(): base_url = "http://0.0.0.0:8000/v1" - model = "Qwen/Qwen3-1.7B" client = OpenAI(base_url=base_url, api_key="empty") + model = get_first_model(client) response = client.responses.create( model=model, input=input_messages, tools=tools, tool_choice="required" ) - tool_call = response.output[0] - args = json.loads(tool_call.arguments) + for out in response.output: + if out.type == "function_call": + print("Function call:", out.name, out.arguments) + tool_call = out + args = json.loads(tool_call.arguments) result = get_weather(args["latitude"], args["longitude"]) input_messages.append(tool_call) # append model's function call message diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py index 5ea300772804..28f4b01d3e12 100644 --- a/tests/v1/entrypoints/openai/responses/test_function_call.py +++ b/tests/v1/entrypoints/openai/responses/test_function_call.py @@ -119,8 +119,10 @@ async def test_function_tool_use( ) assert len(response.output) >= 1 - tool_call = response.output[0] - + tool_call = None + for out in response.output: + if out.type == "function_call": + tool_call = out assert tool_call.type == "function_call" assert json.loads(tool_call.arguments) is not None @@ -164,8 +166,10 @@ def get_weather(latitude: float, longitude: float) -> str: tools=tools, tool_choice={"type": "function", "name": "get_weather"}, ) - assert len(response.output) == 1 - tool_call = response.output[0] + assert len(response.output) >= 1 + for out in response.output: + if out.type == "function_call": + tool_call = out assert tool_call.type == "function_call" assert tool_call.name == "get_weather" args = json.loads(tool_call.arguments) diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 397449affe37..bb8f3c0c24e7 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -829,6 +829,9 @@ def _make_response_output_items( ], status=None, # NOTE: Only the last output item has status. ) + function_calls, content = self._extract_tool_calls( + request, tokenizer, content=content + ) if content: output_text = ResponseOutputText( text=content, @@ -853,7 +856,11 @@ def _make_response_output_items( type="message", ) outputs = [] - function_calls = self._extract_tool_calls(request, tokenizer, content=content) + + if reasoning_item: + outputs.append(reasoning_item) + if message_item: + outputs.append(message_item) if function_calls: outputs.extend( [ @@ -868,11 +875,6 @@ def _make_response_output_items( for tool_call in function_calls ] ) - else: - if reasoning_item: - outputs.append(reasoning_item) - if message_item: - outputs.append(message_item) return outputs def _extract_tool_calls( @@ -880,14 +882,15 @@ def _extract_tool_calls( request: ResponsesRequest, tokenizer: AnyTokenizer, content: str | None = None, - ) -> list[FunctionCall] | None: + ) -> tuple[list[FunctionCall], str | None] | None: function_calls = list[FunctionCall]() + if not self.enable_auto_tools or not self.tool_parser: # Tools are not enabled - return None + return None, content elif request.tool_choice is None: # No tool calls. - return None + return None, content elif request.tool_choice and isinstance( request.tool_choice, ToolChoiceFunction ): @@ -895,6 +898,7 @@ def _extract_tool_calls( function_calls.append( FunctionCall(name=request.tool_choice.name, arguments=content) ) + content = "" # Clear content since tool is called. elif request.tool_choice == "required": assert content is not None tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content) @@ -907,6 +911,7 @@ def _extract_tool_calls( for tool_call in tool_calls ] ) + content = "" # Clear content since tool is called. elif request.tool_choice == "auto" or request.tool_choice == "none": try: tool_parser = self.tool_parser(tokenizer) @@ -926,12 +931,13 @@ def _extract_tool_calls( ) for tool_call in tool_call_info.tool_calls ) + content = tool_call_info.content else: # No tool calls. - return None + return None, content else: raise ValueError(f"Invalid tool_choice: {request.tool_choice}") - return function_calls + return function_calls, content def _parse_chat_tool_call( self, item: ResponseInputOutputItem From 93e9db5fcd9c16587fcd6835f1da5e31dfdabbd6 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Tue, 21 Oct 2025 07:49:12 +0000 Subject: [PATCH 09/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- .../v1/entrypoints/openai/responses/test_function_call.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py index 28f4b01d3e12..8ecbc8d2704e 100644 --- a/tests/v1/entrypoints/openai/responses/test_function_call.py +++ b/tests/v1/entrypoints/openai/responses/test_function_call.py @@ -120,11 +120,17 @@ async def test_function_tool_use( assert len(response.output) >= 1 tool_call = None + reasoning = None for out in response.output: if out.type == "function_call": tool_call = out + if out.type == "reasoning": + reasoning = out + assert tool_call is not None assert tool_call.type == "function_call" assert json.loads(tool_call.arguments) is not None + assert reasoning is not None + assert reasoning.type == "reasoning" @pytest.mark.asyncio @@ -170,6 +176,7 @@ def get_weather(latitude: float, longitude: float) -> str: for out in response.output: if out.type == "function_call": tool_call = out + assert tool_call is not None assert tool_call.type == "function_call" assert tool_call.name == "get_weather" args = json.loads(tool_call.arguments) From eebab9c360a572346372e0b76318a96eb56c0df3 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Tue, 21 Oct 2025 07:53:13 +0000 Subject: [PATCH 10/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_responses.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index bb8f3c0c24e7..87d1b0901342 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -882,7 +882,7 @@ def _extract_tool_calls( request: ResponsesRequest, tokenizer: AnyTokenizer, content: str | None = None, - ) -> tuple[list[FunctionCall], str | None] | None: + ) -> tuple[list[FunctionCall] | None, str | None]: function_calls = list[FunctionCall]() if not self.enable_auto_tools or not self.tool_parser: @@ -898,7 +898,7 @@ def _extract_tool_calls( function_calls.append( FunctionCall(name=request.tool_choice.name, arguments=content) ) - content = "" # Clear content since tool is called. + content = None # Clear content since tool is called. elif request.tool_choice == "required": assert content is not None tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content) @@ -911,7 +911,7 @@ def _extract_tool_calls( for tool_call in tool_calls ] ) - content = "" # Clear content since tool is called. + content = None # Clear content since tool is called. elif request.tool_choice == "auto" or request.tool_choice == "none": try: tool_parser = self.tool_parser(tokenizer) From 842f7e1764dde666487c5009c8fe4280fa4583d2 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 22 Oct 2025 04:07:14 +0000 Subject: [PATCH 11/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_responses.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 87d1b0901342..e7d8e7b812ae 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -939,7 +939,7 @@ def _extract_tool_calls( raise ValueError(f"Invalid tool_choice: {request.tool_choice}") return function_calls, content - def _parse_chat_tool_call( + def _construct_chat_message_with_tool_call( self, item: ResponseInputOutputItem ) -> ChatCompletionMessageParam: if item.get("type") == "function_call": @@ -1018,7 +1018,7 @@ def _construct_input_messages( messages.append({"role": "user", "content": request.input}) else: for item in request.input: - messages.append(self._parse_chat_tool_call(item)) + messages.append(self._construct_chat_message_with_tool_call(item)) return messages def _construct_harmony_system_input_message( From 839aaadb83f6c707f8ab4366affe41f08415a7ab Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 22 Oct 2025 05:57:02 +0000 Subject: [PATCH 12/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/protocol.py | 112 +++++++++++++++------------- 1 file changed, 60 insertions(+), 52 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index d78fd97c4556..bf9a63da1ada 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -306,6 +306,65 @@ def get_logits_processors( return None +def _extract_tool_info( + tool: Tool | ChatCompletionToolsParam, +) -> tuple[str, dict[str, Any] | None]: + if isinstance(tool, FunctionTool): + return tool.name, tool.parameters + elif isinstance(tool, ChatCompletionToolsParam): + return tool.function.name, tool.function.parameters + else: + raise TypeError(f"Unsupported tool type: {type(tool)}") + + +def _get_tool_schema_from_tool(tool: Tool | ChatCompletionToolsParam) -> dict: + name, params = _extract_tool_info(tool) + params = params if params else {"type": "object", "properties": {}} + return { + "properties": { + "name": {"type": "string", "enum": [name]}, + "parameters": params, + }, + "required": ["name", "parameters"], + } + + +def _get_tool_schema_defs( + tools: list[Tool | ChatCompletionToolsParam], +) -> dict: + all_defs: dict[str, dict[str, Any]] = {} + for tool in tools: + _, params = _get_tool_schema_from_tool(tool) + if params is None: + continue + defs = params.pop("$defs", {}) + for def_name, def_schema in defs.items(): + if def_name in all_defs and all_defs[def_name] != def_schema: + raise ValueError( + f"Tool definition '{def_name}' has multiple schemas, " + "which is not supported." + ) + all_defs[def_name] = def_schema + return all_defs + + +def _get_json_schema_from_choice_required( + tools: list[Tool | ChatCompletionToolsParam], +) -> dict: + json_schema = { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "anyOf": [_get_tool_schema_from_tool(tool) for tool in tools], + }, + } + json_schema_defs = _get_tool_schema_defs(tools) + if json_schema_defs: + json_schema["$defs"] = json_schema_defs + return json_schema + + def get_json_schema_from_tool( tool_choice: str | ToolChoiceFunction | ChatCompletionNamedToolChoiceParam, tools: list[FunctionTool | ChatCompletionToolsParam] | None, @@ -335,58 +394,7 @@ def get_json_schema_from_tool( return tool_map[tool_name].function.parameters if tool_choice == "required": - - def extract_tool_info( - tool: Tool | ChatCompletionToolsParam, - ) -> tuple[str, dict[str, Any] | None]: - if isinstance(tool, FunctionTool): - return tool.name, tool.parameters - elif isinstance(tool, ChatCompletionToolsParam): - return tool.function.name, tool.function.parameters - else: - raise TypeError(f"Unsupported tool type: {type(tool)}") - - def get_tool_schema(tool: Tool | ChatCompletionToolsParam) -> dict: - name, params = extract_tool_info(tool) - params = params if params else {"type": "object", "properties": {}} - return { - "properties": { - "name": {"type": "string", "enum": [name]}, - "parameters": params, - }, - "required": ["name", "parameters"], - } - - def get_tool_schema_defs( - tools: list[Tool | ChatCompletionToolsParam], - ) -> dict: - all_defs: dict[str, dict[str, Any]] = {} - for tool in tools: - _, params = extract_tool_info(tool) - if params is None: - continue - defs = params.pop("$defs", {}) - for def_name, def_schema in defs.items(): - if def_name in all_defs and all_defs[def_name] != def_schema: - raise ValueError( - f"Tool definition '{def_name}' has multiple schemas, " - "which is not supported." - ) - all_defs[def_name] = def_schema - return all_defs - - json_schema = { - "type": "array", - "minItems": 1, - "items": { - "type": "object", - "anyOf": [get_tool_schema(tool) for tool in tools], - }, - } - json_schema_defs = get_tool_schema_defs(tools) - if json_schema_defs: - json_schema["$defs"] = json_schema_defs - return json_schema + return _get_json_schema_from_choice_required(tools) return None From eb8fa5b4b3bc80fd0c7aebd46aae762df76c27bb Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 22 Oct 2025 06:08:09 +0000 Subject: [PATCH 13/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/protocol.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index bf9a63da1ada..4cc1949808b3 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -348,7 +348,7 @@ def _get_tool_schema_defs( return all_defs -def _get_json_schema_from_choice_required( +def _get_json_schema_from_tools( tools: list[Tool | ChatCompletionToolsParam], ) -> dict: json_schema = { @@ -394,7 +394,7 @@ def get_json_schema_from_tool( return tool_map[tool_name].function.parameters if tool_choice == "required": - return _get_json_schema_from_choice_required(tools) + return _get_json_schema_from_tools(tools) return None From 3d91f5355981ac21b83d39bef436c2176b0564d4 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 22 Oct 2025 06:29:20 +0000 Subject: [PATCH 14/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/protocol.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 4cc1949808b3..e0c4ca63d2d5 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -369,8 +369,10 @@ def get_json_schema_from_tool( tool_choice: str | ToolChoiceFunction | ChatCompletionNamedToolChoiceParam, tools: list[FunctionTool | ChatCompletionToolsParam] | None, ) -> str | dict | None: + # tool_choice: "none" if tool_choice in ("none", None) or tools is None: return None + # tool_choice: Forced Function (Responses) if (not isinstance(tool_choice, str)) and isinstance( tool_choice, ToolChoiceFunction ): @@ -379,7 +381,7 @@ def get_json_schema_from_tool( if tool_name not in tool_map: raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.") return tool_map[tool_name].parameters - + # tool_choice: Forced Function (ChatCompletion) if (not isinstance(tool_choice, str)) and isinstance( tool_choice, ChatCompletionNamedToolChoiceParam ): @@ -392,10 +394,10 @@ def get_json_schema_from_tool( if tool_name not in tool_map: raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.") return tool_map[tool_name].function.parameters - + # tool_choice: "required" if tool_choice == "required": return _get_json_schema_from_tools(tools) - + # tool_choice: "auto" return None From 157ab86b6f7b96da075f427bb11017382b01088a Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 22 Oct 2025 06:32:12 +0000 Subject: [PATCH 15/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_responses.py | 25 ++++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index e7d8e7b812ae..4e30de747b58 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -862,19 +862,18 @@ def _make_response_output_items( if message_item: outputs.append(message_item) if function_calls: - outputs.extend( - [ - ResponseFunctionToolCall( - id=f"fc_{random_uuid()}", - call_id=f"call_{random_uuid()}", - type="function_call", - status="completed", - name=tool_call.name, - arguments=tool_call.arguments, - ) - for tool_call in function_calls - ] - ) + tool_call_items = [ + ResponseFunctionToolCall( + id=f"fc_{random_uuid()}", + call_id=f"call_{random_uuid()}", + type="function_call", + status="completed", + name=tool_call.name, + arguments=tool_call.arguments, + ) + for tool_call in function_calls + ] + outputs.extend(tool_call_items) return outputs def _extract_tool_calls( From 2ddd919e715ebc5e3db949e786bd1bf08cf54051 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 22 Oct 2025 06:41:24 +0000 Subject: [PATCH 16/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_responses.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 4e30de747b58..459a9981b9c6 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -829,7 +829,7 @@ def _make_response_output_items( ], status=None, # NOTE: Only the last output item has status. ) - function_calls, content = self._extract_tool_calls( + function_calls, content = self._parse_tool_calls_from_content( request, tokenizer, content=content ) if content: @@ -876,7 +876,7 @@ def _make_response_output_items( outputs.extend(tool_call_items) return outputs - def _extract_tool_calls( + def _parse_tool_calls_from_content( self, request: ResponsesRequest, tokenizer: AnyTokenizer, From ce784fe5be2983a1ad7234cd3413cd8ae29365f2 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Fri, 24 Oct 2025 14:19:09 +0000 Subject: [PATCH 17/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/protocol.py | 95 ----------------------------- 1 file changed, 95 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index e0c4ca63d2d5..7515b3ab1048 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -306,101 +306,6 @@ def get_logits_processors( return None -def _extract_tool_info( - tool: Tool | ChatCompletionToolsParam, -) -> tuple[str, dict[str, Any] | None]: - if isinstance(tool, FunctionTool): - return tool.name, tool.parameters - elif isinstance(tool, ChatCompletionToolsParam): - return tool.function.name, tool.function.parameters - else: - raise TypeError(f"Unsupported tool type: {type(tool)}") - - -def _get_tool_schema_from_tool(tool: Tool | ChatCompletionToolsParam) -> dict: - name, params = _extract_tool_info(tool) - params = params if params else {"type": "object", "properties": {}} - return { - "properties": { - "name": {"type": "string", "enum": [name]}, - "parameters": params, - }, - "required": ["name", "parameters"], - } - - -def _get_tool_schema_defs( - tools: list[Tool | ChatCompletionToolsParam], -) -> dict: - all_defs: dict[str, dict[str, Any]] = {} - for tool in tools: - _, params = _get_tool_schema_from_tool(tool) - if params is None: - continue - defs = params.pop("$defs", {}) - for def_name, def_schema in defs.items(): - if def_name in all_defs and all_defs[def_name] != def_schema: - raise ValueError( - f"Tool definition '{def_name}' has multiple schemas, " - "which is not supported." - ) - all_defs[def_name] = def_schema - return all_defs - - -def _get_json_schema_from_tools( - tools: list[Tool | ChatCompletionToolsParam], -) -> dict: - json_schema = { - "type": "array", - "minItems": 1, - "items": { - "type": "object", - "anyOf": [_get_tool_schema_from_tool(tool) for tool in tools], - }, - } - json_schema_defs = _get_tool_schema_defs(tools) - if json_schema_defs: - json_schema["$defs"] = json_schema_defs - return json_schema - - -def get_json_schema_from_tool( - tool_choice: str | ToolChoiceFunction | ChatCompletionNamedToolChoiceParam, - tools: list[FunctionTool | ChatCompletionToolsParam] | None, -) -> str | dict | None: - # tool_choice: "none" - if tool_choice in ("none", None) or tools is None: - return None - # tool_choice: Forced Function (Responses) - if (not isinstance(tool_choice, str)) and isinstance( - tool_choice, ToolChoiceFunction - ): - tool_name = tool_choice.name - tool_map = {tool.name: tool for tool in tools if isinstance(tool, FunctionTool)} - if tool_name not in tool_map: - raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.") - return tool_map[tool_name].parameters - # tool_choice: Forced Function (ChatCompletion) - if (not isinstance(tool_choice, str)) and isinstance( - tool_choice, ChatCompletionNamedToolChoiceParam - ): - tool_name = tool_choice.function.name - tool_map = { - tool.function.name: tool - for tool in tools - if isinstance(tool, ChatCompletionToolsParam) - } - if tool_name not in tool_map: - raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.") - return tool_map[tool_name].function.parameters - # tool_choice: "required" - if tool_choice == "required": - return _get_json_schema_from_tools(tools) - # tool_choice: "auto" - return None - - ResponseInputOutputItem: TypeAlias = ( ResponseInputItemParam | ResponseReasoningItem | ResponseFunctionToolCall ) From 067cd66c4aa26732ff313a7301cec7519b016d02 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Fri, 24 Oct 2025 14:23:10 +0000 Subject: [PATCH 18/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/protocol.py | 38 +++++++++-------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 7515b3ab1048..33256de6dd47 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -16,7 +16,6 @@ ) from openai.types.chat.chat_completion_message import Annotation as OpenAIAnnotation from openai.types.responses import ( - FunctionTool, ResponseCodeInterpreterCallCodeDeltaEvent, ResponseCodeInterpreterCallCodeDoneEvent, ResponseCodeInterpreterCallCompletedEvent, @@ -37,7 +36,6 @@ ResponseWebSearchCallCompletedEvent, ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent, - ToolChoiceFunction, ) from openai.types.responses import ( ResponseCompletedEvent as OpenAIResponseCompletedEvent, @@ -425,7 +423,18 @@ def to_sampling_params( stop_token_ids = default_sampling_params.get("stop_token_ids") # Structured output - structured_outputs = self._get_structured_outputs() + structured_outputs = None + if self.text is not None and self.text.format is not None: + response_format = self.text.format + if ( + response_format.type == "json_schema" + and response_format.schema_ is not None + ): + structured_outputs = StructuredOutputsParams( + json=response_format.schema_ + ) + elif response_format.type == "json_object": + raise NotImplementedError("json_object is not supported") # TODO: add more parameters return SamplingParams.from_optional( @@ -440,29 +449,6 @@ def to_sampling_params( structured_outputs=structured_outputs, ) - def _get_structured_outputs(self) -> StructuredOutputsParams | None: - # Structured output - structured_outputs = None - if self.text is not None and self.text.format is not None: - response_format = self.text.format - if ( - response_format.type == "json_schema" - and response_format.schema_ is not None - ): - structured_outputs = StructuredOutputsParams( - json=response_format.schema_ - ) - elif response_format.type == "json_object": - raise NotImplementedError("json_object is not supported") - # Function call - elif not (self.tool_choice == "none" or self.tools is None): - json_schema = get_json_schema_from_tool( - tools=self.tools, tool_choice=self.tool_choice - ) - if json_schema is not None: - structured_outputs = StructuredOutputsParams(json=json_schema) - return structured_outputs - def is_include_output_logprobs(self) -> bool: """Check if the request includes output logprobs.""" if self.include is None: From f44848a19a8349878c52d5dd62fe60b8931a962c Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Fri, 24 Oct 2025 15:02:34 +0000 Subject: [PATCH 19/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_engine.py | 8 +++---- vllm/entrypoints/openai/serving_responses.py | 8 +++---- .../tool_parsers/abstract_tool_parser.py | 24 +++++++++++++++---- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index ddbeeba99447..dc9e71651914 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -1104,11 +1104,9 @@ async def _preprocess_chat( "or Responses API requests." ) raise NotImplementedError(msg) - - if isinstance(request, ChatCompletionRequest): - request = tool_parser(tokenizer).adjust_request( # type: ignore - request=request - ) + request = tool_parser(tokenizer).adjust_request( # type: ignore + request=request + ) if tokenizer is None: assert isinstance(request_prompt, str), ( diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 459a9981b9c6..62d86d790d28 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -941,16 +941,16 @@ def _parse_tool_calls_from_content( def _construct_chat_message_with_tool_call( self, item: ResponseInputOutputItem ) -> ChatCompletionMessageParam: - if item.get("type") == "function_call": + if isinstance(item, ResponseFunctionToolCall): # Append the function call as a tool call. return ChatCompletionAssistantMessageParam( role="assistant", tool_calls=[ ChatCompletionMessageToolCallParam( - id=item.get("call_id"), + id=item.call_id, function=FunctionCallTool( - name=item.get("name"), - arguments=item.get("arguments"), + name=item.name, + arguments=item.arguments, ), type="function", ) diff --git a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py index 8d520f5bf8ef..d26d7a139b90 100644 --- a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py @@ -6,10 +6,16 @@ from collections.abc import Callable, Sequence from functools import cached_property +from openai.types.responses.response_format_text_json_schema_config import ( + ResponseFormatTextJSONSchemaConfig, +) + from vllm.entrypoints.openai.protocol import ( ChatCompletionRequest, DeltaMessage, ExtractedToolCallInformation, + ResponsesRequest, + ResponseTextConfig, ) from vllm.entrypoints.openai.tool_parsers.utils import get_json_schema_from_tools from vllm.logger import init_logger @@ -56,11 +62,21 @@ def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionReques ) # Set structured output params for tool calling if json_schema_from_tool is not None: - if request.structured_outputs is None: + if isinstance(request, ChatCompletionRequest): request.structured_outputs = StructuredOutputsParams() - # tool_choice: "Forced Function" or "required" will override - # structured output json settings to make tool calling work correctly - request.structured_outputs.json = json_schema_from_tool + # tool_choice: "Forced Function" or "required" will override + # structured output json settings to make tool calling work correctly + request.structured_outputs.json = json_schema_from_tool + if isinstance(request, ResponsesRequest): + request.text = ResponseTextConfig() + request.text.format = ResponseFormatTextJSONSchemaConfig( + name="tool_calling_response", + schema=json_schema_from_tool, + type="json_schema", + description="Response format for tool calling", + strict=True, + ) + return request def extract_tool_calls( From 5b8356c9f2dc7b054202745b1525f07ed021fd71 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Fri, 24 Oct 2025 15:06:13 +0000 Subject: [PATCH 20/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index dc9e71651914..4c8d3f2c99a1 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -1104,8 +1104,8 @@ async def _preprocess_chat( "or Responses API requests." ) raise NotImplementedError(msg) - request = tool_parser(tokenizer).adjust_request( # type: ignore - request=request + request = tool_parser(tokenizer).adjust_request( + request=request # type: ignore ) if tokenizer is None: From 3ab6b2be11948242dab858a9258f8661b1638f7e Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Fri, 24 Oct 2025 15:10:13 +0000 Subject: [PATCH 21/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_engine.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 4c8d3f2c99a1..8ce4ff574699 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -1104,9 +1104,7 @@ async def _preprocess_chat( "or Responses API requests." ) raise NotImplementedError(msg) - request = tool_parser(tokenizer).adjust_request( - request=request # type: ignore - ) + request = tool_parser(tokenizer).adjust_request(request=request) # type: ignore if tokenizer is None: assert isinstance(request_prompt, str), ( From f2b7ddeaec1a794152839aacc46f52fbff588b57 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Tue, 28 Oct 2025 03:42:04 +0000 Subject: [PATCH 22/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- .../openai/{responses => serving_responses}/__init__.py | 0 .../openai/{responses => serving_responses}/conftest.py | 0 .../openai/{responses => serving_responses}/test_basic.py | 0 .../openai/{responses => serving_responses}/test_function_call.py | 0 .../openai/{responses => serving_responses}/test_image.py | 0 .../openai/{responses => serving_responses}/test_stateful.py | 0 .../{responses => serving_responses}/test_structured_output.py | 0 7 files changed, 0 insertions(+), 0 deletions(-) rename tests/v1/entrypoints/openai/{responses => serving_responses}/__init__.py (100%) rename tests/v1/entrypoints/openai/{responses => serving_responses}/conftest.py (100%) rename tests/v1/entrypoints/openai/{responses => serving_responses}/test_basic.py (100%) rename tests/v1/entrypoints/openai/{responses => serving_responses}/test_function_call.py (100%) rename tests/v1/entrypoints/openai/{responses => serving_responses}/test_image.py (100%) rename tests/v1/entrypoints/openai/{responses => serving_responses}/test_stateful.py (100%) rename tests/v1/entrypoints/openai/{responses => serving_responses}/test_structured_output.py (100%) diff --git a/tests/v1/entrypoints/openai/responses/__init__.py b/tests/v1/entrypoints/openai/serving_responses/__init__.py similarity index 100% rename from tests/v1/entrypoints/openai/responses/__init__.py rename to tests/v1/entrypoints/openai/serving_responses/__init__.py diff --git a/tests/v1/entrypoints/openai/responses/conftest.py b/tests/v1/entrypoints/openai/serving_responses/conftest.py similarity index 100% rename from tests/v1/entrypoints/openai/responses/conftest.py rename to tests/v1/entrypoints/openai/serving_responses/conftest.py diff --git a/tests/v1/entrypoints/openai/responses/test_basic.py b/tests/v1/entrypoints/openai/serving_responses/test_basic.py similarity index 100% rename from tests/v1/entrypoints/openai/responses/test_basic.py rename to tests/v1/entrypoints/openai/serving_responses/test_basic.py diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/serving_responses/test_function_call.py similarity index 100% rename from tests/v1/entrypoints/openai/responses/test_function_call.py rename to tests/v1/entrypoints/openai/serving_responses/test_function_call.py diff --git a/tests/v1/entrypoints/openai/responses/test_image.py b/tests/v1/entrypoints/openai/serving_responses/test_image.py similarity index 100% rename from tests/v1/entrypoints/openai/responses/test_image.py rename to tests/v1/entrypoints/openai/serving_responses/test_image.py diff --git a/tests/v1/entrypoints/openai/responses/test_stateful.py b/tests/v1/entrypoints/openai/serving_responses/test_stateful.py similarity index 100% rename from tests/v1/entrypoints/openai/responses/test_stateful.py rename to tests/v1/entrypoints/openai/serving_responses/test_stateful.py diff --git a/tests/v1/entrypoints/openai/responses/test_structured_output.py b/tests/v1/entrypoints/openai/serving_responses/test_structured_output.py similarity index 100% rename from tests/v1/entrypoints/openai/responses/test_structured_output.py rename to tests/v1/entrypoints/openai/serving_responses/test_structured_output.py From 06e802e826ee5e7209f02197da4a881807728a24 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 5 Nov 2025 08:20:27 +0000 Subject: [PATCH 23/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_responses.py | 105 +------------------ vllm/entrypoints/responses_utils.py | 43 ++++++++ 2 files changed, 45 insertions(+), 103 deletions(-) create mode 100644 vllm/entrypoints/responses_utils.py diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 62d86d790d28..f4aa3e15d59e 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -14,14 +14,6 @@ import jinja2 from fastapi import Request -from openai.types.chat import ( - ChatCompletionAssistantMessageParam, - ChatCompletionMessageToolCallParam, - ChatCompletionToolMessageParam, -) -from openai.types.chat.chat_completion_message_tool_call_param import ( - Function as FunctionCallTool, -) from openai.types.responses import ( ResponseCodeInterpreterCallCodeDeltaEvent, ResponseCodeInterpreterCallCodeDoneEvent, @@ -49,7 +41,6 @@ ResponseWebSearchCallCompletedEvent, ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent, - ToolChoiceFunction, response_function_web_search, response_text_delta_event, ) @@ -59,7 +50,6 @@ ) from openai.types.responses.tool import Tool from openai_harmony import Message as OpenAIHarmonyMessage -from pydantic import TypeAdapter from vllm import envs from vllm.engine.protocol import EngineClient @@ -89,15 +79,12 @@ from vllm.entrypoints.openai.protocol import ( DeltaMessage, ErrorResponse, - FunctionCall, - FunctionDefinition, InputTokensDetails, OutputTokensDetails, RequestResponseMetadata, ResponseCompletedEvent, ResponseCreatedEvent, ResponseInProgressEvent, - ResponseInputOutputItem, ResponseReasoningPartAddedEvent, ResponseReasoningPartDoneEvent, ResponsesRequest, @@ -107,6 +94,7 @@ ) from vllm.entrypoints.openai.serving_engine import OpenAIServing from vllm.entrypoints.openai.serving_models import OpenAIServingModels +from vllm.entrypoints.responses_utils import construct_chat_message_with_tool_call from vllm.entrypoints.tool_server import ToolServer from vllm.inputs.data import TokensPrompt as EngineTokensPrompt from vllm.logger import init_logger @@ -876,95 +864,6 @@ def _make_response_output_items( outputs.extend(tool_call_items) return outputs - def _parse_tool_calls_from_content( - self, - request: ResponsesRequest, - tokenizer: AnyTokenizer, - content: str | None = None, - ) -> tuple[list[FunctionCall] | None, str | None]: - function_calls = list[FunctionCall]() - - if not self.enable_auto_tools or not self.tool_parser: - # Tools are not enabled - return None, content - elif request.tool_choice is None: - # No tool calls. - return None, content - elif request.tool_choice and isinstance( - request.tool_choice, ToolChoiceFunction - ): - # Forced Function Call - function_calls.append( - FunctionCall(name=request.tool_choice.name, arguments=content) - ) - content = None # Clear content since tool is called. - elif request.tool_choice == "required": - assert content is not None - tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content) - function_calls.extend( - [ - FunctionCall( - name=tool_call.name, - arguments=json.dumps(tool_call.parameters, ensure_ascii=False), - ) - for tool_call in tool_calls - ] - ) - content = None # Clear content since tool is called. - elif request.tool_choice == "auto" or request.tool_choice == "none": - try: - tool_parser = self.tool_parser(tokenizer) - except RuntimeError as e: - logger.exception("Error in tool parser creation.") - raise e - tool_call_info = tool_parser.extract_tool_calls( - content if content is not None else "", - request=request, # type: ignore - ) - if tool_call_info is not None and tool_call_info.tools_called: - # extract_tool_calls() returns a list of tool calls. - function_calls.extend( - FunctionCall( - name=tool_call.function.name, - arguments=tool_call.function.arguments, - ) - for tool_call in tool_call_info.tool_calls - ) - content = tool_call_info.content - else: - # No tool calls. - return None, content - else: - raise ValueError(f"Invalid tool_choice: {request.tool_choice}") - return function_calls, content - - def _construct_chat_message_with_tool_call( - self, item: ResponseInputOutputItem - ) -> ChatCompletionMessageParam: - if isinstance(item, ResponseFunctionToolCall): - # Append the function call as a tool call. - return ChatCompletionAssistantMessageParam( - role="assistant", - tool_calls=[ - ChatCompletionMessageToolCallParam( - id=item.call_id, - function=FunctionCallTool( - name=item.name, - arguments=item.arguments, - ), - type="function", - ) - ], - ) - elif item.get("type") == "function_call_output": - # Append the function call output as a tool message. - return ChatCompletionToolMessageParam( - role="tool", - content=item.get("output"), - tool_call_id=item.get("call_id"), - ) - return item # type: ignore - def _make_response_output_items_with_harmony( self, context: HarmonyContext, @@ -1017,7 +916,7 @@ def _construct_input_messages( messages.append({"role": "user", "content": request.input}) else: for item in request.input: - messages.append(self._construct_chat_message_with_tool_call(item)) + messages.append(construct_chat_message_with_tool_call(item)) return messages def _construct_harmony_system_input_message( diff --git a/vllm/entrypoints/responses_utils.py b/vllm/entrypoints/responses_utils.py new file mode 100644 index 000000000000..7e631645db12 --- /dev/null +++ b/vllm/entrypoints/responses_utils.py @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from openai.types.chat.chat_completion_message_tool_call_param import ( + Function as FunctionCallTool, +) +from openai.types.responses import ResponseFunctionToolCall + +from vllm.entrypoints.openai.protocol import ( + ChatCompletionAssistantMessageParam, + ChatCompletionMessageParam, + ChatCompletionMessageToolCallParam, + ChatCompletionToolMessageParam, + ResponseInputOutputItem, +) + + +def construct_chat_message_with_tool_call( + item: ResponseInputOutputItem, +) -> ChatCompletionMessageParam: + if isinstance(item, ResponseFunctionToolCall): + # Append the function call as a tool call. + return ChatCompletionAssistantMessageParam( + role="assistant", + tool_calls=[ + ChatCompletionMessageToolCallParam( + id=item.call_id, + function=FunctionCallTool( + name=item.name, + arguments=item.arguments, + ), + type="function", + ) + ], + ) + elif item.get("type") == "function_call_output": + # Append the function call output as a tool message. + return ChatCompletionToolMessageParam( + role="tool", + content=item.get("output"), + tool_call_id=item.get("call_id"), + ) + return item # type: ignore From 13a2749af4d3563d6da0748ed8a0334a7b8bdd37 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 5 Nov 2025 08:26:57 +0000 Subject: [PATCH 24/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_responses.py | 12 ++++++++---- vllm/entrypoints/responses_utils.py | 8 +++++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index f4aa3e15d59e..7580d2da2d2b 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -817,8 +817,12 @@ def _make_response_output_items( ], status=None, # NOTE: Only the last output item has status. ) - function_calls, content = self._parse_tool_calls_from_content( - request, tokenizer, content=content + tool_calls, content = self._parse_tool_calls_from_content( + request=request, + tokenizer=tokenizer, + content=content, + enable_auto_tools=self.enable_auto_tools, + tool_parser_cls=self.tool_parser, ) if content: output_text = ResponseOutputText( @@ -849,7 +853,7 @@ def _make_response_output_items( outputs.append(reasoning_item) if message_item: outputs.append(message_item) - if function_calls: + if tool_calls: tool_call_items = [ ResponseFunctionToolCall( id=f"fc_{random_uuid()}", @@ -859,7 +863,7 @@ def _make_response_output_items( name=tool_call.name, arguments=tool_call.arguments, ) - for tool_call in function_calls + for tool_call in tool_calls ] outputs.extend(tool_call_items) return outputs diff --git a/vllm/entrypoints/responses_utils.py b/vllm/entrypoints/responses_utils.py index 7e631645db12..6eb7c0b70a67 100644 --- a/vllm/entrypoints/responses_utils.py +++ b/vllm/entrypoints/responses_utils.py @@ -1,16 +1,18 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from openai.types.chat import ( + ChatCompletionAssistantMessageParam, + ChatCompletionMessageToolCallParam, + ChatCompletionToolMessageParam, +) from openai.types.chat.chat_completion_message_tool_call_param import ( Function as FunctionCallTool, ) from openai.types.responses import ResponseFunctionToolCall from vllm.entrypoints.openai.protocol import ( - ChatCompletionAssistantMessageParam, ChatCompletionMessageParam, - ChatCompletionMessageToolCallParam, - ChatCompletionToolMessageParam, ResponseInputOutputItem, ) From 56f400b69bd40fa3e4a2768670c7ec93c07b9561 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Thu, 6 Nov 2025 08:36:28 +0000 Subject: [PATCH 25/25] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- .../entrypoints/openai/serving_responses/test_function_call.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v1/entrypoints/openai/serving_responses/test_function_call.py b/tests/v1/entrypoints/openai/serving_responses/test_function_call.py index 8ecbc8d2704e..cf57956a9dea 100644 --- a/tests/v1/entrypoints/openai/serving_responses/test_function_call.py +++ b/tests/v1/entrypoints/openai/serving_responses/test_function_call.py @@ -6,7 +6,7 @@ import openai # use the official client for correctness check import pytest -MODEL_NAME = "Qwen/Qwen3-0.6B" +MODEL_NAME = "Qwen/Qwen3-1.7B" tools = [ { "type": "function",