From a38e60cdc1f41850dffa46bcc91445fbc616c0d3 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 15 Oct 2025 03:35:57 +0000
Subject: [PATCH 01/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 .../openai_responses_client_with_tools.py     |  79 ++++++++
 .../entrypoints/openai/responses/conftest.py  |   7 +-
 .../openai/responses/test_function_call.py    | 185 ++++++++++++++++++
 vllm/entrypoints/openai/protocol.py           | 120 ++++++++++--
 vllm/entrypoints/openai/serving_engine.py     |  14 +-
 vllm/entrypoints/openai/serving_responses.py  | 154 +++++++++++++--
 6 files changed, 522 insertions(+), 37 deletions(-)
 create mode 100644 examples/online_serving/openai_responses_client_with_tools.py
 create mode 100644 tests/v1/entrypoints/openai/responses/test_function_call.py

diff --git a/examples/online_serving/openai_responses_client_with_tools.py b/examples/online_serving/openai_responses_client_with_tools.py
new file mode 100644
index 000000000000..fb0cb90d9a8c
--- /dev/null
+++ b/examples/online_serving/openai_responses_client_with_tools.py
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Set up this example by starting a vLLM OpenAI-compatible server with tool call
+options enabled.
+Reasoning models can be used through the Responses API as seen here 
+https://platform.openai.com/docs/api-reference/responses
+For example:
+vllm serve Qwen/Qwen3-1.7B --reasoning-parser qwen3 \
+   --guided-decoding-backend xgrammar \
+      --enable-auto-tool-choice --tool-call-parser hermes
+"""
+
+import json
+
+from openai import OpenAI
+
+
+def get_weather(latitude: float, longitude: float) -> str:
+    """
+    Mock function to simulate getting weather data.
+    In a real application, this would call an external weather API.
+    """
+    return f"Current temperature at ({latitude}, {longitude}) is 20°C."
+
+
+tools = [
+    {
+        "type": "function",
+        "name": "get_weather",
+        "description": "Get current temperature for provided coordinates in celsius.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "latitude": {"type": "number"},
+                "longitude": {"type": "number"},
+            },
+            "required": ["latitude", "longitude"],
+            "additionalProperties": False,
+        },
+        "strict": True,
+    }
+]
+
+input_messages = [
+    {"role": "user", "content": "What's the weather like in Paris today?"}
+]
+
+
+def main():
+    base_url = "http://0.0.0.0:8000/v1"
+    model = "Qwen/Qwen3-1.7B"
+    client = OpenAI(base_url=base_url, api_key="empty")
+    response = client.responses.create(
+        model=model, input=input_messages, tools=tools, tool_choice="required"
+    )
+    tool_call = response.output[0]
+    args = json.loads(tool_call.arguments)
+
+    result = get_weather(args["latitude"], args["longitude"])
+
+    input_messages.append(tool_call)  # append model's function call message
+    input_messages.append(
+        {  # append result message
+            "type": "function_call_output",
+            "call_id": tool_call.call_id,
+            "output": str(result),
+        }
+    )
+    response_2 = client.responses.create(
+        model=model,
+        input=input_messages,
+        tools=tools,
+    )
+    print(response_2.output_text)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/v1/entrypoints/openai/responses/conftest.py b/tests/v1/entrypoints/openai/responses/conftest.py
index 032ed42f43d1..22730dbae067 100644
--- a/tests/v1/entrypoints/openai/responses/conftest.py
+++ b/tests/v1/entrypoints/openai/responses/conftest.py
@@ -15,8 +15,13 @@ def default_server_args():
         "--max-model-len",
         "8192",
         "--enforce-eager",  # For faster startup.
+        "--enable-auto-tool-choice",
+        "--guided-decoding-backend",
+        "xgrammar",
+        "--tool-call-parser",
+        "hermes",
         "--reasoning-parser",
-        "deepseek_r1",
+        "qwen3",
     ]
 
 
diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py
new file mode 100644
index 000000000000..4e4e847d1663
--- /dev/null
+++ b/tests/v1/entrypoints/openai/responses/test_function_call.py
@@ -0,0 +1,185 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+
+import openai  # use the official client for correctness check
+import pytest
+
+MODEL_NAME = "Qwen/Qwen3-0.6B"
+tools = [
+    {
+        "type": "function",
+        "name": "get_current_weather",
+        "description": "Get the current weather in a given location",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "city": {
+                    "type": "string",
+                    "description": "The city to find the weather for, e.g. 'Vienna'",
+                    "default": "Vienna",
+                },
+                "country": {
+                    "type": "string",
+                    "description": "The country that the city is in, e.g. 'Austria'",
+                },
+                "unit": {
+                    "type": "string",
+                    "description": "The unit to fetch the temperature in",
+                    "enum": ["celsius", "fahrenheit"],
+                },
+                "options": {
+                    "$ref": "#/$defs/WeatherOptions",
+                    "description": "Optional parameters for weather query",
+                },
+            },
+            "required": ["country", "unit"],
+            "$defs": {
+                "WeatherOptions": {
+                    "title": "WeatherOptions",
+                    "type": "object",
+                    "additionalProperties": False,
+                    "properties": {
+                        "unit": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                            "default": "celsius",
+                            "description": "Temperature unit",
+                            "title": "Temperature Unit",
+                        },
+                        "include_forecast": {
+                            "type": "boolean",
+                            "default": False,
+                            "description": "Whether to include a 24-hour forecast",
+                            "title": "Include Forecast",
+                        },
+                        "language": {
+                            "type": "string",
+                            "default": "zh-CN",
+                            "description": "Language of the response",
+                            "title": "Language",
+                            "enum": ["zh-CN", "en-US", "ja-JP"],
+                        },
+                    },
+                },
+            },
+        },
+    },
+    {
+        "type": "function",
+        "name": "get_forecast",
+        "description": "Get the weather forecast for a given location",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "city": {
+                    "type": "string",
+                    "description": "The city to get the forecast for, e.g. 'Vienna'",
+                    "default": "Vienna",
+                },
+                "country": {
+                    "type": "string",
+                    "description": "The country that the city is in, e.g. 'Austria'",
+                },
+                "days": {
+                    "type": "integer",
+                    "description": "Number of days to get the forecast for (1-7)",
+                },
+                "unit": {
+                    "type": "string",
+                    "description": "The unit to fetch the temperature in",
+                    "enum": ["celsius", "fahrenheit"],
+                },
+            },
+            "required": ["country", "days", "unit"],
+        },
+    },
+]
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize("tool_choice", ["auto", "required"])
+async def test_function_tool_use(
+    client: openai.AsyncOpenAI, model_name: str, tool_choice: str
+):
+    prompt = [
+        {
+            "role": "user",
+            "content": "Can you tell me what the current weather is in Berlin and the "
+            "forecast for the next 5 days, in fahrenheit?",
+        },
+    ]
+    response = await client.responses.create(
+        model=model_name,
+        input=prompt,
+        tools=tools,
+        tool_choice=tool_choice,
+    )
+
+    assert len(response.output) >= 1
+    tool_call = response.output[0]
+
+    assert tool_call.type == "function_call"
+    assert json.loads(tool_call.arguments) is not None
+
+
+@pytest.mark.asyncio
+async def test_named_tool_use(client: openai.AsyncOpenAI):
+    def get_weather(latitude: float, longitude: float) -> str:
+        """
+        Mock function to simulate getting weather data.
+        In a real application, this would call an external weather API.
+        """
+        return f"Current temperature at ({latitude}, {longitude}) is 20°C."
+
+    tools = [
+        {
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get current temperature for provided coordinates in celsius.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "latitude": {"type": "number"},
+                    "longitude": {"type": "number"},
+                },
+                "required": ["latitude", "longitude"],
+                "additionalProperties": False,
+            },
+            "strict": True,
+        }
+    ]
+
+    input_messages = [
+        {"role": "user", "content": "What's the weather like in Paris today?"}
+    ]
+
+    response = await client.responses.create(
+        model=MODEL_NAME,
+        input=input_messages,
+        tools=tools,
+        tool_choice={"type": "function", "name": "get_weather"},
+    )
+    assert len(response.output) == 1
+    tool_call = response.output[0]
+    assert tool_call.type == "function_call"
+    assert tool_call.name == "get_weather"
+    args = json.loads(tool_call.arguments)
+    assert args["latitude"] is not None
+    assert args["longitude"] is not None
+    # call the tool
+    result = get_weather(args["latitude"], args["longitude"])
+    input_messages.append(tool_call)  # append model's function call message
+    input_messages.append(
+        {  # append result message
+            "type": "function_call_output",
+            "call_id": tool_call.call_id,
+            "output": str(result),
+        }
+    )
+    # create a new response with the tool call result
+    response_2 = await client.responses.create(model=MODEL_NAME, input=input_messages)
+    # check the output
+    assert len(response_2.output_text) > 0
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 33256de6dd47..2a762c557b2f 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -65,7 +65,7 @@
 
 
 from openai.types.responses.response import IncompleteDetails, ToolChoice
-from openai.types.responses.tool import Tool
+from openai.types.responses.tool import FunctionTool, Tool
 from openai.types.shared import Metadata, Reasoning
 from pydantic import (
     BaseModel,
@@ -304,6 +304,88 @@ def get_logits_processors(
     return None
 
 
+def get_json_schema_from_tool(
+    tool_choice: str | ToolChoice | ChatCompletionNamedToolChoiceParam,
+    tools: list[Tool | ChatCompletionToolsParam] | None,
+) -> str | dict | BaseModel | None:
+    if tool_choice in ("none", None) or tools is None:
+        return None
+    if (not isinstance(tool_choice, str)) and isinstance(tool_choice, ToolChoice):
+        tool_name = tool_choice.name
+        tool_map = {tool.name: tool for tool in tools if isinstance(tool, Tool)}
+        if tool_name not in tool_map:
+            raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.")
+        return tool_map[tool_name].parameters
+
+    if (not isinstance(tool_choice, str)) and isinstance(
+        tool_choice, ChatCompletionNamedToolChoiceParam
+    ):
+        tool_name = tool_choice.function.name
+        tool_map = {
+            tool.function.name: tool
+            for tool in tools
+            if isinstance(tool, ChatCompletionToolsParam)
+        }
+        if tool_name not in tool_map:
+            raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.")
+        return tool_map[tool_name].function.parameters
+
+    if tool_choice == "required":
+
+        def extract_tool_info(
+            tool: Tool | ChatCompletionToolsParam,
+        ) -> tuple[str, dict[str, Any] | None]:
+            if isinstance(tool, FunctionTool):
+                return tool.name, tool.parameters
+            elif isinstance(tool, ChatCompletionToolsParam):
+                return tool.function.name, tool.function.parameters
+            else:
+                raise TypeError(f"Unsupported tool type: {type(tool)}")
+
+        def get_tool_schema(tool: Tool | ChatCompletionToolsParam) -> dict:
+            name, params = extract_tool_info(tool)
+            params = params if params else {"type": "object", "properties": {}}
+            return {
+                "properties": {
+                    "name": {"type": "string", "enum": [name]},
+                    "parameters": params,
+                },
+                "required": ["name", "parameters"],
+            }
+
+        def get_tool_schema_defs(
+            tools: list[Tool | ChatCompletionToolsParam],
+        ) -> dict:
+            all_defs: dict[str, dict[str, Any]] = {}
+            for tool in tools:
+                _, params = extract_tool_info(tool)
+                if params is None:
+                    continue
+                defs = params.pop("$defs", {})
+                for def_name, def_schema in defs.items():
+                    if def_name in all_defs and all_defs[def_name] != def_schema:
+                        raise ValueError(
+                            f"Tool definition '{def_name}' has multiple schemas, which is not supported."
+                        )
+                    all_defs[def_name] = def_schema
+            return all_defs
+
+        json_schema = {
+            "type": "array",
+            "minItems": 1,
+            "items": {
+                "type": "object",
+                "anyOf": [get_tool_schema(tool) for tool in tools],
+            },
+        }
+        json_schema_defs = get_tool_schema_defs(tools)
+        if json_schema_defs:
+            json_schema["$defs"] = json_schema_defs
+        return json_schema
+
+    return None
+
+
 ResponseInputOutputItem: TypeAlias = (
     ResponseInputItemParam | ResponseReasoningItem | ResponseFunctionToolCall
 )
@@ -423,18 +505,7 @@ def to_sampling_params(
         stop_token_ids = default_sampling_params.get("stop_token_ids")
 
         # Structured output
-        structured_outputs = None
-        if self.text is not None and self.text.format is not None:
-            response_format = self.text.format
-            if (
-                response_format.type == "json_schema"
-                and response_format.schema_ is not None
-            ):
-                structured_outputs = StructuredOutputsParams(
-                    json=response_format.schema_
-                )
-            elif response_format.type == "json_object":
-                raise NotImplementedError("json_object is not supported")
+        structured_outputs = self._get_structured_outputs()
 
         # TODO: add more parameters
         return SamplingParams.from_optional(
@@ -449,6 +520,29 @@ def to_sampling_params(
             structured_outputs=structured_outputs,
         )
 
+    def _get_structured_outputs(self) -> StructuredOutputsParams | None:
+        # Structured output
+        structured_outputs = None
+        if self.text is not None and self.text.format is not None:
+            response_format = self.text.format
+            if (
+                response_format.type == "json_schema"
+                and response_format.schema_ is not None
+            ):
+                structured_outputs = StructuredOutputsParams(
+                    json=response_format.schema_
+                )
+            elif response_format.type == "json_object":
+                raise NotImplementedError("json_object is not supported")
+        # Function call
+        elif not (self.tool_choice == "none" or self.tools is None):
+            structured_outputs = StructuredOutputsParams(
+                json=get_json_schema_from_tool(
+                    tools=self.tools, tool_choice=self.tool_choice
+                )
+            )
+        return structured_outputs
+
     def is_include_output_logprobs(self) -> bool:
         """Check if the request includes output logprobs."""
         if self.include is None:
diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index bafc0e2c372f..ddbeeba99447 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -1098,13 +1098,17 @@ async def _preprocess_chat(
         )
 
         if should_parse_tools:
-            if not isinstance(request, ChatCompletionRequest):
-                msg = "Tool usage is only supported for Chat Completions API"
+            if not isinstance(request, ChatCompletionRequest | ResponsesRequest):
+                msg = (
+                    "Tool usage is only supported for Chat Completions API "
+                    "or Responses API requests."
+                )
                 raise NotImplementedError(msg)
 
-            request = tool_parser(tokenizer).adjust_request(  # type: ignore
-                request=request
-            )
+            if isinstance(request, ChatCompletionRequest):
+                request = tool_parser(tokenizer).adjust_request(  # type: ignore
+                    request=request
+                )
 
         if tokenizer is None:
             assert isinstance(request_prompt, str), (
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index 2ee8de5fba07..0905cf0ab413 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -14,6 +14,14 @@
 
 import jinja2
 from fastapi import Request
+from openai.types.chat import (
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionMessageToolCallParam,
+    ChatCompletionToolMessageParam,
+)
+from openai.types.chat.chat_completion_message_tool_call_param import (
+    Function as FunctionCallTool,
+)
 from openai.types.responses import (
     ResponseCodeInterpreterCallCodeDeltaEvent,
     ResponseCodeInterpreterCallCodeDoneEvent,
@@ -41,6 +49,7 @@
     ResponseWebSearchCallCompletedEvent,
     ResponseWebSearchCallInProgressEvent,
     ResponseWebSearchCallSearchingEvent,
+    ToolChoiceFunction,
     response_function_web_search,
     response_text_delta_event,
 )
@@ -50,6 +59,7 @@
 )
 from openai.types.responses.tool import Tool
 from openai_harmony import Message as OpenAIHarmonyMessage
+from pydantic import TypeAdapter
 
 from vllm import envs
 from vllm.engine.protocol import EngineClient
@@ -79,12 +89,15 @@
 from vllm.entrypoints.openai.protocol import (
     DeltaMessage,
     ErrorResponse,
+    FunctionCall,
+    FunctionDefinition,
     InputTokensDetails,
     OutputTokensDetails,
     RequestResponseMetadata,
     ResponseCompletedEvent,
     ResponseCreatedEvent,
     ResponseInProgressEvent,
+    ResponseInputOutputItem,
     ResponseReasoningPartAddedEvent,
     ResponseReasoningPartDoneEvent,
     ResponsesRequest,
@@ -198,14 +211,10 @@ def __init__(
             )
 
         # set up tool use
-        self.enable_auto_tools: bool = enable_auto_tools
-        if self.enable_auto_tools:
-            logger.info(
-                '"auto" tool choice has been enabled please note that while'
-                " the parallel_tool_calls client option is preset for "
-                "compatibility reasons, it will be ignored."
-            )
-
+        self.tool_parser = self._get_tool_parser(
+            tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools
+        )
+        self.exclude_tools_when_tool_choice_none = False
         # HACK(woosuk): This is a hack. We should use a better store.
         # FIXME: If enable_store=True, this may cause a memory leak since we
         # never remove responses from the store.
@@ -511,16 +520,20 @@ async def _make_request(
         prev_response: ResponsesResponse | None,
         tokenizer: AnyTokenizer,
     ):
-        if len(request.tools) > 0:
-            raise NotImplementedError(
-                "Tool use is not supported in Responses API without Harmony"
-            )
+        if request.tools is None or (
+            request.tool_choice == "none" and self.exclude_tools_when_tool_choice_none
+        ):
+            tool_dicts = None
+        else:
+            tool_dicts = [tool.model_dump() for tool in request.tools]
         # Construct the input messages.
         messages = self._construct_input_messages(request, prev_response)
         _, request_prompts, engine_prompts = await self._preprocess_chat(
             request,
             tokenizer,
             messages,
+            tool_dicts=tool_dicts,
+            tool_parser=self.tool_parser,
             chat_template=self.chat_template,
             chat_template_content_format=self.chat_template_content_format,
         )
@@ -802,7 +815,8 @@ def _make_response_output_items(
                     delta=False,
                 )
 
-        output = []
+        reasoning_item = None
+        message_item = None
         if reasoning_content:
             reasoning_item = ResponseReasoningItem(
                 id=f"rs_{random_uuid()}",
@@ -815,7 +829,6 @@ def _make_response_output_items(
                 ],
                 status=None,  # NOTE: Only the last output item has status.
             )
-            output.append(reasoning_item)
         if content:
             output_text = ResponseOutputText(
                 text=content,
@@ -832,15 +845,119 @@ def _make_response_output_items(
                     else None
                 ),
             )
-            message = ResponseOutputMessage(
+            message_item = ResponseOutputMessage(
                 id=f"msg_{random_uuid()}",
                 content=[output_text],
                 role="assistant",
                 status="completed",
                 type="message",
             )
-            output.append(message)
-        return output
+        outputs = []
+        function_calls = self._extract_tool_calls(request, tokenizer, content=content)
+        if function_calls:
+            outputs.extend(
+                [
+                    ResponseFunctionToolCall(
+                        id=f"fc_{random_uuid()}",
+                        call_id=f"call_{random_uuid()}",
+                        type="function_call",
+                        status="completed",
+                        name=tool_call.name,
+                        arguments=tool_call.arguments,
+                    )
+                    for tool_call in function_calls
+                ]
+            )
+        else:
+            if reasoning_item:
+                outputs.append(reasoning_item)
+            if message_item:
+                outputs.append(message_item)
+        return outputs
+
+    def _extract_tool_calls(
+        self,
+        request: ResponsesRequest,
+        tokenizer: AnyTokenizer,
+        content: str | None = None,
+    ) -> list[FunctionCall] | None:
+        function_calls = list[FunctionCall]()
+        if not self.enable_auto_tools or not self.tool_parser:
+            # Tools are not enabled
+            return None
+        elif request.tool_choice is None:
+            # No tool calls.
+            return None
+        elif request.tool_choice and isinstance(
+            request.tool_choice, ToolChoiceFunction
+        ):
+            # Forced Function Call
+            function_calls.append(
+                FunctionCall(name=request.tool_choice.name, arguments=content)
+            )
+        elif request.tool_choice == "required":
+            assert content is not None
+            tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content)
+            function_calls.extend(
+                [
+                    FunctionCall(
+                        name=tool_call.name,
+                        arguments=json.dumps(tool_call.parameters, ensure_ascii=False),
+                    )
+                    for tool_call in tool_calls
+                ]
+            )
+        elif request.tool_choice == "auto" or request.tool_choice == "none":
+            try:
+                tool_parser = self.tool_parser(tokenizer)
+            except RuntimeError as e:
+                logger.exception("Error in tool parser creation.")
+                raise e
+            tool_call_info = tool_parser.extract_tool_calls(
+                content if content is not None else "", request=request
+            )
+            if tool_call_info is not None and tool_call_info.tools_called:
+                # extract_tool_calls() returns a list of tool calls.
+                function_calls.extend(
+                    FunctionCall(
+                        name=tool_call.function.name,
+                        arguments=tool_call.function.arguments,
+                    )
+                    for tool_call in tool_call_info.tool_calls
+                )
+            else:
+                # No tool calls.
+                return None
+        else:
+            raise ValueError(f"Invalid tool_choice: {request.tool_choice}")
+        return function_calls
+
+    def _parse_chat_tool_call(
+        self, item: ResponseInputOutputItem
+    ) -> ChatCompletionMessageParam:
+        if item.get("type") == "function_call":
+            # Append the function call as a tool call.
+            return ChatCompletionAssistantMessageParam(
+                role="assistant",
+                tool_calls=[
+                    ChatCompletionMessageToolCallParam(
+                        id=item.get("call_id"),
+                        function=FunctionCallTool(
+                            name=item.get("name"),
+                            arguments=item.get("arguments"),
+                        ),
+                        type="function",
+                    )
+                ],
+            )
+        elif item.get("type") == "function_call_output":
+            # Append the function call output as a tool message.
+            return ChatCompletionToolMessageParam(
+                role="tool",
+                content=item.get("output"),
+                tool_call_id=item.get("call_id"),
+            )
+        return item  # type: ignore
 
     def _make_response_output_items_with_harmony(
         self,
@@ -893,7 +1010,8 @@ def _construct_input_messages(
         if isinstance(request.input, str):
             messages.append({"role": "user", "content": request.input})
         else:
-            messages.extend(request.input)  # type: ignore
+            for item in request.input:
+                messages.append(self._parse_chat_tool_call(item))
         return messages
 
     def _construct_harmony_system_input_message(

From 878f10459c2492aa1cd8e940101fa00363ed1b13 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 15 Oct 2025 08:17:51 +0000
Subject: [PATCH 02/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/protocol.py                          | 2 +-
 vllm/entrypoints/openai/serving_responses.py                 | 2 +-
 vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 2a762c557b2f..236867cbcd7d 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -307,7 +307,7 @@ def get_logits_processors(
 def get_json_schema_from_tool(
     tool_choice: str | ToolChoice | ChatCompletionNamedToolChoiceParam,
     tools: list[Tool | ChatCompletionToolsParam] | None,
-) -> str | dict | BaseModel | None:
+) -> str | dict | None:
     if tool_choice in ("none", None) or tools is None:
         return None
     if (not isinstance(tool_choice, str)) and isinstance(tool_choice, ToolChoice):
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index 0905cf0ab413..536497b7d22a 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -209,7 +209,7 @@ def __init__(
             self.default_sampling_params["stop_token_ids"].extend(
                 get_stop_tokens_for_assistant_actions()
             )
-
+        self.enable_auto_tools = enable_auto_tools
         # set up tool use
         self.tool_parser = self._get_tool_parser(
             tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools
diff --git a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
index 8d520f5bf8ef..b647098efb41 100644
--- a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
@@ -10,6 +10,7 @@
     ChatCompletionRequest,
     DeltaMessage,
     ExtractedToolCallInformation,
+    ResponsesRequest,
 )
 from vllm.entrypoints.openai.tool_parsers.utils import get_json_schema_from_tools
 from vllm.logger import init_logger
@@ -64,7 +65,7 @@ def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionReques
         return request
 
     def extract_tool_calls(
-        self, model_output: str, request: ChatCompletionRequest
+        self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
     ) -> ExtractedToolCallInformation:
         """
         Static method that should be implemented for extracting tool calls from

From c11eb9d957fbac78161ff040697fdfc20d84f669 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 15 Oct 2025 08:31:22 +0000
Subject: [PATCH 03/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/serving_responses.py                 | 3 ++-
 vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index 536497b7d22a..397449affe37 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -914,7 +914,8 @@ def _extract_tool_calls(
                 logger.exception("Error in tool parser creation.")
                 raise e
             tool_call_info = tool_parser.extract_tool_calls(
-                content if content is not None else "", request=request
+                content if content is not None else "",
+                request=request,  # type: ignore
             )
             if tool_call_info is not None and tool_call_info.tools_called:
                 # extract_tool_calls() returns a list of tool calls.
diff --git a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
index b647098efb41..8d520f5bf8ef 100644
--- a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
@@ -10,7 +10,6 @@
     ChatCompletionRequest,
     DeltaMessage,
     ExtractedToolCallInformation,
-    ResponsesRequest,
 )
 from vllm.entrypoints.openai.tool_parsers.utils import get_json_schema_from_tools
 from vllm.logger import init_logger
@@ -65,7 +64,7 @@ def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionReques
         return request
 
     def extract_tool_calls(
-        self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
+        self, model_output: str, request: ChatCompletionRequest
     ) -> ExtractedToolCallInformation:
         """
         Static method that should be implemented for extracting tool calls from

From ecd5942c437189a4dbe4989cbbbd3405efba6b66 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 15 Oct 2025 08:35:15 +0000
Subject: [PATCH 04/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 tests/v1/entrypoints/openai/responses/test_function_call.py | 4 +++-
 vllm/entrypoints/openai/protocol.py                         | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py
index 4e4e847d1663..5ea300772804 100644
--- a/tests/v1/entrypoints/openai/responses/test_function_call.py
+++ b/tests/v1/entrypoints/openai/responses/test_function_call.py
@@ -138,7 +138,9 @@ def get_weather(latitude: float, longitude: float) -> str:
         {
             "type": "function",
             "name": "get_weather",
-            "description": "Get current temperature for provided coordinates in celsius.",
+            "description": (
+                "Get current temperature for provided coordinates in celsius."
+            ),
             "parameters": {
                 "type": "object",
                 "properties": {
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 236867cbcd7d..db87d7a81686 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -365,7 +365,8 @@ def get_tool_schema_defs(
                 for def_name, def_schema in defs.items():
                     if def_name in all_defs and all_defs[def_name] != def_schema:
                         raise ValueError(
-                            f"Tool definition '{def_name}' has multiple schemas, which is not supported."
+                            f"Tool definition '{def_name}' has multiple schemas, "
+                            "which is not supported."
                         )
                     all_defs[def_name] = def_schema
             return all_defs

From b03d0b26ec4d911150e45c3bed06940cdf8551fe Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 15 Oct 2025 10:03:54 +0000
Subject: [PATCH 05/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 .../openai_responses_client_with_tools.py     |  2 +-
 .../entrypoints/openai/responses/conftest.py  |  2 +-
 .../openai/responses/test_function_call.py    |  2 +-
 vllm/entrypoints/openai/protocol.py           | 20 +++++++++++--------
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/examples/online_serving/openai_responses_client_with_tools.py b/examples/online_serving/openai_responses_client_with_tools.py
index fb0cb90d9a8c..a985699ec162 100644
--- a/examples/online_serving/openai_responses_client_with_tools.py
+++ b/examples/online_serving/openai_responses_client_with_tools.py
@@ -7,7 +7,7 @@
 https://platform.openai.com/docs/api-reference/responses
 For example:
 vllm serve Qwen/Qwen3-1.7B --reasoning-parser qwen3 \
-   --guided-decoding-backend xgrammar \
+      --structured-outputs-config.backend xgrammar \
       --enable-auto-tool-choice --tool-call-parser hermes
 """
 
diff --git a/tests/v1/entrypoints/openai/responses/conftest.py b/tests/v1/entrypoints/openai/responses/conftest.py
index 22730dbae067..8081e5fa1d83 100644
--- a/tests/v1/entrypoints/openai/responses/conftest.py
+++ b/tests/v1/entrypoints/openai/responses/conftest.py
@@ -16,7 +16,7 @@ def default_server_args():
         "8192",
         "--enforce-eager",  # For faster startup.
         "--enable-auto-tool-choice",
-        "--guided-decoding-backend",
+        "--structured-outputs-config.backend",
         "xgrammar",
         "--tool-call-parser",
         "hermes",
diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py
index 5ea300772804..f964d45a60c6 100644
--- a/tests/v1/entrypoints/openai/responses/test_function_call.py
+++ b/tests/v1/entrypoints/openai/responses/test_function_call.py
@@ -100,7 +100,7 @@
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
-@pytest.mark.parametrize("tool_choice", ["auto", "required"])
+@pytest.mark.parametrize("tool_choice", ["auto",])
 async def test_function_tool_use(
     client: openai.AsyncOpenAI, model_name: str, tool_choice: str
 ):
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index db87d7a81686..65c49e8705f7 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -16,6 +16,7 @@
 )
 from openai.types.chat.chat_completion_message import Annotation as OpenAIAnnotation
 from openai.types.responses import (
+    FunctionTool,
     ResponseCodeInterpreterCallCodeDeltaEvent,
     ResponseCodeInterpreterCallCodeDoneEvent,
     ResponseCodeInterpreterCallCompletedEvent,
@@ -36,6 +37,7 @@
     ResponseWebSearchCallCompletedEvent,
     ResponseWebSearchCallInProgressEvent,
     ResponseWebSearchCallSearchingEvent,
+    ToolChoiceFunction,
 )
 from openai.types.responses import (
     ResponseCompletedEvent as OpenAIResponseCompletedEvent,
@@ -65,7 +67,7 @@
 
 
 from openai.types.responses.response import IncompleteDetails, ToolChoice
-from openai.types.responses.tool import FunctionTool, Tool
+from openai.types.responses.tool import Tool
 from openai.types.shared import Metadata, Reasoning
 from pydantic import (
     BaseModel,
@@ -306,13 +308,15 @@ def get_logits_processors(
 
 def get_json_schema_from_tool(
     tool_choice: str | ToolChoice | ChatCompletionNamedToolChoiceParam,
-    tools: list[Tool | ChatCompletionToolsParam] | None,
+    tools: list[FunctionTool | ChatCompletionToolsParam] | None,
 ) -> str | dict | None:
     if tool_choice in ("none", None) or tools is None:
         return None
-    if (not isinstance(tool_choice, str)) and isinstance(tool_choice, ToolChoice):
+    if (not isinstance(tool_choice, str)) and isinstance(
+        tool_choice, ToolChoiceFunction
+    ):
         tool_name = tool_choice.name
-        tool_map = {tool.name: tool for tool in tools if isinstance(tool, Tool)}
+        tool_map = {tool.name: tool for tool in tools if isinstance(tool, FunctionTool)}
         if tool_name not in tool_map:
             raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.")
         return tool_map[tool_name].parameters
@@ -537,11 +541,11 @@ def _get_structured_outputs(self) -> StructuredOutputsParams | None:
                 raise NotImplementedError("json_object is not supported")
         # Function call
         elif not (self.tool_choice == "none" or self.tools is None):
-            structured_outputs = StructuredOutputsParams(
-                json=get_json_schema_from_tool(
-                    tools=self.tools, tool_choice=self.tool_choice
-                )
+            json_schema = get_json_schema_from_tool(
+                tools=self.tools, tool_choice=self.tool_choice
             )
+            if json_schema is not None:
+                structured_outputs = StructuredOutputsParams(json=json_schema)
         return structured_outputs
 
     def is_include_output_logprobs(self) -> bool:

From 577c191a4fec282a4f05f503464bd9659f9e147f Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 15 Oct 2025 10:05:10 +0000
Subject: [PATCH 06/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/protocol.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 65c49e8705f7..d78fd97c4556 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -307,7 +307,7 @@ def get_logits_processors(
 
 
 def get_json_schema_from_tool(
-    tool_choice: str | ToolChoice | ChatCompletionNamedToolChoiceParam,
+    tool_choice: str | ToolChoiceFunction | ChatCompletionNamedToolChoiceParam,
     tools: list[FunctionTool | ChatCompletionToolsParam] | None,
 ) -> str | dict | None:
     if tool_choice in ("none", None) or tools is None:

From 98af9c9803b9fd5e1755abb2b702e0b8eae486b9 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 15 Oct 2025 10:15:13 +0000
Subject: [PATCH 07/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 tests/v1/entrypoints/openai/responses/test_function_call.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py
index f964d45a60c6..5ea300772804 100644
--- a/tests/v1/entrypoints/openai/responses/test_function_call.py
+++ b/tests/v1/entrypoints/openai/responses/test_function_call.py
@@ -100,7 +100,7 @@
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
-@pytest.mark.parametrize("tool_choice", ["auto",])
+@pytest.mark.parametrize("tool_choice", ["auto", "required"])
 async def test_function_tool_use(
     client: openai.AsyncOpenAI, model_name: str, tool_choice: str
 ):

From 2f13d9903ca558df523f283fb085716b2ed9042f Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Tue, 21 Oct 2025 07:46:32 +0000
Subject: [PATCH 08/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 .../openai_responses_client_with_tools.py     | 10 +++++--
 .../openai/responses/test_function_call.py    | 12 +++++---
 vllm/entrypoints/openai/serving_responses.py  | 28 +++++++++++--------
 3 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/examples/online_serving/openai_responses_client_with_tools.py b/examples/online_serving/openai_responses_client_with_tools.py
index a985699ec162..276010197b5a 100644
--- a/examples/online_serving/openai_responses_client_with_tools.py
+++ b/examples/online_serving/openai_responses_client_with_tools.py
@@ -14,6 +14,7 @@
 import json
 
 from openai import OpenAI
+from utils import get_first_model
 
 
 def get_weather(latitude: float, longitude: float) -> str:
@@ -49,14 +50,17 @@ def get_weather(latitude: float, longitude: float) -> str:
 
 def main():
     base_url = "http://0.0.0.0:8000/v1"
-    model = "Qwen/Qwen3-1.7B"
     client = OpenAI(base_url=base_url, api_key="empty")
+    model = get_first_model(client)
     response = client.responses.create(
         model=model, input=input_messages, tools=tools, tool_choice="required"
     )
-    tool_call = response.output[0]
-    args = json.loads(tool_call.arguments)
 
+    for out in response.output:
+        if out.type == "function_call":
+            print("Function call:", out.name, out.arguments)
+            tool_call = out
+    args = json.loads(tool_call.arguments)
     result = get_weather(args["latitude"], args["longitude"])
 
     input_messages.append(tool_call)  # append model's function call message
diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py
index 5ea300772804..28f4b01d3e12 100644
--- a/tests/v1/entrypoints/openai/responses/test_function_call.py
+++ b/tests/v1/entrypoints/openai/responses/test_function_call.py
@@ -119,8 +119,10 @@ async def test_function_tool_use(
     )
 
     assert len(response.output) >= 1
-    tool_call = response.output[0]
-
+    tool_call = None
+    for out in response.output:
+        if out.type == "function_call":
+            tool_call = out
     assert tool_call.type == "function_call"
     assert json.loads(tool_call.arguments) is not None
 
@@ -164,8 +166,10 @@ def get_weather(latitude: float, longitude: float) -> str:
         tools=tools,
         tool_choice={"type": "function", "name": "get_weather"},
     )
-    assert len(response.output) == 1
-    tool_call = response.output[0]
+    assert len(response.output) >= 1
+    for out in response.output:
+        if out.type == "function_call":
+            tool_call = out
     assert tool_call.type == "function_call"
     assert tool_call.name == "get_weather"
     args = json.loads(tool_call.arguments)
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index 397449affe37..bb8f3c0c24e7 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -829,6 +829,9 @@ def _make_response_output_items(
                 ],
                 status=None,  # NOTE: Only the last output item has status.
             )
+        function_calls, content = self._extract_tool_calls(
+            request, tokenizer, content=content
+        )
         if content:
             output_text = ResponseOutputText(
                 text=content,
@@ -853,7 +856,11 @@ def _make_response_output_items(
                 type="message",
             )
         outputs = []
-        function_calls = self._extract_tool_calls(request, tokenizer, content=content)
+
+        if reasoning_item:
+            outputs.append(reasoning_item)
+        if message_item:
+            outputs.append(message_item)
         if function_calls:
             outputs.extend(
                 [
@@ -868,11 +875,6 @@ def _make_response_output_items(
                     for tool_call in function_calls
                 ]
             )
-        else:
-            if reasoning_item:
-                outputs.append(reasoning_item)
-            if message_item:
-                outputs.append(message_item)
         return outputs
 
     def _extract_tool_calls(
@@ -880,14 +882,15 @@ def _extract_tool_calls(
         request: ResponsesRequest,
         tokenizer: AnyTokenizer,
         content: str | None = None,
-    ) -> list[FunctionCall] | None:
+    ) -> tuple[list[FunctionCall], str | None] | None:
         function_calls = list[FunctionCall]()
+
         if not self.enable_auto_tools or not self.tool_parser:
             # Tools are not enabled
-            return None
+            return None, content
         elif request.tool_choice is None:
             # No tool calls.
-            return None
+            return None, content
         elif request.tool_choice and isinstance(
             request.tool_choice, ToolChoiceFunction
         ):
@@ -895,6 +898,7 @@ def _extract_tool_calls(
             function_calls.append(
                 FunctionCall(name=request.tool_choice.name, arguments=content)
             )
+            content = ""  # Clear content since tool is called.
         elif request.tool_choice == "required":
             assert content is not None
             tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content)
@@ -907,6 +911,7 @@ def _extract_tool_calls(
                     for tool_call in tool_calls
                 ]
             )
+            content = ""  # Clear content since tool is called.
         elif request.tool_choice == "auto" or request.tool_choice == "none":
             try:
                 tool_parser = self.tool_parser(tokenizer)
@@ -926,12 +931,13 @@ def _extract_tool_calls(
                     )
                     for tool_call in tool_call_info.tool_calls
                 )
+                content = tool_call_info.content
             else:
                 # No tool calls.
-                return None
+                return None, content
         else:
             raise ValueError(f"Invalid tool_choice: {request.tool_choice}")
-        return function_calls
+        return function_calls, content
 
     def _parse_chat_tool_call(
         self, item: ResponseInputOutputItem

From 93e9db5fcd9c16587fcd6835f1da5e31dfdabbd6 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Tue, 21 Oct 2025 07:49:12 +0000
Subject: [PATCH 09/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 .../v1/entrypoints/openai/responses/test_function_call.py  | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py
index 28f4b01d3e12..8ecbc8d2704e 100644
--- a/tests/v1/entrypoints/openai/responses/test_function_call.py
+++ b/tests/v1/entrypoints/openai/responses/test_function_call.py
@@ -120,11 +120,17 @@ async def test_function_tool_use(
 
     assert len(response.output) >= 1
     tool_call = None
+    reasoning = None
     for out in response.output:
         if out.type == "function_call":
             tool_call = out
+        if out.type == "reasoning":
+            reasoning = out
+    assert tool_call is not None
     assert tool_call.type == "function_call"
     assert json.loads(tool_call.arguments) is not None
+    assert reasoning is not None
+    assert reasoning.type == "reasoning"
 
 
 @pytest.mark.asyncio
@@ -170,6 +176,7 @@ def get_weather(latitude: float, longitude: float) -> str:
     for out in response.output:
         if out.type == "function_call":
             tool_call = out
+    assert tool_call is not None
     assert tool_call.type == "function_call"
     assert tool_call.name == "get_weather"
     args = json.loads(tool_call.arguments)

From eebab9c360a572346372e0b76318a96eb56c0df3 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Tue, 21 Oct 2025 07:53:13 +0000
Subject: [PATCH 10/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/serving_responses.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index bb8f3c0c24e7..87d1b0901342 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -882,7 +882,7 @@ def _extract_tool_calls(
         request: ResponsesRequest,
         tokenizer: AnyTokenizer,
         content: str | None = None,
-    ) -> tuple[list[FunctionCall], str | None] | None:
+    ) -> tuple[list[FunctionCall] | None, str | None]:
         function_calls = list[FunctionCall]()
 
         if not self.enable_auto_tools or not self.tool_parser:
@@ -898,7 +898,7 @@ def _extract_tool_calls(
             function_calls.append(
                 FunctionCall(name=request.tool_choice.name, arguments=content)
             )
-            content = ""  # Clear content since tool is called.
+            content = None  # Clear content since tool is called.
         elif request.tool_choice == "required":
             assert content is not None
             tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content)
@@ -911,7 +911,7 @@ def _extract_tool_calls(
                     for tool_call in tool_calls
                 ]
             )
-            content = ""  # Clear content since tool is called.
+            content = None  # Clear content since tool is called.
         elif request.tool_choice == "auto" or request.tool_choice == "none":
             try:
                 tool_parser = self.tool_parser(tokenizer)

From 842f7e1764dde666487c5009c8fe4280fa4583d2 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 22 Oct 2025 04:07:14 +0000
Subject: [PATCH 11/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/serving_responses.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index 87d1b0901342..e7d8e7b812ae 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -939,7 +939,7 @@ def _extract_tool_calls(
             raise ValueError(f"Invalid tool_choice: {request.tool_choice}")
         return function_calls, content
 
-    def _parse_chat_tool_call(
+    def _construct_chat_message_with_tool_call(
         self, item: ResponseInputOutputItem
     ) -> ChatCompletionMessageParam:
         if item.get("type") == "function_call":
@@ -1018,7 +1018,7 @@ def _construct_input_messages(
             messages.append({"role": "user", "content": request.input})
         else:
             for item in request.input:
-                messages.append(self._parse_chat_tool_call(item))
+                messages.append(self._construct_chat_message_with_tool_call(item))
         return messages
 
     def _construct_harmony_system_input_message(

From 839aaadb83f6c707f8ab4366affe41f08415a7ab Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 22 Oct 2025 05:57:02 +0000
Subject: [PATCH 12/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/protocol.py | 112 +++++++++++++++-------------
 1 file changed, 60 insertions(+), 52 deletions(-)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index d78fd97c4556..bf9a63da1ada 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -306,6 +306,65 @@ def get_logits_processors(
     return None
 
 
+def _extract_tool_info(
+    tool: Tool | ChatCompletionToolsParam,
+) -> tuple[str, dict[str, Any] | None]:
+    if isinstance(tool, FunctionTool):
+        return tool.name, tool.parameters
+    elif isinstance(tool, ChatCompletionToolsParam):
+        return tool.function.name, tool.function.parameters
+    else:
+        raise TypeError(f"Unsupported tool type: {type(tool)}")
+
+
+def _get_tool_schema_from_tool(tool: Tool | ChatCompletionToolsParam) -> dict:
+    name, params = _extract_tool_info(tool)
+    params = params if params else {"type": "object", "properties": {}}
+    return {
+        "properties": {
+            "name": {"type": "string", "enum": [name]},
+            "parameters": params,
+        },
+        "required": ["name", "parameters"],
+    }
+
+
+def _get_tool_schema_defs(
+    tools: list[Tool | ChatCompletionToolsParam],
+) -> dict:
+    all_defs: dict[str, dict[str, Any]] = {}
+    for tool in tools:
+        _, params = _get_tool_schema_from_tool(tool)
+        if params is None:
+            continue
+        defs = params.pop("$defs", {})
+        for def_name, def_schema in defs.items():
+            if def_name in all_defs and all_defs[def_name] != def_schema:
+                raise ValueError(
+                    f"Tool definition '{def_name}' has multiple schemas, "
+                    "which is not supported."
+                )
+            all_defs[def_name] = def_schema
+    return all_defs
+
+
+def _get_json_schema_from_choice_required(
+    tools: list[Tool | ChatCompletionToolsParam],
+) -> dict:
+    json_schema = {
+        "type": "array",
+        "minItems": 1,
+        "items": {
+            "type": "object",
+            "anyOf": [_get_tool_schema_from_tool(tool) for tool in tools],
+        },
+    }
+    json_schema_defs = _get_tool_schema_defs(tools)
+    if json_schema_defs:
+        json_schema["$defs"] = json_schema_defs
+    return json_schema
+
+
 def get_json_schema_from_tool(
     tool_choice: str | ToolChoiceFunction | ChatCompletionNamedToolChoiceParam,
     tools: list[FunctionTool | ChatCompletionToolsParam] | None,
@@ -335,58 +394,7 @@ def get_json_schema_from_tool(
         return tool_map[tool_name].function.parameters
 
     if tool_choice == "required":
-
-        def extract_tool_info(
-            tool: Tool | ChatCompletionToolsParam,
-        ) -> tuple[str, dict[str, Any] | None]:
-            if isinstance(tool, FunctionTool):
-                return tool.name, tool.parameters
-            elif isinstance(tool, ChatCompletionToolsParam):
-                return tool.function.name, tool.function.parameters
-            else:
-                raise TypeError(f"Unsupported tool type: {type(tool)}")
-
-        def get_tool_schema(tool: Tool | ChatCompletionToolsParam) -> dict:
-            name, params = extract_tool_info(tool)
-            params = params if params else {"type": "object", "properties": {}}
-            return {
-                "properties": {
-                    "name": {"type": "string", "enum": [name]},
-                    "parameters": params,
-                },
-                "required": ["name", "parameters"],
-            }
-
-        def get_tool_schema_defs(
-            tools: list[Tool | ChatCompletionToolsParam],
-        ) -> dict:
-            all_defs: dict[str, dict[str, Any]] = {}
-            for tool in tools:
-                _, params = extract_tool_info(tool)
-                if params is None:
-                    continue
-                defs = params.pop("$defs", {})
-                for def_name, def_schema in defs.items():
-                    if def_name in all_defs and all_defs[def_name] != def_schema:
-                        raise ValueError(
-                            f"Tool definition '{def_name}' has multiple schemas, "
-                            "which is not supported."
-                        )
-                    all_defs[def_name] = def_schema
-            return all_defs
-
-        json_schema = {
-            "type": "array",
-            "minItems": 1,
-            "items": {
-                "type": "object",
-                "anyOf": [get_tool_schema(tool) for tool in tools],
-            },
-        }
-        json_schema_defs = get_tool_schema_defs(tools)
-        if json_schema_defs:
-            json_schema["$defs"] = json_schema_defs
-        return json_schema
+        return _get_json_schema_from_choice_required(tools)
 
     return None
 

From eb8fa5b4b3bc80fd0c7aebd46aae762df76c27bb Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 22 Oct 2025 06:08:09 +0000
Subject: [PATCH 13/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/protocol.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index bf9a63da1ada..4cc1949808b3 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -348,7 +348,7 @@ def _get_tool_schema_defs(
     return all_defs
 
 
-def _get_json_schema_from_choice_required(
+def _get_json_schema_from_tools(
     tools: list[Tool | ChatCompletionToolsParam],
 ) -> dict:
     json_schema = {
@@ -394,7 +394,7 @@ def get_json_schema_from_tool(
         return tool_map[tool_name].function.parameters
 
     if tool_choice == "required":
-        return _get_json_schema_from_choice_required(tools)
+        return _get_json_schema_from_tools(tools)
 
     return None
 

From 3d91f5355981ac21b83d39bef436c2176b0564d4 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 22 Oct 2025 06:29:20 +0000
Subject: [PATCH 14/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/protocol.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 4cc1949808b3..e0c4ca63d2d5 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -369,8 +369,10 @@ def get_json_schema_from_tool(
     tool_choice: str | ToolChoiceFunction | ChatCompletionNamedToolChoiceParam,
     tools: list[FunctionTool | ChatCompletionToolsParam] | None,
 ) -> str | dict | None:
+    # tool_choice: "none"
     if tool_choice in ("none", None) or tools is None:
         return None
+    # tool_choice: Forced Function (Responses)
     if (not isinstance(tool_choice, str)) and isinstance(
         tool_choice, ToolChoiceFunction
     ):
@@ -379,7 +381,7 @@ def get_json_schema_from_tool(
         if tool_name not in tool_map:
             raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.")
         return tool_map[tool_name].parameters
-
+    # tool_choice: Forced Function (ChatCompletion)
     if (not isinstance(tool_choice, str)) and isinstance(
         tool_choice, ChatCompletionNamedToolChoiceParam
     ):
@@ -392,10 +394,10 @@ def get_json_schema_from_tool(
         if tool_name not in tool_map:
             raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.")
         return tool_map[tool_name].function.parameters
-
+    # tool_choice: "required"
     if tool_choice == "required":
         return _get_json_schema_from_tools(tools)
-
+    # tool_choice: "auto"
     return None
 
 

From 157ab86b6f7b96da075f427bb11017382b01088a Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 22 Oct 2025 06:32:12 +0000
Subject: [PATCH 15/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/serving_responses.py | 25 ++++++++++----------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index e7d8e7b812ae..4e30de747b58 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -862,19 +862,18 @@ def _make_response_output_items(
         if message_item:
             outputs.append(message_item)
         if function_calls:
-            outputs.extend(
-                [
-                    ResponseFunctionToolCall(
-                        id=f"fc_{random_uuid()}",
-                        call_id=f"call_{random_uuid()}",
-                        type="function_call",
-                        status="completed",
-                        name=tool_call.name,
-                        arguments=tool_call.arguments,
-                    )
-                    for tool_call in function_calls
-                ]
-            )
+            tool_call_items = [
+                ResponseFunctionToolCall(
+                    id=f"fc_{random_uuid()}",
+                    call_id=f"call_{random_uuid()}",
+                    type="function_call",
+                    status="completed",
+                    name=tool_call.name,
+                    arguments=tool_call.arguments,
+                )
+                for tool_call in function_calls
+            ]
+            outputs.extend(tool_call_items)
         return outputs
 
     def _extract_tool_calls(

From 2ddd919e715ebc5e3db949e786bd1bf08cf54051 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 22 Oct 2025 06:41:24 +0000
Subject: [PATCH 16/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/serving_responses.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index 4e30de747b58..459a9981b9c6 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -829,7 +829,7 @@ def _make_response_output_items(
                 ],
                 status=None,  # NOTE: Only the last output item has status.
             )
-        function_calls, content = self._extract_tool_calls(
+        function_calls, content = self._parse_tool_calls_from_content(
             request, tokenizer, content=content
         )
         if content:
@@ -876,7 +876,7 @@ def _make_response_output_items(
             outputs.extend(tool_call_items)
         return outputs
 
-    def _extract_tool_calls(
+    def _parse_tool_calls_from_content(
         self,
         request: ResponsesRequest,
         tokenizer: AnyTokenizer,

From ce784fe5be2983a1ad7234cd3413cd8ae29365f2 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Fri, 24 Oct 2025 14:19:09 +0000
Subject: [PATCH 17/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/protocol.py | 95 -----------------------------
 1 file changed, 95 deletions(-)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index e0c4ca63d2d5..7515b3ab1048 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -306,101 +306,6 @@ def get_logits_processors(
     return None
 
 
-def _extract_tool_info(
-    tool: Tool | ChatCompletionToolsParam,
-) -> tuple[str, dict[str, Any] | None]:
-    if isinstance(tool, FunctionTool):
-        return tool.name, tool.parameters
-    elif isinstance(tool, ChatCompletionToolsParam):
-        return tool.function.name, tool.function.parameters
-    else:
-        raise TypeError(f"Unsupported tool type: {type(tool)}")
-
-
-def _get_tool_schema_from_tool(tool: Tool | ChatCompletionToolsParam) -> dict:
-    name, params = _extract_tool_info(tool)
-    params = params if params else {"type": "object", "properties": {}}
-    return {
-        "properties": {
-            "name": {"type": "string", "enum": [name]},
-            "parameters": params,
-        },
-        "required": ["name", "parameters"],
-    }
-
-
-def _get_tool_schema_defs(
-    tools: list[Tool | ChatCompletionToolsParam],
-) -> dict:
-    all_defs: dict[str, dict[str, Any]] = {}
-    for tool in tools:
-        _, params = _get_tool_schema_from_tool(tool)
-        if params is None:
-            continue
-        defs = params.pop("$defs", {})
-        for def_name, def_schema in defs.items():
-            if def_name in all_defs and all_defs[def_name] != def_schema:
-                raise ValueError(
-                    f"Tool definition '{def_name}' has multiple schemas, "
-                    "which is not supported."
-                )
-            all_defs[def_name] = def_schema
-    return all_defs
-
-
-def _get_json_schema_from_tools(
-    tools: list[Tool | ChatCompletionToolsParam],
-) -> dict:
-    json_schema = {
-        "type": "array",
-        "minItems": 1,
-        "items": {
-            "type": "object",
-            "anyOf": [_get_tool_schema_from_tool(tool) for tool in tools],
-        },
-    }
-    json_schema_defs = _get_tool_schema_defs(tools)
-    if json_schema_defs:
-        json_schema["$defs"] = json_schema_defs
-    return json_schema
-
-
-def get_json_schema_from_tool(
-    tool_choice: str | ToolChoiceFunction | ChatCompletionNamedToolChoiceParam,
-    tools: list[FunctionTool | ChatCompletionToolsParam] | None,
-) -> str | dict | None:
-    # tool_choice: "none"
-    if tool_choice in ("none", None) or tools is None:
-        return None
-    # tool_choice: Forced Function (Responses)
-    if (not isinstance(tool_choice, str)) and isinstance(
-        tool_choice, ToolChoiceFunction
-    ):
-        tool_name = tool_choice.name
-        tool_map = {tool.name: tool for tool in tools if isinstance(tool, FunctionTool)}
-        if tool_name not in tool_map:
-            raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.")
-        return tool_map[tool_name].parameters
-    # tool_choice: Forced Function (ChatCompletion)
-    if (not isinstance(tool_choice, str)) and isinstance(
-        tool_choice, ChatCompletionNamedToolChoiceParam
-    ):
-        tool_name = tool_choice.function.name
-        tool_map = {
-            tool.function.name: tool
-            for tool in tools
-            if isinstance(tool, ChatCompletionToolsParam)
-        }
-        if tool_name not in tool_map:
-            raise ValueError(f"Tool '{tool_name}' has not been passed in `tools`.")
-        return tool_map[tool_name].function.parameters
-    # tool_choice: "required"
-    if tool_choice == "required":
-        return _get_json_schema_from_tools(tools)
-    # tool_choice: "auto"
-    return None
-
-
 ResponseInputOutputItem: TypeAlias = (
     ResponseInputItemParam | ResponseReasoningItem | ResponseFunctionToolCall
 )

From 067cd66c4aa26732ff313a7301cec7519b016d02 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Fri, 24 Oct 2025 14:23:10 +0000
Subject: [PATCH 18/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/protocol.py | 38 +++++++++--------------------
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 7515b3ab1048..33256de6dd47 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -16,7 +16,6 @@
 )
 from openai.types.chat.chat_completion_message import Annotation as OpenAIAnnotation
 from openai.types.responses import (
-    FunctionTool,
     ResponseCodeInterpreterCallCodeDeltaEvent,
     ResponseCodeInterpreterCallCodeDoneEvent,
     ResponseCodeInterpreterCallCompletedEvent,
@@ -37,7 +36,6 @@
     ResponseWebSearchCallCompletedEvent,
     ResponseWebSearchCallInProgressEvent,
     ResponseWebSearchCallSearchingEvent,
-    ToolChoiceFunction,
 )
 from openai.types.responses import (
     ResponseCompletedEvent as OpenAIResponseCompletedEvent,
@@ -425,7 +423,18 @@ def to_sampling_params(
         stop_token_ids = default_sampling_params.get("stop_token_ids")
 
         # Structured output
-        structured_outputs = self._get_structured_outputs()
+        structured_outputs = None
+        if self.text is not None and self.text.format is not None:
+            response_format = self.text.format
+            if (
+                response_format.type == "json_schema"
+                and response_format.schema_ is not None
+            ):
+                structured_outputs = StructuredOutputsParams(
+                    json=response_format.schema_
+                )
+            elif response_format.type == "json_object":
+                raise NotImplementedError("json_object is not supported")
 
         # TODO: add more parameters
         return SamplingParams.from_optional(
@@ -440,29 +449,6 @@ def to_sampling_params(
             structured_outputs=structured_outputs,
         )
 
-    def _get_structured_outputs(self) -> StructuredOutputsParams | None:
-        # Structured output
-        structured_outputs = None
-        if self.text is not None and self.text.format is not None:
-            response_format = self.text.format
-            if (
-                response_format.type == "json_schema"
-                and response_format.schema_ is not None
-            ):
-                structured_outputs = StructuredOutputsParams(
-                    json=response_format.schema_
-                )
-            elif response_format.type == "json_object":
-                raise NotImplementedError("json_object is not supported")
-        # Function call
-        elif not (self.tool_choice == "none" or self.tools is None):
-            json_schema = get_json_schema_from_tool(
-                tools=self.tools, tool_choice=self.tool_choice
-            )
-            if json_schema is not None:
-                structured_outputs = StructuredOutputsParams(json=json_schema)
-        return structured_outputs
-
     def is_include_output_logprobs(self) -> bool:
         """Check if the request includes output logprobs."""
         if self.include is None:

From f44848a19a8349878c52d5dd62fe60b8931a962c Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Fri, 24 Oct 2025 15:02:34 +0000
Subject: [PATCH 19/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/serving_engine.py     |  8 +++----
 vllm/entrypoints/openai/serving_responses.py  |  8 +++----
 .../tool_parsers/abstract_tool_parser.py      | 24 +++++++++++++++----
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index ddbeeba99447..dc9e71651914 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -1104,11 +1104,9 @@ async def _preprocess_chat(
                     "or Responses API requests."
                 )
                 raise NotImplementedError(msg)
-
-            if isinstance(request, ChatCompletionRequest):
-                request = tool_parser(tokenizer).adjust_request(  # type: ignore
-                    request=request
-                )
+            request = tool_parser(tokenizer).adjust_request(  # type: ignore
+                request=request
+            )
 
         if tokenizer is None:
             assert isinstance(request_prompt, str), (
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index 459a9981b9c6..62d86d790d28 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -941,16 +941,16 @@ def _parse_tool_calls_from_content(
     def _construct_chat_message_with_tool_call(
         self, item: ResponseInputOutputItem
     ) -> ChatCompletionMessageParam:
-        if item.get("type") == "function_call":
+        if isinstance(item, ResponseFunctionToolCall):
             # Append the function call as a tool call.
             return ChatCompletionAssistantMessageParam(
                 role="assistant",
                 tool_calls=[
                     ChatCompletionMessageToolCallParam(
-                        id=item.get("call_id"),
+                        id=item.call_id,
                         function=FunctionCallTool(
-                            name=item.get("name"),
-                            arguments=item.get("arguments"),
+                            name=item.name,
+                            arguments=item.arguments,
                         ),
                         type="function",
                     )
diff --git a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
index 8d520f5bf8ef..d26d7a139b90 100644
--- a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
@@ -6,10 +6,16 @@
 from collections.abc import Callable, Sequence
 from functools import cached_property
 
+from openai.types.responses.response_format_text_json_schema_config import (
+    ResponseFormatTextJSONSchemaConfig,
+)
+
 from vllm.entrypoints.openai.protocol import (
     ChatCompletionRequest,
     DeltaMessage,
     ExtractedToolCallInformation,
+    ResponsesRequest,
+    ResponseTextConfig,
 )
 from vllm.entrypoints.openai.tool_parsers.utils import get_json_schema_from_tools
 from vllm.logger import init_logger
@@ -56,11 +62,21 @@ def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionReques
         )
         # Set structured output params for tool calling
         if json_schema_from_tool is not None:
-            if request.structured_outputs is None:
+            if isinstance(request, ChatCompletionRequest):
                 request.structured_outputs = StructuredOutputsParams()
-            # tool_choice: "Forced Function" or "required" will override
-            # structured output json settings to make tool calling work correctly
-            request.structured_outputs.json = json_schema_from_tool
+                # tool_choice: "Forced Function" or "required" will override
+                # structured output json settings to make tool calling work correctly
+                request.structured_outputs.json = json_schema_from_tool
+            if isinstance(request, ResponsesRequest):
+                request.text = ResponseTextConfig()
+                request.text.format = ResponseFormatTextJSONSchemaConfig(
+                    name="tool_calling_response",
+                    schema=json_schema_from_tool,
+                    type="json_schema",
+                    description="Response format for tool calling",
+                    strict=True,
+                )
+
         return request
 
     def extract_tool_calls(

From 5b8356c9f2dc7b054202745b1525f07ed021fd71 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Fri, 24 Oct 2025 15:06:13 +0000
Subject: [PATCH 20/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/serving_engine.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index dc9e71651914..4c8d3f2c99a1 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -1104,8 +1104,8 @@ async def _preprocess_chat(
                     "or Responses API requests."
                 )
                 raise NotImplementedError(msg)
-            request = tool_parser(tokenizer).adjust_request(  # type: ignore
-                request=request
+            request = tool_parser(tokenizer).adjust_request(
+                request=request  # type: ignore
             )
 
         if tokenizer is None:

From 3ab6b2be11948242dab858a9258f8661b1638f7e Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Fri, 24 Oct 2025 15:10:13 +0000
Subject: [PATCH 21/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/serving_engine.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index 4c8d3f2c99a1..8ce4ff574699 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -1104,9 +1104,7 @@ async def _preprocess_chat(
                     "or Responses API requests."
                 )
                 raise NotImplementedError(msg)
-            request = tool_parser(tokenizer).adjust_request(
-                request=request  # type: ignore
-            )
+            request = tool_parser(tokenizer).adjust_request(request=request)  # type: ignore
 
         if tokenizer is None:
             assert isinstance(request_prompt, str), (

From f2b7ddeaec1a794152839aacc46f52fbff588b57 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Tue, 28 Oct 2025 03:42:04 +0000
Subject: [PATCH 22/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 .../openai/{responses => serving_responses}/__init__.py           | 0
 .../openai/{responses => serving_responses}/conftest.py           | 0
 .../openai/{responses => serving_responses}/test_basic.py         | 0
 .../openai/{responses => serving_responses}/test_function_call.py | 0
 .../openai/{responses => serving_responses}/test_image.py         | 0
 .../openai/{responses => serving_responses}/test_stateful.py      | 0
 .../{responses => serving_responses}/test_structured_output.py    | 0
 7 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/v1/entrypoints/openai/{responses => serving_responses}/__init__.py (100%)
 rename tests/v1/entrypoints/openai/{responses => serving_responses}/conftest.py (100%)
 rename tests/v1/entrypoints/openai/{responses => serving_responses}/test_basic.py (100%)
 rename tests/v1/entrypoints/openai/{responses => serving_responses}/test_function_call.py (100%)
 rename tests/v1/entrypoints/openai/{responses => serving_responses}/test_image.py (100%)
 rename tests/v1/entrypoints/openai/{responses => serving_responses}/test_stateful.py (100%)
 rename tests/v1/entrypoints/openai/{responses => serving_responses}/test_structured_output.py (100%)

diff --git a/tests/v1/entrypoints/openai/responses/__init__.py b/tests/v1/entrypoints/openai/serving_responses/__init__.py
similarity index 100%
rename from tests/v1/entrypoints/openai/responses/__init__.py
rename to tests/v1/entrypoints/openai/serving_responses/__init__.py
diff --git a/tests/v1/entrypoints/openai/responses/conftest.py b/tests/v1/entrypoints/openai/serving_responses/conftest.py
similarity index 100%
rename from tests/v1/entrypoints/openai/responses/conftest.py
rename to tests/v1/entrypoints/openai/serving_responses/conftest.py
diff --git a/tests/v1/entrypoints/openai/responses/test_basic.py b/tests/v1/entrypoints/openai/serving_responses/test_basic.py
similarity index 100%
rename from tests/v1/entrypoints/openai/responses/test_basic.py
rename to tests/v1/entrypoints/openai/serving_responses/test_basic.py
diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/serving_responses/test_function_call.py
similarity index 100%
rename from tests/v1/entrypoints/openai/responses/test_function_call.py
rename to tests/v1/entrypoints/openai/serving_responses/test_function_call.py
diff --git a/tests/v1/entrypoints/openai/responses/test_image.py b/tests/v1/entrypoints/openai/serving_responses/test_image.py
similarity index 100%
rename from tests/v1/entrypoints/openai/responses/test_image.py
rename to tests/v1/entrypoints/openai/serving_responses/test_image.py
diff --git a/tests/v1/entrypoints/openai/responses/test_stateful.py b/tests/v1/entrypoints/openai/serving_responses/test_stateful.py
similarity index 100%
rename from tests/v1/entrypoints/openai/responses/test_stateful.py
rename to tests/v1/entrypoints/openai/serving_responses/test_stateful.py
diff --git a/tests/v1/entrypoints/openai/responses/test_structured_output.py b/tests/v1/entrypoints/openai/serving_responses/test_structured_output.py
similarity index 100%
rename from tests/v1/entrypoints/openai/responses/test_structured_output.py
rename to tests/v1/entrypoints/openai/serving_responses/test_structured_output.py

From 06e802e826ee5e7209f02197da4a881807728a24 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 5 Nov 2025 08:20:27 +0000
Subject: [PATCH 23/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/serving_responses.py | 105 +------------------
 vllm/entrypoints/responses_utils.py          |  43 ++++++++
 2 files changed, 45 insertions(+), 103 deletions(-)
 create mode 100644 vllm/entrypoints/responses_utils.py

diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index 62d86d790d28..f4aa3e15d59e 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -14,14 +14,6 @@
 
 import jinja2
 from fastapi import Request
-from openai.types.chat import (
-    ChatCompletionAssistantMessageParam,
-    ChatCompletionMessageToolCallParam,
-    ChatCompletionToolMessageParam,
-)
-from openai.types.chat.chat_completion_message_tool_call_param import (
-    Function as FunctionCallTool,
-)
 from openai.types.responses import (
     ResponseCodeInterpreterCallCodeDeltaEvent,
     ResponseCodeInterpreterCallCodeDoneEvent,
@@ -49,7 +41,6 @@
     ResponseWebSearchCallCompletedEvent,
     ResponseWebSearchCallInProgressEvent,
     ResponseWebSearchCallSearchingEvent,
-    ToolChoiceFunction,
     response_function_web_search,
     response_text_delta_event,
 )
@@ -59,7 +50,6 @@
 )
 from openai.types.responses.tool import Tool
 from openai_harmony import Message as OpenAIHarmonyMessage
-from pydantic import TypeAdapter
 
 from vllm import envs
 from vllm.engine.protocol import EngineClient
@@ -89,15 +79,12 @@
 from vllm.entrypoints.openai.protocol import (
     DeltaMessage,
     ErrorResponse,
-    FunctionCall,
-    FunctionDefinition,
     InputTokensDetails,
     OutputTokensDetails,
     RequestResponseMetadata,
     ResponseCompletedEvent,
     ResponseCreatedEvent,
     ResponseInProgressEvent,
-    ResponseInputOutputItem,
     ResponseReasoningPartAddedEvent,
     ResponseReasoningPartDoneEvent,
     ResponsesRequest,
@@ -107,6 +94,7 @@
 )
 from vllm.entrypoints.openai.serving_engine import OpenAIServing
 from vllm.entrypoints.openai.serving_models import OpenAIServingModels
+from vllm.entrypoints.responses_utils import construct_chat_message_with_tool_call
 from vllm.entrypoints.tool_server import ToolServer
 from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
 from vllm.logger import init_logger
@@ -876,95 +864,6 @@ def _make_response_output_items(
             outputs.extend(tool_call_items)
         return outputs
 
-    def _parse_tool_calls_from_content(
-        self,
-        request: ResponsesRequest,
-        tokenizer: AnyTokenizer,
-        content: str | None = None,
-    ) -> tuple[list[FunctionCall] | None, str | None]:
-        function_calls = list[FunctionCall]()
-
-        if not self.enable_auto_tools or not self.tool_parser:
-            # Tools are not enabled
-            return None, content
-        elif request.tool_choice is None:
-            # No tool calls.
-            return None, content
-        elif request.tool_choice and isinstance(
-            request.tool_choice, ToolChoiceFunction
-        ):
-            # Forced Function Call
-            function_calls.append(
-                FunctionCall(name=request.tool_choice.name, arguments=content)
-            )
-            content = None  # Clear content since tool is called.
-        elif request.tool_choice == "required":
-            assert content is not None
-            tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content)
-            function_calls.extend(
-                [
-                    FunctionCall(
-                        name=tool_call.name,
-                        arguments=json.dumps(tool_call.parameters, ensure_ascii=False),
-                    )
-                    for tool_call in tool_calls
-                ]
-            )
-            content = None  # Clear content since tool is called.
-        elif request.tool_choice == "auto" or request.tool_choice == "none":
-            try:
-                tool_parser = self.tool_parser(tokenizer)
-            except RuntimeError as e:
-                logger.exception("Error in tool parser creation.")
-                raise e
-            tool_call_info = tool_parser.extract_tool_calls(
-                content if content is not None else "",
-                request=request,  # type: ignore
-            )
-            if tool_call_info is not None and tool_call_info.tools_called:
-                # extract_tool_calls() returns a list of tool calls.
-                function_calls.extend(
-                    FunctionCall(
-                        name=tool_call.function.name,
-                        arguments=tool_call.function.arguments,
-                    )
-                    for tool_call in tool_call_info.tool_calls
-                )
-                content = tool_call_info.content
-            else:
-                # No tool calls.
-                return None, content
-        else:
-            raise ValueError(f"Invalid tool_choice: {request.tool_choice}")
-        return function_calls, content
-
-    def _construct_chat_message_with_tool_call(
-        self, item: ResponseInputOutputItem
-    ) -> ChatCompletionMessageParam:
-        if isinstance(item, ResponseFunctionToolCall):
-            # Append the function call as a tool call.
-            return ChatCompletionAssistantMessageParam(
-                role="assistant",
-                tool_calls=[
-                    ChatCompletionMessageToolCallParam(
-                        id=item.call_id,
-                        function=FunctionCallTool(
-                            name=item.name,
-                            arguments=item.arguments,
-                        ),
-                        type="function",
-                    )
-                ],
-            )
-        elif item.get("type") == "function_call_output":
-            # Append the function call output as a tool message.
-            return ChatCompletionToolMessageParam(
-                role="tool",
-                content=item.get("output"),
-                tool_call_id=item.get("call_id"),
-            )
-        return item  # type: ignore
-
     def _make_response_output_items_with_harmony(
         self,
         context: HarmonyContext,
@@ -1017,7 +916,7 @@ def _construct_input_messages(
             messages.append({"role": "user", "content": request.input})
         else:
             for item in request.input:
-                messages.append(self._construct_chat_message_with_tool_call(item))
+                messages.append(construct_chat_message_with_tool_call(item))
         return messages
 
     def _construct_harmony_system_input_message(
diff --git a/vllm/entrypoints/responses_utils.py b/vllm/entrypoints/responses_utils.py
new file mode 100644
index 000000000000..7e631645db12
--- /dev/null
+++ b/vllm/entrypoints/responses_utils.py
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from openai.types.chat.chat_completion_message_tool_call_param import (
+    Function as FunctionCallTool,
+)
+from openai.types.responses import ResponseFunctionToolCall
+
+from vllm.entrypoints.openai.protocol import (
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionMessageParam,
+    ChatCompletionMessageToolCallParam,
+    ChatCompletionToolMessageParam,
+    ResponseInputOutputItem,
+)
+
+
+def construct_chat_message_with_tool_call(
+    item: ResponseInputOutputItem,
+) -> ChatCompletionMessageParam:
+    if isinstance(item, ResponseFunctionToolCall):
+        # Append the function call as a tool call.
+        return ChatCompletionAssistantMessageParam(
+            role="assistant",
+            tool_calls=[
+                ChatCompletionMessageToolCallParam(
+                    id=item.call_id,
+                    function=FunctionCallTool(
+                        name=item.name,
+                        arguments=item.arguments,
+                    ),
+                    type="function",
+                )
+            ],
+        )
+    elif item.get("type") == "function_call_output":
+        # Append the function call output as a tool message.
+        return ChatCompletionToolMessageParam(
+            role="tool",
+            content=item.get("output"),
+            tool_call_id=item.get("call_id"),
+        )
+    return item  # type: ignore

From 13a2749af4d3563d6da0748ed8a0334a7b8bdd37 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Wed, 5 Nov 2025 08:26:57 +0000
Subject: [PATCH 24/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/serving_responses.py | 12 ++++++++----
 vllm/entrypoints/responses_utils.py          |  8 +++++---
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index f4aa3e15d59e..7580d2da2d2b 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -817,8 +817,12 @@ def _make_response_output_items(
                 ],
                 status=None,  # NOTE: Only the last output item has status.
             )
-        function_calls, content = self._parse_tool_calls_from_content(
-            request, tokenizer, content=content
+        tool_calls, content = self._parse_tool_calls_from_content(
+            request=request,
+            tokenizer=tokenizer,
+            content=content,
+            enable_auto_tools=self.enable_auto_tools,
+            tool_parser_cls=self.tool_parser,
         )
         if content:
             output_text = ResponseOutputText(
@@ -849,7 +853,7 @@ def _make_response_output_items(
             outputs.append(reasoning_item)
         if message_item:
             outputs.append(message_item)
-        if function_calls:
+        if tool_calls:
             tool_call_items = [
                 ResponseFunctionToolCall(
                     id=f"fc_{random_uuid()}",
@@ -859,7 +863,7 @@ def _make_response_output_items(
                     name=tool_call.name,
                     arguments=tool_call.arguments,
                 )
-                for tool_call in function_calls
+                for tool_call in tool_calls
             ]
             outputs.extend(tool_call_items)
         return outputs
diff --git a/vllm/entrypoints/responses_utils.py b/vllm/entrypoints/responses_utils.py
index 7e631645db12..6eb7c0b70a67 100644
--- a/vllm/entrypoints/responses_utils.py
+++ b/vllm/entrypoints/responses_utils.py
@@ -1,16 +1,18 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from openai.types.chat import (
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionMessageToolCallParam,
+    ChatCompletionToolMessageParam,
+)
 from openai.types.chat.chat_completion_message_tool_call_param import (
     Function as FunctionCallTool,
 )
 from openai.types.responses import ResponseFunctionToolCall
 
 from vllm.entrypoints.openai.protocol import (
-    ChatCompletionAssistantMessageParam,
     ChatCompletionMessageParam,
-    ChatCompletionMessageToolCallParam,
-    ChatCompletionToolMessageParam,
     ResponseInputOutputItem,
 )
 

From 56f400b69bd40fa3e4a2768670c7ec93c07b9561 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Thu, 6 Nov 2025 08:36:28 +0000
Subject: [PATCH 25/25] [Frontend] OpenAI Responses API supports Tool/Function
 calling

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 .../entrypoints/openai/serving_responses/test_function_call.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/v1/entrypoints/openai/serving_responses/test_function_call.py b/tests/v1/entrypoints/openai/serving_responses/test_function_call.py
index 8ecbc8d2704e..cf57956a9dea 100644
--- a/tests/v1/entrypoints/openai/serving_responses/test_function_call.py
+++ b/tests/v1/entrypoints/openai/serving_responses/test_function_call.py
@@ -6,7 +6,7 @@
 import openai  # use the official client for correctness check
 import pytest
 
-MODEL_NAME = "Qwen/Qwen3-0.6B"
+MODEL_NAME = "Qwen/Qwen3-1.7B"
 tools = [
     {
         "type": "function",