vllm-project · zhuohan123 · Sep 5, 2025 · Aug 6, 2025 · Aug 25, 2025 · Aug 25, 2025
@@ -1,13 +1,16 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from __future__ import annotations
+
 import asyncio
 from contextlib import suppress
 from dataclasses import dataclass, field
-from typing import Any, Optional
+from typing import TYPE_CHECKING, Any, Optional
 from unittest.mock import MagicMock
 
 import pytest
+import pytest_asyncio
 
 from vllm.config import MultiModalConfig
 from vllm.engine.multiprocessing.client import MQLLMEngineClient
@@ -17,6 +20,79 @@
                                                     OpenAIServingModels)
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
+from ...utils import RemoteOpenAIServer
+
+if TYPE_CHECKING:
+    from openai import OpenAI
+
+GPT_OSS_MODEL_NAME = "openai/gpt-oss-20b"
+
+
+@pytest.fixture(scope="module")
+def gptoss_server():
+    args = ["--enforce-eager"]
+    with RemoteOpenAIServer(GPT_OSS_MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def gptoss_client(gptoss_server):
+    async with gptoss_server.get_async_client() as async_client:
+        yield async_client
+
+
+@pytest.mark.asyncio
+@pytest.mark.skip(reason="gpt-oss can't run on CI yet.")
+async def test_gpt_oss_chat_tool_call_streaming(gptoss_client: OpenAI):
+    tools = [{
+        "type": "function",
+        "function": {
+            "name": "get_current_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "city": {
+                        "type": "string"
+                    },
+                    "state": {
+                        "type": "string"
+                    },
+                    "unit": {
+                        "type": "string",
+                        "enum": ["celsius", "fahrenheit"],
+                    },
+                },
+                "required": ["city", "state", "unit"],
+            },
+        },
+    }]
+
+    messages = [
+        {
+            "role": "user",
+            "content": "What is the weather in Dallas, TX?"
+        },
+    ]
+
+    stream = await gptoss_client.chat.completions.create(
+        model=GPT_OSS_MODEL_NAME, messages=messages, tools=tools, stream=True)
+
+    name = None
+    args_buf = ""
+    async for chunk in stream:
+        delta = chunk.choices[0].delta
+        if delta.tool_calls:
+            tc = delta.tool_calls[0]
+            if tc.function and tc.function.name:
+                name = tc.function.name
+            if tc.function and tc.function.arguments:
+                args_buf += tc.function.arguments
+
+    assert name is not None
+    assert len(args_buf) > 0
+
+
 MODEL_NAME = "openai-community/gpt2"
 CHAT_TEMPLATE = "Dummy chat template for testing {}"
 BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)]

diff --git a/tests/tool_use/test_openai_tool_parser.py b/tests/tool_use/test_openai_tool_parser.py
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+
+import pytest
+from openai_harmony import (HarmonyEncodingName, Message, Role,
+                            load_harmony_encoding)
+
+from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
+from vllm.entrypoints.openai.tool_parsers import OpenAIToolParser
+from vllm.transformers_utils.tokenizer import get_tokenizer
+
+MODEL = "gpt2"
+
+
+@pytest.fixture(scope="module")
+def openai_tokenizer():
+    # The parser does not use the tokenizer, but the constructor requires it.
+    return get_tokenizer(MODEL)
+
+
+@pytest.fixture
+def openai_tool_parser(openai_tokenizer):
+    return OpenAIToolParser(openai_tokenizer)
+
+
+@pytest.fixture(scope="module")
+def harmony_encoding():
+    return load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
+
+
+def assert_tool_calls(actual_tool_calls: list[ToolCall],
+                      expected_tool_calls: list[ToolCall]):
+    assert len(actual_tool_calls) == len(expected_tool_calls)
+
+    for actual_tool_call, expected_tool_call in zip(actual_tool_calls,
+                                                    expected_tool_calls):
+        assert isinstance(actual_tool_call.id, str)
+        assert len(actual_tool_call.id) > 16  # Default from protocol.py
+        assert actual_tool_call.type == "function"
+        assert actual_tool_call.function == expected_tool_call.function
+
+
+def test_extract_tool_calls_no_tools(openai_tool_parser, harmony_encoding):
+    msg = Message.from_role_and_content(Role.ASSISTANT,
+                                        "This is a test").with_channel("final")
+    stop_token = harmony_encoding.token_from_string("<|return|>")
+    token_ids = harmony_encoding.render_message(msg) + [stop_token]
+
+    extracted_info = openai_tool_parser.extract_tool_calls("",
+                                                           request=None,
+                                                           token_ids=token_ids)
+    assert not extracted_info.tools_called
+    assert extracted_info.tool_calls == []
+    assert extracted_info.content == "This is a test"
+
+
+def test_extract_tool_calls_single_tool(openai_tool_parser, harmony_encoding):
+    msg = Message.from_role_and_content(
+        Role.ASSISTANT, '{"city": "Dallas"}').with_channel("commentary"). \
+        with_recipient("functions.get_current_weather").with_content_type("json")
+    stop_token = harmony_encoding.token_from_string("<|call|>")
+    token_ids = harmony_encoding.render_message(msg) + [stop_token]
+
+    extracted_info = openai_tool_parser.extract_tool_calls("",
+                                                           request=None,
+                                                           token_ids=token_ids)
+    assert extracted_info.tools_called
+    expected_tool_calls = [
+        ToolCall(
+            function=FunctionCall(name="get_current_weather",
+                                  arguments=json.dumps({"city": "Dallas"})))
+    ]
+    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
+    assert extracted_info.content is None
+
+
+def test_extract_tool_calls_multiple_tools(openai_tool_parser,
+                                           harmony_encoding):
+    msg1 = Message.from_role_and_content(
+        Role.ASSISTANT, '{"city": "Dallas"}').with_channel("commentary"). \
+        with_recipient("functions.get_current_weather").with_content_type("json")
+    msg2 = Message.from_role_and_content(
+        Role.ASSISTANT, '{}').with_channel("commentary"). \
+        with_recipient("functions.get_user_location").with_content_type("json")
+    stop_token = harmony_encoding.token_from_string("<|call|>")
+    token_ids = harmony_encoding.render_message(
+        msg1) + harmony_encoding.render_message(msg2) + [stop_token]
+
+    extracted_info = openai_tool_parser.extract_tool_calls("",
+                                                           request=None,
+                                                           token_ids=token_ids)
+    assert extracted_info.tools_called
+    expected_tool_calls = [
+        ToolCall(
+            function=FunctionCall(name="get_current_weather",
+                                  arguments=json.dumps({"city": "Dallas"}))),
+        ToolCall(function=FunctionCall(name="get_user_location",
+                                       arguments=json.dumps({})))
+    ]
+    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
+    assert extracted_info.content is None
+
+
+def test_extract_tool_calls_with_reasoning(openai_tool_parser,
+                                           harmony_encoding):
+    msg1 = Message.from_role_and_content(
+        Role.ASSISTANT, "Thinking about the weather.").with_channel("analysis")
+    msg2 = Message.from_role_and_content(
+        Role.ASSISTANT, '{"city": "Dallas"}').with_channel("commentary"). \
+        with_recipient("functions.get_current_weather").with_content_type("json")
+    msg3 = Message.from_role_and_content(
+        Role.ASSISTANT, "The weather is nice.").with_channel("final")
+
+    stop_token = harmony_encoding.token_from_string("<|return|>")
+    token_ids = harmony_encoding.render_message(
+        msg1) + harmony_encoding.render_message(
+            msg2) + harmony_encoding.render_message(msg3) + [stop_token]
+
+    extracted_info = openai_tool_parser.extract_tool_calls("",
+                                                           request=None,
+                                                           token_ids=token_ids)
+    assert extracted_info.tools_called
+    assert extracted_info.reasoning_content == "Thinking about the weather."
+    expected_tool_calls = [
+        ToolCall(
+            function=FunctionCall(name="get_current_weather",
+                                  arguments=json.dumps({"city": "Dallas"})))
+    ]
+    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
+    assert extracted_info.content == "The weather is nice."
@@ -18,7 +18,8 @@
                             Role, StreamableParser, SystemContent, TextContent,
                             ToolDescription, load_harmony_encoding)
 
-from vllm.entrypoints.openai.protocol import ResponseInputOutputItem
+from vllm.entrypoints.openai.protocol import (ChatCompletionToolsParam,
+                                              ResponseInputOutputItem)
 from vllm.utils import random_uuid
 
 REASONING_EFFORT = {
@@ -63,6 +64,20 @@ def get_system_message(
     return sys_msg
 
 
+def create_tool_definition(tool):
+    if isinstance(tool, ChatCompletionToolsParam):
+        return ToolDescription.new(
+            name=tool.function.name,
+            description=tool.function.description,
+            parameters=tool.function.parameters,
+        )
+    return ToolDescription.new(
+        name=tool.name,
+        description=tool.description,
+        parameters=tool.parameters,
+    )
+
+
 def get_developer_message(instructions: Optional[str] = None,
                           tools: Optional[list[Tool]] = None) -> Message:
     dev_msg_content = DeveloperContent.new()
@@ -80,11 +95,7 @@ def get_developer_message(instructions: Optional[str] = None,
                 raise ValueError(f"tool type {tool.type} not supported")
         if function_tools:
             function_tool_descriptions = [
-                ToolDescription.new(
-                    name=tool.name,
-                    description=tool.description,
-                    parameters=tool.parameters,
-                ) for tool in function_tools
+                create_tool_definition(tool) for tool in function_tools
             ]
             dev_msg_content = dev_msg_content.with_function_tools(
                 function_tool_descriptions)
@@ -148,16 +159,45 @@ def parse_response_input(
     return msg
 
 
-def parse_chat_input(chat_msg) -> Message:
-    role = chat_msg["role"]
-    content = chat_msg["content"]
+def parse_chat_input(chat_msg) -> list[Message]:
+    if not isinstance(chat_msg, dict):
+        # Handle Pydantic models
+        chat_msg = chat_msg.model_dump(exclude_none=True)
+
+    role = chat_msg.get("role")
+
+    # Assistant message with tool calls
+    tool_calls = chat_msg.get("tool_calls")
+    if role == "assistant" and tool_calls:
+        msgs: list[Message] = []
+        for call in tool_calls:
+            func = call.get("function", {})
+            name = func.get("name", "")
+            arguments = func.get("arguments", "") or ""
+            msg = Message.from_role_and_content(Role.ASSISTANT, arguments)
+            msg = msg.with_channel("commentary")
+            msg = msg.with_recipient(f"functions.{name}")
+            msg = msg.with_content_type("json")
+            msgs.append(msg)
+        return msgs
+
+    # Tool role message (tool output)
+    if role == "tool":
+        name = chat_msg.get("name", "")
+        content = chat_msg.get("content", "") or ""
+        msg = Message.from_author_and_content(
+            Author.new(Role.TOOL, f"functions.{name}"), content)
+        return [msg]
+
+    # Default: user/assistant/system messages with content
+    content = chat_msg.get("content", "")
     if isinstance(content, str):
         contents = [TextContent(text=content)]
     else:
         # TODO: Support refusal.
-        contents = [TextContent(text=c["text"]) for c in content]
+        contents = [TextContent(text=c.get("text", "")) for c in content]
     msg = Message.from_role_and_contents(role, contents)
-    return msg
+    return [msg]
 
 
 def render_for_completion(messages: list[Message]) -> list[int]: