Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements/common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ tokenizers >= 0.21.1 # Required for fast incremental detokenization.
protobuf >= 5.29.6, !=6.30.*, !=6.31.*, !=6.32.*, !=6.33.0.*, !=6.33.1.*, !=6.33.2.*, !=6.33.3.*, !=6.33.4.* # Required by LlamaTokenizer, gRPC. CVE-2026-0994
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
aiohttp >= 3.13.3
openai >= 1.99.1, < 2.25.0 # For Responses API with reasoning content
openai >= 2.0.0 # For Responses API with reasoning content
pydantic >= 2.12.0
prometheus_client >= 0.18.0
pillow # Required for image processing
Expand Down
2 changes: 1 addition & 1 deletion tests/entrypoints/openai/responses/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ def log_response_diagnostics(
def default_server_args():
return [
"--max-model-len",
"8192",
"18192",
"--enforce-eager", # For faster startup.
"--enable-auto-tool-choice",
"--structured-outputs-config.backend",
Expand Down
15 changes: 9 additions & 6 deletions tests/entrypoints/openai/responses/test_function_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ async def test_function_tool_use(
tool_choice=tool_choice,
temperature=0.0,
)

assert len(response.output) >= 1
tool_call = None
reasoning = None
Expand All @@ -127,11 +126,15 @@ async def test_function_tool_use(
tool_call = out
if out.type == "reasoning":
reasoning = out
assert tool_call is not None
assert tool_call.type == "function_call"
assert json.loads(tool_call.arguments) is not None
assert reasoning is not None
assert reasoning.type == "reasoning"
if response.incomplete_details is None:
assert tool_call is not None
assert tool_call.type == "function_call"
assert json.loads(tool_call.arguments) is not None
assert reasoning is not None
assert reasoning.type == "reasoning"
else:
print(response.model_dump_json(indent=2))
assert response.incomplete_details.reason == "max_output_tokens"


@pytest.mark.asyncio
Expand Down
12 changes: 3 additions & 9 deletions vllm/entrypoints/openai/responses/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
ResponseReasoningTextDeltaEvent,
ResponseReasoningTextDoneEvent,
ResponseStatus,
ResponseTextConfig,
ResponseWebSearchCallCompletedEvent,
ResponseWebSearchCallInProgressEvent,
ResponseWebSearchCallSearchingEvent,
Expand All @@ -38,20 +39,13 @@
from openai.types.responses import (
ResponseInProgressEvent as OpenAIResponseInProgressEvent,
)
from openai.types.responses.tool import Tool
from openai_harmony import Message as OpenAIHarmonyMessage

# Backward compatibility for OpenAI client versions
try: # For older openai versions (< 1.100.0)
from openai.types.responses import ResponseTextConfig
except ImportError: # For newer openai versions (>= 1.100.0)
from openai.types.responses import ResponseFormatTextConfig as ResponseTextConfig

from openai.types.responses.response import IncompleteDetails, ToolChoice
from openai.types.responses.response_reasoning_item import (
Content as ResponseReasoningTextContent,
)
from openai.types.responses.tool import Tool
from openai.types.shared import Metadata, Reasoning
from openai_harmony import Message as OpenAIHarmonyMessage
from pydantic import (
Field,
ValidationError,
Expand Down
4 changes: 2 additions & 2 deletions vllm/tool_parsers/abstract_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
from collections.abc import Callable, Sequence
from functools import cached_property

from openai.types.responses.response_format_text_json_schema_config import (
from openai.types.responses import (
ResponseFormatTextJSONSchemaConfig,
ResponseTextConfig,
)

from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
Expand All @@ -17,7 +18,6 @@
)
from vllm.entrypoints.openai.responses.protocol import (
ResponsesRequest,
ResponseTextConfig,
)
from vllm.logger import init_logger
from vllm.sampling_params import (
Expand Down
Loading