vllm-project · DarkLight1337 · Mar 18, 2026 · Jan 14, 2026 · Jan 14, 2026 · Mar 16, 2026
diff --git a/requirements/common.txt b/requirements/common.txt
@@ -12,7 +12,7 @@ tokenizers >= 0.21.1  # Required for fast incremental detokenization.
 protobuf >= 5.29.6, !=6.30.*, !=6.31.*, !=6.32.*, !=6.33.0.*, !=6.33.1.*, !=6.33.2.*, !=6.33.3.*, !=6.33.4.* # Required by LlamaTokenizer, gRPC. CVE-2026-0994
 fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
 aiohttp >= 3.13.3
-openai >= 1.99.1, < 2.25.0  # For Responses API with reasoning content
+openai >= 2.0.0  # For Responses API with reasoning content
 pydantic >= 2.12.0
 prometheus_client >= 0.18.0
 pillow  # Required for image processing

@@ -370,7 +370,7 @@ def log_response_diagnostics(
 def default_server_args():
     return [
         "--max-model-len",
-        "8192",
+        "18192",
         "--enforce-eager",  # For faster startup.
         "--enable-auto-tool-choice",
         "--structured-outputs-config.backend",

@@ -118,7 +118,6 @@ async def test_function_tool_use(
         tool_choice=tool_choice,
         temperature=0.0,
     )
-
     assert len(response.output) >= 1
     tool_call = None
     reasoning = None
@@ -127,11 +126,15 @@ async def test_function_tool_use(
             tool_call = out
         if out.type == "reasoning":
             reasoning = out
-    assert tool_call is not None
-    assert tool_call.type == "function_call"
-    assert json.loads(tool_call.arguments) is not None
-    assert reasoning is not None
-    assert reasoning.type == "reasoning"
+    if response.incomplete_details is None:
+        assert tool_call is not None
+        assert tool_call.type == "function_call"
+        assert json.loads(tool_call.arguments) is not None
+        assert reasoning is not None
+        assert reasoning.type == "reasoning"
+    else:
+        print(response.model_dump_json(indent=2))
+        assert response.incomplete_details.reason == "max_output_tokens"
 
 
 @pytest.mark.asyncio

@@ -27,6 +27,7 @@
     ResponseReasoningTextDeltaEvent,
     ResponseReasoningTextDoneEvent,
     ResponseStatus,
+    ResponseTextConfig,
     ResponseWebSearchCallCompletedEvent,
     ResponseWebSearchCallInProgressEvent,
     ResponseWebSearchCallSearchingEvent,
@@ -38,20 +39,13 @@
 from openai.types.responses import (
     ResponseInProgressEvent as OpenAIResponseInProgressEvent,
 )
-from openai.types.responses.tool import Tool
-from openai_harmony import Message as OpenAIHarmonyMessage
-
-# Backward compatibility for OpenAI client versions
-try:  # For older openai versions (< 1.100.0)
-    from openai.types.responses import ResponseTextConfig
-except ImportError:  # For newer openai versions (>= 1.100.0)
-    from openai.types.responses import ResponseFormatTextConfig as ResponseTextConfig
-
 from openai.types.responses.response import IncompleteDetails, ToolChoice
 from openai.types.responses.response_reasoning_item import (
     Content as ResponseReasoningTextContent,
 )
+from openai.types.responses.tool import Tool
 from openai.types.shared import Metadata, Reasoning
+from openai_harmony import Message as OpenAIHarmonyMessage
 from pydantic import (
     Field,
     ValidationError,

@@ -6,8 +6,9 @@
 from collections.abc import Callable, Sequence
 from functools import cached_property
 
-from openai.types.responses.response_format_text_json_schema_config import (
+from openai.types.responses import (
     ResponseFormatTextJSONSchemaConfig,
+    ResponseTextConfig,
 )
 
 from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
@@ -17,7 +18,6 @@
 )
 from vllm.entrypoints.openai.responses.protocol import (
     ResponsesRequest,
-    ResponseTextConfig,
 )
 from vllm.logger import init_logger
 from vllm.sampling_params import (