From 1147f21c4e0a9f3f34d34ba3743afc02584b16f7 Mon Sep 17 00:00:00 2001
From: QwertyJack <7554089+QwertyJack@users.noreply.github.com>
Date: Fri, 17 Apr 2026 13:15:36 +0000
Subject: [PATCH 1/4] fix(openai): tolerate empty content in forced tool choice

Normalize None content to an empty argument string when forced tool choice runs after reasoning extraction. This avoids AssertionError in both chat-completions and responses parsing paths and adds regression coverage for the named tool-choice case.

Fixes #40147

Signed-off-by: QwertyJack <7554089+QwertyJack@users.noreply.github.com>
---
 .../openai/test_tool_choice_content_none.py   | 94 +++++++++++++++++++
 .../openai/chat_completion/serving.py         | 22 +++--
 vllm/entrypoints/openai/engine/serving.py     |  6 +-
 vllm/parser/abstract_parser.py                |  3 +-
 4 files changed, 114 insertions(+), 11 deletions(-)
 create mode 100644 tests/entrypoints/openai/test_tool_choice_content_none.py

diff --git a/tests/entrypoints/openai/test_tool_choice_content_none.py b/tests/entrypoints/openai/test_tool_choice_content_none.py
new file mode 100644
index 000000000000..c1da5918697c
--- /dev/null
+++ b/tests/entrypoints/openai/test_tool_choice_content_none.py
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+from vllm.entrypoints.openai.engine.serving import OpenAIServing
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+from vllm.parser.abstract_parser import DelegatingParser
+
+pytestmark = pytest.mark.skip_global_cleanup
+
+
+class _DummyDelegatingParser(DelegatingParser):
+    def is_reasoning_end(self, input_ids: list[int]) -> bool:
+        return False
+
+    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
+        return input_ids
+
+    def extract_reasoning(self, model_output: str, request):
+        return None, model_output
+
+    def extract_reasoning_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: list[int],
+        current_token_ids: list[int],
+        delta_token_ids: list[int],
+    ):
+        return None
+
+    def extract_tool_calls(self, model_output: str, request):
+        return None
+
+
+def test_parse_tool_calls_from_content_allows_named_tool_choice_with_none_content():
+    request = ChatCompletionRequest.model_validate(
+        {
+            "model": "test-model",
+            "messages": [{"role": "user", "content": "test"}],
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "parameters": {"type": "object", "properties": {}},
+                    },
+                }
+            ],
+            "tool_choice": {"type": "function", "function": {"name": "get_weather"}},
+        }
+    )
+
+    tool_calls, content = OpenAIServing._parse_tool_calls_from_content(
+        request=request,
+        tokenizer=None,
+        enable_auto_tools=True,
+        tool_parser_cls=None,
+        content=None,
+    )
+
+    assert content is None
+    assert tool_calls is not None
+    assert tool_calls == []
+
+
+def test_responses_parser_allows_named_tool_choice_with_none_content():
+    request = ResponsesRequest.model_validate(
+        {
+            "model": "test-model",
+            "input": "test",
+            "tools": [
+                {
+                    "type": "function",
+                    "name": "get_weather",
+                    "parameters": {"type": "object", "properties": {}},
+                }
+            ],
+            "tool_choice": {"type": "function", "name": "get_weather"},
+        }
+    )
+    parser = _DummyDelegatingParser(tokenizer=None)
+
+    tool_calls, content = parser._parse_tool_calls(
+        request=request,
+        content=None,
+        enable_auto_tools=False,
+    )
+
+    assert content is None
+    assert tool_calls == []
diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py
index b8ad54adb5a6..7accf42a31a4 100644
--- a/vllm/entrypoints/openai/chat_completion/serving.py
+++ b/vllm/entrypoints/openai/chat_completion/serving.py
@@ -1302,9 +1302,8 @@ async def chat_completion_full_generator(
                 request.tool_choice
                 and type(request.tool_choice) is ChatCompletionNamedToolChoiceParam
             ):
-                assert tool_calls is not None and len(tool_calls) > 0
                 tool_call_class_items = []
-                for idx, tc in enumerate(tool_calls):
+                for idx, tc in enumerate(tool_calls or []):
                     # Use native ID if available (e.g., Kimi K2),
                     # otherwise generate ID with correct id_type
                     if tc.id:
@@ -1327,12 +1326,19 @@ async def chat_completion_full_generator(
                                 tool_call_class(id=generated_id, function=tc)
                             )
                     history_tool_call_cnt += 1
-                message = ChatMessage(
-                    role=role,
-                    reasoning=reasoning,
-                    content="",
-                    tool_calls=tool_call_class_items,
-                )
+                if tool_call_class_items:
+                    message = ChatMessage(
+                        role=role,
+                        reasoning=reasoning,
+                        content="",
+                        tool_calls=tool_call_class_items,
+                    )
+                else:
+                    message = ChatMessage(
+                        role=role,
+                        reasoning=reasoning,
+                        content=content,
+                    )
 
             elif request.tool_choice and request.tool_choice == "required":
                 tool_call_class_items = []
diff --git a/vllm/entrypoints/openai/engine/serving.py b/vllm/entrypoints/openai/engine/serving.py
index 77cce6bec5b2..51fab7ba050f 100644
--- a/vllm/entrypoints/openai/engine/serving.py
+++ b/vllm/entrypoints/openai/engine/serving.py
@@ -638,8 +638,9 @@ def _parse_tool_calls_from_content(
             and request.tool_choice
             and isinstance(request.tool_choice, ToolChoiceFunction)
         ):
-            assert content is not None
             # Forced Function Call (Responses API)
+            if content is None:
+                return [], None
             function_calls.append(
                 FunctionCall(name=request.tool_choice.name, arguments=content)
             )
@@ -651,7 +652,8 @@ def _parse_tool_calls_from_content(
             and (tool_parser_cls is None or tool_parser_cls.supports_required_and_named)
         ):
             # Named function with standard JSON-based parsing
-            assert content is not None
+            if content is None:
+                return [], None
             function_calls.append(
                 FunctionCall(name=request.tool_choice.function.name, arguments=content)
             )
diff --git a/vllm/parser/abstract_parser.py b/vllm/parser/abstract_parser.py
index e7f83686dbef..03b9f211d858 100644
--- a/vllm/parser/abstract_parser.py
+++ b/vllm/parser/abstract_parser.py
@@ -459,7 +459,8 @@ def _parse_tool_calls(
             (ToolChoiceFunction, ChatCompletionNamedToolChoiceParam),
         ):
             # Forced Function Call
-            assert content is not None
+            if content is None:
+                return [], None
             function_calls.append(
                 FunctionCall(name=self._get_function_name(request), arguments=content)
             )

From ea7301d1ecc393fc31a732f1ee9aff709d0cae85 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Thu, 30 Apr 2026 14:49:58 +0800
Subject: [PATCH 2/4] [Bugfix] Fix crash when tool_choice=named/forced exceeds
 max_tokens

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/chat_completion/serving.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py
index 7accf42a31a4..b70cb7e38911 100644
--- a/vllm/entrypoints/openai/chat_completion/serving.py
+++ b/vllm/entrypoints/openai/chat_completion/serving.py
@@ -1303,7 +1303,8 @@ async def chat_completion_full_generator(
                 and type(request.tool_choice) is ChatCompletionNamedToolChoiceParam
             ):
                 tool_call_class_items = []
-                for idx, tc in enumerate(tool_calls or []):
+                tool_calls = tool_calls or []
+                for idx, tc in enumerate(tool_calls):
                     # Use native ID if available (e.g., Kimi K2),
                     # otherwise generate ID with correct id_type
                     if tc.id:
@@ -1326,19 +1327,12 @@ async def chat_completion_full_generator(
                                 tool_call_class(id=generated_id, function=tc)
                             )
                     history_tool_call_cnt += 1
-                if tool_call_class_items:
                     message = ChatMessage(
                         role=role,
                         reasoning=reasoning,
                         content="",
                         tool_calls=tool_call_class_items,
                     )
-                else:
-                    message = ChatMessage(
-                        role=role,
-                        reasoning=reasoning,
-                        content=content,
-                    )
 
             elif request.tool_choice and request.tool_choice == "required":
                 tool_call_class_items = []

From af9ff270ae91cb9ed6415429379e4106147797f5 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Thu, 30 Apr 2026 14:51:02 +0800
Subject: [PATCH 3/4] [Bugfix] Fix crash when tool_choice=named/forced exceeds
 max_tokens

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/entrypoints/openai/chat_completion/serving.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py
index b70cb7e38911..c03a76e61df4 100644
--- a/vllm/entrypoints/openai/chat_completion/serving.py
+++ b/vllm/entrypoints/openai/chat_completion/serving.py
@@ -1327,12 +1327,12 @@ async def chat_completion_full_generator(
                                 tool_call_class(id=generated_id, function=tc)
                             )
                     history_tool_call_cnt += 1
-                    message = ChatMessage(
-                        role=role,
-                        reasoning=reasoning,
-                        content="",
-                        tool_calls=tool_call_class_items,
-                    )
+                message = ChatMessage(
+                    role=role,
+                    reasoning=reasoning,
+                    content="",
+                    tool_calls=tool_call_class_items,
+                )
 
             elif request.tool_choice and request.tool_choice == "required":
                 tool_call_class_items = []

From a9b08dabe7d490cbe4ff7ecc4884b9f2c06052b2 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Thu, 30 Apr 2026 14:57:42 +0800
Subject: [PATCH 4/4] [Bugfix] Fix crash when tool_choice=named/forced exceeds
 max_tokens

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 .../test_completion_with_function_calling.py          | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py b/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py
index 965b21351302..839793fde856 100644
--- a/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py
+++ b/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py
@@ -518,7 +518,13 @@ async def test_inconsistent_tool_choice_and_tools(
 
 
 @pytest.mark.asyncio
-async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI):
+@pytest.mark.parametrize(
+    "tool_choice",
+    ["required", {"type": "function", "function": {"name": "get_current_weather"}}],
+)
+async def test_max_tokens_with_tool_choice_required(
+    client: openai.AsyncOpenAI, tool_choice
+):
     """ """
     models = await client.models.list()
     model_name: str = models.data[0].id
@@ -530,7 +536,7 @@ async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI):
         max_completion_tokens=1,
         model=model_name,
         tools=tools,
-        tool_choice="required",
+        tool_choice=tool_choice,
     )
     # When `tool_choice="required"` and the tokens of `tools` exceed `max_tokens`,
     # both `tool_calls` and `content` should be empty.
@@ -538,4 +544,3 @@ async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI):
     choice = chat_completion.choices[0]
     assert choice.finish_reason == "length"
     assert len(choice.message.tool_calls) == 0
-    assert choice.message.content == ""