From 1147f21c4e0a9f3f34d34ba3743afc02584b16f7 Mon Sep 17 00:00:00 2001 From: QwertyJack <7554089+QwertyJack@users.noreply.github.com> Date: Fri, 17 Apr 2026 13:15:36 +0000 Subject: [PATCH 1/4] fix(openai): tolerate empty content in forced tool choice Normalize None content to an empty argument string when forced tool choice runs after reasoning extraction. This avoids AssertionError in both chat-completions and responses parsing paths and adds regression coverage for the named tool-choice case. Fixes #40147 Signed-off-by: QwertyJack <7554089+QwertyJack@users.noreply.github.com> --- .../openai/test_tool_choice_content_none.py | 94 +++++++++++++++++++ .../openai/chat_completion/serving.py | 22 +++-- vllm/entrypoints/openai/engine/serving.py | 6 +- vllm/parser/abstract_parser.py | 3 +- 4 files changed, 114 insertions(+), 11 deletions(-) create mode 100644 tests/entrypoints/openai/test_tool_choice_content_none.py diff --git a/tests/entrypoints/openai/test_tool_choice_content_none.py b/tests/entrypoints/openai/test_tool_choice_content_none.py new file mode 100644 index 000000000000..c1da5918697c --- /dev/null +++ b/tests/entrypoints/openai/test_tool_choice_content_none.py @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import pytest + +from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest +from vllm.entrypoints.openai.engine.serving import OpenAIServing +from vllm.entrypoints.openai.responses.protocol import ResponsesRequest +from vllm.parser.abstract_parser import DelegatingParser + +pytestmark = pytest.mark.skip_global_cleanup + + +class _DummyDelegatingParser(DelegatingParser): + def is_reasoning_end(self, input_ids: list[int]) -> bool: + return False + + def extract_content_ids(self, input_ids: list[int]) -> list[int]: + return input_ids + + def extract_reasoning(self, model_output: str, request): + return None, model_output + + def extract_reasoning_streaming( + self, + previous_text: str, + current_text: str, + delta_text: str, + previous_token_ids: list[int], + current_token_ids: list[int], + delta_token_ids: list[int], + ): + return None + + def extract_tool_calls(self, model_output: str, request): + return None + + +def test_parse_tool_calls_from_content_allows_named_tool_choice_with_none_content(): + request = ChatCompletionRequest.model_validate( + { + "model": "test-model", + "messages": [{"role": "user", "content": "test"}], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "parameters": {"type": "object", "properties": {}}, + }, + } + ], + "tool_choice": {"type": "function", "function": {"name": "get_weather"}}, + } + ) + + tool_calls, content = OpenAIServing._parse_tool_calls_from_content( + request=request, + tokenizer=None, + enable_auto_tools=True, + tool_parser_cls=None, + content=None, + ) + + assert content is None + assert tool_calls is not None + assert tool_calls == [] + + +def test_responses_parser_allows_named_tool_choice_with_none_content(): + request = ResponsesRequest.model_validate( + { + "model": "test-model", + "input": "test", + "tools": [ + { + "type": "function", + "name": "get_weather", + "parameters": {"type": "object", "properties": {}}, + } + ], + "tool_choice": {"type": "function", "name": "get_weather"}, + } + ) + parser = _DummyDelegatingParser(tokenizer=None) + + tool_calls, content = parser._parse_tool_calls( + request=request, + content=None, + enable_auto_tools=False, + ) + + assert content is None + assert tool_calls == [] diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index b8ad54adb5a6..7accf42a31a4 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -1302,9 +1302,8 @@ async def chat_completion_full_generator( request.tool_choice and type(request.tool_choice) is ChatCompletionNamedToolChoiceParam ): - assert tool_calls is not None and len(tool_calls) > 0 tool_call_class_items = [] - for idx, tc in enumerate(tool_calls): + for idx, tc in enumerate(tool_calls or []): # Use native ID if available (e.g., Kimi K2), # otherwise generate ID with correct id_type if tc.id: @@ -1327,12 +1326,19 @@ async def chat_completion_full_generator( tool_call_class(id=generated_id, function=tc) ) history_tool_call_cnt += 1 - message = ChatMessage( - role=role, - reasoning=reasoning, - content="", - tool_calls=tool_call_class_items, - ) + if tool_call_class_items: + message = ChatMessage( + role=role, + reasoning=reasoning, + content="", + tool_calls=tool_call_class_items, + ) + else: + message = ChatMessage( + role=role, + reasoning=reasoning, + content=content, + ) elif request.tool_choice and request.tool_choice == "required": tool_call_class_items = [] diff --git a/vllm/entrypoints/openai/engine/serving.py b/vllm/entrypoints/openai/engine/serving.py index 77cce6bec5b2..51fab7ba050f 100644 --- a/vllm/entrypoints/openai/engine/serving.py +++ b/vllm/entrypoints/openai/engine/serving.py @@ -638,8 +638,9 @@ def _parse_tool_calls_from_content( and request.tool_choice and isinstance(request.tool_choice, ToolChoiceFunction) ): - assert content is not None # Forced Function Call (Responses API) + if content is None: + return [], None function_calls.append( FunctionCall(name=request.tool_choice.name, arguments=content) ) @@ -651,7 +652,8 @@ def _parse_tool_calls_from_content( and (tool_parser_cls is None or tool_parser_cls.supports_required_and_named) ): # Named function with standard JSON-based parsing - assert content is not None + if content is None: + return [], None function_calls.append( FunctionCall(name=request.tool_choice.function.name, arguments=content) ) diff --git a/vllm/parser/abstract_parser.py b/vllm/parser/abstract_parser.py index e7f83686dbef..03b9f211d858 100644 --- a/vllm/parser/abstract_parser.py +++ b/vllm/parser/abstract_parser.py @@ -459,7 +459,8 @@ def _parse_tool_calls( (ToolChoiceFunction, ChatCompletionNamedToolChoiceParam), ): # Forced Function Call - assert content is not None + if content is None: + return [], None function_calls.append( FunctionCall(name=self._get_function_name(request), arguments=content) ) From ea7301d1ecc393fc31a732f1ee9aff709d0cae85 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Thu, 30 Apr 2026 14:49:58 +0800 Subject: [PATCH 2/4] [Bugfix] Fix crash when tool_choice=named/forced exceeds max_tokens Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/chat_completion/serving.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index 7accf42a31a4..b70cb7e38911 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -1303,7 +1303,8 @@ async def chat_completion_full_generator( and type(request.tool_choice) is ChatCompletionNamedToolChoiceParam ): tool_call_class_items = [] - for idx, tc in enumerate(tool_calls or []): + tool_calls = tool_calls or [] + for idx, tc in enumerate(tool_calls): # Use native ID if available (e.g., Kimi K2), # otherwise generate ID with correct id_type if tc.id: @@ -1326,19 +1327,12 @@ async def chat_completion_full_generator( tool_call_class(id=generated_id, function=tc) ) history_tool_call_cnt += 1 - if tool_call_class_items: message = ChatMessage( role=role, reasoning=reasoning, content="", tool_calls=tool_call_class_items, ) - else: - message = ChatMessage( - role=role, - reasoning=reasoning, - content=content, - ) elif request.tool_choice and request.tool_choice == "required": tool_call_class_items = [] From af9ff270ae91cb9ed6415429379e4106147797f5 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Thu, 30 Apr 2026 14:51:02 +0800 Subject: [PATCH 3/4] [Bugfix] Fix crash when tool_choice=named/forced exceeds max_tokens Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/chat_completion/serving.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index b70cb7e38911..c03a76e61df4 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -1327,12 +1327,12 @@ async def chat_completion_full_generator( tool_call_class(id=generated_id, function=tc) ) history_tool_call_cnt += 1 - message = ChatMessage( - role=role, - reasoning=reasoning, - content="", - tool_calls=tool_call_class_items, - ) + message = ChatMessage( + role=role, + reasoning=reasoning, + content="", + tool_calls=tool_call_class_items, + ) elif request.tool_choice and request.tool_choice == "required": tool_call_class_items = [] From a9b08dabe7d490cbe4ff7ecc4884b9f2c06052b2 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Thu, 30 Apr 2026 14:57:42 +0800 Subject: [PATCH 4/4] [Bugfix] Fix crash when tool_choice=named/forced exceeds max_tokens Signed-off-by: chaunceyjiang --- .../test_completion_with_function_calling.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py b/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py index 965b21351302..839793fde856 100644 --- a/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py +++ b/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py @@ -518,7 +518,13 @@ async def test_inconsistent_tool_choice_and_tools( @pytest.mark.asyncio -async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI): +@pytest.mark.parametrize( + "tool_choice", + ["required", {"type": "function", "function": {"name": "get_current_weather"}}], +) +async def test_max_tokens_with_tool_choice_required( + client: openai.AsyncOpenAI, tool_choice +): """ """ models = await client.models.list() model_name: str = models.data[0].id @@ -530,7 +536,7 @@ async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI): max_completion_tokens=1, model=model_name, tools=tools, - tool_choice="required", + tool_choice=tool_choice, ) # When `tool_choice="required"` and the tokens of `tools` exceed `max_tokens`, # both `tool_calls` and `content` should be empty. @@ -538,4 +544,3 @@ async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI): choice = chat_completion.choices[0] assert choice.finish_reason == "length" assert len(choice.message.tool_calls) == 0 - assert choice.message.content == ""