Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions tests/ut/patch/platform/test_patch_tool_choice_none_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# SPDX-License-Identifier: Apache-2.0

from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.engine.serving import OpenAIServing
from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
from vllm.parser.abstract_parser import DelegatingParser

from vllm_ascend.patch.platform import patch_tool_choice_none_content # noqa: F401


class _DummyDelegatingParser(DelegatingParser):
def is_reasoning_end(self, input_ids: list[int]) -> bool:
return False

def extract_content_ids(self, input_ids: list[int]) -> list[int]:
return input_ids

def extract_reasoning(self, model_output: str, request):
return None, model_output

def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
delta_text: str,
previous_token_ids: list[int],
current_token_ids: list[int],
delta_token_ids: list[int],
):
return None

def extract_tool_calls(self, model_output: str, request):
return None


def test_parse_tool_calls_from_content_allows_named_tool_choice_with_none_content():
request = ChatCompletionRequest.model_validate(
{
"model": "test-model",
"messages": [{"role": "user", "content": "test"}],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"parameters": {"type": "object", "properties": {}},
},
}
],
"tool_choice": {"type": "function", "function": {"name": "get_weather"}},
}
)

tool_calls, content = OpenAIServing._parse_tool_calls_from_content(
request=request,
tokenizer=None,
enable_auto_tools=True,
tool_parser_cls=None,
content=None,
)

assert content is None
assert tool_calls is not None
assert len(tool_calls) == 1
assert tool_calls[0].name == "get_weather"
assert tool_calls[0].arguments == ""


def test_responses_parser_allows_named_tool_choice_with_none_content():
request = ResponsesRequest.model_validate(
{
"model": "test-model",
"input": "test",
"tools": [
{
"type": "function",
"name": "get_weather",
"parameters": {"type": "object", "properties": {}},
}
],
"tool_choice": {"type": "function", "name": "get_weather"},
}
)
parser = _DummyDelegatingParser(tokenizer=None)

tool_calls, content = parser._parse_tool_calls(
request=request,
content=None,
enable_auto_tools=False,
)

assert content is None
assert len(tool_calls) == 1
assert tool_calls[0].name == "get_weather"
assert tool_calls[0].arguments == ""
20 changes: 20 additions & 0 deletions vllm_ascend/patch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,26 @@
# finish-backfill fix are present in the runtime vLLM version used by
# vllm-ascend.
#
# ** 11. File: platform/patch_tool_choice_none_content.py**
Comment thread
QwertyJack marked this conversation as resolved.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# 1. `vllm.entrypoints.openai.engine.serving.OpenAIServing`
# `vllm.parser.abstract_parser.DelegatingParser`
# Why:
# Some reasoning parsers can consume the full model output and return
# `content=None`. On the release runtime, forced named tool choice still
# asserts that content is present before constructing a function call,
# which can surface as a server-side failure instead of an empty-argument
# tool call.
# How:
# Monkey-patch the forced-tool-choice parsing entry points to normalize
# `content=None` to `""` before delegating back to the original upstream
# implementations.
# Related PR (if no, explain why):
# https://github.com/vllm-project/vllm/pull/40148
# Future Plan:
# Remove this patch once the upstream forced-tool-choice fix is included
# in the runtime vLLM version used by vllm-ascend.
#
# * Worker Patch:
# ===============
#
Expand Down
1 change: 1 addition & 0 deletions vllm_ascend/patch/platform/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import vllm_ascend.patch.platform.patch_torch_accelerator # noqa
import vllm_ascend.patch.platform.patch_minimax_usage_accounting # noqa
import vllm_ascend.patch.platform.patch_glm_tool_call_parser # noqa
import vllm_ascend.patch.platform.patch_tool_choice_none_content # noqa

if envs.VLLM_ASCEND_BALANCE_SCHEDULING:
import vllm_ascend.patch.platform.patch_balance_schedule # noqa
86 changes: 86 additions & 0 deletions vllm_ascend/patch/platform/patch_tool_choice_none_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#
# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
# This file is a part of the vllm-ascend project.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# OpenAI forced tool choice: tolerate None content after reasoning extraction.
#

from __future__ import annotations

from openai.types.responses import ToolChoiceFunction
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionNamedToolChoiceParam,
)
from vllm.entrypoints.openai.engine.serving import OpenAIServing
from vllm.parser.abstract_parser import DelegatingParser


def _normalize_tool_choice_content(
request,
content: str | None,
) -> str | None:
if content is not None:
return content

tool_choice = getattr(request, "tool_choice", None)
if isinstance(
tool_choice,
(ToolChoiceFunction, ChatCompletionNamedToolChoiceParam),
):
return ""
return content


_original_parse_tool_calls_from_content = OpenAIServing._parse_tool_calls_from_content


def _patched_parse_tool_calls_from_content(
request,
tokenizer,
enable_auto_tools: bool,
tool_parser_cls,
content: str | None = None,
):
content = _normalize_tool_choice_content(request, content)
return _original_parse_tool_calls_from_content(
request=request,
tokenizer=tokenizer,
enable_auto_tools=enable_auto_tools,
tool_parser_cls=tool_parser_cls,
content=content,
)


OpenAIServing._parse_tool_calls_from_content = staticmethod(_patched_parse_tool_calls_from_content)

_original_delegating_parse_tool_calls = DelegatingParser._parse_tool_calls


def _patched_delegating_parse_tool_calls(
self,
request,
content: str | None,
enable_auto_tools: bool,
):
content = _normalize_tool_choice_content(request, content)
return _original_delegating_parse_tool_calls(
self,
request,
content,
enable_auto_tools,
)


DelegatingParser._parse_tool_calls = _patched_delegating_parse_tool_calls
Loading