Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,13 @@ async def test_inconsistent_tool_choice_and_tools(


@pytest.mark.asyncio
async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI):
@pytest.mark.parametrize(
"tool_choice",
["required", {"type": "function", "function": {"name": "get_current_weather"}}],
)
async def test_max_tokens_with_tool_choice_required(
client: openai.AsyncOpenAI, tool_choice
):
""" """
models = await client.models.list()
model_name: str = models.data[0].id
Expand All @@ -530,12 +536,11 @@ async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI):
max_completion_tokens=1,
model=model_name,
tools=tools,
tool_choice="required",
tool_choice=tool_choice,
)
# When `tool_choice="required"` and the tokens of `tools` exceed `max_tokens`,
# both `tool_calls` and `content` should be empty.
# This behavior should be consistent with OpenAI.
choice = chat_completion.choices[0]
assert choice.finish_reason == "length"
assert len(choice.message.tool_calls) == 0
assert choice.message.content == ""
94 changes: 94 additions & 0 deletions tests/entrypoints/openai/test_tool_choice_content_none.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import pytest

from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.engine.serving import OpenAIServing
from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
from vllm.parser.abstract_parser import DelegatingParser

pytestmark = pytest.mark.skip_global_cleanup


class _DummyDelegatingParser(DelegatingParser):
def is_reasoning_end(self, input_ids: list[int]) -> bool:
return False

def extract_content_ids(self, input_ids: list[int]) -> list[int]:
return input_ids

def extract_reasoning(self, model_output: str, request):
return None, model_output

def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
delta_text: str,
previous_token_ids: list[int],
current_token_ids: list[int],
delta_token_ids: list[int],
):
return None

def extract_tool_calls(self, model_output: str, request):
return None


def test_parse_tool_calls_from_content_allows_named_tool_choice_with_none_content():
request = ChatCompletionRequest.model_validate(
{
"model": "test-model",
"messages": [{"role": "user", "content": "test"}],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"parameters": {"type": "object", "properties": {}},
},
}
],
"tool_choice": {"type": "function", "function": {"name": "get_weather"}},
}
)

tool_calls, content = OpenAIServing._parse_tool_calls_from_content(
request=request,
tokenizer=None,
enable_auto_tools=True,
tool_parser_cls=None,
content=None,
)

assert content is None
assert tool_calls is not None
assert tool_calls == []


def test_responses_parser_allows_named_tool_choice_with_none_content():
request = ResponsesRequest.model_validate(
{
"model": "test-model",
"input": "test",
"tools": [
{
"type": "function",
"name": "get_weather",
"parameters": {"type": "object", "properties": {}},
}
],
"tool_choice": {"type": "function", "name": "get_weather"},
}
)
parser = _DummyDelegatingParser(tokenizer=None)

tool_calls, content = parser._parse_tool_calls(
request=request,
content=None,
enable_auto_tools=False,
)

assert content is None
assert tool_calls == []
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/chat_completion/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -1307,8 +1307,8 @@ async def chat_completion_full_generator(
request.tool_choice
and type(request.tool_choice) is ChatCompletionNamedToolChoiceParam
):
assert tool_calls is not None and len(tool_calls) > 0
tool_call_class_items = []
tool_calls = tool_calls or []
for idx, tc in enumerate(tool_calls):
# Use native ID if available (e.g., Kimi K2),
# otherwise generate ID with correct id_type
Expand Down
6 changes: 4 additions & 2 deletions vllm/entrypoints/openai/engine/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,8 +638,9 @@ def _parse_tool_calls_from_content(
and request.tool_choice
and isinstance(request.tool_choice, ToolChoiceFunction)
):
assert content is not None
# Forced Function Call (Responses API)
if content is None:
return [], None
function_calls.append(
FunctionCall(name=request.tool_choice.name, arguments=content)
)
Expand All @@ -651,7 +652,8 @@ def _parse_tool_calls_from_content(
and (tool_parser_cls is None or tool_parser_cls.supports_required_and_named)
):
# Named function with standard JSON-based parsing
assert content is not None
if content is None:
return [], None
function_calls.append(
FunctionCall(name=request.tool_choice.function.name, arguments=content)
)
Expand Down
3 changes: 2 additions & 1 deletion vllm/parser/abstract_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,8 @@ def _parse_tool_calls(
(ToolChoiceFunction, ChatCompletionNamedToolChoiceParam),
):
# Forced Function Call
assert content is not None
if content is None:
return [], None
function_calls.append(
FunctionCall(name=self._get_function_name(request), arguments=content)
)
Expand Down
Loading