Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -514,3 +514,27 @@ async def test_inconsistent_tool_choice_and_tools(
],
tool_choice={},
)


@pytest.mark.asyncio
async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI):
""" """
models = await client.models.list()
model_name: str = models.data[0].id

# This combination previously crashed the engine
chat_completion = await client.chat.completions.create(
messages=messages,
temperature=0,
max_completion_tokens=1,
model=model_name,
tools=tools,
tool_choice="required",
)
# When `tool_choice="required"` and the tokens of `tools` exceed `max_tokens`,
# both `tool_calls` and `content` should be empty.
# This behavior should be consistent with OpenAI.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you confirmed that OpenAI does this as well?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

chat_completion = client.chat.completions.create(
    messages=messages,
    model="gpt-5",
    tools=tools,
    tool_choice="required",
    max_completion_tokens=10,
)
print(chat_completion)


ChatCompletion(id='chatcmpl-DIU1M3ic0iPTxtnxit59rWDxUpaEH', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content='', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None))], created=1773297678, model='gpt-5', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=10, prompt_tokens=340, total_tokens=350, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=10, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0), input_tokens=0, output_tokens=0, input_tokens_details=None))

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you confirmed that OpenAI does this as well?

This is the result from my test with GPT-5.

choice = chat_completion.choices[0]
assert choice.finish_reason == "length"
assert len(choice.message.tool_calls) == 0
assert choice.message.content == ""
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/chat_completion/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -1507,7 +1507,7 @@ async def chat_completion_full_generator(

elif request.tool_choice and request.tool_choice == "required":
tool_call_class_items = []
assert tool_calls is not None and len(tool_calls) > 0
tool_calls = tool_calls or []
for idx, tool_call in enumerate(tool_calls):
# Use native ID if available,
# otherwise generate ID with correct id_type
Expand Down
19 changes: 11 additions & 8 deletions vllm/entrypoints/openai/engine/serving.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio
import contextlib
import json
import time
from collections.abc import AsyncGenerator, Callable, Mapping, Sequence
Expand All @@ -13,7 +14,7 @@
from openai.types.responses import (
ToolChoiceFunction,
)
from pydantic import ConfigDict, TypeAdapter
from pydantic import ConfigDict, TypeAdapter, ValidationError
from starlette.datastructures import Headers

import vllm.envs as envs
Expand Down Expand Up @@ -1125,17 +1126,19 @@ def _parse_tool_calls_from_content(
)
content = None # Clear content since tool is called.
elif request.tool_choice == "required":
assert content is not None
tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content)
function_calls.extend(
[
tool_calls = []
with contextlib.suppress(ValidationError):
content = content or ""
tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(
content
)
for tool_call in tool_calls:
function_calls.append(
FunctionCall(
name=tool_call.name,
arguments=json.dumps(tool_call.parameters, ensure_ascii=False),
)
for tool_call in tool_calls
]
)
)
content = None # Clear content since tool is called.
elif (
tool_parser_cls
Expand Down
Loading