diff --git a/tests/entrypoints/openai/test_completion_with_function_calling.py b/tests/entrypoints/openai/test_completion_with_function_calling.py index 15a2fb85f489..39ab13213134 100644 --- a/tests/entrypoints/openai/test_completion_with_function_calling.py +++ b/tests/entrypoints/openai/test_completion_with_function_calling.py @@ -514,3 +514,27 @@ async def test_inconsistent_tool_choice_and_tools( ], tool_choice={}, ) + + +@pytest.mark.asyncio +async def test_max_tokens_with_tool_choice_required(client: openai.AsyncOpenAI): + """ """ + models = await client.models.list() + model_name: str = models.data[0].id + + # This combination previously crashed the engine + chat_completion = await client.chat.completions.create( + messages=messages, + temperature=0, + max_completion_tokens=1, + model=model_name, + tools=tools, + tool_choice="required", + ) + # When `tool_choice="required"` and the tokens of `tools` exceed `max_tokens`, + # both `tool_calls` and `content` should be empty. + # This behavior should be consistent with OpenAI. + choice = chat_completion.choices[0] + assert choice.finish_reason == "length" + assert len(choice.message.tool_calls) == 0 + assert choice.message.content == "" diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index eb39e649a7e4..da934c05d5a7 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -1507,7 +1507,7 @@ async def chat_completion_full_generator( elif request.tool_choice and request.tool_choice == "required": tool_call_class_items = [] - assert tool_calls is not None and len(tool_calls) > 0 + tool_calls = tool_calls or [] for idx, tool_call in enumerate(tool_calls): # Use native ID if available, # otherwise generate ID with correct id_type diff --git a/vllm/entrypoints/openai/engine/serving.py b/vllm/entrypoints/openai/engine/serving.py index fad2a7f8c2eb..2049b3adfd3c 100644 --- a/vllm/entrypoints/openai/engine/serving.py +++ b/vllm/entrypoints/openai/engine/serving.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import asyncio +import contextlib import json import time from collections.abc import AsyncGenerator, Callable, Mapping, Sequence @@ -13,7 +14,7 @@ from openai.types.responses import ( ToolChoiceFunction, ) -from pydantic import ConfigDict, TypeAdapter +from pydantic import ConfigDict, TypeAdapter, ValidationError from starlette.datastructures import Headers import vllm.envs as envs @@ -1125,17 +1126,19 @@ def _parse_tool_calls_from_content( ) content = None # Clear content since tool is called. elif request.tool_choice == "required": - assert content is not None - tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content) - function_calls.extend( - [ + tool_calls = [] + with contextlib.suppress(ValidationError): + content = content or "" + tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json( + content + ) + for tool_call in tool_calls: + function_calls.append( FunctionCall( name=tool_call.name, arguments=json.dumps(tool_call.parameters, ensure_ascii=False), ) - for tool_call in tool_calls - ] - ) + ) content = None # Clear content since tool is called. elif ( tool_parser_cls