Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions tests/entrypoints/openai/test_tool_calls_serialization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for tool_calls Iterable → list materialisation.

Regression tests for https://github.com/vllm-project/vllm/issues/34792.

Setting VLLM_LOGGING_LEVEL=debug caused tool calling to break for Mistral
models because:
1. The OpenAI Python SDK types tool_calls as Iterable[...] in
ChatCompletionAssistantMessageParam.
2. Pydantic v2, when validating from Python objects (not from raw JSON),
wraps Iterable fields in a one-shot lazy iterator.
3. Debug logging called model_dump_json() which consumed that iterator.
4. The Mistral tokenizer then saw empty tool_calls and raised
"ValueError: Unexpected tool call id ...".
"""

import pytest

from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest


def _make_tool_call(tc_id: str, name: str, args: str) -> dict:
return {
"id": tc_id,
"type": "function",
"function": {"name": name, "arguments": args},
}


def _make_request(messages: list) -> ChatCompletionRequest:
return ChatCompletionRequest(
model="test-model",
messages=messages,
)


def test_tool_calls_list_preserved_after_model_dump():
"""tool_calls in assistant messages must be readable after model_dump_json.

When the request is built from Python dicts (as in the Anthropic → OpenAI
conversion path), Pydantic v2 previously wrapped the Iterable tool_calls
in a one-shot iterator. model_dump_json() consumed it, leaving subsequent
readers (e.g. the Mistral tokenizer) with an empty sequence.
"""
tool_call = _make_tool_call("call_abc123", "get_weather", '{"city": "Paris"}')
messages = [
{"role": "user", "content": "What is the weather in Paris?"},
{"role": "assistant", "content": None, "tool_calls": [tool_call]},
{
"role": "tool",
"tool_call_id": "call_abc123",
"content": '{"temperature": 20}',
},
]

req = _make_request(messages)

# Simulate debug logging: serialize the model (this was the trigger)
_ = req.model_dump_json()

# The assistant message must still have accessible tool_calls afterwards
assistant_msg = req.messages[1]
assert isinstance(assistant_msg, dict)
tool_calls = assistant_msg.get("tool_calls")
assert tool_calls is not None, "tool_calls must not be None after model_dump_json"
assert isinstance(tool_calls, list), "tool_calls must be a list"
assert len(tool_calls) > 0, "tool_calls must not be empty after model_dump_json"


def test_tool_calls_from_generator_are_materialised():
"""tool_calls passed as a generator must be converted to list on validation."""
tool_call = _make_tool_call("call_gen1", "search", '{"query": "vllm"}')

def tool_calls_gen():
yield tool_call

messages = [
{"role": "user", "content": "Search for vllm"},
{
"role": "assistant",
"content": None,
"tool_calls": tool_calls_gen(), # one-shot generator
},
]

req = _make_request(messages)
assistant_msg = req.messages[1]
assert isinstance(assistant_msg, dict)

# Iterate twice — must not raise or return empty on second pass
tool_calls_first = list(assistant_msg.get("tool_calls", []))
tool_calls_second = list(assistant_msg.get("tool_calls", []))

assert len(tool_calls_first) == 1, "First read must return the tool call"
assert len(tool_calls_second) == 1, "Second read must also return the tool call"


def test_tool_calls_list_passthrough():
"""tool_calls already provided as a list must remain a list."""
tool_call = _make_tool_call("call_list1", "calculate", '{"expr": "2+2"}')
messages = [
{"role": "user", "content": "Calculate 2+2"},
{"role": "assistant", "content": None, "tool_calls": [tool_call]},
]

req = _make_request(messages)
assistant_msg = req.messages[1]
assert isinstance(assistant_msg, dict)
assert isinstance(assistant_msg.get("tool_calls"), list)


def test_messages_without_tool_calls_unaffected():
"""Messages without tool_calls must be handled correctly."""
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Hi there!"},
]

req = _make_request(messages)
# None of the messages should have tool_calls injected
for msg in req.messages:
assert isinstance(msg, dict)
assert msg.get("tool_calls") is None or msg.get("tool_calls") == []


@pytest.mark.parametrize("num_tool_calls", [1, 3])
def test_multiple_tool_calls_materialised(num_tool_calls: int):
"""Multiple tool calls in a single message are all preserved."""
tool_calls = [
_make_tool_call(f"call_{i}", f"func_{i}", f'{{"arg": {i}}}')
for i in range(num_tool_calls)
]
messages = [
{"role": "user", "content": "Do things"},
{"role": "assistant", "content": None, "tool_calls": iter(tool_calls)},
]

req = _make_request(messages)
assistant_msg = req.messages[1]
assert isinstance(assistant_msg, dict)

result_tool_calls = assistant_msg.get("tool_calls")
assert isinstance(result_tool_calls, list)
assert len(result_tool_calls) == num_tool_calls

# Verify after model_dump_json too
_ = req.model_dump_json()
assert len(assistant_msg.get("tool_calls", [])) == num_tool_calls
4 changes: 2 additions & 2 deletions vllm/entrypoints/chat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ class CustomChatCompletionMessageParam(TypedDict, total=False):
tool_call_id: str | None
"""Tool call that this message is responding to."""

tool_calls: Iterable[ChatCompletionMessageToolCallParam] | None
tool_calls: list[ChatCompletionMessageToolCallParam] | None
"""The tool calls generated by the model, such as function calls."""

reasoning: str | None
Expand Down Expand Up @@ -321,7 +321,7 @@ class ConversationMessage(TypedDict, total=False):
name: str | None
"""The name of the function to call"""

tool_calls: Iterable[ChatCompletionMessageToolCallParam] | None
tool_calls: list[ChatCompletionMessageToolCallParam] | None
"""The tool calls generated by the model, such as function calls."""

reasoning: str | None
Expand Down
41 changes: 41 additions & 0 deletions vllm/entrypoints/openai/chat_completion/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,47 @@ class ChatCompletionRequest(OpenAIBaseModel):

# --8<-- [end:chat-completion-extra-params]

@model_validator(mode="before")
@classmethod
def _materialize_tool_calls_before(cls, data: Any) -> Any:
"""Eagerly convert tool_calls generators/iterators to lists.

Must run before Pydantic field validation so that one-shot
generators are not consumed during union type matching of
ChatCompletionAssistantMessageParam (which types tool_calls
as Iterable[...]).
"""
if not isinstance(data, dict):
return data
messages = data.get("messages")
if not isinstance(messages, list):
return data
for msg in messages:
if not isinstance(msg, dict):
continue
tool_calls = msg.get("tool_calls")
if tool_calls is not None and not isinstance(tool_calls, list):
msg["tool_calls"] = list(tool_calls)
return data

@model_validator(mode="after")
def _materialize_tool_calls_after(self) -> "ChatCompletionRequest":
"""Convert Pydantic ValidatorIterator wrappers back to lists.

Even after the "before" validator converts iterables to lists,
Pydantic re-wraps them in a ValidatorIterator when validating
against ChatCompletionAssistantMessageParam's Iterable[...] type.
This "after" pass materialises those wrappers so downstream code
(tokenizers, model_dump_json) always sees plain lists.
"""
for msg in self.messages:
if not isinstance(msg, dict):
continue
tool_calls = msg.get("tool_calls")
if tool_calls is not None and not isinstance(tool_calls, list):
msg["tool_calls"] = list(tool_calls)
return self

def build_chat_params(
self,
default_template: str | None,
Expand Down
Loading