vllm-project · wojciech-wais · Mar 18, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 2, 2026
@@ -0,0 +1,150 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for tool_calls Iterable → list materialisation.
+
+Regression tests for https://github.com/vllm-project/vllm/issues/34792.
+
+Setting VLLM_LOGGING_LEVEL=debug caused tool calling to break for Mistral
+models because:
+  1. The OpenAI Python SDK types tool_calls as Iterable[...] in
+     ChatCompletionAssistantMessageParam.
+  2. Pydantic v2, when validating from Python objects (not from raw JSON),
+     wraps Iterable fields in a one-shot lazy iterator.
+  3. Debug logging called model_dump_json() which consumed that iterator.
+  4. The Mistral tokenizer then saw empty tool_calls and raised
+     "ValueError: Unexpected tool call id ...".
+"""
+
+import pytest
+
+from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+
+
+def _make_tool_call(tc_id: str, name: str, args: str) -> dict:
+    return {
+        "id": tc_id,
+        "type": "function",
+        "function": {"name": name, "arguments": args},
+    }
+
+
+def _make_request(messages: list) -> ChatCompletionRequest:
+    return ChatCompletionRequest(
+        model="test-model",
+        messages=messages,
+    )
+
+
+def test_tool_calls_list_preserved_after_model_dump():
+    """tool_calls in assistant messages must be readable after model_dump_json.
+
+    When the request is built from Python dicts (as in the Anthropic → OpenAI
+    conversion path), Pydantic v2 previously wrapped the Iterable tool_calls
+    in a one-shot iterator.  model_dump_json() consumed it, leaving subsequent
+    readers (e.g. the Mistral tokenizer) with an empty sequence.
+    """
+    tool_call = _make_tool_call("call_abc123", "get_weather", '{"city": "Paris"}')
+    messages = [
+        {"role": "user", "content": "What is the weather in Paris?"},
+        {"role": "assistant", "content": None, "tool_calls": [tool_call]},
+        {
+            "role": "tool",
+            "tool_call_id": "call_abc123",
+            "content": '{"temperature": 20}',
+        },
+    ]
+
+    req = _make_request(messages)
+
+    # Simulate debug logging: serialize the model (this was the trigger)
+    _ = req.model_dump_json()
+
+    # The assistant message must still have accessible tool_calls afterwards
+    assistant_msg = req.messages[1]
+    assert isinstance(assistant_msg, dict)
+    tool_calls = assistant_msg.get("tool_calls")
+    assert tool_calls is not None, "tool_calls must not be None after model_dump_json"
+    assert isinstance(tool_calls, list), "tool_calls must be a list"
+    assert len(tool_calls) > 0, "tool_calls must not be empty after model_dump_json"
+
+
+def test_tool_calls_from_generator_are_materialised():
+    """tool_calls passed as a generator must be converted to list on validation."""
+    tool_call = _make_tool_call("call_gen1", "search", '{"query": "vllm"}')
+
+    def tool_calls_gen():
+        yield tool_call
+
+    messages = [
+        {"role": "user", "content": "Search for vllm"},
+        {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": tool_calls_gen(),  # one-shot generator
+        },
+    ]
+
+    req = _make_request(messages)
+    assistant_msg = req.messages[1]
+    assert isinstance(assistant_msg, dict)
+
+    # Iterate twice — must not raise or return empty on second pass
+    tool_calls_first = list(assistant_msg.get("tool_calls", []))
+    tool_calls_second = list(assistant_msg.get("tool_calls", []))
+
+    assert len(tool_calls_first) == 1, "First read must return the tool call"
+    assert len(tool_calls_second) == 1, "Second read must also return the tool call"
+
+
+def test_tool_calls_list_passthrough():
+    """tool_calls already provided as a list must remain a list."""
+    tool_call = _make_tool_call("call_list1", "calculate", '{"expr": "2+2"}')
+    messages = [
+        {"role": "user", "content": "Calculate 2+2"},
+        {"role": "assistant", "content": None, "tool_calls": [tool_call]},
+    ]
+
+    req = _make_request(messages)
+    assistant_msg = req.messages[1]
+    assert isinstance(assistant_msg, dict)
+    assert isinstance(assistant_msg.get("tool_calls"), list)
+
+
+def test_messages_without_tool_calls_unaffected():
+    """Messages without tool_calls must be handled correctly."""
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello!"},
+        {"role": "assistant", "content": "Hi there!"},
+    ]
+
+    req = _make_request(messages)
+    # None of the messages should have tool_calls injected
+    for msg in req.messages:
+        assert isinstance(msg, dict)
+        assert msg.get("tool_calls") is None or msg.get("tool_calls") == []
+
+
+@pytest.mark.parametrize("num_tool_calls", [1, 3])
+def test_multiple_tool_calls_materialised(num_tool_calls: int):
+    """Multiple tool calls in a single message are all preserved."""
+    tool_calls = [
+        _make_tool_call(f"call_{i}", f"func_{i}", f'{{"arg": {i}}}')
+        for i in range(num_tool_calls)
+    ]
+    messages = [
+        {"role": "user", "content": "Do things"},
+        {"role": "assistant", "content": None, "tool_calls": iter(tool_calls)},
+    ]
+
+    req = _make_request(messages)
+    assistant_msg = req.messages[1]
+    assert isinstance(assistant_msg, dict)
+
+    result_tool_calls = assistant_msg.get("tool_calls")
+    assert isinstance(result_tool_calls, list)
+    assert len(result_tool_calls) == num_tool_calls
+
+    # Verify after model_dump_json too
+    _ = req.model_dump_json()
+    assert len(assistant_msg.get("tool_calls", [])) == num_tool_calls
@@ -290,7 +290,7 @@ class CustomChatCompletionMessageParam(TypedDict, total=False):
     tool_call_id: str | None
     """Tool call that this message is responding to."""
 
-    tool_calls: Iterable[ChatCompletionMessageToolCallParam] | None
+    tool_calls: list[ChatCompletionMessageToolCallParam] | None
     """The tool calls generated by the model, such as function calls."""
 
     reasoning: str | None
@@ -321,7 +321,7 @@ class ConversationMessage(TypedDict, total=False):
     name: str | None
     """The name of the function to call"""
 
-    tool_calls: Iterable[ChatCompletionMessageToolCallParam] | None
+    tool_calls: list[ChatCompletionMessageToolCallParam] | None
     """The tool calls generated by the model, such as function calls."""
 
     reasoning: str | None

@@ -357,6 +357,47 @@ class ChatCompletionRequest(OpenAIBaseModel):
 
     # --8<-- [end:chat-completion-extra-params]
 
+    @model_validator(mode="before")
+    @classmethod
+    def _materialize_tool_calls_before(cls, data: Any) -> Any:
+        """Eagerly convert tool_calls generators/iterators to lists.
+
+        Must run before Pydantic field validation so that one-shot
+        generators are not consumed during union type matching of
+        ChatCompletionAssistantMessageParam (which types tool_calls
+        as Iterable[...]).
+        """
+        if not isinstance(data, dict):
+            return data
+        messages = data.get("messages")
+        if not isinstance(messages, list):
+            return data
+        for msg in messages:
+            if not isinstance(msg, dict):
+                continue
+            tool_calls = msg.get("tool_calls")
+            if tool_calls is not None and not isinstance(tool_calls, list):
+                msg["tool_calls"] = list(tool_calls)
+        return data
+
+    @model_validator(mode="after")
+    def _materialize_tool_calls_after(self) -> "ChatCompletionRequest":
+        """Convert Pydantic ValidatorIterator wrappers back to lists.
+
+        Even after the "before" validator converts iterables to lists,
+        Pydantic re-wraps them in a ValidatorIterator when validating
+        against ChatCompletionAssistantMessageParam's Iterable[...] type.
+        This "after" pass materialises those wrappers so downstream code
+        (tokenizers, model_dump_json) always sees plain lists.
+        """
+        for msg in self.messages:
+            if not isinstance(msg, dict):
+                continue
+            tool_calls = msg.get("tool_calls")
+            if tool_calls is not None and not isinstance(tool_calls, list):
+                msg["tool_calls"] = list(tool_calls)
+        return self
+
     def build_chat_params(
         self,
         default_template: str | None,