diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py
index 14462da5b9cb..9c5020f59b68 100644
--- a/tests/tool_parsers/test_deepseekv32_tool_parser.py
+++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py
@@ -11,6 +11,7 @@
import pytest
+from tests.tool_parsers.utils import run_tool_extraction_streaming
from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
# ---------------------------------------------------------------------------
@@ -21,6 +22,7 @@
# tokenizer object to be truthy (the parser checks `if not self.model_tokenizer`).
MOCK_TOKENIZER = MagicMock()
MOCK_TOKENIZER.get_vocab.return_value = {}
+MOCK_TOKENIZER.tokenize.return_value = []
def make_parser() -> DeepSeekV32ToolParser:
@@ -474,3 +476,85 @@ def test_no_emission_while_incomplete(self, parser):
deltas = self._stream(parser, partial_text)
# Should have no tool call deltas yet
assert all(not d.tool_calls for d in deltas)
+
+
+class TestDelimiterPreservation:
+ """Regression: fast detokenization skipping DSML delimiters (PR #33964)."""
+
+ @pytest.fixture
+ def parser(self):
+ return make_parser()
+
+ def test_delimiter_preserved_fast_detokenization(self, parser):
+ """DSML delimiters as literal text must still be detected."""
+ # Delimiters appear as regular text (fast detokenization scenario).
+ model_output = (
+ f"{FC_START}\n"
+ f'{INV_START}get_weather">\n'
+ f'{PARAM_START}location" string="true">Tokyo{PARAM_END}\n'
+ f"{INV_END}\n"
+ f"{FC_END}"
+ )
+
+ # Non-streaming: parser must detect the tool call
+ result = parser.extract_tool_calls(model_output, None)
+ assert result.tools_called
+ assert len(result.tool_calls) == 1
+ assert result.tool_calls[0].function.name == "get_weather"
+ assert json.loads(result.tool_calls[0].function.arguments) == {
+ "location": "Tokyo"
+ }
+
+ assert result.content is None
+
+ # With content prefix
+ prefixed_output = "Here is the weather: " + model_output
+ result2 = parser.extract_tool_calls(prefixed_output, None)
+ assert result2.tools_called
+ assert result2.content == "Here is the weather: "
+
+ def test_tool_detection_skip_special_tokens_false(self, parser):
+ """Regression: skip_special_tokens must be False when tools are enabled."""
+ # adjust_request must set skip_special_tokens=False
+ tool = make_tool_param(
+ "search",
+ {
+ "type": "object",
+ "properties": {
+ "query": {"type": "string"},
+ },
+ },
+ )
+ request = make_request(tools=[tool])
+ request.tool_choice = "auto"
+ adjusted = parser.adjust_request(request)
+ assert adjusted.skip_special_tokens is False
+
+ full_text = build_tool_call("search", {"query": "vllm documentation"})
+
+ # Non-streaming extraction
+ non_stream_result = parser.extract_tool_calls(full_text, request)
+ assert non_stream_result.tools_called
+ assert len(non_stream_result.tool_calls) == 1
+ assert non_stream_result.tool_calls[0].function.name == "search"
+ ns_args = json.loads(non_stream_result.tool_calls[0].function.arguments)
+ assert ns_args == {"query": "vllm documentation"}
+
+ # Streaming extraction: drive the parser line-by-line
+ chunks: list[str] = []
+ remaining = full_text
+ while remaining:
+ nl = remaining.find("\n")
+ if nl == -1:
+ chunks.append(remaining)
+ break
+ chunks.append(remaining[: nl + 1])
+ remaining = remaining[nl + 1 :]
+
+ reconstructor = run_tool_extraction_streaming(
+ parser, chunks, request, assert_one_tool_per_delta=False
+ )
+ assert len(reconstructor.tool_calls) == 1
+ assert reconstructor.tool_calls[0].function.name == "search"
+ streamed_args = json.loads(reconstructor.tool_calls[0].function.arguments)
+ assert streamed_args == ns_args
diff --git a/tests/tool_parsers/test_glm4_moe_tool_parser.py b/tests/tool_parsers/test_glm4_moe_tool_parser.py
index 213cc75db7ea..1cfdbc97bbcc 100644
--- a/tests/tool_parsers/test_glm4_moe_tool_parser.py
+++ b/tests/tool_parsers/test_glm4_moe_tool_parser.py
@@ -817,3 +817,108 @@ def test_extract_tool_calls_numeric_deserialization(glm4_moe_tool_parser, mock_r
# Boolean should be deserialized as bool
assert args["enabled"] is True
assert isinstance(args["enabled"], bool)
+
+
+def test_zero_argument_tool_call(glm4_moe_tool_parser, mock_request):
+ """Regression: zero-argument tool call crash (PR #32321)."""
+ model_output = """get_time
+"""
+
+ extracted = glm4_moe_tool_parser.extract_tool_calls(
+ model_output, request=mock_request
+ ) # type: ignore[arg-type]
+
+ assert extracted.tools_called
+ assert len(extracted.tool_calls) == 1
+ assert extracted.tool_calls[0].function.name == "get_time"
+ args = json.loads(extracted.tool_calls[0].function.arguments)
+ assert args == {}
+
+
+def test_malformed_tool_call_no_regex_match(glm4_moe_tool_parser, mock_request):
+ """Regression: malformed tool_call with no regex match (PR #32321)."""
+ model_output = " "
+
+ extracted = glm4_moe_tool_parser.extract_tool_calls(
+ model_output, request=mock_request
+ ) # type: ignore[arg-type]
+
+ assert extracted.tools_called is False
+ assert extracted.tool_calls == []
+
+
+def test_delimiter_preserved_transformers_5x(glm4_moe_tool_parser):
+ """Regression: adjust_request sets skip_special_tokens=False (PR #31622)."""
+ # Tools enabled
+ request_with_tools = ChatCompletionRequest(
+ model=MODEL,
+ messages=[],
+ tools=[
+ {
+ "type": "function",
+ "function": {
+ "name": "get_weather",
+ "parameters": {
+ "type": "object",
+ "properties": {"city": {"type": "string"}},
+ },
+ },
+ }
+ ],
+ ) # type: ignore
+ adjusted = glm4_moe_tool_parser.adjust_request(request_with_tools)
+ assert adjusted.skip_special_tokens is False
+
+ # tool_choice="none"
+ request_no_choice = ChatCompletionRequest(
+ model=MODEL,
+ messages=[],
+ tools=[
+ {
+ "type": "function",
+ "function": {
+ "name": "get_weather",
+ "parameters": {
+ "type": "object",
+ "properties": {"city": {"type": "string"}},
+ },
+ },
+ }
+ ],
+ tool_choice="none",
+ ) # type: ignore
+ adjusted_none = glm4_moe_tool_parser.adjust_request(request_no_choice)
+ assert adjusted_none.skip_special_tokens is True
+
+ # No tools at all
+ request_no_tools = ChatCompletionRequest(
+ model=MODEL,
+ messages=[],
+ ) # type: ignore
+ adjusted_empty = glm4_moe_tool_parser.adjust_request(request_no_tools)
+ assert adjusted_empty.skip_special_tokens is True
+
+
+def test_unicode_characters_preserved(glm4_moe_tool_parser, mock_request):
+ """Regression: Unicode chars must not be escaped to \\uXXXX (PR #30920)."""
+ model_output = """send_message
+greeting
+你好世界
+emoji
+🎉
+"""
+
+ extracted = glm4_moe_tool_parser.extract_tool_calls(
+ model_output, request=mock_request
+ ) # type: ignore[arg-type]
+
+ assert extracted.tools_called
+ assert len(extracted.tool_calls) == 1
+
+ raw_args = extracted.tool_calls[0].function.arguments
+ assert "你好世界" in raw_args
+ assert "🎉" in raw_args
+ assert "\\u4f60" not in raw_args
+ parsed_args = json.loads(raw_args)
+ assert parsed_args["greeting"] == "你好世界"
+ assert parsed_args["emoji"] == "🎉"
diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
index 21b3d5adfde1..09c1c461c100 100644
--- a/tests/tool_parsers/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -872,6 +872,59 @@ def test_streaming_tool_call_markers_not_leaked(kimi_k2_tool_parser):
assert "I'll check the weather." in full_content or len(all_content) > 0
+def test_native_id_extracted_and_placed_on_tool_call(kimi_k2_tool_parser):
+ """Regression: parser extracts native ID onto ToolCall (PR #32768)."""
+ model_output = (
+ "Checking weather. "
+ "<|tool_calls_section_begin|>"
+ "<|tool_call_begin|>functions.get_weather:0"
+ '<|tool_call_argument_begin|>{"city": "Tokyo"}'
+ "<|tool_call_end|>"
+ "<|tool_calls_section_end|>"
+ )
+
+ result = kimi_k2_tool_parser.extract_tool_calls(model_output, request=None)
+ assert result.tools_called
+ assert len(result.tool_calls) == 1
+
+ tc = result.tool_calls[0]
+ # Native ID from model output must be used as the tool call ID
+ assert tc.id == "functions.get_weather:0"
+ assert tc.function.name == "get_weather"
+ assert json.loads(tc.function.arguments) == {"city": "Tokyo"}
+
+
+def test_multi_turn_native_id_continuity(kimi_k2_tool_parser, kimi_k2_tokenizer):
+ """Regression: native IDs from turn 1 preserved across turns (PR #32768)."""
+ turn1_output = (
+ "Let me check. "
+ "<|tool_calls_section_begin|>"
+ "<|tool_call_begin|>functions.get_weather:0"
+ '<|tool_call_argument_begin|>{"city": "Beijing"}'
+ "<|tool_call_end|>"
+ "<|tool_calls_section_end|>"
+ )
+
+ turn1_result = kimi_k2_tool_parser.extract_tool_calls(turn1_output, request=None)
+ assert turn1_result.tools_called
+ assert turn1_result.tool_calls[0].id == "functions.get_weather:0"
+
+ # Fresh parser for turn 2
+ turn2_parser = KimiK2ToolParser(kimi_k2_tokenizer)
+ turn2_output = (
+ "Now let me get news. "
+ "<|tool_calls_section_begin|>"
+ "<|tool_call_begin|>functions.get_news:0"
+ '<|tool_call_argument_begin|>{"topic": "weather in Beijing"}'
+ "<|tool_call_end|>"
+ "<|tool_calls_section_end|>"
+ )
+
+ turn2_result = turn2_parser.extract_tool_calls(turn2_output, request=None)
+ assert turn2_result.tools_called
+ assert turn2_result.tool_calls[0].id == "functions.get_news:0"
+
+
def test_streaming_multiple_tool_calls_not_leaked(kimi_k2_tool_parser):
"""
Test that MULTIPLE tool calls in streaming mode do not leak into content.
diff --git a/tests/tool_parsers/test_minimax_m2_tool_parser.py b/tests/tool_parsers/test_minimax_m2_tool_parser.py
index d61b6b6201cd..aa396492d262 100644
--- a/tests/tool_parsers/test_minimax_m2_tool_parser.py
+++ b/tests/tool_parsers/test_minimax_m2_tool_parser.py
@@ -5,6 +5,11 @@
import pytest
+from vllm.entrypoints.openai.chat_completion.protocol import (
+ ChatCompletionRequest,
+ ChatCompletionToolsParam,
+ FunctionDefinition,
+)
from vllm.tool_parsers.minimax_m2_tool_parser import (
MinimaxM2ToolParser,
)
@@ -442,3 +447,127 @@ def test_header_and_params_in_separate_chunks(self, parser):
"city": "Seattle",
"days": "5",
}
+
+
+def _make_request_with_tools(tools_spec):
+ """Build a ChatCompletionRequest with tool definitions.
+
+ *tools_spec* is a list of dicts, each with 'name' and 'parameters' keys.
+ """
+ tools = []
+ for spec in tools_spec:
+ tools.append(
+ ChatCompletionToolsParam(
+ function=FunctionDefinition(
+ name=spec["name"],
+ parameters=spec["parameters"],
+ ),
+ )
+ )
+ return ChatCompletionRequest(
+ messages=[],
+ model="test-model",
+ tools=tools,
+ )
+
+
+class TestAnyOfNullableParam:
+ """Regression: anyOf nullable parameter parsing (PR #32342)."""
+
+ def test_anyof_nullable_param_non_null_value(self, parser):
+ """A valid non-null string should be preserved, not collapsed to None."""
+ request = _make_request_with_tools(
+ [
+ {
+ "name": "update_profile",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "nickname": {
+ "anyOf": [{"type": "string"}, {"type": "null"}],
+ },
+ },
+ },
+ }
+ ]
+ )
+
+ results = _feed(
+ parser,
+ [
+ ''
+ 'Alice'
+ "",
+ ],
+ request=request,
+ )
+ tc = _collect_tool_calls(results)
+ assert len(tc) == 1
+ parsed = json.loads(tc[0]["arguments"])
+ assert parsed["nickname"] == "Alice"
+
+ def test_anyof_nullable_param_null_value(self, parser):
+ """An actual null-like value should be returned as None/null."""
+ request = _make_request_with_tools(
+ [
+ {
+ "name": "update_profile",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "nickname": {
+ "anyOf": [{"type": "string"}, {"type": "null"}],
+ },
+ },
+ },
+ }
+ ]
+ )
+
+ results = _feed(
+ parser,
+ [
+ ''
+ 'null'
+ "",
+ ],
+ request=request,
+ )
+ tc = _collect_tool_calls(results)
+ assert len(tc) == 1
+ parsed = json.loads(tc[0]["arguments"])
+ assert parsed["nickname"] is None
+
+ def test_anyof_nullable_param_object_value(self, parser):
+ """A valid object value in anyOf with null should parse as dict."""
+ request = _make_request_with_tools(
+ [
+ {
+ "name": "update_settings",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "config": {
+ "anyOf": [{"type": "object"}, {"type": "null"}],
+ },
+ },
+ },
+ }
+ ]
+ )
+
+ results = _feed(
+ parser,
+ [
+ ''
+ '{"theme": "dark", "fontSize": 14}'
+ ""
+ "",
+ ],
+ request=request,
+ )
+ tc = _collect_tool_calls(results)
+ assert len(tc) == 1
+ parsed = json.loads(tc[0]["arguments"])
+ assert parsed["config"] == {"theme": "dark", "fontSize": 14}
+ assert isinstance(parsed["config"], dict)
diff --git a/tests/tool_parsers/test_mistral_tool_parser.py b/tests/tool_parsers/test_mistral_tool_parser.py
index bf2fba8a8655..4be5646669be 100644
--- a/tests/tool_parsers/test_mistral_tool_parser.py
+++ b/tests/tool_parsers/test_mistral_tool_parser.py
@@ -890,3 +890,64 @@ def test_extract_tool_calls_streaming_pre_v11_tokenizer_one_chunk(
assert expected_content == ""
else:
assert delta_message.content == expected_content
+
+
+def test_fast_detokenization_text_detection(mistral_tool_parser):
+ """Regression: bot_token in text but not token_ids (PR #37209)."""
+ model_output = '[TOOL_CALLS]add{"a": 1, "b": 2}'
+ # Token IDs that do NOT contain bot_token_id.
+ fake_token_ids = list(range(99, 99 + 20))
+
+ # First delta: pure content, no bot token yet
+ delta_message_before = mistral_tool_parser.extract_tool_calls_streaming(
+ previous_text="",
+ current_text="Hello",
+ delta_text="Hello",
+ previous_token_ids=[],
+ current_token_ids=[99],
+ delta_token_ids=[99],
+ request=None,
+ )
+ assert delta_message_before is not None
+ assert delta_message_before.content == "Hello"
+ assert not delta_message_before.tool_calls
+
+ # Second delta: bot token in text but NOT in token_ids
+ delta_message = mistral_tool_parser.extract_tool_calls_streaming(
+ previous_text="Hello",
+ current_text="Hello" + model_output,
+ delta_text=model_output,
+ previous_token_ids=[99],
+ current_token_ids=fake_token_ids,
+ delta_token_ids=fake_token_ids[1:],
+ request=None,
+ )
+ assert delta_message is not None
+ assert delta_message.tool_calls is not None
+ assert len(delta_message.tool_calls) > 0
+ assert delta_message.tool_calls[0].function is not None
+ assert delta_message.tool_calls[0].function.name == "add"
+
+
+def test_fast_detokenization_text_detection_pre_v11(
+ mistral_pre_v11_tool_parser,
+):
+ """Regression: bot_token text detection for pre-v11 tokenizer (PR #37209)."""
+ model_output = '[TOOL_CALLS] [{"name": "add", "arguments":{"a": 1, "b": 2}}]'
+
+ fake_token_ids = list(range(99, 99 + 30))
+
+ delta_message = mistral_pre_v11_tool_parser.extract_tool_calls_streaming(
+ previous_text="",
+ current_text=model_output,
+ delta_text=model_output,
+ previous_token_ids=[],
+ current_token_ids=fake_token_ids,
+ delta_token_ids=fake_token_ids,
+ request=None,
+ )
+ assert delta_message is not None
+ assert delta_message.tool_calls is not None
+ assert len(delta_message.tool_calls) > 0
+ assert delta_message.tool_calls[0].function is not None
+ assert delta_message.tool_calls[0].function.name == "add"
diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index 3d46f73de612..ae1d09532e55 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -976,3 +976,155 @@ def test_extract_tool_calls_streaming_missing_opening_tag(
assert args["city"] == "Dallas"
assert args["state"] == "TX"
assert args["unit"] == "fahrenheit"
+
+
+def test_malformed_xml_no_gt_delimiter(qwen3_tool_parser, sample_tools):
+ """Regression: malformed XML without '>' must not crash (PR #36774)."""
+ model_output = (
+ "\n"
+ "Dallas\n"
+ "\n"
+ ""
+ )
+
+ request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+ result = qwen3_tool_parser.extract_tool_calls(model_output, request=request)
+ assert result is not None
+ assert isinstance(result.tool_calls, list)
+ assert all(tc is not None for tc in result.tool_calls)
+
+
+def test_none_tool_calls_filtered(qwen3_tool_parser, sample_tools):
+ """Regression: None tool calls filtered from output (PR #36774)."""
+ model_output = (
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "Dallas\n"
+ "TX\n"
+ "\n"
+ ""
+ )
+
+ request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+ result = qwen3_tool_parser.extract_tool_calls(model_output, request=request)
+ assert all(tc is not None for tc in result.tool_calls)
+ assert result.tools_called
+ assert len(result.tool_calls) == 1
+ assert result.tool_calls[0].function.name == "get_current_weather"
+ args = json.loads(result.tool_calls[0].function.arguments)
+ assert args["city"] == "Dallas"
+ assert args["state"] == "TX"
+
+
+def test_anyof_parameter_not_double_encoded(qwen3_tool_parser):
+ """Regression: anyOf parameters must not be double-encoded (PR #36032)."""
+ tools = [
+ ChatCompletionToolsParam(
+ type="function",
+ function={
+ "name": "update_record",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "data": {
+ "anyOf": [{"type": "object"}, {"type": "null"}],
+ },
+ },
+ },
+ },
+ )
+ ]
+
+ model_output = (
+ "\n"
+ "\n"
+ '{"key": "value", "count": 42}\n'
+ "\n"
+ ""
+ )
+
+ request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+ result = qwen3_tool_parser.extract_tool_calls(model_output, request=request)
+
+ assert result.tools_called
+ assert len(result.tool_calls) == 1
+ args = json.loads(result.tool_calls[0].function.arguments)
+ assert isinstance(args["data"], dict)
+ assert args["data"] == {"key": "value", "count": 42}
+
+
+def test_streaming_multi_param_single_chunk(
+ qwen3_tool_parser, qwen3_tokenizer, sample_tools
+):
+ """Regression: speculative decode delivering multiple params at once (PR #35615)."""
+ request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+
+ deltas = [
+ "",
+ "\n",
+ "\n", # triggers json_started -> sends "{"
+ # This single delta delivers all three parameters at once
+ "\nDallas\n"
+ "\n\nTX\n"
+ "\n\nfahrenheit\n",
+ "\n",
+ "\n",
+ ]
+
+ from tests.tool_parsers.utils import (
+ run_tool_extraction_streaming,
+ )
+
+ reconstructor = run_tool_extraction_streaming(
+ qwen3_tool_parser,
+ deltas,
+ request,
+ assert_one_tool_per_delta=False,
+ )
+
+ assert len(reconstructor.tool_calls) == 1
+ args = json.loads(reconstructor.tool_calls[0].function.arguments)
+ assert args["city"] == "Dallas"
+ assert args["state"] == "TX"
+ assert args["unit"] == "fahrenheit"
+
+
+def test_no_double_serialization_string_args(qwen3_tool_parser):
+ """Regression: string arguments must not be double-serialized (PR #35615)."""
+ tools = [
+ ChatCompletionToolsParam(
+ type="function",
+ function={
+ "name": "greet",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "message": {"type": "string"},
+ },
+ },
+ },
+ )
+ ]
+
+ model_output = (
+ "\n"
+ "\n"
+ "hello world\n"
+ "\n"
+ ""
+ )
+
+ request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+ result = qwen3_tool_parser.extract_tool_calls(model_output, request=request)
+
+ assert result.tools_called
+ assert len(result.tool_calls) == 1
+ raw_arguments = result.tool_calls[0].function.arguments
+ args = json.loads(raw_arguments)
+ assert args["message"] == "hello world"
+ assert '\\"hello world\\"' not in raw_arguments
diff --git a/tests/tool_parsers/test_step3p5_tool_parser.py b/tests/tool_parsers/test_step3p5_tool_parser.py
index b3cb4e20fb9c..9ec740760f71 100644
--- a/tests/tool_parsers/test_step3p5_tool_parser.py
+++ b/tests/tool_parsers/test_step3p5_tool_parser.py
@@ -1433,3 +1433,140 @@ def test_extract_tool_calls_non_streaming_multiple_tool_calls_no_content_between
assert "" not in extracted_tool_calls.content, (
"Second tool call should not be in content"
)
+
+
+def _accumulate_tool_states(delta_messages):
+ """Accumulate tool call state from a stream of DeltaMessage objects."""
+ content = ""
+ tool_states = {}
+ for delta_message in delta_messages:
+ if delta_message.content:
+ content += delta_message.content
+ if delta_message.tool_calls:
+ for tool_call in delta_message.tool_calls:
+ idx = tool_call.index
+ if idx not in tool_states:
+ tool_states[idx] = {
+ "id": None,
+ "name": None,
+ "arguments": "",
+ "type": None,
+ }
+ if tool_call.id:
+ tool_states[idx]["id"] = tool_call.id
+ if tool_call.type:
+ tool_states[idx]["type"] = tool_call.type
+ if tool_call.function:
+ if tool_call.function.name:
+ tool_states[idx]["name"] = tool_call.function.name
+ if tool_call.function.arguments is not None:
+ tool_states[idx]["arguments"] += tool_call.function.arguments
+ return content, tool_states
+
+
+def test_streaming_mtp_variable_chunks(
+ step3p5_tool_parser, step3p5_tokenizer, sample_tools
+):
+ """Regression: MTP variable-size chunks spanning param boundaries (PR #33690)."""
+ request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+
+ delta_text_chunks = [
+ "\n\n\n",
+ "Dallas\n\n\nTX",
+ "\n\n\nfahrenheit\n",
+ "\n\n",
+ ]
+
+ _, tool_states = _accumulate_tool_states(
+ stream_delta_message_generator_from_chunks(
+ step3p5_tool_parser, step3p5_tokenizer, delta_text_chunks, request
+ )
+ )
+
+ assert len(tool_states) == 1
+
+ state = tool_states[0]
+ assert state["id"] is not None
+ assert state["type"] == "function"
+ assert state["name"] == "get_current_weather"
+
+ args = json.loads(state["arguments"])
+ assert args["city"] == "Dallas"
+ assert args["state"] == "TX"
+ assert args["unit"] == "fahrenheit"
+
+
+def test_streaming_multi_token_per_step(
+ step3p5_tool_parser, step3p5_tokenizer, sample_tools
+):
+ """Regression: MTP large chunks spanning multiple tool calls (PR #33690)."""
+ model_output = """
+
+
+Dallas
+
+
+TX
+
+
+fahrenheit
+
+
+
+
+
+
+Orlando
+
+
+FL
+
+
+celsius
+
+
+"""
+
+ request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
+
+ # MTP-style large chunks
+ mtp_chunks = [
+ (
+ "\n\n"
+ "\nDallas\n\n"
+ "\nTX"
+ ),
+ (
+ "\n\n\nfahrenheit\n\n"
+ "\n\n"
+ "\n\n"
+ "\nOrlando\n\n"
+ "\nFL\n\n"
+ "\ncelsius\n\n"
+ "\n"
+ ),
+ ]
+
+ _, mtp_tool_states = _accumulate_tool_states(
+ stream_delta_message_generator_from_chunks(
+ step3p5_tool_parser, step3p5_tokenizer, mtp_chunks, request
+ )
+ )
+
+ # Token-by-token streaming (reference)
+ step3p5_tool_parser_ref = Step3p5ToolParser(step3p5_tokenizer)
+ _, ref_tool_states = _accumulate_tool_states(
+ stream_delta_message_generator(
+ step3p5_tool_parser_ref, step3p5_tokenizer, model_output, request
+ )
+ )
+
+ assert len(mtp_tool_states) == 2
+ assert len(ref_tool_states) == 2
+
+ # MTP results must match reference
+ for idx in range(2):
+ assert mtp_tool_states[idx]["name"] == ref_tool_states[idx]["name"]
+ mtp_args = json.loads(mtp_tool_states[idx]["arguments"])
+ ref_args = json.loads(ref_tool_states[idx]["arguments"])
+ assert mtp_args == ref_args