vllm-project · bbrowning · Mar 25, 2026
diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py
@@ -11,6 +11,7 @@
 
 import pytest
 
+from tests.tool_parsers.utils import run_tool_extraction_streaming
 from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser
 
 # ---------------------------------------------------------------------------
@@ -21,6 +22,7 @@
 # tokenizer object to be truthy (the parser checks `if not self.model_tokenizer`).
 MOCK_TOKENIZER = MagicMock()
 MOCK_TOKENIZER.get_vocab.return_value = {}
+MOCK_TOKENIZER.tokenize.return_value = []
 
 
 def make_parser() -> DeepSeekV32ToolParser:
@@ -474,3 +476,85 @@ def test_no_emission_while_incomplete(self, parser):
         deltas = self._stream(parser, partial_text)
         # Should have no tool call deltas yet
         assert all(not d.tool_calls for d in deltas)
+
+
+class TestDelimiterPreservation:
+    """Regression: fast detokenization skipping DSML delimiters (PR #33964)."""
+
+    @pytest.fixture
+    def parser(self):
+        return make_parser()
+
+    def test_delimiter_preserved_fast_detokenization(self, parser):
+        """DSML delimiters as literal text must still be detected."""
+        # Delimiters appear as regular text (fast detokenization scenario).
+        model_output = (
+            f"{FC_START}\n"
+            f'{INV_START}get_weather">\n'
+            f'{PARAM_START}location" string="true">Tokyo{PARAM_END}\n'
+            f"{INV_END}\n"
+            f"{FC_END}"
+        )
+
+        # Non-streaming: parser must detect the tool call
+        result = parser.extract_tool_calls(model_output, None)
+        assert result.tools_called
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "get_weather"
+        assert json.loads(result.tool_calls[0].function.arguments) == {
+            "location": "Tokyo"
+        }
+
+        assert result.content is None
+
+        # With content prefix
+        prefixed_output = "Here is the weather: " + model_output
+        result2 = parser.extract_tool_calls(prefixed_output, None)
+        assert result2.tools_called
+        assert result2.content == "Here is the weather: "
+
+    def test_tool_detection_skip_special_tokens_false(self, parser):
+        """Regression: skip_special_tokens must be False when tools are enabled."""
+        # adjust_request must set skip_special_tokens=False
+        tool = make_tool_param(
+            "search",
+            {
+                "type": "object",
+                "properties": {
+                    "query": {"type": "string"},
+                },
+            },
+        )
+        request = make_request(tools=[tool])
+        request.tool_choice = "auto"
+        adjusted = parser.adjust_request(request)
+        assert adjusted.skip_special_tokens is False
+
+        full_text = build_tool_call("search", {"query": "vllm documentation"})
+
+        # Non-streaming extraction
+        non_stream_result = parser.extract_tool_calls(full_text, request)
+        assert non_stream_result.tools_called
+        assert len(non_stream_result.tool_calls) == 1
+        assert non_stream_result.tool_calls[0].function.name == "search"
+        ns_args = json.loads(non_stream_result.tool_calls[0].function.arguments)
+        assert ns_args == {"query": "vllm documentation"}
+
+        # Streaming extraction: drive the parser line-by-line
+        chunks: list[str] = []
+        remaining = full_text
+        while remaining:
+            nl = remaining.find("\n")
+            if nl == -1:
+                chunks.append(remaining)
+                break
+            chunks.append(remaining[: nl + 1])
+            remaining = remaining[nl + 1 :]
+
+        reconstructor = run_tool_extraction_streaming(
+            parser, chunks, request, assert_one_tool_per_delta=False
+        )
+        assert len(reconstructor.tool_calls) == 1
+        assert reconstructor.tool_calls[0].function.name == "search"
+        streamed_args = json.loads(reconstructor.tool_calls[0].function.arguments)
+        assert streamed_args == ns_args
diff --git a/tests/tool_parsers/test_glm4_moe_tool_parser.py b/tests/tool_parsers/test_glm4_moe_tool_parser.py
@@ -817,3 +817,108 @@ def test_extract_tool_calls_numeric_deserialization(glm4_moe_tool_parser, mock_r
     # Boolean should be deserialized as bool
     assert args["enabled"] is True
     assert isinstance(args["enabled"], bool)
+
+
+def test_zero_argument_tool_call(glm4_moe_tool_parser, mock_request):
+    """Regression: zero-argument tool call crash (PR #32321)."""
+    model_output = """<tool_call>get_time
+</tool_call>"""
+
+    extracted = glm4_moe_tool_parser.extract_tool_calls(
+        model_output, request=mock_request
+    )  # type: ignore[arg-type]
+
+    assert extracted.tools_called
+    assert len(extracted.tool_calls) == 1
+    assert extracted.tool_calls[0].function.name == "get_time"
+    args = json.loads(extracted.tool_calls[0].function.arguments)
+    assert args == {}
+
+
+def test_malformed_tool_call_no_regex_match(glm4_moe_tool_parser, mock_request):
+    """Regression: malformed tool_call with no regex match (PR #32321)."""
+    model_output = "<tool_call>   </tool_call>"
+
+    extracted = glm4_moe_tool_parser.extract_tool_calls(
+        model_output, request=mock_request
+    )  # type: ignore[arg-type]
+
+    assert extracted.tools_called is False
+    assert extracted.tool_calls == []
+
+
+def test_delimiter_preserved_transformers_5x(glm4_moe_tool_parser):
+    """Regression: adjust_request sets skip_special_tokens=False (PR #31622)."""
+    # Tools enabled
+    request_with_tools = ChatCompletionRequest(
+        model=MODEL,
+        messages=[],
+        tools=[
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"city": {"type": "string"}},
+                    },
+                },
+            }
+        ],
+    )  # type: ignore
+    adjusted = glm4_moe_tool_parser.adjust_request(request_with_tools)
+    assert adjusted.skip_special_tokens is False
+
+    # tool_choice="none"
+    request_no_choice = ChatCompletionRequest(
+        model=MODEL,
+        messages=[],
+        tools=[
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"city": {"type": "string"}},
+                    },
+                },
+            }
+        ],
+        tool_choice="none",
+    )  # type: ignore
+    adjusted_none = glm4_moe_tool_parser.adjust_request(request_no_choice)
+    assert adjusted_none.skip_special_tokens is True
+
+    # No tools at all
+    request_no_tools = ChatCompletionRequest(
+        model=MODEL,
+        messages=[],
+    )  # type: ignore
+    adjusted_empty = glm4_moe_tool_parser.adjust_request(request_no_tools)
+    assert adjusted_empty.skip_special_tokens is True
+
+
+def test_unicode_characters_preserved(glm4_moe_tool_parser, mock_request):
+    """Regression: Unicode chars must not be escaped to \\uXXXX (PR #30920)."""
+    model_output = """<tool_call>send_message
+<arg_key>greeting</arg_key>
+<arg_value>你好世界</arg_value>
+<arg_key>emoji</arg_key>
+<arg_value>🎉</arg_value>
+</tool_call>"""
+
+    extracted = glm4_moe_tool_parser.extract_tool_calls(
+        model_output, request=mock_request
+    )  # type: ignore[arg-type]
+
+    assert extracted.tools_called
+    assert len(extracted.tool_calls) == 1
+
+    raw_args = extracted.tool_calls[0].function.arguments
+    assert "你好世界" in raw_args
+    assert "🎉" in raw_args
+    assert "\\u4f60" not in raw_args
+    parsed_args = json.loads(raw_args)
+    assert parsed_args["greeting"] == "你好世界"
+    assert parsed_args["emoji"] == "🎉"
diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -872,6 +872,59 @@ def test_streaming_tool_call_markers_not_leaked(kimi_k2_tool_parser):
     assert "I'll check the weather." in full_content or len(all_content) > 0
 
 
+def test_native_id_extracted_and_placed_on_tool_call(kimi_k2_tool_parser):
+    """Regression: parser extracts native ID onto ToolCall (PR #32768)."""
+    model_output = (
+        "Checking weather. "
+        "<|tool_calls_section_begin|>"
+        "<|tool_call_begin|>functions.get_weather:0"
+        '<|tool_call_argument_begin|>{"city": "Tokyo"}'
+        "<|tool_call_end|>"
+        "<|tool_calls_section_end|>"
+    )
+
+    result = kimi_k2_tool_parser.extract_tool_calls(model_output, request=None)
+    assert result.tools_called
+    assert len(result.tool_calls) == 1
+
+    tc = result.tool_calls[0]
+    # Native ID from model output must be used as the tool call ID
+    assert tc.id == "functions.get_weather:0"
+    assert tc.function.name == "get_weather"
+    assert json.loads(tc.function.arguments) == {"city": "Tokyo"}
+
+
+def test_multi_turn_native_id_continuity(kimi_k2_tool_parser, kimi_k2_tokenizer):
+    """Regression: native IDs from turn 1 preserved across turns (PR #32768)."""
+    turn1_output = (
+        "Let me check. "
+        "<|tool_calls_section_begin|>"
+        "<|tool_call_begin|>functions.get_weather:0"
+        '<|tool_call_argument_begin|>{"city": "Beijing"}'
+        "<|tool_call_end|>"
+        "<|tool_calls_section_end|>"
+    )
+
+    turn1_result = kimi_k2_tool_parser.extract_tool_calls(turn1_output, request=None)
+    assert turn1_result.tools_called
+    assert turn1_result.tool_calls[0].id == "functions.get_weather:0"
+
+    # Fresh parser for turn 2
+    turn2_parser = KimiK2ToolParser(kimi_k2_tokenizer)
+    turn2_output = (
+        "Now let me get news. "
+        "<|tool_calls_section_begin|>"
+        "<|tool_call_begin|>functions.get_news:0"
+        '<|tool_call_argument_begin|>{"topic": "weather in Beijing"}'
+        "<|tool_call_end|>"
+        "<|tool_calls_section_end|>"
+    )
+
+    turn2_result = turn2_parser.extract_tool_calls(turn2_output, request=None)
+    assert turn2_result.tools_called
+    assert turn2_result.tool_calls[0].id == "functions.get_news:0"
+
+
 def test_streaming_multiple_tool_calls_not_leaked(kimi_k2_tool_parser):
     """
     Test that MULTIPLE tool calls in streaming mode do not leak into content.