Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions tests/tool_parsers/test_deepseekv32_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import pytest

from tests.tool_parsers.utils import run_tool_extraction_streaming
from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser

# ---------------------------------------------------------------------------
Expand All @@ -21,6 +22,7 @@
# tokenizer object to be truthy (the parser checks `if not self.model_tokenizer`).
MOCK_TOKENIZER = MagicMock()
MOCK_TOKENIZER.get_vocab.return_value = {}
MOCK_TOKENIZER.tokenize.return_value = []


def make_parser() -> DeepSeekV32ToolParser:
Expand Down Expand Up @@ -474,3 +476,85 @@ def test_no_emission_while_incomplete(self, parser):
deltas = self._stream(parser, partial_text)
# Should have no tool call deltas yet
assert all(not d.tool_calls for d in deltas)


class TestDelimiterPreservation:
"""Regression: fast detokenization skipping DSML delimiters (PR #33964)."""

@pytest.fixture
def parser(self):
return make_parser()

def test_delimiter_preserved_fast_detokenization(self, parser):
"""DSML delimiters as literal text must still be detected."""
# Delimiters appear as regular text (fast detokenization scenario).
model_output = (
f"{FC_START}\n"
f'{INV_START}get_weather">\n'
f'{PARAM_START}location" string="true">Tokyo{PARAM_END}\n'
f"{INV_END}\n"
f"{FC_END}"
)

# Non-streaming: parser must detect the tool call
result = parser.extract_tool_calls(model_output, None)
assert result.tools_called
assert len(result.tool_calls) == 1
assert result.tool_calls[0].function.name == "get_weather"
assert json.loads(result.tool_calls[0].function.arguments) == {
"location": "Tokyo"
}

assert result.content is None

# With content prefix
prefixed_output = "Here is the weather: " + model_output
result2 = parser.extract_tool_calls(prefixed_output, None)
assert result2.tools_called
assert result2.content == "Here is the weather: "

def test_tool_detection_skip_special_tokens_false(self, parser):
"""Regression: skip_special_tokens must be False when tools are enabled."""
# adjust_request must set skip_special_tokens=False
tool = make_tool_param(
"search",
{
"type": "object",
"properties": {
"query": {"type": "string"},
},
},
)
request = make_request(tools=[tool])
request.tool_choice = "auto"
adjusted = parser.adjust_request(request)
assert adjusted.skip_special_tokens is False

full_text = build_tool_call("search", {"query": "vllm documentation"})

# Non-streaming extraction
non_stream_result = parser.extract_tool_calls(full_text, request)
assert non_stream_result.tools_called
assert len(non_stream_result.tool_calls) == 1
assert non_stream_result.tool_calls[0].function.name == "search"
ns_args = json.loads(non_stream_result.tool_calls[0].function.arguments)
assert ns_args == {"query": "vllm documentation"}

# Streaming extraction: drive the parser line-by-line
chunks: list[str] = []
remaining = full_text
while remaining:
nl = remaining.find("\n")
if nl == -1:
chunks.append(remaining)
break
chunks.append(remaining[: nl + 1])
remaining = remaining[nl + 1 :]

reconstructor = run_tool_extraction_streaming(
parser, chunks, request, assert_one_tool_per_delta=False
)
assert len(reconstructor.tool_calls) == 1
assert reconstructor.tool_calls[0].function.name == "search"
streamed_args = json.loads(reconstructor.tool_calls[0].function.arguments)
assert streamed_args == ns_args
105 changes: 105 additions & 0 deletions tests/tool_parsers/test_glm4_moe_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,3 +817,108 @@ def test_extract_tool_calls_numeric_deserialization(glm4_moe_tool_parser, mock_r
# Boolean should be deserialized as bool
assert args["enabled"] is True
assert isinstance(args["enabled"], bool)


def test_zero_argument_tool_call(glm4_moe_tool_parser, mock_request):
"""Regression: zero-argument tool call crash (PR #32321)."""
model_output = """<tool_call>get_time
</tool_call>"""

extracted = glm4_moe_tool_parser.extract_tool_calls(
model_output, request=mock_request
) # type: ignore[arg-type]

assert extracted.tools_called
assert len(extracted.tool_calls) == 1
assert extracted.tool_calls[0].function.name == "get_time"
args = json.loads(extracted.tool_calls[0].function.arguments)
assert args == {}


def test_malformed_tool_call_no_regex_match(glm4_moe_tool_parser, mock_request):
"""Regression: malformed tool_call with no regex match (PR #32321)."""
model_output = "<tool_call> </tool_call>"

extracted = glm4_moe_tool_parser.extract_tool_calls(
model_output, request=mock_request
) # type: ignore[arg-type]

assert extracted.tools_called is False
assert extracted.tool_calls == []


def test_delimiter_preserved_transformers_5x(glm4_moe_tool_parser):
"""Regression: adjust_request sets skip_special_tokens=False (PR #31622)."""
# Tools enabled
request_with_tools = ChatCompletionRequest(
model=MODEL,
messages=[],
tools=[
{
"type": "function",
"function": {
"name": "get_weather",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
},
},
}
],
) # type: ignore
adjusted = glm4_moe_tool_parser.adjust_request(request_with_tools)
assert adjusted.skip_special_tokens is False

# tool_choice="none"
request_no_choice = ChatCompletionRequest(
model=MODEL,
messages=[],
tools=[
{
"type": "function",
"function": {
"name": "get_weather",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
},
},
}
],
tool_choice="none",
) # type: ignore
adjusted_none = glm4_moe_tool_parser.adjust_request(request_no_choice)
assert adjusted_none.skip_special_tokens is True

# No tools at all
request_no_tools = ChatCompletionRequest(
model=MODEL,
messages=[],
) # type: ignore
adjusted_empty = glm4_moe_tool_parser.adjust_request(request_no_tools)
assert adjusted_empty.skip_special_tokens is True


def test_unicode_characters_preserved(glm4_moe_tool_parser, mock_request):
"""Regression: Unicode chars must not be escaped to \\uXXXX (PR #30920)."""
model_output = """<tool_call>send_message
<arg_key>greeting</arg_key>
<arg_value>你好世界</arg_value>
<arg_key>emoji</arg_key>
<arg_value>🎉</arg_value>
</tool_call>"""

extracted = glm4_moe_tool_parser.extract_tool_calls(
model_output, request=mock_request
) # type: ignore[arg-type]

assert extracted.tools_called
assert len(extracted.tool_calls) == 1

raw_args = extracted.tool_calls[0].function.arguments
assert "你好世界" in raw_args
assert "🎉" in raw_args
assert "\\u4f60" not in raw_args
parsed_args = json.loads(raw_args)
assert parsed_args["greeting"] == "你好世界"
assert parsed_args["emoji"] == "🎉"
53 changes: 53 additions & 0 deletions tests/tool_parsers/test_kimi_k2_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,6 +872,59 @@ def test_streaming_tool_call_markers_not_leaked(kimi_k2_tool_parser):
assert "I'll check the weather." in full_content or len(all_content) > 0


def test_native_id_extracted_and_placed_on_tool_call(kimi_k2_tool_parser):
"""Regression: parser extracts native ID onto ToolCall (PR #32768)."""
model_output = (
"Checking weather. "
"<|tool_calls_section_begin|>"
"<|tool_call_begin|>functions.get_weather:0"
'<|tool_call_argument_begin|>{"city": "Tokyo"}'
"<|tool_call_end|>"
"<|tool_calls_section_end|>"
)

result = kimi_k2_tool_parser.extract_tool_calls(model_output, request=None)
assert result.tools_called
assert len(result.tool_calls) == 1

tc = result.tool_calls[0]
# Native ID from model output must be used as the tool call ID
assert tc.id == "functions.get_weather:0"
assert tc.function.name == "get_weather"
assert json.loads(tc.function.arguments) == {"city": "Tokyo"}


def test_multi_turn_native_id_continuity(kimi_k2_tool_parser, kimi_k2_tokenizer):
"""Regression: native IDs from turn 1 preserved across turns (PR #32768)."""
turn1_output = (
"Let me check. "
"<|tool_calls_section_begin|>"
"<|tool_call_begin|>functions.get_weather:0"
'<|tool_call_argument_begin|>{"city": "Beijing"}'
"<|tool_call_end|>"
"<|tool_calls_section_end|>"
)

turn1_result = kimi_k2_tool_parser.extract_tool_calls(turn1_output, request=None)
assert turn1_result.tools_called
assert turn1_result.tool_calls[0].id == "functions.get_weather:0"

# Fresh parser for turn 2
turn2_parser = KimiK2ToolParser(kimi_k2_tokenizer)
turn2_output = (
"Now let me get news. "
"<|tool_calls_section_begin|>"
"<|tool_call_begin|>functions.get_news:0"
'<|tool_call_argument_begin|>{"topic": "weather in Beijing"}'
"<|tool_call_end|>"
"<|tool_calls_section_end|>"
)

turn2_result = turn2_parser.extract_tool_calls(turn2_output, request=None)
assert turn2_result.tools_called
assert turn2_result.tool_calls[0].id == "functions.get_news:0"


def test_streaming_multiple_tool_calls_not_leaked(kimi_k2_tool_parser):
"""
Test that MULTIPLE tool calls in streaming mode do not leak into content.
Expand Down
Loading
Loading