diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py index 14462da5b9cb..9c5020f59b68 100644 --- a/tests/tool_parsers/test_deepseekv32_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py @@ -11,6 +11,7 @@ import pytest +from tests.tool_parsers.utils import run_tool_extraction_streaming from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser # --------------------------------------------------------------------------- @@ -21,6 +22,7 @@ # tokenizer object to be truthy (the parser checks `if not self.model_tokenizer`). MOCK_TOKENIZER = MagicMock() MOCK_TOKENIZER.get_vocab.return_value = {} +MOCK_TOKENIZER.tokenize.return_value = [] def make_parser() -> DeepSeekV32ToolParser: @@ -474,3 +476,85 @@ def test_no_emission_while_incomplete(self, parser): deltas = self._stream(parser, partial_text) # Should have no tool call deltas yet assert all(not d.tool_calls for d in deltas) + + +class TestDelimiterPreservation: + """Regression: fast detokenization skipping DSML delimiters (PR #33964).""" + + @pytest.fixture + def parser(self): + return make_parser() + + def test_delimiter_preserved_fast_detokenization(self, parser): + """DSML delimiters as literal text must still be detected.""" + # Delimiters appear as regular text (fast detokenization scenario). + model_output = ( + f"{FC_START}\n" + f'{INV_START}get_weather">\n' + f'{PARAM_START}location" string="true">Tokyo{PARAM_END}\n' + f"{INV_END}\n" + f"{FC_END}" + ) + + # Non-streaming: parser must detect the tool call + result = parser.extract_tool_calls(model_output, None) + assert result.tools_called + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].function.name == "get_weather" + assert json.loads(result.tool_calls[0].function.arguments) == { + "location": "Tokyo" + } + + assert result.content is None + + # With content prefix + prefixed_output = "Here is the weather: " + model_output + result2 = parser.extract_tool_calls(prefixed_output, None) + assert result2.tools_called + assert result2.content == "Here is the weather: " + + def test_tool_detection_skip_special_tokens_false(self, parser): + """Regression: skip_special_tokens must be False when tools are enabled.""" + # adjust_request must set skip_special_tokens=False + tool = make_tool_param( + "search", + { + "type": "object", + "properties": { + "query": {"type": "string"}, + }, + }, + ) + request = make_request(tools=[tool]) + request.tool_choice = "auto" + adjusted = parser.adjust_request(request) + assert adjusted.skip_special_tokens is False + + full_text = build_tool_call("search", {"query": "vllm documentation"}) + + # Non-streaming extraction + non_stream_result = parser.extract_tool_calls(full_text, request) + assert non_stream_result.tools_called + assert len(non_stream_result.tool_calls) == 1 + assert non_stream_result.tool_calls[0].function.name == "search" + ns_args = json.loads(non_stream_result.tool_calls[0].function.arguments) + assert ns_args == {"query": "vllm documentation"} + + # Streaming extraction: drive the parser line-by-line + chunks: list[str] = [] + remaining = full_text + while remaining: + nl = remaining.find("\n") + if nl == -1: + chunks.append(remaining) + break + chunks.append(remaining[: nl + 1]) + remaining = remaining[nl + 1 :] + + reconstructor = run_tool_extraction_streaming( + parser, chunks, request, assert_one_tool_per_delta=False + ) + assert len(reconstructor.tool_calls) == 1 + assert reconstructor.tool_calls[0].function.name == "search" + streamed_args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert streamed_args == ns_args diff --git a/tests/tool_parsers/test_glm4_moe_tool_parser.py b/tests/tool_parsers/test_glm4_moe_tool_parser.py index 213cc75db7ea..1cfdbc97bbcc 100644 --- a/tests/tool_parsers/test_glm4_moe_tool_parser.py +++ b/tests/tool_parsers/test_glm4_moe_tool_parser.py @@ -817,3 +817,108 @@ def test_extract_tool_calls_numeric_deserialization(glm4_moe_tool_parser, mock_r # Boolean should be deserialized as bool assert args["enabled"] is True assert isinstance(args["enabled"], bool) + + +def test_zero_argument_tool_call(glm4_moe_tool_parser, mock_request): + """Regression: zero-argument tool call crash (PR #32321).""" + model_output = """get_time +""" + + extracted = glm4_moe_tool_parser.extract_tool_calls( + model_output, request=mock_request + ) # type: ignore[arg-type] + + assert extracted.tools_called + assert len(extracted.tool_calls) == 1 + assert extracted.tool_calls[0].function.name == "get_time" + args = json.loads(extracted.tool_calls[0].function.arguments) + assert args == {} + + +def test_malformed_tool_call_no_regex_match(glm4_moe_tool_parser, mock_request): + """Regression: malformed tool_call with no regex match (PR #32321).""" + model_output = " " + + extracted = glm4_moe_tool_parser.extract_tool_calls( + model_output, request=mock_request + ) # type: ignore[arg-type] + + assert extracted.tools_called is False + assert extracted.tool_calls == [] + + +def test_delimiter_preserved_transformers_5x(glm4_moe_tool_parser): + """Regression: adjust_request sets skip_special_tokens=False (PR #31622).""" + # Tools enabled + request_with_tools = ChatCompletionRequest( + model=MODEL, + messages=[], + tools=[ + { + "type": "function", + "function": { + "name": "get_weather", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + }, + }, + } + ], + ) # type: ignore + adjusted = glm4_moe_tool_parser.adjust_request(request_with_tools) + assert adjusted.skip_special_tokens is False + + # tool_choice="none" + request_no_choice = ChatCompletionRequest( + model=MODEL, + messages=[], + tools=[ + { + "type": "function", + "function": { + "name": "get_weather", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + }, + }, + } + ], + tool_choice="none", + ) # type: ignore + adjusted_none = glm4_moe_tool_parser.adjust_request(request_no_choice) + assert adjusted_none.skip_special_tokens is True + + # No tools at all + request_no_tools = ChatCompletionRequest( + model=MODEL, + messages=[], + ) # type: ignore + adjusted_empty = glm4_moe_tool_parser.adjust_request(request_no_tools) + assert adjusted_empty.skip_special_tokens is True + + +def test_unicode_characters_preserved(glm4_moe_tool_parser, mock_request): + """Regression: Unicode chars must not be escaped to \\uXXXX (PR #30920).""" + model_output = """send_message +greeting +你好世界 +emoji +🎉 +""" + + extracted = glm4_moe_tool_parser.extract_tool_calls( + model_output, request=mock_request + ) # type: ignore[arg-type] + + assert extracted.tools_called + assert len(extracted.tool_calls) == 1 + + raw_args = extracted.tool_calls[0].function.arguments + assert "你好世界" in raw_args + assert "🎉" in raw_args + assert "\\u4f60" not in raw_args + parsed_args = json.loads(raw_args) + assert parsed_args["greeting"] == "你好世界" + assert parsed_args["emoji"] == "🎉" diff --git a/tests/tool_parsers/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py index 21b3d5adfde1..09c1c461c100 100644 --- a/tests/tool_parsers/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -872,6 +872,59 @@ def test_streaming_tool_call_markers_not_leaked(kimi_k2_tool_parser): assert "I'll check the weather." in full_content or len(all_content) > 0 +def test_native_id_extracted_and_placed_on_tool_call(kimi_k2_tool_parser): + """Regression: parser extracts native ID onto ToolCall (PR #32768).""" + model_output = ( + "Checking weather. " + "<|tool_calls_section_begin|>" + "<|tool_call_begin|>functions.get_weather:0" + '<|tool_call_argument_begin|>{"city": "Tokyo"}' + "<|tool_call_end|>" + "<|tool_calls_section_end|>" + ) + + result = kimi_k2_tool_parser.extract_tool_calls(model_output, request=None) + assert result.tools_called + assert len(result.tool_calls) == 1 + + tc = result.tool_calls[0] + # Native ID from model output must be used as the tool call ID + assert tc.id == "functions.get_weather:0" + assert tc.function.name == "get_weather" + assert json.loads(tc.function.arguments) == {"city": "Tokyo"} + + +def test_multi_turn_native_id_continuity(kimi_k2_tool_parser, kimi_k2_tokenizer): + """Regression: native IDs from turn 1 preserved across turns (PR #32768).""" + turn1_output = ( + "Let me check. " + "<|tool_calls_section_begin|>" + "<|tool_call_begin|>functions.get_weather:0" + '<|tool_call_argument_begin|>{"city": "Beijing"}' + "<|tool_call_end|>" + "<|tool_calls_section_end|>" + ) + + turn1_result = kimi_k2_tool_parser.extract_tool_calls(turn1_output, request=None) + assert turn1_result.tools_called + assert turn1_result.tool_calls[0].id == "functions.get_weather:0" + + # Fresh parser for turn 2 + turn2_parser = KimiK2ToolParser(kimi_k2_tokenizer) + turn2_output = ( + "Now let me get news. " + "<|tool_calls_section_begin|>" + "<|tool_call_begin|>functions.get_news:0" + '<|tool_call_argument_begin|>{"topic": "weather in Beijing"}' + "<|tool_call_end|>" + "<|tool_calls_section_end|>" + ) + + turn2_result = turn2_parser.extract_tool_calls(turn2_output, request=None) + assert turn2_result.tools_called + assert turn2_result.tool_calls[0].id == "functions.get_news:0" + + def test_streaming_multiple_tool_calls_not_leaked(kimi_k2_tool_parser): """ Test that MULTIPLE tool calls in streaming mode do not leak into content. diff --git a/tests/tool_parsers/test_minimax_m2_tool_parser.py b/tests/tool_parsers/test_minimax_m2_tool_parser.py index d61b6b6201cd..aa396492d262 100644 --- a/tests/tool_parsers/test_minimax_m2_tool_parser.py +++ b/tests/tool_parsers/test_minimax_m2_tool_parser.py @@ -5,6 +5,11 @@ import pytest +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionRequest, + ChatCompletionToolsParam, + FunctionDefinition, +) from vllm.tool_parsers.minimax_m2_tool_parser import ( MinimaxM2ToolParser, ) @@ -442,3 +447,127 @@ def test_header_and_params_in_separate_chunks(self, parser): "city": "Seattle", "days": "5", } + + +def _make_request_with_tools(tools_spec): + """Build a ChatCompletionRequest with tool definitions. + + *tools_spec* is a list of dicts, each with 'name' and 'parameters' keys. + """ + tools = [] + for spec in tools_spec: + tools.append( + ChatCompletionToolsParam( + function=FunctionDefinition( + name=spec["name"], + parameters=spec["parameters"], + ), + ) + ) + return ChatCompletionRequest( + messages=[], + model="test-model", + tools=tools, + ) + + +class TestAnyOfNullableParam: + """Regression: anyOf nullable parameter parsing (PR #32342).""" + + def test_anyof_nullable_param_non_null_value(self, parser): + """A valid non-null string should be preserved, not collapsed to None.""" + request = _make_request_with_tools( + [ + { + "name": "update_profile", + "parameters": { + "type": "object", + "properties": { + "nickname": { + "anyOf": [{"type": "string"}, {"type": "null"}], + }, + }, + }, + } + ] + ) + + results = _feed( + parser, + [ + '' + 'Alice' + "", + ], + request=request, + ) + tc = _collect_tool_calls(results) + assert len(tc) == 1 + parsed = json.loads(tc[0]["arguments"]) + assert parsed["nickname"] == "Alice" + + def test_anyof_nullable_param_null_value(self, parser): + """An actual null-like value should be returned as None/null.""" + request = _make_request_with_tools( + [ + { + "name": "update_profile", + "parameters": { + "type": "object", + "properties": { + "nickname": { + "anyOf": [{"type": "string"}, {"type": "null"}], + }, + }, + }, + } + ] + ) + + results = _feed( + parser, + [ + '' + 'null' + "", + ], + request=request, + ) + tc = _collect_tool_calls(results) + assert len(tc) == 1 + parsed = json.loads(tc[0]["arguments"]) + assert parsed["nickname"] is None + + def test_anyof_nullable_param_object_value(self, parser): + """A valid object value in anyOf with null should parse as dict.""" + request = _make_request_with_tools( + [ + { + "name": "update_settings", + "parameters": { + "type": "object", + "properties": { + "config": { + "anyOf": [{"type": "object"}, {"type": "null"}], + }, + }, + }, + } + ] + ) + + results = _feed( + parser, + [ + '' + '{"theme": "dark", "fontSize": 14}' + "" + "", + ], + request=request, + ) + tc = _collect_tool_calls(results) + assert len(tc) == 1 + parsed = json.loads(tc[0]["arguments"]) + assert parsed["config"] == {"theme": "dark", "fontSize": 14} + assert isinstance(parsed["config"], dict) diff --git a/tests/tool_parsers/test_mistral_tool_parser.py b/tests/tool_parsers/test_mistral_tool_parser.py index bf2fba8a8655..4be5646669be 100644 --- a/tests/tool_parsers/test_mistral_tool_parser.py +++ b/tests/tool_parsers/test_mistral_tool_parser.py @@ -890,3 +890,64 @@ def test_extract_tool_calls_streaming_pre_v11_tokenizer_one_chunk( assert expected_content == "" else: assert delta_message.content == expected_content + + +def test_fast_detokenization_text_detection(mistral_tool_parser): + """Regression: bot_token in text but not token_ids (PR #37209).""" + model_output = '[TOOL_CALLS]add{"a": 1, "b": 2}' + # Token IDs that do NOT contain bot_token_id. + fake_token_ids = list(range(99, 99 + 20)) + + # First delta: pure content, no bot token yet + delta_message_before = mistral_tool_parser.extract_tool_calls_streaming( + previous_text="", + current_text="Hello", + delta_text="Hello", + previous_token_ids=[], + current_token_ids=[99], + delta_token_ids=[99], + request=None, + ) + assert delta_message_before is not None + assert delta_message_before.content == "Hello" + assert not delta_message_before.tool_calls + + # Second delta: bot token in text but NOT in token_ids + delta_message = mistral_tool_parser.extract_tool_calls_streaming( + previous_text="Hello", + current_text="Hello" + model_output, + delta_text=model_output, + previous_token_ids=[99], + current_token_ids=fake_token_ids, + delta_token_ids=fake_token_ids[1:], + request=None, + ) + assert delta_message is not None + assert delta_message.tool_calls is not None + assert len(delta_message.tool_calls) > 0 + assert delta_message.tool_calls[0].function is not None + assert delta_message.tool_calls[0].function.name == "add" + + +def test_fast_detokenization_text_detection_pre_v11( + mistral_pre_v11_tool_parser, +): + """Regression: bot_token text detection for pre-v11 tokenizer (PR #37209).""" + model_output = '[TOOL_CALLS] [{"name": "add", "arguments":{"a": 1, "b": 2}}]' + + fake_token_ids = list(range(99, 99 + 30)) + + delta_message = mistral_pre_v11_tool_parser.extract_tool_calls_streaming( + previous_text="", + current_text=model_output, + delta_text=model_output, + previous_token_ids=[], + current_token_ids=fake_token_ids, + delta_token_ids=fake_token_ids, + request=None, + ) + assert delta_message is not None + assert delta_message.tool_calls is not None + assert len(delta_message.tool_calls) > 0 + assert delta_message.tool_calls[0].function is not None + assert delta_message.tool_calls[0].function.name == "add" diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index 3d46f73de612..ae1d09532e55 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -976,3 +976,155 @@ def test_extract_tool_calls_streaming_missing_opening_tag( assert args["city"] == "Dallas" assert args["state"] == "TX" assert args["unit"] == "fahrenheit" + + +def test_malformed_xml_no_gt_delimiter(qwen3_tool_parser, sample_tools): + """Regression: malformed XML without '>' must not crash (PR #36774).""" + model_output = ( + "\n" + "Dallas\n" + "\n" + "" + ) + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + result = qwen3_tool_parser.extract_tool_calls(model_output, request=request) + assert result is not None + assert isinstance(result.tool_calls, list) + assert all(tc is not None for tc in result.tool_calls) + + +def test_none_tool_calls_filtered(qwen3_tool_parser, sample_tools): + """Regression: None tool calls filtered from output (PR #36774).""" + model_output = ( + "\n" + "\n" + "\n" + "\n" + "\n" + "Dallas\n" + "TX\n" + "\n" + "" + ) + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + result = qwen3_tool_parser.extract_tool_calls(model_output, request=request) + assert all(tc is not None for tc in result.tool_calls) + assert result.tools_called + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].function.name == "get_current_weather" + args = json.loads(result.tool_calls[0].function.arguments) + assert args["city"] == "Dallas" + assert args["state"] == "TX" + + +def test_anyof_parameter_not_double_encoded(qwen3_tool_parser): + """Regression: anyOf parameters must not be double-encoded (PR #36032).""" + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "update_record", + "parameters": { + "type": "object", + "properties": { + "data": { + "anyOf": [{"type": "object"}, {"type": "null"}], + }, + }, + }, + }, + ) + ] + + model_output = ( + "\n" + "\n" + '{"key": "value", "count": 42}\n' + "\n" + "" + ) + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + result = qwen3_tool_parser.extract_tool_calls(model_output, request=request) + + assert result.tools_called + assert len(result.tool_calls) == 1 + args = json.loads(result.tool_calls[0].function.arguments) + assert isinstance(args["data"], dict) + assert args["data"] == {"key": "value", "count": 42} + + +def test_streaming_multi_param_single_chunk( + qwen3_tool_parser, qwen3_tokenizer, sample_tools +): + """Regression: speculative decode delivering multiple params at once (PR #35615).""" + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + + deltas = [ + "", + "\n", + "\n", # triggers json_started -> sends "{" + # This single delta delivers all three parameters at once + "\nDallas\n" + "\n\nTX\n" + "\n\nfahrenheit\n", + "\n", + "\n", + ] + + from tests.tool_parsers.utils import ( + run_tool_extraction_streaming, + ) + + reconstructor = run_tool_extraction_streaming( + qwen3_tool_parser, + deltas, + request, + assert_one_tool_per_delta=False, + ) + + assert len(reconstructor.tool_calls) == 1 + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert args["city"] == "Dallas" + assert args["state"] == "TX" + assert args["unit"] == "fahrenheit" + + +def test_no_double_serialization_string_args(qwen3_tool_parser): + """Regression: string arguments must not be double-serialized (PR #35615).""" + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "greet", + "parameters": { + "type": "object", + "properties": { + "message": {"type": "string"}, + }, + }, + }, + ) + ] + + model_output = ( + "\n" + "\n" + "hello world\n" + "\n" + "" + ) + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + result = qwen3_tool_parser.extract_tool_calls(model_output, request=request) + + assert result.tools_called + assert len(result.tool_calls) == 1 + raw_arguments = result.tool_calls[0].function.arguments + args = json.loads(raw_arguments) + assert args["message"] == "hello world" + assert '\\"hello world\\"' not in raw_arguments diff --git a/tests/tool_parsers/test_step3p5_tool_parser.py b/tests/tool_parsers/test_step3p5_tool_parser.py index b3cb4e20fb9c..9ec740760f71 100644 --- a/tests/tool_parsers/test_step3p5_tool_parser.py +++ b/tests/tool_parsers/test_step3p5_tool_parser.py @@ -1433,3 +1433,140 @@ def test_extract_tool_calls_non_streaming_multiple_tool_calls_no_content_between assert "" not in extracted_tool_calls.content, ( "Second tool call should not be in content" ) + + +def _accumulate_tool_states(delta_messages): + """Accumulate tool call state from a stream of DeltaMessage objects.""" + content = "" + tool_states = {} + for delta_message in delta_messages: + if delta_message.content: + content += delta_message.content + if delta_message.tool_calls: + for tool_call in delta_message.tool_calls: + idx = tool_call.index + if idx not in tool_states: + tool_states[idx] = { + "id": None, + "name": None, + "arguments": "", + "type": None, + } + if tool_call.id: + tool_states[idx]["id"] = tool_call.id + if tool_call.type: + tool_states[idx]["type"] = tool_call.type + if tool_call.function: + if tool_call.function.name: + tool_states[idx]["name"] = tool_call.function.name + if tool_call.function.arguments is not None: + tool_states[idx]["arguments"] += tool_call.function.arguments + return content, tool_states + + +def test_streaming_mtp_variable_chunks( + step3p5_tool_parser, step3p5_tokenizer, sample_tools +): + """Regression: MTP variable-size chunks spanning param boundaries (PR #33690).""" + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + + delta_text_chunks = [ + "\n\n\n", + "Dallas\n\n\nTX", + "\n\n\nfahrenheit\n", + "\n\n", + ] + + _, tool_states = _accumulate_tool_states( + stream_delta_message_generator_from_chunks( + step3p5_tool_parser, step3p5_tokenizer, delta_text_chunks, request + ) + ) + + assert len(tool_states) == 1 + + state = tool_states[0] + assert state["id"] is not None + assert state["type"] == "function" + assert state["name"] == "get_current_weather" + + args = json.loads(state["arguments"]) + assert args["city"] == "Dallas" + assert args["state"] == "TX" + assert args["unit"] == "fahrenheit" + + +def test_streaming_multi_token_per_step( + step3p5_tool_parser, step3p5_tokenizer, sample_tools +): + """Regression: MTP large chunks spanning multiple tool calls (PR #33690).""" + model_output = """ + + +Dallas + + +TX + + +fahrenheit + + + + + + +Orlando + + +FL + + +celsius + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + + # MTP-style large chunks + mtp_chunks = [ + ( + "\n\n" + "\nDallas\n\n" + "\nTX" + ), + ( + "\n\n\nfahrenheit\n\n" + "\n\n" + "\n\n" + "\nOrlando\n\n" + "\nFL\n\n" + "\ncelsius\n\n" + "\n" + ), + ] + + _, mtp_tool_states = _accumulate_tool_states( + stream_delta_message_generator_from_chunks( + step3p5_tool_parser, step3p5_tokenizer, mtp_chunks, request + ) + ) + + # Token-by-token streaming (reference) + step3p5_tool_parser_ref = Step3p5ToolParser(step3p5_tokenizer) + _, ref_tool_states = _accumulate_tool_states( + stream_delta_message_generator( + step3p5_tool_parser_ref, step3p5_tokenizer, model_output, request + ) + ) + + assert len(mtp_tool_states) == 2 + assert len(ref_tool_states) == 2 + + # MTP results must match reference + for idx in range(2): + assert mtp_tool_states[idx]["name"] == ref_tool_states[idx]["name"] + mtp_args = json.loads(mtp_tool_states[idx]["arguments"]) + ref_args = json.loads(ref_tool_states[idx]["arguments"]) + assert mtp_args == ref_args