From 7fc99edcdd2b0cd6d9a24dd5ac4254212cbf14d7 Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Fri, 24 Apr 2026 09:42:11 +0200 Subject: [PATCH 01/21] fix split tag detection in tool parser : qwen3_coder (streaming mode) Signed-off-by: CNE Pierre FICHEPOIL --- .../test_qwen3coder_tool_parser.py | 47 +++++++++++++++++++ vllm/tool_parsers/qwen3coder_tool_parser.py | 27 +++++++---- 2 files changed, 66 insertions(+), 8 deletions(-) diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index c62e95830243..db0d1050e36f 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -1146,3 +1146,50 @@ def test_no_double_serialization_string_args(qwen3_tool_parser): args = json.loads(raw_arguments) assert args["message"] == "hello world" assert '\\"hello world\\"' not in raw_arguments + + +def test_extract_tool_calls_streaming_split_tag(qwen3_tool_parser): + """ + This highlights the need to use current_text instead of delta_text. + """ + request = ChatCompletionRequest(model=MODEL, messages=[]) + + # Iteration 1: "" + prev_text_2 = curr_text_1 + delta_text_2 = "_call>" + curr_text_2 = prev_text_2 + delta_text_2 + + msg2 = qwen3_tool_parser.extract_tool_calls_streaming( + previous_text=prev_text_2, + current_text=curr_text_2, + delta_text=delta_text_2, + previous_token_ids=[1, 2, 3, 4], + current_token_ids=[1, 2, 3, 4, 5], + delta_token_ids=[5], + request=request + ) + + # The assertion must verify that the is_tool_call_started variable correctly switches to True + assert qwen3_tool_parser.is_tool_call_started is True, "is_tool_call_started should be True when '' is completed in current_text." + + # and that the function does not return fragments of the tag in DeltaMessage(content=...) + if msg1 and msg1.content: + assert "" not in msg2.content + diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 7b089ceffbc0..a3146693d791 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -25,7 +25,7 @@ Tool, ToolParser, ) -from vllm.tool_parsers.utils import find_tool_properties +from vllm.tool_parsers.utils import find_tool_properties, partial_tag_overlap logger = init_logger(__name__) @@ -109,6 +109,7 @@ def _reset_streaming_state(self): # Store accumulated parameters for type conversion self.accumulated_params = {} self.streaming_request = None + self._sent_content_idx = 0 def _convert_param_value( self, param_value: str, param_name: str, param_config: dict, func_name: str @@ -391,29 +392,39 @@ def extract_tool_calls_streaming( # Handle normal content before tool calls if not self.is_tool_call_started: # Check if tool call is starting + tool_starts_count = current_text.count(self.tool_call_start_token) if ( self.tool_call_start_token_id in delta_token_ids - or self.tool_call_start_token in delta_text + or tool_starts_count > self.current_tool_index ): self.is_tool_call_started = True # Return any content before the tool call - if self.tool_call_start_token in delta_text: - content_before = delta_text[ - : delta_text.index(self.tool_call_start_token) - ] + last_start = current_text.rfind(self.tool_call_start_token) + if last_start > self._sent_content_idx: + content_before = current_text[self._sent_content_idx:last_start] + self._sent_content_idx = last_start if content_before: return DeltaMessage(content=content_before) return None else: + overlap = partial_tag_overlap(current_text, self.tool_call_start_token) + sendable_idx = len(current_text) - overlap + # Check if we're between tool calls - skip whitespace if ( current_text.rstrip().endswith(self.tool_call_end_token) and delta_text.strip() == "" ): # We just ended a tool call, skip whitespace + self._sent_content_idx = len(current_text) return None - # Normal content, no tool call - return DeltaMessage(content=delta_text) + + if sendable_idx > self._sent_content_idx: + content = current_text[self._sent_content_idx:sendable_idx] + self._sent_content_idx = sendable_idx + if content: + return DeltaMessage(content=content) + return None # Check if we're between tool calls (waiting for next one) # Count tool calls we've seen vs processed From f1785b38968d0bfec89b84d6dff0e7806338b779 Mon Sep 17 00:00:00 2001 From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> Date: Fri, 24 Apr 2026 10:01:13 +0200 Subject: [PATCH 02/21] Update vllm/tool_parsers/qwen3coder_tool_parser.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Signed-off-by: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> --- vllm/tool_parsers/qwen3coder_tool_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index a3146693d791..367260565efa 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -110,6 +110,7 @@ def _reset_streaming_state(self): self.accumulated_params = {} self.streaming_request = None self._sent_content_idx = 0 + self.current_tool_index = 0 def _convert_param_value( self, param_value: str, param_name: str, param_config: dict, func_name: str From 68694d73bde143a681bfaddb390c01e5602c4a98 Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Fri, 24 Apr 2026 10:03:50 +0200 Subject: [PATCH 03/21] Fix delayed text emission between tool calls in Qwen3XML Signed-off-by: CNE Pierre FICHEPOIL --- .../tool_parsers/test_qwen3xml_tool_parser.py | 64 ++++++++++++++++++- vllm/tool_parsers/qwen3xml_tool_parser.py | 9 ++- 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/tests/tool_parsers/test_qwen3xml_tool_parser.py b/tests/tool_parsers/test_qwen3xml_tool_parser.py index 1ea9a1d65c04..8fc816582d1e 100644 --- a/tests/tool_parsers/test_qwen3xml_tool_parser.py +++ b/tests/tool_parsers/test_qwen3xml_tool_parser.py @@ -8,7 +8,9 @@ ToolParserTestConfig, ToolParserTests, ) - +from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser +from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest +from transformers import AutoTokenizer class TestQwen3xmlToolParser(ToolParserTests): @pytest.fixture @@ -70,3 +72,63 @@ def test_config(self) -> ToolParserTestConfig: }, supports_typed_arguments=False, ) + + @pytest.mark.asyncio + async def test_qwen3xml_async_streaming_free_text(self): + + tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct") + parser = Qwen3XMLToolParser(tokenizer) + + # 1. First tool call + # 2. Free text + # 3. Second tool call + + text_to_stream = ( + "\n\nParis\n\n" + "\nNext, I will check the weather for London:\n" + "\n\nLondon\n\n" + ) + + request = ChatCompletionRequest(messages=[], model="test") + + emitted_messages = [] + previous_text = "" + previous_tokens = [] + token_ids = tokenizer.encode(text_to_stream, add_special_tokens=False) + + for i in range(1, len(token_ids) + 1): + current_token_ids = token_ids[:i] + current_text = tokenizer.decode(current_token_ids) + delta_text = current_text[len(previous_text):] + token_delta = current_token_ids[len(previous_tokens):] + + delta = parser.extract_tool_calls_streaming( + previous_text, + current_text, + delta_text, + previous_tokens, + current_token_ids, + token_delta, + request + ) + if delta is not None: + emitted_messages.append(delta) + + previous_text = current_text + previous_tokens = current_token_ids + + # Check that the free text is emitted BEFORE London's arguments are emitted. + found_early = False + for i, msg in enumerate(emitted_messages): + if msg.content and "Next, I will check the weather for London" in msg.content: + # Check if we already saw "London" in any previous or current tool call arguments + is_london_emitted = any( + tc.function.arguments and "London" in tc.function.arguments + for m in emitted_messages[:i+1] if m.tool_calls + for tc in m.tool_calls + ) + if not is_london_emitted: + found_early = True + break + + assert found_early, "Free text between tool calls should be emitted as soon as the second tool call starts, not delayed." diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py index 8ee10dcbc9e6..b176cb542ffc 100644 --- a/vllm/tool_parsers/qwen3xml_tool_parser.py +++ b/vllm/tool_parsers/qwen3xml_tool_parser.py @@ -251,16 +251,15 @@ def _process_complete_xml_elements(self) -> bool: # Found complete XML element, process it try: preprocessed_element = self._preprocess_xml_chunk(element) - # Check if this is the first tool_call start + # Check if a new tool_call starts and we have buffered text content if ( ( preprocessed_element.strip().startswith("") or preprocessed_element.strip().startswith(" ToolParserTestConfig: supports_typed_arguments=False, ) - @pytest.mark.asyncio - async def test_qwen3xml_async_streaming_free_text(self): - - tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct") - parser = Qwen3XMLToolParser(tokenizer) + def test_qwen3xml_async_streaming_free_text(self, qwen3_tokenizer): + parser = Qwen3XMLToolParser(qwen3_tokenizer) # 1. First tool call # 2. Free text # 3. Second tool call - text_to_stream = ( "\n\nParis\n\n" "\nNext, I will check the weather for London:\n" @@ -90,15 +92,14 @@ async def test_qwen3xml_async_streaming_free_text(self): ) request = ChatCompletionRequest(messages=[], model="test") - emitted_messages = [] previous_text = "" previous_tokens = [] - token_ids = tokenizer.encode(text_to_stream, add_special_tokens=False) + token_ids = qwen3_tokenizer.encode(text_to_stream, add_special_tokens=False) for i in range(1, len(token_ids) + 1): current_token_ids = token_ids[:i] - current_text = tokenizer.decode(current_token_ids) + current_text = qwen3_tokenizer.decode(current_token_ids) delta_text = current_text[len(previous_text):] token_delta = current_token_ids[len(previous_tokens):] From 3ffd7691b0cfa0ac1a78fc9565a9f2187f7a3589 Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Fri, 24 Apr 2026 11:11:01 +0200 Subject: [PATCH 07/21] gemini is right, ensure last content message is flushed to client after tool calls Signed-off-by: CNE Pierre FICHEPOIL --- .../tool_parsers/test_qwen3xml_tool_parser.py | 47 ++++++++++++++++++- vllm/tool_parsers/qwen3xml_tool_parser.py | 4 +- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/tests/tool_parsers/test_qwen3xml_tool_parser.py b/tests/tool_parsers/test_qwen3xml_tool_parser.py index 9001e4d37c79..21095f8e86af 100644 --- a/tests/tool_parsers/test_qwen3xml_tool_parser.py +++ b/tests/tool_parsers/test_qwen3xml_tool_parser.py @@ -120,8 +120,12 @@ def test_qwen3xml_async_streaming_free_text(self, qwen3_tokenizer): # Check that the free text is emitted BEFORE London's arguments are emitted. found_early = False + accumulated_content = "" for i, msg in enumerate(emitted_messages): - if msg.content and "Next, I will check the weather for London" in msg.content: + if msg.content: + accumulated_content += msg.content + + if "Next, I will check the weather for London" in accumulated_content: # Check if we already saw "London" in any previous or current tool call arguments is_london_emitted = any( tc.function.arguments and "London" in tc.function.arguments @@ -133,3 +137,44 @@ def test_qwen3xml_async_streaming_free_text(self, qwen3_tokenizer): break assert found_early, "Free text between tool calls should be emitted as soon as the second tool call starts, not delayed." + + def test_qwen3xml_streaming_text_after_tool_call(self, qwen3_tokenizer): + parser = Qwen3XMLToolParser(qwen3_tokenizer) + + # Tool call followed by free text + text_to_stream = ( + "\n\nParis\n\n" + "\nI hope this helps!" + ) + + request = ChatCompletionRequest(messages=[], model="test") + emitted_messages = [] + previous_text = "" + previous_tokens = [] + token_ids = qwen3_tokenizer.encode(text_to_stream, add_special_tokens=False) + + for i in range(1, len(token_ids) + 1): + current_token_ids = token_ids[:i] + current_text = qwen3_tokenizer.decode(current_token_ids) + delta_text = current_text[len(previous_text):] + token_delta = current_token_ids[len(previous_tokens):] + + delta = parser.extract_tool_calls_streaming( + previous_text, + current_text, + delta_text, + previous_tokens, + current_token_ids, + token_delta, + request + ) + if delta is not None: + emitted_messages.append(delta) + + previous_text = current_text + previous_tokens = current_token_ids + + # Aggregate all emitted content + all_content = "".join([m.content for m in emitted_messages if m.content]) + + assert "I hope this helps!" in all_content, "Free text after the last tool call should be emitted." diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py index b176cb542ffc..aae1b670d049 100644 --- a/vllm/tool_parsers/qwen3xml_tool_parser.py +++ b/vllm/tool_parsers/qwen3xml_tool_parser.py @@ -173,8 +173,8 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: return result_delta else: # No complete elements, check if there's unoutput text content - if self.text_content_buffer and self.tool_call_index == 0: - # Has text content but no tool_call yet, output text content + if self.text_content_buffer: + # Output buffered text content text_delta = DeltaMessage(content=self.text_content_buffer) self._emit_delta(text_delta) # Clear buffer to avoid duplicate output From 721b10e7d802987b5df9b4d873ad4e9da5d9bbd6 Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Fri, 24 Apr 2026 15:24:00 +0200 Subject: [PATCH 08/21] fixed tests Signed-off-by: CNE Pierre FICHEPOIL --- tests/tool_parsers/test_qwen3coder_tool_parser.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index c62e95830243..0bb191fba45d 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -322,10 +322,9 @@ def test_extract_tool_calls_no_tools(qwen3_tool_parser_parametrized): ), ) ), - ], - None, - ), - ( + ], + "\n", + ), ( """Let me calculate that area for you. @@ -589,10 +588,9 @@ def test_extract_tool_calls_type_conversion(qwen3_tokenizer): ), ) ), - ], - None, - ), - # Added tool_with_typed_params test case + ], + "\n", + ), # Added tool_with_typed_params test case ( """Let me calculate that area for you. From 77d9e95de68c84254c7df3de05d161de3d401463 Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Fri, 24 Apr 2026 16:11:25 +0200 Subject: [PATCH 09/21] Fixed edge case streamed tool call started in delta1 (tool call start + function name only) + delta2 (params + tool call end) was dropping params Signed-off-by: CNE Pierre FICHEPOIL --- .../test_qwen3coder_tool_parser.py | 107 ++++++++++++++++++ vllm/tool_parsers/qwen3coder_tool_parser.py | 80 +++++-------- 2 files changed, 138 insertions(+), 49 deletions(-) diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index db0d1050e36f..d761ab6580cd 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -1193,3 +1193,110 @@ def test_extract_tool_calls_streaming_split_tag(qwen3_tool_parser): if msg2 and msg2.content: assert "_call>" not in msg2.content + + +def test_extract_tool_calls_streaming_speculative_decode_loss(qwen3_tool_parser): + """ + if json_started=False, and the delta contains the parameters AND the end of the tool call, + the parser should not just return '{' and lose the parameters. + """ + + request = ChatCompletionRequest(model="test", messages=[]) + + text1 = "\n\n" + qwen3_tool_parser.extract_tool_calls_streaming( + "", text1, text1, [], [1], [1], request + ) + + # Delta 2 has the rest of the tool call + delta_str = "\nParis\n\n\n" + text2 = text1 + delta_str + delta2 = qwen3_tool_parser.extract_tool_calls_streaming( + text1, text2, delta_str, [1], [1,2], [2], request + ) + + # The parameters should be in delta2! + assert delta2 is not None + assert delta2.tool_calls is not None + assert len(delta2.tool_calls) == 1 + args = delta2.tool_calls[0].function.arguments + assert "Paris" in args, f"Arguments lost! Got: {args}" + + +def test_extract_tool_calls_streaming_various_chunk_sizes(qwen3_tool_parser): + """ + Test streaming with various chunk sizes using the exact template from Qwen 3.6. + """ + + request = ChatCompletionRequest(model="test", messages=[]) + + # Exact template format from Qwen 3.6 + template_text = """ + + +value_1 + + +This is the value for the second parameter +that can span +multiple lines + + +""" + + # Test with different chunk sizes to simulate different network/speculative decoding behaviors + for chunk_size in [1, 3, 15, len(template_text)]: + # Reset parser state + qwen3_tool_parser._reset_streaming_state() + + tool_states = {} + + # Simulate custom streaming to precisely control chunk sizes + current_text = "" + previous_text = "" + ptr = 0 + + while ptr < len(template_text): + delta = template_text[ptr:ptr+chunk_size] + previous_text = current_text + current_text += delta + ptr += chunk_size + + delta_message = qwen3_tool_parser.extract_tool_calls_streaming( + previous_text=previous_text, + current_text=current_text, + delta_text=delta, + previous_token_ids=[], + current_token_ids=[], + delta_token_ids=[], + request=request + ) + + if delta_message and delta_message.tool_calls: + for tool_call in delta_message.tool_calls: + idx = tool_call.index + if idx not in tool_states: + tool_states[idx] = { + "id": None, + "name": None, + "arguments": "", + "type": None, + } + + if tool_call.id: + tool_states[idx]["id"] = tool_call.id + if tool_call.type: + tool_states[idx]["type"] = tool_call.type + if tool_call.function: + if tool_call.function.name: + tool_states[idx]["name"] = tool_call.function.name + if tool_call.function.arguments is not None: + tool_states[idx]["arguments"] += tool_call.function.arguments + + assert 0 in tool_states + assert tool_states[0]["name"] == "example_function_name" + + import json + args = json.loads(tool_states[0]["arguments"]) + assert args["example_parameter_1"] == "value_1" + assert args["example_parameter_2"] == "This is the value for the second parameter\nthat can span\nmultiple lines" diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index df80367674d8..7f75f2451554 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -405,6 +405,7 @@ def extract_tool_calls_streaming( # Continue processing next tool return None + content_message = None # Handle normal content before tool calls if not self.is_tool_call_started: # Check if tool call is starting @@ -420,8 +421,7 @@ def extract_tool_calls_streaming( content_before = current_text[self._sent_content_idx:last_start] self._sent_content_idx = last_start if content_before: - return DeltaMessage(content=content_before) - return None + content_message = DeltaMessage(content=content_before) else: overlap = partial_tag_overlap(current_text, self.tool_call_start_token) sendable_idx = len(current_text) - overlap @@ -446,8 +446,7 @@ def extract_tool_calls_streaming( # Count tool calls we've seen vs processed tool_starts_count = current_text.count(self.tool_call_start_token) if self.current_tool_index >= tool_starts_count: - # We're past all tool calls, shouldn't be here - return None + return content_message # We're in a tool call, find the current tool call portion # Need to find the correct tool call based on current_tool_index @@ -461,8 +460,7 @@ def extract_tool_calls_streaming( idx += len(self.tool_call_start_token) if self.current_tool_index >= len(tool_start_positions): - # No more tool calls to process yet - return None + return content_message tool_start_idx = tool_start_positions[self.current_tool_index] # Find where this tool call ends (or current position if not ended yet) @@ -474,6 +472,7 @@ def extract_tool_calls_streaming( tool_start_idx : tool_end_idx + len(self.tool_call_end_token) ] + tool_call_fragments = None # Looking for function header if not self.header_sent: if self.tool_call_prefix in tool_text: @@ -506,21 +505,16 @@ def extract_tool_calls_streaming( # accesses streamed_args_for_tool[index]. self.streamed_args_for_tool.append("") - # Send header with function info - return DeltaMessage( - tool_calls=[ - DeltaToolCall( - index=self.current_tool_index, - id=self.current_tool_id, - function=DeltaFunctionCall( - name=self.current_function_name, arguments="" - ), - type="function", - ) - ] + tool_call_fragments = DeltaToolCall( + index=self.current_tool_index, + id=self.current_tool_id, + function=DeltaFunctionCall(name=self.current_function_name, arguments=""), + type="function", ) - return None + if not self.header_sent: + return content_message + arguments_to_emit = "" # We've sent header, now handle function body if self.in_function: # Always send opening brace first, regardless of whether @@ -531,16 +525,8 @@ def extract_tool_calls_streaming( if not self.json_started: self.json_started = True self.streamed_args_for_tool[self.current_tool_index] += "{" - return DeltaMessage( - tool_calls=[ - DeltaToolCall( - index=self.current_tool_index, - function=DeltaFunctionCall(arguments="{"), - ) - ] - ) + arguments_to_emit += "{" - # Find all parameter start positions in current tool_text param_starts = [] search_idx = 0 while True: @@ -641,15 +627,7 @@ def extract_tool_calls_streaming( self.current_tool_index, len(self.streamed_args_for_tool), ) - - return DeltaMessage( - tool_calls=[ - DeltaToolCall( - index=self.current_tool_index, - function=DeltaFunctionCall(arguments=combined), - ) - ] - ) + arguments_to_emit += combined # Check for function end AFTER processing parameters. # This ordering is critical: with speculative decoding a @@ -691,20 +669,24 @@ def extract_tool_calls_streaming( self.current_tool_index, len(self.streamed_args_for_tool), ) - - result = DeltaMessage( - tool_calls=[ - DeltaToolCall( - index=self.current_tool_index, - function=DeltaFunctionCall(arguments="}"), - ) - ] - ) - + arguments_to_emit += "}" self.in_function = False self.json_closed = True self.accumulated_params = {} - return result + if tool_call_fragments or arguments_to_emit: + if not tool_call_fragments: + tool_call_fragments = DeltaToolCall( + index=self.current_tool_index, + function=DeltaFunctionCall(arguments=arguments_to_emit), + ) + else: + tool_call_fragments.function.arguments += arguments_to_emit + + if content_message: + content_message.tool_calls = [tool_call_fragments] + return content_message + else: + return DeltaMessage(tool_calls=[tool_call_fragments]) - return None + return content_message From a4ef7d01724d0eb384e72235156abe1fb57180a8 Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Fri, 24 Apr 2026 17:27:00 +0200 Subject: [PATCH 10/21] fixed and re-enabled broken tests Signed-off-by: CNE Pierre FICHEPOIL --- .../tool_parsers/test_qwen3xml_tool_parser.py | 14 -------- vllm/tool_parsers/qwen3xml_tool_parser.py | 36 +++++++++++-------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/tests/tool_parsers/test_qwen3xml_tool_parser.py b/tests/tool_parsers/test_qwen3xml_tool_parser.py index 21095f8e86af..7291cef2a18e 100644 --- a/tests/tool_parsers/test_qwen3xml_tool_parser.py +++ b/tests/tool_parsers/test_qwen3xml_tool_parser.py @@ -62,20 +62,6 @@ def test_config(self) -> ToolParserTestConfig: single_tool_call_expected_args={"city": "Tokyo"}, parallel_tool_calls_count=2, parallel_tool_calls_names=["get_weather", "get_time"], - # xfail markers - Qwen3XML has systematic streaming issues - xfail_streaming={ - "test_single_tool_call_simple_args": ( - "Qwen3XML streaming has systematic issues" - ), - "test_parallel_tool_calls": "Qwen3XML streaming has systematic issues", - "test_various_data_types": "Qwen3XML streaming has systematic issues", - "test_empty_arguments": "Qwen3XML streaming has systematic issues", - "test_surrounding_text": "Qwen3XML streaming has systematic issues", - "test_escaped_strings": "Qwen3XML streaming has systematic issues", - "test_streaming_reconstruction": ( - "Qwen3XML streaming reconstruction has known issues" - ), - }, supports_typed_arguments=False, ) diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py index aae1b670d049..cc06fce32ec3 100644 --- a/vllm/tool_parsers/qwen3xml_tool_parser.py +++ b/vllm/tool_parsers/qwen3xml_tool_parser.py @@ -56,6 +56,7 @@ def reset_streaming_state(self): # state for streaming self.tool_call_index = 0 self.current_call_id = None + self.id_emitted = False self.last_completed_call_id = None self.current_function_name = None self.current_function_open = False @@ -285,7 +286,7 @@ def _process_complete_xml_elements(self) -> bool: tool_calls=[ DeltaToolCall( index=self.tool_call_index - 1, - id=self.current_call_id, + id=self._get_call_id_for_delta(), type="function", function=DeltaFunctionCall(name=None, arguments=""), ) @@ -440,10 +441,10 @@ def _merge_new_deltas_to_single_response(self, initial_count: int) -> DeltaMessa if delta.tool_calls: # For tool_calls, we need to intelligently merge arguments for tool_call in delta.tool_calls: - # Find if there's already a tool_call with the same call_id + # Find if there's already a tool_call with the same index existing_call = None for existing in merged_tool_calls: - if existing.id == tool_call.id: + if existing.index == tool_call.index: existing_call = existing break @@ -592,6 +593,12 @@ def _emit_delta(self, delta: DeltaMessage): """Emit Delta response (streaming output)""" self.deltas.append(delta) + def _get_call_id_for_delta(self) -> str | None: + if not self.id_emitted: + self.id_emitted = True + return self.current_call_id + return None + def _auto_close_open_parameter_if_needed(self, incoming_tag: str | None = None): """Before starting to process new elements, if there are unclosed tags from before, @@ -647,7 +654,7 @@ def _start_element(self, name: str, attrs: dict[str, str]): tool_calls=[ DeltaToolCall( index=self.tool_call_index - 1, - id=self.current_call_id, + id=self._get_call_id_for_delta(), type="function", function=DeltaFunctionCall( name=function_name, arguments="" @@ -678,7 +685,7 @@ def _start_element(self, name: str, attrs: dict[str, str]): tool_calls=[ DeltaToolCall( index=self.tool_call_index - 1, - id=self.current_call_id, + id=self._get_call_id_for_delta(), type="function", function=DeltaFunctionCall( name=None, arguments=json_start @@ -696,7 +703,7 @@ def _start_element(self, name: str, attrs: dict[str, str]): tool_calls=[ DeltaToolCall( index=self.tool_call_index - 1, - id=self.current_call_id, + id=self._get_call_id_for_delta(), type="function", function=DeltaFunctionCall( name=None, arguments=json_continue @@ -739,7 +746,7 @@ def _char_data(self, data: str): tool_calls=[ DeltaToolCall( index=self.tool_call_index - 1, - id=self.current_call_id, + id=self._get_call_id_for_delta(), type="function", function=DeltaFunctionCall(name=None, arguments='"'), ) @@ -774,7 +781,7 @@ def _char_data(self, data: str): tool_calls=[ DeltaToolCall( index=self.tool_call_index - 1, - id=self.current_call_id, + id=self._get_call_id_for_delta(), type="function", function=DeltaFunctionCall(name=None, arguments=delta_data), ) @@ -831,7 +838,7 @@ def _end_element(self, name: str): tool_calls=[ DeltaToolCall( index=self.tool_call_index - 1, - id=self.current_call_id, + id=self._get_call_id_for_delta(), type="function", function=DeltaFunctionCall( name=None, arguments=output_arguments @@ -867,7 +874,7 @@ def _end_element(self, name: str): tool_calls=[ DeltaToolCall( index=self.tool_call_index - 1, - id=self.current_call_id, + id=self._get_call_id_for_delta(), type="function", function=DeltaFunctionCall(name=None, arguments='""'), ) @@ -880,7 +887,7 @@ def _end_element(self, name: str): tool_calls=[ DeltaToolCall( index=self.tool_call_index - 1, - id=self.current_call_id, + id=self._get_call_id_for_delta(), type="function", function=DeltaFunctionCall(name=None, arguments='"'), ) @@ -903,7 +910,7 @@ def _end_element(self, name: str): tool_calls=[ DeltaToolCall( index=self.tool_call_index - 1, - id=self.current_call_id, + id=self._get_call_id_for_delta(), type="function", function=DeltaFunctionCall(name=None, arguments="}"), ) @@ -916,7 +923,7 @@ def _end_element(self, name: str): tool_calls=[ DeltaToolCall( index=self.tool_call_index - 1, - id=self.current_call_id, + id=self._get_call_id_for_delta(), type="function", function=DeltaFunctionCall(name=None, arguments="{}"), ) @@ -939,7 +946,7 @@ def _end_element(self, name: str): tool_calls=[ DeltaToolCall( index=self.tool_call_index - 1, - id=self.current_call_id, + id=self._get_call_id_for_delta(), type="function", function=DeltaFunctionCall(name=None, arguments=""), ) @@ -1125,6 +1132,7 @@ def _reset_xml_parser_after_tool_call(self): if self.current_call_id: self.last_completed_call_id = self.current_call_id self.current_call_id = None + self.id_emitted = False self.current_function_name = None self.current_function_open = False self.parameters = {} From 74783cd0e169e0cbfee307a22514fa91046ed49a Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Fri, 24 Apr 2026 19:29:02 +0200 Subject: [PATCH 11/21] Fix StreamingXMLToolCallParser: anyOf type detection and double-close fallback Signed-off-by: CNE Pierre FICHEPOIL --- .../tool_parsers/test_qwen3xml_tool_parser.py | 197 +++++++++++++++++- vllm/tool_parsers/qwen3xml_tool_parser.py | 72 +++---- 2 files changed, 219 insertions(+), 50 deletions(-) diff --git a/tests/tool_parsers/test_qwen3xml_tool_parser.py b/tests/tool_parsers/test_qwen3xml_tool_parser.py index 7291cef2a18e..4ae1961bda5d 100644 --- a/tests/tool_parsers/test_qwen3xml_tool_parser.py +++ b/tests/tool_parsers/test_qwen3xml_tool_parser.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import json import pytest @@ -8,9 +9,14 @@ ToolParserTestConfig, ToolParserTests, ) -from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser -from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionRequest, + ChatCompletionToolsParam, +) from vllm.tokenizers import get_tokenizer +from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser +from tests.tool_parsers.utils import run_tool_extraction_streaming + MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8" @@ -164,3 +170,190 @@ def test_qwen3xml_streaming_text_after_tool_call(self, qwen3_tokenizer): all_content = "".join([m.content for m in emitted_messages if m.content]) assert "I hope this helps!" in all_content, "Free text after the last tool call should be emitted." + + +def test_qwen36_anyof_parameter_xml_not_double_encoded(qwen3_tokenizer): + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "update_record", + "parameters": { + "type": "object", + "properties": { + # anyOf schema — no top-level "type" key + "data": { + "anyOf": [{"type": "object"}, {"type": "null"}], + }, + }, + }, + }, + ) + ] + + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) + model_output = ( + "\n" + "\n" + '{"key": "value", "count": 42}\n' + "\n" + "" + ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + result = parser.extract_tool_calls(model_output, request=request) + + assert result.tools_called + assert len(result.tool_calls) == 1 + args = json.loads(result.tool_calls[0].function.arguments) + + assert isinstance(args["data"], dict), ( + f"anyOf parameter was double-encoded: data={args['data']!r}. " + "StreamingXMLToolCallParser._get_param_type ignores anyOf schemas." + ) + assert args["data"] == {"key": "value", "count": 42} + + +def test_qwen36_anyof_parameter_xml_streaming_not_double_encoded(qwen3_tokenizer): + + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "update_record", + "parameters": { + "type": "object", + "properties": { + "data": { + "anyOf": [{"type": "object"}, {"type": "null"}], + }, + }, + }, + }, + ) + ] + + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + + # Deltas are pre-formed XML element chunks (one element per delta), + # which is the same pattern used by speculative decoding. + deltas = [ + "", + "\n", + '\n{"key": "value", "count": 42}', + "\n", + "\n", + ] + + reconstructor = run_tool_extraction_streaming( + parser, + deltas, + request, + assert_one_tool_per_delta=False, + ) + + assert len(reconstructor.tool_calls) == 1 + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert isinstance(args["data"], dict), ( + f"anyOf parameter was double-encoded in streaming: data={args['data']!r}" + ) + + +def test_qwen36_xml_streaming_double_close_brace(qwen3_tokenizer): + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "get_weather", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + }, + }, + ) + ] + + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + + deltas = [ + "", + "\n", + "\n\nDallas\n", + "\n", + "\n", + ] + + reconstructor = run_tool_extraction_streaming( + parser, + deltas, + request, + assert_one_tool_per_delta=False, + ) + + assert len(reconstructor.tool_calls) == 1 + full_args = reconstructor.tool_calls[0].function.arguments + + assert not full_args.endswith("}}"), ( + f"XML streaming parser emitted double closing brace: {full_args!r}. " + "parse_single_streaming_chunks fallback called _end_element('function') twice." + ) + args = json.loads(full_args) + assert args == {"city": "Dallas"} + + +def test_xml_streaming_parallel_tool_calls_preformed_chunks(qwen3_tokenizer): + """ + Note: in normal token-by-token streaming this rarely triggers because + the tokenizer splits XML tags across multiple tokens. It CAN trigger with + speculative decoding multi-token flushes. + """ + + + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "get_weather", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + }, + }, + ) + ] + + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + + deltas = [ + "", + "\n", + "\nParis", + "\n", + "\n", + "", + "\n", + "\nLondon", + "\n", + "\n", + ] + + reconstructor = run_tool_extraction_streaming( + parser, + deltas, + request, + assert_one_tool_per_delta=False, + ) + + assert len(reconstructor.tool_calls) == 2, ( + f"Expected 2 tool calls, got {len(reconstructor.tool_calls)}" + ) + + args0 = json.loads(reconstructor.tool_calls[0].function.arguments) + args1 = json.loads(reconstructor.tool_calls[1].function.arguments) + + assert reconstructor.tool_calls[0].function.name == "get_weather" + assert reconstructor.tool_calls[1].function.name == "get_weather" + assert args0 == {"city": "Paris"}, f"First call args wrong: {args0!r}" + assert args1 == {"city": "London"}, f"Second call args wrong: {args1!r}" diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py index cc06fce32ec3..f22f30288f98 100644 --- a/vllm/tool_parsers/qwen3xml_tool_parser.py +++ b/vllm/tool_parsers/qwen3xml_tool_parser.py @@ -113,58 +113,25 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: if ( self.current_call_id is not None and self.function_end_token in xml_chunk + and self.current_function_open ): - # - Added '}' (non-empty parameter ending) - # - Added '{}' (empty parameter function) - has_function_close = any( - ( - td.tool_calls - and any( - ( - tc.function - and tc.id == self.current_call_id - and isinstance(tc.function.arguments, str) - and (tc.function.arguments in ("}", "{}")) - ) - for tc in td.tool_calls - ) - ) - for td in new_deltas - ) - if not has_function_close: - # Close potentially unclosed element - if self.current_param_name: - self._end_element("parameter") - if self.current_function_name: - self._end_element("function") + # Close potentially unclosed element + if self.current_param_name: + self._end_element("parameter") + if self.current_function_name: + self._end_element("function") # If this chunk contains # but didn't generate final empty delta, then complete it if ( self.current_call_id is not None and self.tool_call_end_token in xml_chunk ): - has_toolcall_close = any( - ( - td.tool_calls - and any( - ( - tc.type == "function" - and tc.function - and tc.function.arguments == "" - and tc.id == self.current_call_id - ) - for tc in td.tool_calls - ) - ) - for td in new_deltas - ) - if not has_toolcall_close: - # Close potentially unclosed element - if self.current_param_name: - self._end_element("parameter") - if self.current_function_name: - self._end_element("function") - self._end_element("tool_call") + # Close potentially unclosed elements + if self.current_param_name: + self._end_element("parameter") + if self.current_function_open: + self._end_element("function") + self._end_element("tool_call") except Exception as e: logger.warning("Error with fallback parsing: %s", e) # Merge newly generated deltas into single response @@ -1009,9 +976,18 @@ def _get_param_type(self, param_name: str) -> str: properties = find_tool_properties(self.tools, self.current_function_name) if param_name in properties and isinstance(properties[param_name], dict): - return self.repair_param_type( - str(properties[param_name].get("type", "string")) - ) + prop = properties[param_name] + param_type = prop.get("type") + if param_type is None and "anyOf" in prop: + # Handle anyOf schemas (common in Qwen 3.6) + for option in prop["anyOf"]: + if isinstance(option, dict) and "type" in option: + opt_type = str(option["type"]) + if opt_type in ["object", "array", "arr", "sequence"]: + return opt_type + return "string" + + return self.repair_param_type(str(param_type or "string")) return "string" def repair_param_type(self, param_type: str) -> str: From c164555e412e3a867d3fcfb40033020fe879596d Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Sat, 25 Apr 2026 05:58:34 +0200 Subject: [PATCH 12/21] fix Qwen3 XML and Coder tool parser regressions on merged branches Combined fixes for the XML and Coder tool parsers that surfaced once the two PR branches were merged together. Qwen3XML parser: * Reorder _convert_param_value: check string type BEFORE the "null" shortcut so a string param with literal value "null" stays "null" instead of becoming JSON null. Fix logger.warning argument count. * _convert_for_json_streaming: emit "null" (not "") when converted_value is None so nullable integer/object params serialize correctly. * _get_param_type: anyOf returns the first non-null type instead of falling back to "string" for nullable integer/boolean schemas. * _preprocess_xml_chunk: defer streaming for boolean params (avoids emitting "false" on the first 't' of "true") and for all container types regardless of single-quote hint. * _end_element deferred path: try json.loads BEFORE ast.literal_eval so arrays/objects containing JSON true/false/null parse natively; double-decode strings to recover from buggy json.dumps(str(dict)) templates. * Add structural-aware helpers: _is_structural_tag_position, _get_valid_param_names, _is_structural_closing_tag (with partial-tag prefix safety), _chunk_has_structural_function_end, _chunk_has_structural_tool_call_end. * _preprocess_xml_chunk: when SAX state is inside a parameter value, escape / always, and /closing tags only when they are not structural delimiters. * _process_complete_xml_elements: defer when streaming with empty lookahead (more tokens may still arrive). * parse_single_streaming_chunks: fallback close uses _chunk_has_structural_*_end instead of plain "in xml_chunk" so a literal in a parameter value doesn't trigger a double close. * extract_tool_calls_streaming: enable _streaming_mode=True on first delta. Qwen3Coder parser: * Reorder _convert_param_value the same way (string-first, then null). * anyOf picks the first non-null type instead of treating it as "object". * Container handling: try json.loads then double-decode via ast.literal_eval to recover from buggy json.dumps(str(dict)) outputs. * Add structural-aware helpers: _next_structural_param_start, _find_true_function_end, _find_true_tool_call_end, _find_true_param_end (with require_lookahead for streaming). * _parse_xml_function_call: top-level params are NOT filtered by schema (callers may rename fields) but nested boundaries inside a value ARE, so literal lines in file content don't terminate the param early. * _get_function_calls: structural-aware ( must be followed by another or EOS; same for ). * Streaming param_starts uses the helpers; close check uses _find_true_function_end so a literal in a value doesn't prematurely emit "}". * tool_start_positions skips past each of completed calls so a literal inside a parameter value of a closed call doesn't spawn a phantom new tool call. * Multi-tool-call delta (speculative decoding): when one tool call closes and another full ... remains in current_text, advance manually and re-enter with a sentinel previous_text so reset_streaming_state isn't triggered (which would loop forever). These fix the agentic-streaming bug where Qwen3.5 would freeze mid-tool-call when a parameter value contained , , , or as literal text (e.g. writing a Jinja2 template, a heredoc, or any file describing the tool-call format), as well as several value-conversion bugs (string "null" -> JSON null, anyOf nullable -> wrong type, double-encoded objects -> string). Add 16 regression tests in test_qwen3xml_tool_parser.py, 10 in test_qwen3coder_tool_parser.py, and a new test_qwen36_bugs.py covering bugs that span both parsers (XML array with JSON true/false, Coder multi-tool-call in one streaming delta). 98 tests pass across the three test files. Signed-off-by: CNE Pierre FICHEPOIL --- tests/tool_parsers/test_qwen36_bugs.py | 189 +++++++ .../test_qwen3coder_tool_parser.py | 398 ++++++++++++- .../tool_parsers/test_qwen3xml_tool_parser.py | 534 ++++++++++++++++++ vllm/tool_parsers/qwen3coder_tool_parser.py | 480 ++++++++++++++-- vllm/tool_parsers/qwen3xml_tool_parser.py | 382 ++++++++++--- 5 files changed, 1842 insertions(+), 141 deletions(-) create mode 100644 tests/tool_parsers/test_qwen36_bugs.py diff --git a/tests/tool_parsers/test_qwen36_bugs.py b/tests/tool_parsers/test_qwen36_bugs.py new file mode 100644 index 000000000000..65671e9fafbe --- /dev/null +++ b/tests/tool_parsers/test_qwen36_bugs.py @@ -0,0 +1,189 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +Bug-confirmation tests for the merged Qwen 3.5 parser changes. + +Each test is a minimal reproducer of a real issue; they are meant to FAIL +until the corresponding bug is fixed. Each scenario is also contrasted +against the Coder parser (for XML bugs) or the XML parser (for Coder bugs) +when one of the two already behaves correctly, which helps narrow down +where the fix belongs. + +Run with: + .venv/bin/python -m pytest tests/tool_parsers/test_qwen36_bugs.py -v +""" +import json + +import pytest + +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionRequest, + ChatCompletionToolsParam, +) +from vllm.tokenizers import get_tokenizer +from vllm.tool_parsers.qwen3coder_tool_parser import Qwen3CoderToolParser +from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser + +MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8" + + +@pytest.fixture(scope="module") +def qwen3_tokenizer(): + return get_tokenizer(tokenizer_name=MODEL) + + +def _stream(parser, chunks, request): + """Feed pre-shaped string chunks and collect emitted tool-call pieces. + + Returns (content_str, tool_calls_dict_by_index). + """ + prev_text = "" + prev_ids: list[int] = [] + content_out = "" + events: list[tuple] = [] + for chunk in chunks: + cur_text = prev_text + chunk + # Approximate: tokenize incrementally. + dt_ids = parser.model_tokenizer.encode(chunk, add_special_tokens=False) + cur_ids = prev_ids + dt_ids + msg = parser.extract_tool_calls_streaming( + prev_text, cur_text, chunk, prev_ids, cur_ids, dt_ids, request + ) + if msg is not None: + if msg.content: + content_out += msg.content + if msg.tool_calls: + for tc in msg.tool_calls: + events.append(( + tc.index, + tc.function.name if tc.function else None, + tc.function.arguments if tc.function else None, + )) + prev_text, prev_ids = cur_text, cur_ids + tcs: dict[int, dict] = {} + for idx, name, args in events: + tcs.setdefault(idx, {"name": name, "args": ""}) + if args: + tcs[idx]["args"] += args + return content_out, tcs + + +# --------------------------------------------------------------------------- +# BUG 1: XML parser -- array parameter containing JSON true/false/null is +# emitted as a JSON string instead of being parsed as a JSON array. +# +# Root cause: in _end_element the deferred parser calls ast.literal_eval on +# the raw text. ast.literal_eval does NOT understand JSON tokens `true`, +# `false`, `null` (Python uses True/False/None), so it raises and the fallback +# path emits the raw string wrapped with json.dumps. +# +# The Coder parser uses json.loads first, so it gets this scenario right -- +# the test contrasts the two parsers to prove the bug is XML-specific. +# --------------------------------------------------------------------------- + +_ARRAY_TOOLS = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "pick", + "parameters": { + "type": "object", + "properties": {"items": {"type": "array"}}, + }, + }, + ) +] + +_ARRAY_WITH_JSON_BOOL_OUTPUT = ( + "\n\n" + '\n["a", "b", 1, true]\n\n' + "\n" +) + + +def test_xml_array_with_json_bool_nonstreaming(qwen3_tokenizer): + """XML non-streaming: array containing `true` must be parsed as a list.""" + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_ARRAY_TOOLS) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_ARRAY_TOOLS) + result = parser.extract_tool_calls(_ARRAY_WITH_JSON_BOOL_OUTPUT, request=request) + + assert result.tools_called + args = json.loads(result.tool_calls[0].function.arguments) + assert isinstance(args["items"], list), ( + f"XML parser emitted items as {type(args['items']).__name__} " + f"({args['items']!r}). ast.literal_eval cannot parse JSON `true` and " + "the exception fallback wraps the raw string with json.dumps. " + "Use json.loads first (see the Coder parser)." + ) + assert args["items"] == ["a", "b", 1, True] + + +def test_coder_array_with_json_bool_nonstreaming(qwen3_tokenizer): + """Contrast: Coder parser handles the same input correctly.""" + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_ARRAY_TOOLS) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_ARRAY_TOOLS) + result = parser.extract_tool_calls(_ARRAY_WITH_JSON_BOOL_OUTPUT, request=request) + + assert result.tools_called + args = json.loads(result.tool_calls[0].function.arguments) + assert args["items"] == ["a", "b", 1, True] + + +# --------------------------------------------------------------------------- +# BUG 2: Coder parser -- when two complete ... +# blocks arrive in a SINGLE streaming delta (typical for speculative +# decoding), only the first tool call is emitted, the second is dropped. +# +# Root cause: extract_tool_calls_streaming advances current_tool_index by +# one per delta. When a delta flushes two complete tool calls the parser +# processes call #0, sees tool_ends > current_tool_index, advances to #1, +# and returns None without re-processing the same delta. The XML parser +# processes all complete elements in a loop and does not drop the second. +# --------------------------------------------------------------------------- + +_WEATHER_TOOLS = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "get_weather", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + }, + }, + ) +] + +_TWO_TOOL_CALLS_IN_ONE_CHUNK = ( + "\n\n\nParis\n\n" + "\n\n" + "\n\n\nLondon\n\n" + "\n" +) + + +def test_coder_two_tool_calls_in_one_streaming_chunk(qwen3_tokenizer): + """Coder streaming: a single delta that contains TWO complete tool calls + must emit both, not just the first.""" + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WEATHER_TOOLS) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WEATHER_TOOLS) + _, tcs = _stream(parser, [_TWO_TOOL_CALLS_IN_ONE_CHUNK], request) + assert len(tcs) == 2, ( + f"Expected 2 tool calls, got {len(tcs)}. " + "The Coder parser drops the second tool call when both complete in " + "the same delta (speculative decoding scenario)." + ) + args0 = json.loads(tcs[0]["args"]) + args1 = json.loads(tcs[1]["args"]) + assert args0 == {"city": "Paris"} + assert args1 == {"city": "London"} + + +def test_xml_two_tool_calls_in_one_streaming_chunk(qwen3_tokenizer): + """Contrast: XML parser already handles this case correctly.""" + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_WEATHER_TOOLS) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WEATHER_TOOLS) + _, tcs = _stream(parser, [_TWO_TOOL_CALLS_IN_ONE_CHUNK], request) + assert len(tcs) == 2 + assert json.loads(tcs[0]["args"]) == {"city": "Paris"} + assert json.loads(tcs[1]["args"]) == {"city": "London"} diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index 2dc5c2100dad..32d5a238914a 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -22,7 +22,7 @@ Qwen3CoderToolParser, ) from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser - +from tests.tool_parsers.utils import run_tool_extraction_streaming MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8" @@ -1092,9 +1092,6 @@ def test_streaming_multi_param_single_chunk(qwen3_tool_parser, qwen3_tokenizer): "\n", ] - from tests.tool_parsers.utils import ( - run_tool_extraction_streaming, - ) reconstructor = run_tool_extraction_streaming( qwen3_tool_parser, @@ -1298,3 +1295,396 @@ def test_extract_tool_calls_streaming_various_chunk_sizes(qwen3_tool_parser): args = json.loads(tool_states[0]["arguments"]) assert args["example_parameter_1"] == "value_1" assert args["example_parameter_2"] == "This is the value for the second parameter\nthat can span\nmultiple lines" + + +def test_coder_string_null_value_not_converted_to_none(qwen3_tokenizer): + """Regression: string param with literal value 'null' must not become JSON null. + + The null-before-type-check in _convert_param_value returns Python None for + ANY parameter whose raw text is 'null', even when the schema says 'string'. + That turns {"param": "null"} into {"param": null}, which is wrong. + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "set_value", + "parameters": { + "type": "object", + "properties": { + "key": {"type": "string"}, + }, + }, + }, + ) + ] + + model_output = ( + "\n" + "\n" + "null\n" + "\n" + "" + ) + + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + result = parser.extract_tool_calls(model_output, request=request) + + assert result.tools_called + args = json.loads(result.tool_calls[0].function.arguments) + # The value is the string "null", NOT JSON null + assert args["key"] == "null", ( + f"String param 'null' was converted to JSON null. Got: {args['key']!r}" + ) + + +def test_coder_anyof_string_null_numeric_value_stays_string(qwen3_tokenizer): + """Regression: anyOf with string+null must keep numeric-looking values as strings. + + When anyOf is treated as 'object', json.loads('42') returns int 42 even + though the schema declares the type as 'string'. The correct behaviour is + to use the first non-null type from anyOf; for string, the raw text is + returned unchanged. + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "set_code", + "parameters": { + "type": "object", + "properties": { + "code": { + "anyOf": [{"type": "string"}, {"type": "null"}], + }, + }, + }, + }, + ) + ] + + model_output = ( + "\n" + "\n" + "42\n" + "\n" + "" + ) + + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + result = parser.extract_tool_calls(model_output, request=request) + + assert result.tools_called + args = json.loads(result.tool_calls[0].function.arguments) + # "42" is a string in the schema — must NOT become integer 42 + assert args["code"] == "42", ( + f"anyOf string param '42' was parsed as {type(args['code']).__name__}: {args['code']!r}" + ) + assert isinstance(args["code"], str) + + +_WRITE_FILE_TOOLS = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "write_file", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string"}, + "content": {"type": "string"}, + }, + }, + }, + ) +] + +# Tool with an object-type parameter to test double-encoded values. +_OBJECT_PARAM_TOOLS = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "process", + "parameters": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "data": {"type": "object"}, + }, + }, + }, + ) +] + +# Model output as produced by a template with json.dumps(str(value)) bug: +# the dict argument is rendered as a JSON-encoded Python repr string. +_DOUBLE_ENCODED_OBJECT_OUTPUT = ( + "\n" + "\n" + "\nhello\n\n" + "\n\"{'key': 'value', 'n': 1}\"\n\n" + "\n" + "\n" +) + +# File content that contains and \\n\\n"""' +) + +_WRITE_FILE_OUTPUT = ( + "\n" + "\n" + "\ntest.py\n\n" + f"\n{_FILE_CONTENT_WITH_TOOL_CALL_TAG}\n\n" + "\n" + "\n" # trailing newline ensures a delta arrives after +) + + +def test_nonstreaming_content_param_with_tool_call_tag(qwen3_tokenizer): + """Non-streaming: literal inside a string param must not split it. + + When writing a file whose content contains '' as plain text, + extract_tool_calls must still produce exactly one tool call with the + correct path and full content. + """ + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) + result = parser.extract_tool_calls(_WRITE_FILE_OUTPUT, request=request) + + assert result.tools_called + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].function.name == "write_file" + args = json.loads(result.tool_calls[0].function.arguments) + assert args["path"] == "test.py" + assert args["content"] == _FILE_CONTENT_WITH_TOOL_CALL_TAG + + +def test_streaming_content_param_with_tool_call_tag(qwen3_tokenizer): + """Streaming: literal inside a string param must not be mistaken + for a second tool call. + + The streaming parser counted ALL occurrences in current_text, + including those inside parameter values. After completing the first tool + call it would set is_tool_call_started=True again and attempt to process + the embedded as a second invocation — producing garbage or an + extra spurious tool call. + """ + + + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) + + reconstructor = run_tool_extraction_streaming( + parser, + _WRITE_FILE_OUTPUT, + request, + assert_one_tool_per_delta=False, + ) + + assert len(reconstructor.tool_calls) == 1, ( + f"Expected 1 tool call, got {len(reconstructor.tool_calls)}: " + f"{[tc.function.name for tc in reconstructor.tool_calls]}" + ) + assert reconstructor.tool_calls[0].function.name == "write_file" + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert args["path"] == "test.py", f"path wrong: {args.get('path')!r}" + assert args["content"] == _FILE_CONTENT_WITH_TOOL_CALL_TAG, ( + f"content wrong: {args.get('content')!r}" + ) + + +# Python content containing ALL XML structural tags as literal strings. +# This is the hardest case: the parameter value looks like it could end at +# any of the embedded closing tags. +_XML_TAGS_IN_CONTENT = ( + 'char_deltas = [\n' + ' "\\n",\n' + ' "\\n",\n' + ' "\\n\\n",\n' + ' "\\n",\n' + ']\n' +) + +_WRITE_FILE_XML_TAGS_OUTPUT = ( + "\n" + "\n" + "\ntest.py\n\n" + f"\n{_XML_TAGS_IN_CONTENT}\n" + "\n" + "\n" +) + + +def test_nonstreaming_content_with_xml_structural_tags(qwen3_tokenizer): + """Non-streaming: parameter value containing , , + as literal text must be extracted intact without spurious + extra parameters being created. + """ + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) + result = parser.extract_tool_calls(_WRITE_FILE_XML_TAGS_OUTPUT, request=request) + + assert result.tools_called + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].function.name == "write_file" + args = json.loads(result.tool_calls[0].function.arguments) + assert list(args.keys()) == ["file_path", "content"], ( + f"Unexpected keys (spurious params?): {list(args.keys())}" + ) + assert args["file_path"] == "test.py" + assert args["content"] == _XML_TAGS_IN_CONTENT.rstrip("\n"), ( + f"content wrong: {args.get('content')!r}" + ) + + +def test_streaming_content_with_xml_structural_tags(qwen3_tokenizer): + """Streaming: parameter value containing , , + as literal text must not terminate the parameter early and + must not create spurious extra parameters. + """ + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) + + reconstructor = run_tool_extraction_streaming( + parser, + _WRITE_FILE_XML_TAGS_OUTPUT, + request, + assert_one_tool_per_delta=False, + ) + + assert len(reconstructor.tool_calls) == 1, ( + f"Expected 1 tool call, got {len(reconstructor.tool_calls)}: " + f"{[tc.function.name for tc in reconstructor.tool_calls]}" + ) + assert reconstructor.tool_calls[0].function.name == "write_file" + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert list(args.keys()) == ["file_path", "content"], ( + f"Unexpected keys (spurious params?): {list(args.keys())}" + ) + assert args["file_path"] == "test.py" + assert args["content"] == _XML_TAGS_IN_CONTENT.rstrip("\n"), ( + f"content wrong: {args.get('content')!r}" + ) + + +# File content that contains and on their OWN +# LINES (preceded by \n). This occurs when writing a Jinja2 template, a test +# fixture for the parser itself, or any file that documents the tool-call +# format. "new_string" is intentionally NOT a parameter of write_file, so the +# schema filter must prevent it from being treated as a structural boundary. +_CONTENT_WITH_PARAM_LIKE_LINES = ( + 'TOOL_CALL_TEMPLATE = """\n' + "\n" + "\n" + "#!/usr/bin/env python3\n" + "\n" + '"""\n' +) + +_WRITE_FILE_PARAM_LIKE_LINES_OUTPUT = ( + "\n" + "\n" + "\ntest_template.py\n\n" + f"\n{_CONTENT_WITH_PARAM_LIKE_LINES}\n" + "\n" + "\n" +) + + +def test_nonstreaming_content_with_param_like_lines(qwen3_tokenizer): + """Non-streaming: file content containing and + on their own lines must not be truncated at the first or + create spurious extra parameters. Requires schema-based filtering so that + "new_string" (not a real parameter of write_file) is ignored. + """ + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) + result = parser.extract_tool_calls(_WRITE_FILE_PARAM_LIKE_LINES_OUTPUT, request=request) + + assert result.tools_called + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].function.name == "write_file" + args = json.loads(result.tool_calls[0].function.arguments) + assert list(args.keys()) == ["path", "content"], ( + f"Spurious parameters created: {list(args.keys())}" + ) + assert args["path"] == "test_template.py" + assert args["content"] == _CONTENT_WITH_PARAM_LIKE_LINES.rstrip("\n"), ( + f"content truncated or wrong: {args.get('content')!r}" + ) + + +def test_streaming_content_with_param_like_lines(qwen3_tokenizer): + """Streaming: file content containing and on + their own lines must not emit spurious extra tool calls or parameters. + """ + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) + + reconstructor = run_tool_extraction_streaming( + parser, + _WRITE_FILE_PARAM_LIKE_LINES_OUTPUT, + request, + assert_one_tool_per_delta=False, + ) + + assert len(reconstructor.tool_calls) == 1, ( + f"Expected 1 tool call, got {len(reconstructor.tool_calls)}: " + f"{[tc.function.name for tc in reconstructor.tool_calls]}" + ) + assert reconstructor.tool_calls[0].function.name == "write_file" + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert list(args.keys()) == ["path", "content"], ( + f"Spurious parameters created: {list(args.keys())}" + ) + assert args["path"] == "test_template.py" + assert args["content"] == _CONTENT_WITH_PARAM_LIKE_LINES.rstrip("\n"), ( + f"content truncated or wrong: {args.get('content')!r}" + ) + + +def test_nonstreaming_double_encoded_object_param(qwen3_tokenizer): + """Non-streaming: a model trained with a buggy template (json.dumps(str(dict))) + outputs object args as a JSON-encoded Python repr string like \"{'k': 'v'}\". + The parser must double-decode it back to a real dict. + """ + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_OBJECT_PARAM_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_OBJECT_PARAM_TOOLS + ) + result = parser.extract_tool_calls(_DOUBLE_ENCODED_OBJECT_OUTPUT, request=request) + + assert result.tools_called + assert len(result.tool_calls) == 1 + args = json.loads(result.tool_calls[0].function.arguments) + assert args["name"] == "hello" + assert isinstance(args["data"], dict), ( + f"Expected dict, got {type(args['data'])}: {args['data']!r}" + ) + assert args["data"] == {"key": "value", "n": 1} + + +def test_streaming_double_encoded_object_param(qwen3_tokenizer): + """Streaming: same double-encoded object parameter scenario.""" + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_OBJECT_PARAM_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_OBJECT_PARAM_TOOLS + ) + reconstructor = run_tool_extraction_streaming( + parser, + _DOUBLE_ENCODED_OBJECT_OUTPUT, + request, + assert_one_tool_per_delta=False, + ) + assert len(reconstructor.tool_calls) == 1 + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert args["name"] == "hello" + assert isinstance(args["data"], dict), ( + f"Expected dict, got {type(args['data'])}: {args['data']!r}" + ) + assert args["data"] == {"key": "value", "n": 1} diff --git a/tests/tool_parsers/test_qwen3xml_tool_parser.py b/tests/tool_parsers/test_qwen3xml_tool_parser.py index 4ae1961bda5d..03b2d879c16f 100644 --- a/tests/tool_parsers/test_qwen3xml_tool_parser.py +++ b/tests/tool_parsers/test_qwen3xml_tool_parser.py @@ -357,3 +357,537 @@ def test_xml_streaming_parallel_tool_calls_preformed_chunks(qwen3_tokenizer): assert reconstructor.tool_calls[1].function.name == "get_weather" assert args0 == {"city": "Paris"}, f"First call args wrong: {args0!r}" assert args1 == {"city": "London"}, f"Second call args wrong: {args1!r}" + + +# --------------------------------------------------------------------------- +# Bug-confirmation tests (regressions to FIX) +# --------------------------------------------------------------------------- + + +def test_xml_string_null_value_not_emptied(qwen3_tokenizer): + """ + Bug A: _convert_param_value intercepts "null" before the type check. + For a STRING parameter with value "null", the parser should output + the JSON string "null", not an empty string "". + + Root cause: `if param_value.lower() == "null": return None` runs first, + then _convert_for_json_streaming(None, "string") returns "", so the + closing-quote _end_element emits "" instead of "null". + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "search", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string"}, + }, + }, + }, + ) + ] + + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) + model_output = ( + "\n" + "\n" + "null\n" + "\n" + "" + ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + result = parser.extract_tool_calls(model_output, request=request) + + assert result.tools_called + assert len(result.tool_calls) == 1 + args = json.loads(result.tool_calls[0].function.arguments) + + assert "query" in args, f"Parameter 'query' missing from args: {args!r}" + assert args["query"] == "null", ( + f"String parameter with literal value 'null' was incorrectly converted. " + f"Got: {args['query']!r}. " + f"Expected: 'null' (the string). " + f"_convert_param_value returns None before checking type, " + f"then _convert_for_json_streaming(None, 'string') returns ''." + ) + + +def test_xml_streaming_boolean_true_not_false(qwen3_tokenizer): + """ + Bug B: In streaming mode, a boolean parameter with value "true" is + streamed as "false". + + Root cause: When "true" arrives character by character: + - 't' → _convert_param_value("t", "boolean") = False → emits "false" + - 'r','u','e' → no new delta (output_data[len("false"):] = "") + Final accumulated arguments contain "false" instead of "true". + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "set_flag", + "parameters": { + "type": "object", + "properties": { + "enabled": {"type": "boolean"}, + }, + }, + }, + ) + ] + + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + + # Feed character-by-character to trigger the streaming accumulation bug. + # Each chunk simulates a single-character token arriving in streaming. + char_deltas = [ + "", + "\n", + "\n", + "t", # ← first char triggers False → emits "false" + "r", + "u", + "e", # ← full "true" but delta = "true"[5:] = "" + "", + "\n", + "\n", + ] + + reconstructor = run_tool_extraction_streaming( + parser, + char_deltas, + request, + assert_one_tool_per_delta=False, + ) + + assert len(reconstructor.tool_calls) == 1 + args = json.loads(reconstructor.tool_calls[0].function.arguments) + + assert args["enabled"] is True, ( + f"Boolean streaming bug: expected True, got {args['enabled']!r}. " + f"First char 't' emits 'false'; subsequent chars emit nothing; " + f"final value is 'false' even though the model said 'true'." + ) + + +def test_xml_streaming_string_null_last_char_not_dropped(qwen3_tokenizer): + """ + Bug A (streaming variant): String parameter with value "null" loses + the last character 'l' when tokens arrive one by one. + + Root cause: Accumulating 'n','u','l' emits correctly, but on the + fourth char 'l' the full value is "null" → + _convert_param_value("null", "string") → None → + _convert_for_json_streaming(None, "string") → "" → delta = ""[3:] = "". + The closing quote is then emitted, yielding "nul" not "null". + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "search", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string"}, + }, + }, + }, + ) + ] + + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + + char_deltas = [ + "", + "\n", + "\n", + "n", + "u", + "l", + "l", # ← triggers _convert_param_value("null",…) = None → nothing emitted + "", + "\n", + "\n", + ] + + reconstructor = run_tool_extraction_streaming( + parser, + char_deltas, + request, + assert_one_tool_per_delta=False, + ) + + assert len(reconstructor.tool_calls) == 1 + args = json.loads(reconstructor.tool_calls[0].function.arguments) + + assert "query" in args + assert args["query"] == "null", ( + f"String 'null' streaming bug: last 'l' was dropped. " + f"Got: {args['query']!r}. " + f"When full value reaches 'null', _convert_param_value returns None " + f"and _convert_for_json_streaming(None, 'string') returns '', " + f"so the final delta is empty and the 'l' is never emitted." + ) + + +def test_xml_anyof_integer_null_type_detected(qwen3_tokenizer): + """ + Bug C: _get_param_type only returns non-string for anyOf schemas that + contain "object" or "array". For anyOf: [{type: "integer"}, {type: "null"}] + it falls through and returns "string", so integer parameters with + nullable schemas are incorrectly quoted and not converted. + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "set_count", + "parameters": { + "type": "object", + "properties": { + "count": { + "anyOf": [{"type": "integer"}, {"type": "null"}], + }, + }, + }, + }, + ) + ] + + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) + model_output = ( + "\n" + "\n" + "42\n" + "\n" + "" + ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + result = parser.extract_tool_calls(model_output, request=request) + + assert result.tools_called + assert len(result.tool_calls) == 1 + args = json.loads(result.tool_calls[0].function.arguments) + + assert args["count"] == 42, ( + f"anyOf integer+null: expected int 42, got {args['count']!r}. " + f"_get_param_type only checks for object/array in anyOf schemas, " + f"so integer anyOf schemas fall back to 'string', causing '42' " + f"to be returned as the JSON string '\"42\"' instead of the number 42." + ) + + +# --------------------------------------------------------------------------- +# Regression: XML structural tags as literal text inside string parameters +# --------------------------------------------------------------------------- + +_WRITE_FILE_TOOLS_XML = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "write_file", + "parameters": { + "type": "object", + "properties": { + "file_path": {"type": "string"}, + "content": {"type": "string"}, + }, + }, + }, + ) +] + +# Python content that contains all four XML structural tags as literal strings. +# When qwen3xml encounters "" inside the content value it +# currently treats it as the structural end of the element, +# truncating the value and creating a spurious "query" parameter from the text +# that follows the fake . +_XML_TAGS_IN_CONTENT_XML = ( + 'char_deltas = [\n' + ' "\\n",\n' + ' "\\n",\n' + ' "\\n\\n",\n' + ' "\\n",\n' + ']\n' +) + +_WRITE_FILE_XML_TAGS_OUTPUT_XML = ( + "\n" + "\n" + "\ntest.py\n\n" + f"\n{_XML_TAGS_IN_CONTENT_XML}\n" + "\n" + "\n" +) + + +def test_xml_streaming_content_with_structural_xml_tags(qwen3_tokenizer): + """Streaming variant: pre-formed chunks, full content in one delta.""" + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS_XML) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS_XML + ) + + char_deltas = [ + "\n", + "\n", + "\ntest.py\n\n", + f"\n{_XML_TAGS_IN_CONTENT_XML}\n", + "\n", + "\n", + ] + + reconstructor = run_tool_extraction_streaming( + parser, + char_deltas, + request, + assert_one_tool_per_delta=False, + ) + + assert len(reconstructor.tool_calls) == 1 + assert reconstructor.tool_calls[0].function.name == "write_file" + + args = json.loads(reconstructor.tool_calls[0].function.arguments) + + assert list(args.keys()) == ["file_path", "content"], ( + f"Unexpected parameter keys (spurious params from embedded tags?): " + f"{list(args.keys())}" + ) + assert args["file_path"] == "test.py" + expected_content = _XML_TAGS_IN_CONTENT_XML.rstrip("\n") + assert args["content"] == expected_content, ( + f"content was truncated or corrupted by embedded XML tags.\n" + f"Got: {args.get('content')!r}\n" + f"Expected: {expected_content!r}" + ) + + +def test_xml_nonstreaming_content_with_structural_xml_tags(qwen3_tokenizer): + """Regression: string parameter containing , , + as literal text must be extracted intact. + + Bug: the SAX pre-processor (_preprocess_xml_chunk) returns + ``safe_text + ""`` when it sees ```` inside the + accumulated parameter buffer, terminating the current parameter too early. + The text that follows the spurious closing tag is then misinterpreted as a + new parameter named "query", creating a ghost parameter and truncating + the real "content" value. + + Expected: exactly two parameters -- file_path and content -- with content + equal to the full Python snippet including the embedded XML tags. + """ + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS_XML) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS_XML + ) + result = parser.extract_tool_calls(_WRITE_FILE_XML_TAGS_OUTPUT_XML, request=request) + + assert result.tools_called + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].function.name == "write_file" + + args = json.loads(result.tool_calls[0].function.arguments) + + assert list(args.keys()) == ["file_path", "content"], ( + f"Unexpected parameter keys (spurious params created from embedded tags?): " + f"{list(args.keys())}. " + f"_preprocess_xml_chunk sees '' inside the accumulated " + f"_pre_param_buffer and terminates the parameter early; the text after " + f"'' becomes a ghost 'query' parameter." + ) + assert args["file_path"] == "test.py" + expected_content = _XML_TAGS_IN_CONTENT_XML.rstrip("\n") + assert args["content"] == expected_content, ( + f"content was truncated or corrupted by embedded XML tags. " + f"Got: {args.get('content')!r}\n" + f"Expected: {expected_content!r}" + ) + + +# File content whose lines ARE standalone and +# tokens (preceded by \n). This simulates writing a Jinja2 template, a test +# fixture for the parser, or any file that references the tool-call format. +# "new_string" is intentionally NOT a parameter of write_file (schema has +# "file_path" and "content"), so the schema filter must prevent it from being +# treated as a structural boundary. +_CONTENT_WITH_PARAM_LIKE_LINES_XML = ( + 'TOOL_CALL_TEMPLATE = """\n' + "\n" + "\n" + "#!/usr/bin/env python3\n" + "\n" + '"""\n' +) + +_WRITE_FILE_PARAM_LIKE_LINES_OUTPUT_XML = ( + "\n" + "\n" + "\ntest_template.py\n\n" + f"\n{_CONTENT_WITH_PARAM_LIKE_LINES_XML}\n" + "\n" + "\n" +) + + +def test_xml_nonstreaming_content_with_param_like_lines(qwen3_tokenizer): + """Non-streaming: file content containing and + on their own lines must not be truncated at the first or + create spurious extra parameters. Requires schema-based filtering so that + "new_string" (not a real parameter of write_file) is ignored. + """ + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS_XML) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS_XML + ) + result = parser.extract_tool_calls(_WRITE_FILE_PARAM_LIKE_LINES_OUTPUT_XML, request=request) + + assert result.tools_called + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].function.name == "write_file" + + args = json.loads(result.tool_calls[0].function.arguments) + assert list(args.keys()) == ["file_path", "content"], ( + f"Spurious parameters created: {list(args.keys())}" + ) + assert args["file_path"] == "test_template.py" + expected = _CONTENT_WITH_PARAM_LIKE_LINES_XML.rstrip("\n") + assert args["content"] == expected, ( + f"content truncated or wrong: {args.get('content')!r}" + ) + + +def test_xml_streaming_content_with_param_like_lines(qwen3_tokenizer): + """Streaming: file content containing and on + their own lines — split into one chunk per structural token — must not + cause spurious extra parameters. + + The critical scenario: chunk 5 is '\\n' arriving ALONE so + the streaming buffer has nothing after it (rest='') which previously + triggered the 'not rest → structural' fallback, ending the 'content' + parameter prematurely. After the schema fix, the subsequent + '' is recognised as non-structural and the full + content is preserved. + """ + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS_XML) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS_XML + ) + + char_deltas = [ + "\n", + "\n", + "\ntest_template.py\n\n", + '\nTOOL_CALL_TEMPLATE = """\n', + "\n", # first literal close — alone in its delta + "\n", # literal new-param line + "#!/usr/bin/env python3\n", + "\n", # second literal close + '"""\n', + "\n", # REAL close of content + "\n", + "\n", + ] + + reconstructor = run_tool_extraction_streaming( + parser, + char_deltas, + request, + assert_one_tool_per_delta=False, + ) + + assert len(reconstructor.tool_calls) == 1, ( + f"Expected 1 tool call, got {len(reconstructor.tool_calls)}: " + f"{[tc.function.name for tc in reconstructor.tool_calls]}" + ) + assert reconstructor.tool_calls[0].function.name == "write_file" + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert list(args.keys()) == ["file_path", "content"], ( + f"Spurious parameters created: {list(args.keys())}" + ) + assert args["file_path"] == "test_template.py" + expected = _CONTENT_WITH_PARAM_LIKE_LINES_XML.rstrip("\n") + assert args["content"] == expected, ( + f"content truncated or wrong: {args.get('content')!r}" + ) + + +_OBJECT_PARAM_TOOLS_XML = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "process", + "parameters": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "data": {"type": "object"}, + }, + }, + }, + ) +] + +_DOUBLE_ENCODED_OBJECT_OUTPUT_XML = ( + "\n" + "\n" + "\nhello\n\n" + "\n\"{'key': 'value', 'n': 1}\"\n\n" + "\n" + "\n" +) + + +def test_xml_nonstreaming_double_encoded_object_param(qwen3_tokenizer): + """Non-streaming: model trained with buggy template (json.dumps(str(dict))) + outputs object args as a JSON-encoded Python repr. Parser must recover + the real dict via double-decode. + """ + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_OBJECT_PARAM_TOOLS_XML) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_OBJECT_PARAM_TOOLS_XML + ) + result = parser.extract_tool_calls( + _DOUBLE_ENCODED_OBJECT_OUTPUT_XML, request=request + ) + + assert result.tools_called + assert len(result.tool_calls) == 1 + args = json.loads(result.tool_calls[0].function.arguments) + assert args["name"] == "hello" + assert isinstance(args["data"], dict), ( + f"Expected dict, got {type(args['data'])}: {args['data']!r}" + ) + assert args["data"] == {"key": "value", "n": 1} + + +def test_xml_streaming_double_encoded_object_param(qwen3_tokenizer): + """Streaming: same double-encoded object parameter scenario.""" + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_OBJECT_PARAM_TOOLS_XML) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_OBJECT_PARAM_TOOLS_XML + ) + reconstructor = run_tool_extraction_streaming( + parser, + _DOUBLE_ENCODED_OBJECT_OUTPUT_XML, + request, + assert_one_tool_per_delta=False, + ) + assert len(reconstructor.tool_calls) == 1 + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert args["name"] == "hello" + assert isinstance(args["data"], dict), ( + f"Expected dict, got {type(args['data'])}: {args['data']!r}" + ) + assert args["data"] == {"key": "value", "n": 1} + + +# ============================================================================ +# Qwen 3.6 Bug Confirmations (placeholder, truncated test removed) +# ============================================================================ + diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 7f75f2451554..aaa0d63c1f53 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -116,10 +116,6 @@ def _convert_param_value( self, param_value: str, param_name: str, param_config: dict, func_name: str ) -> Any: """Convert parameter value based on its type in the schema.""" - # Handle null value for any type - if param_value.lower() == "null": - return None - if param_name not in param_config: if param_config != {}: logger.debug( @@ -140,13 +136,26 @@ def _convert_param_value( isinstance(param_config[param_name], dict) and "anyOf" in param_config[param_name] ): - # anyOf has no top-level "type"; treat as object to trigger json.loads. - param_type = "object" + # Extract the first non-null type from the anyOf list so that + # nullable schemas like {"anyOf": [{"type": "string"}, + # {"type": "null"}]} behave as "string", not "object". + param_type = "string" + for option in param_config[param_name]["anyOf"]: + if isinstance(option, dict) and "type" in option: + opt_type = str(option["type"]).strip().lower() + if opt_type != "null": + param_type = opt_type + break else: param_type = "string" + # String type takes precedence: preserve the raw value (including + # the literal "null") rather than converting it to Python None. if param_type in ["string", "str", "text", "varchar", "char", "enum"]: return param_value - elif ( + # For non-string types, "null" maps to JSON null. + if param_value.lower() == "null": + return None + if ( param_type.startswith("int") or param_type.startswith("uint") or param_type.startswith("long") @@ -194,14 +203,24 @@ def _convert_param_value( ) return param_value == "true" else: - if ( + is_container_type = ( param_type in ["object", "array", "arr"] or param_type.startswith("dict") or param_type.startswith("list") - ): + ) + if is_container_type: try: - param_value = json.loads(param_value) - return param_value + parsed = json.loads(param_value) + # A model trained with a buggy template + # (json.dumps(str(dict))) may output a JSON-encoded + # Python repr like "{'k': 'v'}". json.loads returns a + # string in that case — try one more parse. + if isinstance(parsed, str): + try: + parsed = ast.literal_eval(parsed) + except (ValueError, SyntaxError, TypeError): + pass + return parsed except (json.JSONDecodeError, TypeError, ValueError): logger.debug( "Parsed value '%s' of parameter '%s' cannot be " @@ -213,6 +232,14 @@ def _convert_param_value( ) try: param_value = ast.literal_eval(param_value) # safer + # Same double-decode for container types whose raw text + # had no JSON outer layer (e.g. bare Python repr + # "{'k': 'v'}"). + if is_container_type and isinstance(param_value, str): + try: + param_value = ast.literal_eval(param_value) + except (ValueError, SyntaxError, TypeError): + pass except (ValueError, SyntaxError, TypeError): logger.debug( "Parsed value '%s' of parameter '%s' cannot be " @@ -224,6 +251,131 @@ def _convert_param_value( ) return param_value + def _next_structural_param_start( + self, + text: str, + start_pos: int = 0, + valid_param_names: set[str] | None = None, + ) -> int: + """Return index of next structural ```` from + start_pos. Structural means preceded by ``\\n`` or at position 0. + If valid_param_names is given, NAME must also be in that set. + Returns -1 if none found. + """ + ni = start_pos + prefix_len = len(self.parameter_prefix) + while True: + ni = text.find(self.parameter_prefix, ni) + if ni == -1: + return -1 + if ni == 0 or text[ni - 1] == "\n": + if valid_param_names is not None: + name_end = text.find(">", ni + prefix_len) + if ( + name_end != -1 + and text[ni + prefix_len : name_end] in valid_param_names + ): + return ni + ni += 1 + continue + return ni + ni += 1 + + def _find_true_function_end(self, text: str) -> int: + """Return the index of the real structural ```` in text + (followed with optional whitespace by ```` or end of + string), or -1 if none found. Skips ```` that appears + as literal text inside a parameter value. + """ + search_pos = 0 + while True: + idx = text.find(self.function_end_token, search_pos) + if idx == -1: + return -1 + after = text[idx + len(self.function_end_token):] + stripped = after.lstrip() + if stripped == "" or stripped.startswith(self.tool_call_end_token): + return idx + search_pos = idx + len(self.function_end_token) + + def _find_true_tool_call_end(self, text: str) -> int: + """Return the index of the real structural ```` in + text (followed with optional whitespace by another ```` + or end of string), or -1 if none found. + """ + search_pos = 0 + while True: + idx = text.find(self.tool_call_end_token, search_pos) + if idx == -1: + return -1 + after = text[idx + len(self.tool_call_end_token):] + stripped = after.lstrip() + if stripped == "" or stripped.startswith(self.tool_call_start_token): + return idx + search_pos = idx + len(self.tool_call_end_token) + + def _find_true_param_end( + self, + value_text: str, + valid_param_names: set[str] | None = None, + require_lookahead: bool = False, + ) -> int: + """Find the true end of a parameter value in value_text. + + A ```` is structural only when it is followed by + another structural delimiter (schema-known ````, + ````, ````) or — in non-streaming mode — + end-of-string. Nested structural ```` tags + decrement depth like matched openings. + + Returns the index of the true ```` in value_text, or + -1 if incomplete. + """ + depth = 0 + pos = 0 + param_prefix_len = len(self.parameter_prefix) + param_end_len = len(self.parameter_end_token) + + while pos < len(value_text): + next_open = self._next_structural_param_start( + value_text, pos, valid_param_names + ) + next_close = value_text.find(self.parameter_end_token, pos) + if next_close == -1: + return -1 + + if next_open != -1 and next_open < next_close: + depth += 1 + pos = next_open + param_prefix_len + elif depth == 0: + after = value_text[next_close + param_end_len:] + stripped = after.lstrip() + structural_next_param = False + if stripped.startswith(self.parameter_prefix): + if valid_param_names is not None: + name_start = len(self.parameter_prefix) + name_end = stripped.find(">", name_start) + if name_end != -1: + structural_next_param = ( + stripped[name_start:name_end] + in valid_param_names + ) + else: + structural_next_param = True + if ( + (stripped == "" and not require_lookahead) + or structural_next_param + or stripped.startswith(self.function_end_token) + or stripped.startswith(self.tool_call_end_token) + ): + return next_close + pos = next_close + param_end_len + else: + depth -= 1 + pos = next_close + param_end_len + + return -1 + def _parse_xml_function_call(self, function_call_str: str) -> ToolCall | None: # Extract function name end_index = function_call_str.find(">") @@ -232,12 +384,53 @@ def _parse_xml_function_call(self, function_call_str: str) -> ToolCall | None: return None function_name = function_call_str[:end_index] param_config = find_tool_properties(self.tools, function_name) + valid_param_names: set[str] | None = ( + set(param_config.keys()) if param_config else None + ) parameters = function_call_str[end_index + 1 :] - param_dict = {} - for match_text in self.tool_call_parameter_regex.findall(parameters): - idx = match_text.index(">") - param_name = match_text[:idx] - param_value = str(match_text[idx + 1 :]) + param_dict: dict = {} + pos = 0 + while True: + # Find next structural at the top level. We + # do NOT filter the outer search by schema: callers may + # legitimately send a parameter whose name is not declared + # in the schema (e.g. renamed fields). Schema filtering is + # applied only when scanning INSIDE a parameter value, to + # disambiguate real nested delimiters from literal text. + param_start = self._next_structural_param_start( + parameters, pos, None + ) + if param_start == -1: + break + name_start = param_start + len(self.parameter_prefix) + name_end = parameters.find(">", name_start) + if name_end == -1: + break + param_name = parameters[name_start:name_end] + value_text = parameters[name_end + 1 :] + + param_end = self._find_true_param_end(value_text, valid_param_names) + if param_end == -1: + # No true found (malformed XML or incomplete). + # Fallback 1: next structural boundary or end + func_end = self._find_true_function_end(value_text) + if func_end != -1: + param_value = value_text[:func_end] + else: + param_value = value_text + pos = len(parameters) + else: + param_value = value_text[:param_end] + pos = (name_end + 1) + param_end + len(self.parameter_end_token) + # Remove prefix and trailing \n if param_value.startswith("\n"): param_value = param_value[1:] @@ -255,23 +448,57 @@ def _parse_xml_function_call(self, function_call_str: str) -> ToolCall | None: ) def _get_function_calls(self, model_output: str) -> list[str]: - # Find all tool calls - matched_ranges = self.tool_call_regex.findall(model_output) - raw_tool_calls = [ - match[0] if match[0] else match[1] for match in matched_ranges - ] + # Find tool_calls using a structural delimiter approach: + # a real is followed by another or + # end-of-text. This skips that appears as literal + # text inside a parameter value. + raw_tool_calls: list[str] = [] + search_pos = 0 + while True: + tc_start = model_output.find(self.tool_call_start_token, search_pos) + if tc_start == -1: + break + after_open = model_output[tc_start + len(self.tool_call_start_token):] + tc_end = -1 + inner_search = 0 + while True: + idx = after_open.find(self.tool_call_end_token, inner_search) + if idx == -1: + tc_end = -1 + break + after_close = after_open[idx + len(self.tool_call_end_token):] + stripped = after_close.lstrip() + if stripped == "" or stripped.startswith(self.tool_call_start_token): + tc_end = idx + break + inner_search = idx + len(self.tool_call_end_token) + if tc_end == -1: + raw_tool_calls.append(after_open) + break + raw_tool_calls.append(after_open[:tc_end]) + search_pos = ( + tc_start + len(self.tool_call_start_token) + + tc_end + len(self.tool_call_end_token) + ) # Back-off strategy if no tool_call tags found if len(raw_tool_calls) == 0: raw_tool_calls = [model_output] - raw_function_calls = [] + # Use structural boundary instead of a greedy regex so + # that '' appearing as literal text inside a parameter + # value does not truncate the function body. + function_calls: list[str] = [] for tool_call in raw_tool_calls: - raw_function_calls.extend(self.tool_call_function_regex.findall(tool_call)) - - function_calls = [ - match[0] if match[0] else match[1] for match in raw_function_calls - ] + func_start = tool_call.find(self.tool_call_prefix) + if func_start == -1: + continue + after_func_open = tool_call[func_start + len(self.tool_call_prefix):] + func_end = self._find_true_function_end(after_func_open) + if func_end == -1: + function_calls.append(after_func_open) + else: + function_calls.append(after_func_open[:func_end]) return function_calls def extract_tool_calls( @@ -442,22 +669,34 @@ def extract_tool_calls_streaming( return DeltaMessage(content=content) return None - # Check if we're between tool calls (waiting for next one) - # Count tool calls we've seen vs processed - tool_starts_count = current_text.count(self.tool_call_start_token) - if self.current_tool_index >= tool_starts_count: + # Check if we're between tool calls (waiting for next one). + # Only count structural starts (skip past each + # of completed calls) so that tokens + # embedded in a parameter value of a completed call are not + # counted as spurious new tool calls. + if self.tool_call_start_token not in current_text[self._sent_content_idx:]: return content_message - # We're in a tool call, find the current tool call portion - # Need to find the correct tool call based on current_tool_index + # We're in a tool call, find the current tool call portion. + # Build tool_start_positions by jumping OVER completed tool + # calls (past each ), so that tokens + # embedded in parameter values of completed calls are never + # included. tool_start_positions: list[int] = [] - idx = 0 - while True: - idx = current_text.find(self.tool_call_start_token, idx) + search_pos = 0 + for i in range(self.current_tool_index + 1): + idx = current_text.find(self.tool_call_start_token, search_pos) if idx == -1: break tool_start_positions.append(idx) - idx += len(self.tool_call_start_token) + if i < self.current_tool_index: + # Completed tool call: jump past its so the + # next search starts after it, skipping any content + # inside (including literal ). + end_idx = current_text.find(self.tool_call_end_token, idx) + if end_idx == -1: + break + search_pos = end_idx + len(self.tool_call_end_token) if self.current_tool_index >= len(tool_start_positions): return content_message @@ -527,14 +766,68 @@ def extract_tool_calls_streaming( self.streamed_args_for_tool[self.current_tool_index] += "{" arguments_to_emit += "{" - param_starts = [] + # Build param_starts using structural-aware lookup. Plain + # tool_text.find(parameter_prefix) would return positions + # inside parameter VALUES (e.g. Python code that embeds the + # XML format), creating spurious extra params. Use the + # schema to filter nested and advance + # sequentially past each complete parameter's value. + streaming_param_config = find_tool_properties( + self.tools, self.current_function_name or "" + ) + valid_param_names: set[str] | None = ( + set(streaming_param_config.keys()) + if streaming_param_config + else None + ) + param_starts: list[int] = [] search_idx = 0 while True: - search_idx = tool_text.find(self.parameter_prefix, search_idx) - if search_idx == -1: + # Don't filter top-level by schema: + # callers may send params whose names aren't declared + # (e.g. renamed fields). Schema filtering is applied + # below when walking INSIDE a parameter value to + # disambiguate nested literal XML. + param_start_pos = self._next_structural_param_start( + tool_text, search_idx, None + ) + if param_start_pos == -1: break - param_starts.append(search_idx) - search_idx += len(self.parameter_prefix) + param_starts.append(param_start_pos) + # Advance past this parameter's content. + name_end_pos = tool_text.find( + ">", param_start_pos + len(self.parameter_prefix) + ) + if name_end_pos == -1: + break + after_name = tool_text[name_end_pos + 1:] + after_name_stripped = ( + after_name[1:] if after_name.startswith("\n") else after_name + ) + end_in_after = self._find_true_param_end( + after_name_stripped, + valid_param_names, + require_lookahead=True, + ) + if end_in_after == -1: + implicit_end = self._next_structural_param_start( + after_name_stripped, 0, valid_param_names + ) + if implicit_end != -1: + search_idx = ( + (name_end_pos + 1) + + (1 if after_name.startswith("\n") else 0) + + implicit_end + ) + else: + break + else: + search_idx = ( + (name_end_pos + 1) + + (1 if after_name.startswith("\n") else 0) + + end_in_after + + len(self.parameter_end_token) + ) # Process ALL complete params in a loop (spec decode fix). # With speculative decoding a single delta can deliver @@ -560,10 +853,21 @@ def extract_tool_calls_streaming( if value_text.startswith("\n"): value_text = value_text[1:] - param_end_idx = value_text.find(self.parameter_end_token) + param_end_idx = self._find_true_param_end( + value_text, valid_param_names, require_lookahead=True + ) if param_end_idx == -1: - next_param_idx = value_text.find(self.parameter_prefix) - func_end_idx = value_text.find(self.function_end_token) + # Fallback for malformed/incomplete XML: a structural + # and + # to avoid cutting the parameter at XML + # tags that appear as literal text inside the + # parameter value. + func_end_idx = self._find_true_function_end(value_text) + tool_end_in_value = self._find_true_tool_call_end(value_text) if next_param_idx != -1 and ( func_end_idx == -1 or next_param_idx < func_end_idx @@ -571,19 +875,10 @@ def extract_tool_calls_streaming( param_end_idx = next_param_idx elif func_end_idx != -1: param_end_idx = func_end_idx + elif tool_end_in_value != -1: + param_end_idx = tool_end_in_value else: - # Fallback for malformed XML where - # is missing. Use as a delimiter - # if present in the value so we don't include - # the closing tag as part of the param value. - tool_end_in_value = value_text.find(self.tool_call_end_token) - if tool_end_in_value != -1: - param_end_idx = tool_end_in_value - else: - # Parameter incomplete — break so we still - # emit any fragments accumulated by earlier - # loop iterations. - break + break if param_end_idx == -1: break @@ -635,13 +930,17 @@ def extract_tool_calls_streaming( # . If the close check ran first it would emit # "}" and set in_function=False before the parameter loop # ever ran, causing the parameter to be silently dropped. - if not self.json_closed and self.function_end_token in tool_text: + # Use structural-aware search so a literal '' + # inside a parameter value does not trigger a premature + # close. + true_func_end = self._find_true_function_end(tool_text) + if not self.json_closed and true_func_end != -1: self.json_closed = True func_start = tool_text.find(self.tool_call_prefix) + len( self.tool_call_prefix ) - func_content_end = tool_text.find(self.function_end_token, func_start) + func_content_end = true_func_end if func_content_end != -1: func_content = tool_text[func_start:func_content_end] try: @@ -685,8 +984,67 @@ def extract_tool_calls_streaming( if content_message: content_message.tool_calls = [tool_call_fragments] - return content_message + result = content_message else: - return DeltaMessage(tool_calls=[tool_call_fragments]) + result = DeltaMessage(tool_calls=[tool_call_fragments]) + + # Speculative decoding can deliver multiple complete tool + # calls in a single delta. If we just finished one and + # another complete ... remains in + # current_text, advance and re-enter to emit it. We pass a + # non-empty `previous_text` sentinel so reset_streaming_state + # is NOT triggered inside the recursion (which would clear + # current_tool_index back to 0 and loop forever). + if ( + self.json_closed + and not self.in_function + and current_text.count(self.tool_call_end_token) + > self.current_tool_index + 1 + ): + # Manually advance to the next tool: this mirrors the + # "advance to next tool" block executed at the top of + # this method on the next delta arrival. + search_idx = 0 + for _ in range(self.current_tool_index + 1): + search_idx = current_text.find( + self.tool_call_start_token, search_idx + ) + if search_idx == -1: + break + end_idx = current_text.find( + self.tool_call_end_token, search_idx + ) + if end_idx != -1: + self._sent_content_idx = max( + self._sent_content_idx, + end_idx + len(self.tool_call_end_token), + ) + search_idx += len(self.tool_call_start_token) + self.current_tool_index += 1 + self.header_sent = False + self.param_count = 0 + self.json_started = False + self.json_closed = False + self.accumulated_params = {} + self.is_tool_call_started = False + + # Recurse with a sentinel previous_text so the entry + # check `if not previous_text` does NOT reset the state. + next_delta = self.extract_tool_calls_streaming( + previous_text or " ", + current_text, + delta_text, + previous_token_ids, + current_token_ids, + delta_token_ids, + request, + ) + if next_delta is not None and next_delta.tool_calls: + if result.tool_calls is None: + result.tool_calls = [] + result.tool_calls.extend(next_delta.tool_calls) + if next_delta.content and not result.content: + result.content = next_delta.content + return result return content_message diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py index f22f30288f98..20b361320375 100644 --- a/vllm/tool_parsers/qwen3xml_tool_parser.py +++ b/vllm/tool_parsers/qwen3xml_tool_parser.py @@ -53,6 +53,12 @@ def reset_streaming_state(self): """Reset streaming parsing state""" self.deltas = [] + # When True (delta-by-delta streaming), _process_complete_xml_elements + # holds off on when nothing follows in the buffer yet — + # that would be ambiguous since more tokens may still arrive. When + # False (full output passed at once), an empty lookahead is a + # genuine end. + self._streaming_mode: bool = False # state for streaming self.tool_call_index = 0 self.current_call_id = None @@ -105,28 +111,24 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: if found_elements: # If complete elements found, check if end events were missed - # some tags may not have been triggered + # some tags may not have been triggered. Use structural-aware + # checks so that / appearing as literal + # text inside a parameter value (e.g. file content) does NOT + # trigger a spurious close that emits a duplicate '}' or ''. try: - new_deltas = self.deltas[initial_delta_count:] - # If this chunk contains - # but didn't generate '}', then complete it if ( self.current_call_id is not None - and self.function_end_token in xml_chunk + and self._chunk_has_structural_function_end(xml_chunk) and self.current_function_open ): - # Close potentially unclosed element if self.current_param_name: self._end_element("parameter") if self.current_function_name: self._end_element("function") - # If this chunk contains - # but didn't generate final empty delta, then complete it if ( self.current_call_id is not None - and self.tool_call_end_token in xml_chunk + and self._chunk_has_structural_tool_call_end(xml_chunk) ): - # Close potentially unclosed elements if self.current_param_name: self._end_element("parameter") if self.current_function_open: @@ -149,21 +151,23 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: self.text_content_buffer = "" return text_delta - # If this chunk contains end tags but wasn't triggered by parser, - # manually complete end events - # Only execute when still on the same call as when entered, - # to prevent accidentally closing new calls - # in multi scenarios + # If this chunk contains structural end tags but wasn't + # triggered by parser, manually complete end events. Only + # execute when still on the same call as when entered, to + # prevent accidentally closing new calls in multi- + # scenarios. if self.current_call_id is not None and ( - self.function_end_token in xml_chunk - or self.tool_call_end_token in xml_chunk + self._chunk_has_structural_function_end(xml_chunk) + or self._chunk_has_structural_tool_call_end(xml_chunk) ): - # Close potentially unclosed element if self.current_param_name: self._end_element("parameter") - if self.function_end_token in xml_chunk and self.current_function_name: + if ( + self._chunk_has_structural_function_end(xml_chunk) + and self.current_function_name + ): self._end_element("function") - if self.tool_call_end_token in xml_chunk: + if self._chunk_has_structural_tool_call_end(xml_chunk): self._end_element("tool_call") # Return the merged delta result generated by this fallback result_delta = self._merge_new_deltas_to_single_response( @@ -195,6 +199,120 @@ def _escape_xml_special_chars(self, text: str) -> str: return text + def _is_structural_tag_position(self) -> bool: + """Return True when the current element is at a structural position. + + A structural opening tag (e.g. ) must appear at the + beginning of a line in the raw output — i.e. the character + immediately before it in the streaming buffer is a newline (or it + is at position 0). Opening tags inside parameter content (e.g. + '""') are preceded by a non-newline character + such as a quote. + """ + if self.last_processed_pos == 0: + return True + return self.streaming_buffer[self.last_processed_pos - 1] == "\n" + + def _get_valid_param_names(self) -> set[str] | None: + """Return the set of parameter names defined in the schema for the + current function, or None when the schema is not available. + + Used to filter structural-looking tokens that + appear as literal text inside a parameter value (e.g. Jinja2 + templates, test fixtures, or files that document the tool-call + format). + """ + if not self.tools or not self.current_function_name: + return None + props = find_tool_properties(self.tools, self.current_function_name) + return set(props.keys()) if props else None + + def _is_structural_closing_tag(self, chunk: str) -> bool: + """Return True when a closing tag at the current buffer position is + a real structural delimiter rather than literal text content. + + A closing tag is structural when the text that follows it in the + streaming buffer (after stripping leading whitespace) begins with + another structural token or is empty (end of buffered output). + + When the schema is available, a following is only + considered structural if NAME is a known parameter of the current + function. This prevents literal lines like ```` + in file content from being mistaken for real structural boundaries. + """ + after_pos = self.last_processed_pos + len(chunk) + rest = self.streaming_buffer[after_pos:].lstrip() + + structural_param_follows = False + if rest.startswith(self.parameter_start_token): + valid_names = self._get_valid_param_names() + if valid_names is not None: + name_start = len(self.parameter_start_token) + name_end = rest.find(">", name_start) + if name_end != -1: + structural_param_follows = ( + rest[name_start:name_end] in valid_names + ) + else: + structural_param_follows = True # fallback: trust all + + # Return True when rest is an incomplete prefix of a structural + # closing token (e.g. rest="" hasn't fully + # arrived yet). The empty-rest case is handled by the deferral in + # _process_complete_xml_elements; this guards against the + # partial-tag scenario where the deferral does not fire (rest is + # non-empty) but the token is still incomplete. + is_partial_structural_prefix = any( + tok.startswith(rest) + for tok in ( + self.parameter_end_token, + self.function_end_token, + self.tool_call_end_token, + ) + ) + + return ( + not rest + or is_partial_structural_prefix + or structural_param_follows + or rest.startswith(self.parameter_end_token) + or rest.startswith(self.function_end_token) + or rest.startswith(self.tool_call_end_token) + ) + + def _chunk_has_structural_function_end(self, chunk: str) -> bool: + """Return True if `chunk` contains a structural tag. + + A structural is followed (after optional whitespace) + by or end-of-string — not inside parameter content + such as a file whose body contains ''. + """ + search = 0 + token = self.function_end_token + end_token = self.tool_call_end_token + while True: + idx = chunk.find(token, search) + if idx == -1: + return False + rest = chunk[idx + len(token):].lstrip() + if not rest or rest.startswith(end_token): + return True + search = idx + len(token) + + def _chunk_has_structural_tool_call_end(self, chunk: str) -> bool: + """Return True if `chunk` contains a structural tag.""" + search = 0 + token = self.tool_call_end_token + start_token = self.tool_call_start_token + while True: + idx = chunk.find(token, search) + if idx == -1: + return False + rest = chunk[idx + len(token):].lstrip() + if not rest or rest.startswith(start_token): + return True + search = idx + len(token) + def _process_complete_xml_elements(self) -> bool: """ Process complete XML elements in buffer @@ -211,6 +329,23 @@ def _process_complete_xml_elements(self) -> bool: # No complete element found, wait for more data break + # In streaming mode, hold off on when nothing + # follows in the buffer yet. We need the lookahead to + # distinguish a real structural close (followed by + # or a schema-known ) from + # literal text content that happens to be ```` on + # its own line (e.g. Jinja2 template files). When not in + # _pre_inside_parameter mode the SAX-level decision is made + # here; skip for now and re-evaluate on the next delta. + if ( + self._streaming_mode + and element == self.parameter_end_token + and self.current_param_name is not None + and not self._pre_inside_parameter + and not self.streaming_buffer[end_pos:].lstrip() + ): + break + # Check if this element should be skipped if self._should_skip_element(element): self.last_processed_pos = end_pos @@ -501,37 +636,27 @@ def _preprocess_xml_chunk(self, chunk: str) -> str: if self._pre_current_param_name else "string" ) - # Only these types need deferred parsing to - # handle Python literals containing single quotes - is_object_type = param_type in ["object"] + # Container types always need deferred parsing so the + # full value is available for json.loads / + # ast.literal_eval — even when the first streaming + # token is just "\n". + is_object_type = param_type == "object" is_complex_type = ( param_type in ["array", "arr", "sequence"] or param_type.startswith("dict") or param_type.startswith("list") ) - - # Only delay when contains container symbols - # and has single quotes and is complex type - has_container_hint = ( - ("[" in original_chunk) - or ("{" in original_chunk) - or ("(" in original_chunk) + # Boolean also needs deferral: streaming "t" as the + # first char would otherwise be converted to False and + # emit "false", shadowing the real "true" that follows. + is_bool_type = param_type in ["boolean", "bool", "binary"] + + need_defer = ( + is_complex_type + or is_object_type + or is_bool_type ) - # Determine if deferred parsing is needed - need_defer = False - if is_complex_type: - # Complex type, always need deferred parsing - need_defer = True - elif ( - is_object_type - and has_container_hint - and ("'" in original_chunk) - ): - # Object type with container symbols - # and single quotes, need deferred parsing - need_defer = True - if not need_defer: # No need for deferred parsing, # exit parameter mode directly @@ -540,6 +665,44 @@ def _preprocess_xml_chunk(self, chunk: str) -> str: self._pre_param_buffer += original_chunk return "" + # When a parameter value is being streamed (SAX state says we are + # inside a ), structural-looking tokens that arrive as + # subsequent elements are literal text — e.g. a file whose content + # describes the tool-call format. Escape them unless they are + # genuine structural delimiters. + if self.current_param_name is not None: + if ( + chunk.startswith(self.tool_call_start_token) + or chunk.startswith(self.function_start_token) + ): + # Opening tool_call/function tags are always literal inside + # a parameter value. + return self._escape_xml_special_chars(chunk) + if chunk.startswith(self.parameter_start_token): + # A structural always follows a newline in + # the buffer. When a schema is available, also require + # NAME to be a known parameter of the current function so + # that literal ```` inside file + # content is treated as text. + if not self._is_structural_tag_position(): + return self._escape_xml_special_chars(chunk) + valid_names = self._get_valid_param_names() + if valid_names is not None: + name_start = len(self.parameter_start_token) + name_end = chunk.find(">", name_start) + if ( + name_end != -1 + and chunk[name_start:name_end] not in valid_names + ): + return self._escape_xml_special_chars(chunk) + if ( + chunk.startswith(self.parameter_end_token) + or chunk.startswith(self.function_end_token) + or chunk.startswith(self.tool_call_end_token) + ): + if not self._is_structural_closing_tag(chunk): + return self._escape_xml_special_chars(chunk) + # Parameter start: enable accumulation if processed.startswith("', processed) @@ -785,21 +948,81 @@ def _end_element(self, name: str): if self.deferred_param_raw_value else param_value ) - parsed_value = None - output_arguments = None - try: - # If previously delayed trailing newline, - # add it back before parsing - if self.should_emit_end_newline: - raw_for_parse = raw_text + "\n" - else: - raw_for_parse = raw_text - parsed_value = ast.literal_eval(raw_for_parse) - output_arguments = json.dumps(parsed_value, ensure_ascii=False) - except Exception: - # Fallback: output as string as-is - output_arguments = json.dumps(raw_text, ensure_ascii=False) - parsed_value = raw_text + parsed_value: Any = None + output_arguments: str | None = None + if self.should_emit_end_newline: + raw_for_parse = raw_text + "\n" + else: + raw_for_parse = raw_text + raw_lower = raw_for_parse.strip().lower() + # Handle JSON literals that ast.literal_eval cannot parse + # (true/false/null are JSON, not Python). + if raw_lower == "null": + parsed_value = None + output_arguments = "null" + elif raw_lower == "true": + parsed_value = True + output_arguments = "true" + elif raw_lower == "false": + parsed_value = False + output_arguments = "false" + else: + # Try JSON first: handles arrays/objects that use JSON + # native tokens (true, false, null) which + # ast.literal_eval cannot parse. + try: + parsed_value = json.loads(raw_for_parse) + # A model trained with a buggy template + # (json.dumps(str(dict))) may output a JSON-encoded + # Python repr like "\"{'k': 'v'}\"". json.loads + # returns a str in that case — try one more level. + if isinstance(parsed_value, str): + try: + parsed_value = ast.literal_eval(parsed_value) + except (ValueError, SyntaxError, TypeError): + try: + parsed_value = json.loads(parsed_value) + except (json.JSONDecodeError, ValueError): + pass + output_arguments = json.dumps( + parsed_value, ensure_ascii=False + ) + except (json.JSONDecodeError, ValueError): + try: + parsed_value = ast.literal_eval(raw_for_parse) + # A model trained with a buggy template + # (json.dumps(str(dict))) may output a + # JSON-encoded Python repr like "{'k': 'v'}". + # ast.literal_eval returns a str in that + # case — try one more level. + if isinstance(parsed_value, str): + try: + parsed_value = ast.literal_eval( + parsed_value + ) + except ( + ValueError, + SyntaxError, + TypeError, + ): + try: + parsed_value = json.loads( + parsed_value + ) + except ( + json.JSONDecodeError, + ValueError, + ): + pass + output_arguments = json.dumps( + parsed_value, ensure_ascii=False + ) + except (ValueError, SyntaxError, TypeError): + # Fallback: output as string as-is + output_arguments = json.dumps( + raw_text, ensure_ascii=False + ) + parsed_value = raw_text delta = DeltaMessage( tool_calls=[ @@ -979,12 +1202,14 @@ def _get_param_type(self, param_name: str) -> str: prop = properties[param_name] param_type = prop.get("type") if param_type is None and "anyOf" in prop: - # Handle anyOf schemas (common in Qwen 3.6) + # Handle anyOf schemas (e.g. nullable types like + # anyOf: [{type: "integer"}, {type: "null"}]). + # Pick the first non-null type; fall back to "string". for option in prop["anyOf"]: if isinstance(option, dict) and "type" in option: opt_type = str(option["type"]) - if opt_type in ["object", "array", "arr", "sequence"]: - return opt_type + if opt_type != "null": + return self.repair_param_type(opt_type) return "string" return self.repair_param_type(str(param_type or "string")) @@ -1027,13 +1252,15 @@ def _convert_param_value(self, param_value: str, param_type: str) -> Any: Returns: Converted value """ - if param_value.lower() == "null": - return None - param_type = param_type.strip().lower() + # String type takes precedence: the literal value "null" must remain + # the string "null" instead of being converted to Python None. if param_type in ["string", "str", "text", "varchar", "char", "enum"]: return param_value - elif ( + # Non-string: "null" → Python None → JSON null. + if param_value.lower() == "null": + return None + if ( param_type.startswith("int") or param_type.startswith("uint") or param_type.startswith("long") @@ -1044,11 +1271,11 @@ def _convert_param_value(self, param_value: str, param_type: str) -> Any: return int(param_value) except (ValueError, TypeError): logger.warning( - "Parsed value '%s' of parameter '%s' is not an integer " - "in tool '%s', degenerating to string.", + "Parsed value '%s' is not an integer, " + "degenerating to string.", param_value, ) - return param_value + return param_value elif param_type.startswith("num") or param_type.startswith("float"): try: float_param_value: float = float(param_value) @@ -1059,14 +1286,13 @@ def _convert_param_value(self, param_value: str, param_type: str) -> Any: ) except (ValueError, TypeError): logger.warning( - "Parsed value '%s' of parameter '%s' is not a float " - "in tool '%s', degenerating to string.", + "Parsed value '%s' is not a float, " + "degenerating to string.", param_value, ) - return param_value + return param_value elif param_type in ["boolean", "bool", "binary"]: - param_value = param_value.lower() - return param_value == "true" + return param_value.lower() == "true" else: return param_value @@ -1080,9 +1306,12 @@ def _convert_for_json_streaming(self, converted_value: Any, param_type: str) -> Returns: Converted string for streaming output """ - # Check if value is empty, but exclude numeric 0 - if converted_value is None or converted_value == "": + # Empty string: no output. + if converted_value == "": return "" + # None → JSON null literal (e.g. for nullable integer/object params). + if converted_value is None: + return "null" if param_type in ["string", "str", "text", "varchar", "char", "enum"]: # String type, remove double quotes @@ -1218,6 +1447,7 @@ def extract_tool_calls_streaming( ) -> DeltaMessage | None: if not previous_text: self.parser.reset_streaming_state() + self.parser._streaming_mode = True # Reset tool call tracking arrays for new streaming session self.prev_tool_call_arr = [] self.streamed_args_for_tool = [] From 249120528de68932982c9bf807306831986a5846 Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Sat, 25 Apr 2026 06:15:29 +0200 Subject: [PATCH 13/21] test: factor shared Qwen3 XML/Coder regression tests into one file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both the XML and Coder tool parsers were tested against nearly identical regression scenarios in their respective files (string "null" preservation, anyOf nullable schemas, double-encoded objects, content with literal XML structural tags, content with param-like lines, etc.). Split the shared expectations into a single file with a parametrized parser fixture so that: * the same intent is tested against BOTH parsers automatically; * divergent behaviour is caught immediately instead of drifting; * parser-specific quirks (XML SAX double-close brace, char-by-char boolean streaming, Coder speculative-decoding chunk loss, etc.) stay in their parser-specific test file. New: tests/tool_parsers/test_qwen3_xml_coder_shared.py exposes a ``parser_cls`` fixture parametrized over Qwen3XMLToolParser and Qwen3CoderToolParser. Each shared test runs twice and prints ``[xml]``/``[coder]`` in the test id. Removed duplicates from: * tests/tool_parsers/test_qwen3xml_tool_parser.py: anyOf object param (streaming + non-streaming), string null preservation, anyOf integer/null type detection, content with structural tags (streaming + non-streaming), content with param-like lines (streaming + non-streaming), double-encoded object (streaming + non-streaming). * tests/tool_parsers/test_qwen3coder_tool_parser.py: anyOf parameter not double encoded, string null preservation, anyOf string/null numeric value, content with XML structural tags (streaming + non-streaming), content with param-like lines (streaming + non-streaming), double-encoded object (streaming + non-streaming), content param with tool_call tag (streaming + non-streaming — redundant with content_with_xml_structural_tags). Removed: tests/tool_parsers/test_qwen36_bugs.py. Its two scenarios (XML array containing JSON ``true``, Coder two complete tool calls in a single streaming delta) are now in the shared file as ``test_array_with_json_bool`` and ``test_two_tool_calls_in_one_streaming_chunk``, both running against both parsers. Net effect: 209 -> 183 tests, 0 failures, identical coverage. Signed-off-by: CNE Pierre FICHEPOIL --- tests/tool_parsers/test_qwen36_bugs.py | 189 ------ .../test_qwen3_xml_coder_shared.py | 606 ++++++++++++++++++ .../test_qwen3coder_tool_parser.py | 430 ------------- .../tool_parsers/test_qwen3xml_tool_parser.py | 496 +------------- 4 files changed, 607 insertions(+), 1114 deletions(-) delete mode 100644 tests/tool_parsers/test_qwen36_bugs.py create mode 100644 tests/tool_parsers/test_qwen3_xml_coder_shared.py diff --git a/tests/tool_parsers/test_qwen36_bugs.py b/tests/tool_parsers/test_qwen36_bugs.py deleted file mode 100644 index 65671e9fafbe..000000000000 --- a/tests/tool_parsers/test_qwen36_bugs.py +++ /dev/null @@ -1,189 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -""" -Bug-confirmation tests for the merged Qwen 3.5 parser changes. - -Each test is a minimal reproducer of a real issue; they are meant to FAIL -until the corresponding bug is fixed. Each scenario is also contrasted -against the Coder parser (for XML bugs) or the XML parser (for Coder bugs) -when one of the two already behaves correctly, which helps narrow down -where the fix belongs. - -Run with: - .venv/bin/python -m pytest tests/tool_parsers/test_qwen36_bugs.py -v -""" -import json - -import pytest - -from vllm.entrypoints.openai.chat_completion.protocol import ( - ChatCompletionRequest, - ChatCompletionToolsParam, -) -from vllm.tokenizers import get_tokenizer -from vllm.tool_parsers.qwen3coder_tool_parser import Qwen3CoderToolParser -from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser - -MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8" - - -@pytest.fixture(scope="module") -def qwen3_tokenizer(): - return get_tokenizer(tokenizer_name=MODEL) - - -def _stream(parser, chunks, request): - """Feed pre-shaped string chunks and collect emitted tool-call pieces. - - Returns (content_str, tool_calls_dict_by_index). - """ - prev_text = "" - prev_ids: list[int] = [] - content_out = "" - events: list[tuple] = [] - for chunk in chunks: - cur_text = prev_text + chunk - # Approximate: tokenize incrementally. - dt_ids = parser.model_tokenizer.encode(chunk, add_special_tokens=False) - cur_ids = prev_ids + dt_ids - msg = parser.extract_tool_calls_streaming( - prev_text, cur_text, chunk, prev_ids, cur_ids, dt_ids, request - ) - if msg is not None: - if msg.content: - content_out += msg.content - if msg.tool_calls: - for tc in msg.tool_calls: - events.append(( - tc.index, - tc.function.name if tc.function else None, - tc.function.arguments if tc.function else None, - )) - prev_text, prev_ids = cur_text, cur_ids - tcs: dict[int, dict] = {} - for idx, name, args in events: - tcs.setdefault(idx, {"name": name, "args": ""}) - if args: - tcs[idx]["args"] += args - return content_out, tcs - - -# --------------------------------------------------------------------------- -# BUG 1: XML parser -- array parameter containing JSON true/false/null is -# emitted as a JSON string instead of being parsed as a JSON array. -# -# Root cause: in _end_element the deferred parser calls ast.literal_eval on -# the raw text. ast.literal_eval does NOT understand JSON tokens `true`, -# `false`, `null` (Python uses True/False/None), so it raises and the fallback -# path emits the raw string wrapped with json.dumps. -# -# The Coder parser uses json.loads first, so it gets this scenario right -- -# the test contrasts the two parsers to prove the bug is XML-specific. -# --------------------------------------------------------------------------- - -_ARRAY_TOOLS = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "pick", - "parameters": { - "type": "object", - "properties": {"items": {"type": "array"}}, - }, - }, - ) -] - -_ARRAY_WITH_JSON_BOOL_OUTPUT = ( - "\n\n" - '\n["a", "b", 1, true]\n\n' - "\n" -) - - -def test_xml_array_with_json_bool_nonstreaming(qwen3_tokenizer): - """XML non-streaming: array containing `true` must be parsed as a list.""" - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_ARRAY_TOOLS) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=_ARRAY_TOOLS) - result = parser.extract_tool_calls(_ARRAY_WITH_JSON_BOOL_OUTPUT, request=request) - - assert result.tools_called - args = json.loads(result.tool_calls[0].function.arguments) - assert isinstance(args["items"], list), ( - f"XML parser emitted items as {type(args['items']).__name__} " - f"({args['items']!r}). ast.literal_eval cannot parse JSON `true` and " - "the exception fallback wraps the raw string with json.dumps. " - "Use json.loads first (see the Coder parser)." - ) - assert args["items"] == ["a", "b", 1, True] - - -def test_coder_array_with_json_bool_nonstreaming(qwen3_tokenizer): - """Contrast: Coder parser handles the same input correctly.""" - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_ARRAY_TOOLS) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=_ARRAY_TOOLS) - result = parser.extract_tool_calls(_ARRAY_WITH_JSON_BOOL_OUTPUT, request=request) - - assert result.tools_called - args = json.loads(result.tool_calls[0].function.arguments) - assert args["items"] == ["a", "b", 1, True] - - -# --------------------------------------------------------------------------- -# BUG 2: Coder parser -- when two complete ... -# blocks arrive in a SINGLE streaming delta (typical for speculative -# decoding), only the first tool call is emitted, the second is dropped. -# -# Root cause: extract_tool_calls_streaming advances current_tool_index by -# one per delta. When a delta flushes two complete tool calls the parser -# processes call #0, sees tool_ends > current_tool_index, advances to #1, -# and returns None without re-processing the same delta. The XML parser -# processes all complete elements in a loop and does not drop the second. -# --------------------------------------------------------------------------- - -_WEATHER_TOOLS = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "get_weather", - "parameters": { - "type": "object", - "properties": {"city": {"type": "string"}}, - }, - }, - ) -] - -_TWO_TOOL_CALLS_IN_ONE_CHUNK = ( - "\n\n\nParis\n\n" - "\n\n" - "\n\n\nLondon\n\n" - "\n" -) - - -def test_coder_two_tool_calls_in_one_streaming_chunk(qwen3_tokenizer): - """Coder streaming: a single delta that contains TWO complete tool calls - must emit both, not just the first.""" - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WEATHER_TOOLS) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WEATHER_TOOLS) - _, tcs = _stream(parser, [_TWO_TOOL_CALLS_IN_ONE_CHUNK], request) - assert len(tcs) == 2, ( - f"Expected 2 tool calls, got {len(tcs)}. " - "The Coder parser drops the second tool call when both complete in " - "the same delta (speculative decoding scenario)." - ) - args0 = json.loads(tcs[0]["args"]) - args1 = json.loads(tcs[1]["args"]) - assert args0 == {"city": "Paris"} - assert args1 == {"city": "London"} - - -def test_xml_two_tool_calls_in_one_streaming_chunk(qwen3_tokenizer): - """Contrast: XML parser already handles this case correctly.""" - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_WEATHER_TOOLS) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WEATHER_TOOLS) - _, tcs = _stream(parser, [_TWO_TOOL_CALLS_IN_ONE_CHUNK], request) - assert len(tcs) == 2 - assert json.loads(tcs[0]["args"]) == {"city": "Paris"} - assert json.loads(tcs[1]["args"]) == {"city": "London"} diff --git a/tests/tool_parsers/test_qwen3_xml_coder_shared.py b/tests/tool_parsers/test_qwen3_xml_coder_shared.py new file mode 100644 index 000000000000..76b71e5eaa75 --- /dev/null +++ b/tests/tool_parsers/test_qwen3_xml_coder_shared.py @@ -0,0 +1,606 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +Shared regression tests for the Qwen3 XML and Coder tool parsers. + +These tests cover behaviour that BOTH parsers must implement identically. +Each test runs twice — once against ``Qwen3XMLToolParser`` and once against +``Qwen3CoderToolParser`` — via the ``parser_cls`` fixture. Tests that +target streaming-mode-specific quirks of one parser only stay in their +parser-specific file (``test_qwen3xml_tool_parser.py`` or +``test_qwen3coder_tool_parser.py``). +""" +import json + +import pytest + +from tests.tool_parsers.utils import run_tool_extraction_streaming +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionRequest, + ChatCompletionToolsParam, +) +from vllm.tokenizers import get_tokenizer +from vllm.tool_parsers.qwen3coder_tool_parser import Qwen3CoderToolParser +from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser + +MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8" + + +@pytest.fixture(scope="module") +def qwen3_tokenizer(): + return get_tokenizer(tokenizer_name=MODEL) + + +@pytest.fixture( + params=[Qwen3XMLToolParser, Qwen3CoderToolParser], + ids=["xml", "coder"], +) +def parser_cls(request): + return request.param + + +# --------------------------------------------------------------------------- +# Value conversion: string "null" must NOT become JSON null +# --------------------------------------------------------------------------- + + +def test_string_null_value_preserved(qwen3_tokenizer, parser_cls): + """A string-typed parameter with literal value "null" must be preserved + as the string "null" (not converted to Python None / JSON null). + + Root cause: _convert_param_value must check the schema's ``string`` + type BEFORE the "null" shortcut — otherwise any param whose raw text + is "null" becomes None regardless of declared type. + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "search", + "parameters": { + "type": "object", + "properties": {"query": {"type": "string"}}, + }, + }, + ) + ] + parser = parser_cls(qwen3_tokenizer, tools=tools) + model_output = ( + "\n" + "\n" + "null\n" + "\n" + "" + ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + result = parser.extract_tool_calls(model_output, request=request) + + assert result.tools_called + args = json.loads(result.tool_calls[0].function.arguments) + assert args["query"] == "null", ( + f"String parameter 'null' was converted incorrectly. " + f"Got: {args.get('query')!r}" + ) + + +# --------------------------------------------------------------------------- +# anyOf nullable schema — type detection +# --------------------------------------------------------------------------- + + +def test_anyof_string_null_keeps_value_as_string(qwen3_tokenizer, parser_cls): + """anyOf [{type: string}, {type: null}] with a numeric-looking value + must keep the value as a string (the schema declares ``string``). + + Root cause: anyOf was previously treated as ``object`` (for the Coder + parser) or fell back to ``string`` only when no object/array option + was present (for the XML parser). The correct behaviour is to pick + the FIRST non-null type from the anyOf list. + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "set_code", + "parameters": { + "type": "object", + "properties": { + "code": { + "anyOf": [{"type": "string"}, {"type": "null"}], + }, + }, + }, + }, + ) + ] + parser = parser_cls(qwen3_tokenizer, tools=tools) + model_output = ( + "\n" + "\n" + "42\n" + "\n" + "" + ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + result = parser.extract_tool_calls(model_output, request=request) + + assert result.tools_called + args = json.loads(result.tool_calls[0].function.arguments) + assert args["code"] == "42", ( + f"anyOf string|null param '42' was parsed as " + f"{type(args['code']).__name__}: {args['code']!r}" + ) + + +def test_anyof_integer_null_parses_as_int(qwen3_tokenizer, parser_cls): + """anyOf [{type: integer}, {type: null}] must parse a numeric value as + an int. Previously the XML parser ignored anyOf for non-container + types and silently treated the param as ``string``. + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "set_count", + "parameters": { + "type": "object", + "properties": { + "count": { + "anyOf": [{"type": "integer"}, {"type": "null"}], + }, + }, + }, + }, + ) + ] + parser = parser_cls(qwen3_tokenizer, tools=tools) + model_output = ( + "\n" + "\n" + "42\n" + "\n" + "" + ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + result = parser.extract_tool_calls(model_output, request=request) + + assert result.tools_called + args = json.loads(result.tool_calls[0].function.arguments) + assert args["count"] == 42, ( + f"anyOf integer|null: expected int 42, got {args['count']!r}" + ) + + +# --------------------------------------------------------------------------- +# anyOf object schema — value not double-encoded +# --------------------------------------------------------------------------- + +_ANYOF_OBJECT_TOOLS = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "update_record", + "parameters": { + "type": "object", + "properties": { + "data": { + "anyOf": [{"type": "object"}, {"type": "null"}], + }, + }, + }, + }, + ) +] + +_ANYOF_OBJECT_OUTPUT = ( + "\n" + "\n" + '{"key": "value", "count": 42}\n' + "\n" + "" +) + + +def test_anyof_object_param_not_double_encoded_nonstreaming( + qwen3_tokenizer, parser_cls +): + parser = parser_cls(qwen3_tokenizer, tools=_ANYOF_OBJECT_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_ANYOF_OBJECT_TOOLS + ) + result = parser.extract_tool_calls(_ANYOF_OBJECT_OUTPUT, request=request) + + assert result.tools_called + args = json.loads(result.tool_calls[0].function.arguments) + assert isinstance(args["data"], dict), ( + f"anyOf object param was double-encoded: data={args['data']!r}" + ) + assert args["data"] == {"key": "value", "count": 42} + + +def test_anyof_object_param_not_double_encoded_streaming( + qwen3_tokenizer, parser_cls +): + parser = parser_cls(qwen3_tokenizer, tools=_ANYOF_OBJECT_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_ANYOF_OBJECT_TOOLS + ) + deltas = [ + "", + "\n", + '\n{"key": "value", "count": 42}', + "\n", + "\n", + ] + reconstructor = run_tool_extraction_streaming( + parser, deltas, request, assert_one_tool_per_delta=False + ) + assert len(reconstructor.tool_calls) == 1 + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert isinstance(args["data"], dict), ( + f"anyOf object param was double-encoded in streaming: " + f"data={args['data']!r}" + ) + + +# --------------------------------------------------------------------------- +# Object param double-encoded as JSON-encoded Python repr +# --------------------------------------------------------------------------- + +_DOUBLE_ENCODED_TOOLS = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "process", + "parameters": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "data": {"type": "object"}, + }, + }, + }, + ) +] + +_DOUBLE_ENCODED_OUTPUT = ( + "\n" + "\n" + "\nhello\n\n" + "\n\"{'key': 'value', 'n': 1}\"\n\n" + "\n" + "\n" +) + + +def test_double_encoded_object_param_nonstreaming(qwen3_tokenizer, parser_cls): + """A model trained with a buggy template (json.dumps(str(dict))) emits + object args as a JSON-encoded Python repr string. The parser must + double-decode it back to a dict. + """ + parser = parser_cls(qwen3_tokenizer, tools=_DOUBLE_ENCODED_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_DOUBLE_ENCODED_TOOLS + ) + result = parser.extract_tool_calls(_DOUBLE_ENCODED_OUTPUT, request=request) + + assert result.tools_called + args = json.loads(result.tool_calls[0].function.arguments) + assert args["name"] == "hello" + assert isinstance(args["data"], dict), ( + f"Expected dict, got {type(args['data'])}: {args['data']!r}" + ) + assert args["data"] == {"key": "value", "n": 1} + + +def test_double_encoded_object_param_streaming(qwen3_tokenizer, parser_cls): + parser = parser_cls(qwen3_tokenizer, tools=_DOUBLE_ENCODED_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_DOUBLE_ENCODED_TOOLS + ) + reconstructor = run_tool_extraction_streaming( + parser, _DOUBLE_ENCODED_OUTPUT, request, assert_one_tool_per_delta=False + ) + assert len(reconstructor.tool_calls) == 1 + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert args["name"] == "hello" + assert isinstance(args["data"], dict), ( + f"Expected dict, got {type(args['data'])}: {args['data']!r}" + ) + assert args["data"] == {"key": "value", "n": 1} + + +# --------------------------------------------------------------------------- +# Parameter value containing XML structural tags as literal text. +# Expected: the value is preserved intact, no spurious extra parameters +# are created from the embedded tags. +# --------------------------------------------------------------------------- + +_WRITE_FILE_TOOLS = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "write_file", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string"}, + "content": {"type": "string"}, + }, + }, + }, + ) +] + +# Content with all four structural tags as literal strings (a Python file +# that documents the tool-call format). +_XML_TAGS_IN_CONTENT = ( + 'char_deltas = [\n' + ' "\\n",\n' + ' "\\n",\n' + ' "\\n\\n",\n' + ' "\\n",\n' + ']\n' +) + +_WRITE_FILE_XML_TAGS_OUTPUT = ( + "\n" + "\n" + "\ntest.py\n\n" + f"\n{_XML_TAGS_IN_CONTENT}\n" + "\n" + "\n" +) + + +def test_content_with_xml_structural_tags_nonstreaming( + qwen3_tokenizer, parser_cls +): + """Non-streaming: a string param whose value embeds , + , , as literal text must be + extracted intact, with no spurious extra params being created from + the embedded tags. + """ + parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS + ) + result = parser.extract_tool_calls( + _WRITE_FILE_XML_TAGS_OUTPUT, request=request + ) + + assert result.tools_called + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].function.name == "write_file" + args = json.loads(result.tool_calls[0].function.arguments) + assert list(args.keys()) == ["path", "content"], ( + f"Spurious params from embedded tags: {list(args.keys())}" + ) + assert args["path"] == "test.py" + expected = _XML_TAGS_IN_CONTENT.rstrip("\n") + assert args["content"] == expected, ( + f"content was truncated/corrupted. Got: {args.get('content')!r}" + ) + + +def test_content_with_xml_structural_tags_streaming( + qwen3_tokenizer, parser_cls +): + """Streaming variant: pre-formed chunks, full content in one delta.""" + parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS + ) + char_deltas = [ + "\n", + "\n", + "\ntest.py\n\n", + f"\n{_XML_TAGS_IN_CONTENT}\n", + "\n", + "\n", + ] + reconstructor = run_tool_extraction_streaming( + parser, char_deltas, request, assert_one_tool_per_delta=False + ) + assert len(reconstructor.tool_calls) == 1 + assert reconstructor.tool_calls[0].function.name == "write_file" + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert list(args.keys()) == ["path", "content"], ( + f"Spurious params from embedded tags: {list(args.keys())}" + ) + assert args["path"] == "test.py" + expected = _XML_TAGS_IN_CONTENT.rstrip("\n") + assert args["content"] == expected + + +# --------------------------------------------------------------------------- +# Parameter value containing and on their +# OWN lines (Jinja2 templates, parser fixtures, etc.). Schema filtering +# must prevent the unknown name from being treated as structural. +# --------------------------------------------------------------------------- + +_CONTENT_WITH_PARAM_LIKE_LINES = ( + 'TOOL_CALL_TEMPLATE = """\n' + "\n" + "\n" + "#!/usr/bin/env python3\n" + "\n" + '"""\n' +) + +_WRITE_FILE_PARAM_LIKE_LINES_OUTPUT = ( + "\n" + "\n" + "\ntest_template.py\n\n" + f"\n{_CONTENT_WITH_PARAM_LIKE_LINES}\n" + "\n" + "\n" +) + + +def test_content_with_param_like_lines_nonstreaming( + qwen3_tokenizer, parser_cls +): + """Non-streaming: ```` and ```` on their + own lines inside a string value must not terminate the parameter + early. Requires schema-based filtering so that ``new_string`` (not a + real parameter of write_file) is treated as literal text. + """ + parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS + ) + result = parser.extract_tool_calls( + _WRITE_FILE_PARAM_LIKE_LINES_OUTPUT, request=request + ) + + assert result.tools_called + args = json.loads(result.tool_calls[0].function.arguments) + assert list(args.keys()) == ["path", "content"], ( + f"Spurious params: {list(args.keys())}" + ) + assert args["path"] == "test_template.py" + expected = _CONTENT_WITH_PARAM_LIKE_LINES.rstrip("\n") + assert args["content"] == expected, ( + f"content truncated/wrong: {args.get('content')!r}" + ) + + +def test_content_with_param_like_lines_streaming(qwen3_tokenizer, parser_cls): + """Streaming variant: each structural-looking literal line arrives in + its own delta — the critical case is when ``\\n`` appears + alone with empty lookahead, which must NOT be treated as a real + structural close. + """ + parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS + ) + char_deltas = [ + "\n", + "\n", + "\ntest_template.py\n\n", + '\nTOOL_CALL_TEMPLATE = """\n', + "\n", # literal close — alone in its delta + "\n", # literal new-param line + "#!/usr/bin/env python3\n", + "\n", # second literal close + '"""\n', + "\n", # REAL close of content + "\n", + "\n", + ] + reconstructor = run_tool_extraction_streaming( + parser, char_deltas, request, assert_one_tool_per_delta=False + ) + assert len(reconstructor.tool_calls) == 1 + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert list(args.keys()) == ["path", "content"], ( + f"Spurious params: {list(args.keys())}" + ) + assert args["path"] == "test_template.py" + expected = _CONTENT_WITH_PARAM_LIKE_LINES.rstrip("\n") + assert args["content"] == expected + + +# --------------------------------------------------------------------------- +# Array param containing JSON true/false/null +# --------------------------------------------------------------------------- + +_ARRAY_TOOLS = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "pick", + "parameters": { + "type": "object", + "properties": {"items": {"type": "array"}}, + }, + }, + ) +] + +_ARRAY_WITH_JSON_BOOL_OUTPUT = ( + "\n\n" + '\n["a", "b", 1, true]\n\n' + "\n" +) + + +def test_array_with_json_bool(qwen3_tokenizer, parser_cls): + """An array param containing a JSON literal (``true``/``false``/``null``) + must be parsed as a real Python list, not wrapped as a string. + + Root cause for the XML parser: the deferred path used + ``ast.literal_eval`` first, which doesn't understand JSON tokens. + Both parsers must try ``json.loads`` before falling back to + ``ast.literal_eval``. + """ + parser = parser_cls(qwen3_tokenizer, tools=_ARRAY_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_ARRAY_TOOLS + ) + result = parser.extract_tool_calls( + _ARRAY_WITH_JSON_BOOL_OUTPUT, request=request + ) + + assert result.tools_called + args = json.loads(result.tool_calls[0].function.arguments) + assert isinstance(args["items"], list), ( + f"Array with JSON bool was not parsed as list: " + f"{type(args['items']).__name__} = {args['items']!r}" + ) + assert args["items"] == ["a", "b", 1, True] + + +# --------------------------------------------------------------------------- +# Speculative decoding: two complete tool calls in a single streaming delta. +# Both parsers must emit both tool calls, not drop the second. +# --------------------------------------------------------------------------- + +_WEATHER_TOOLS = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "get_weather", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + }, + }, + ) +] + +_TWO_TOOL_CALLS_IN_ONE_CHUNK = ( + "\n\n" + "\nParis\n\n" + "\n\n" + "\n\n" + "\nLondon\n\n" + "\n" +) + + +def test_two_tool_calls_in_one_streaming_chunk(qwen3_tokenizer, parser_cls): + """Speculative decoding flushes can deliver several full + ``...`` blocks in a single delta. Both must be + emitted; dropping the second one is a regression. + """ + parser = parser_cls(qwen3_tokenizer, tools=_WEATHER_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WEATHER_TOOLS + ) + reconstructor = run_tool_extraction_streaming( + parser, + [_TWO_TOOL_CALLS_IN_ONE_CHUNK], + request, + assert_one_tool_per_delta=False, + ) + assert len(reconstructor.tool_calls) == 2, ( + f"Expected 2 tool calls in one delta, got " + f"{len(reconstructor.tool_calls)}" + ) + args0 = json.loads(reconstructor.tool_calls[0].function.arguments) + args1 = json.loads(reconstructor.tool_calls[1].function.arguments) + assert args0 == {"city": "Paris"} + assert args1 == {"city": "London"} diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index 32d5a238914a..3f982acacac3 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -1037,45 +1037,6 @@ def test_none_tool_calls_filtered(qwen3_tool_parser): assert args["state"] == "TX" -def test_anyof_parameter_not_double_encoded(qwen3_tokenizer): - """Regression: anyOf parameters must not be double-encoded (PR #36032).""" - tools = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "update_record", - "parameters": { - "type": "object", - "properties": { - "data": { - "anyOf": [{"type": "object"}, {"type": "null"}], - }, - }, - }, - }, - ) - ] - - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools) - - model_output = ( - "\n" - "\n" - '{"key": "value", "count": 42}\n' - "\n" - "" - ) - - request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - result = parser.extract_tool_calls(model_output, request=request) - - assert result.tools_called - assert len(result.tool_calls) == 1 - args = json.loads(result.tool_calls[0].function.arguments) - assert isinstance(args["data"], dict) - assert args["data"] == {"key": "value", "count": 42} - - def test_streaming_multi_param_single_chunk(qwen3_tool_parser, qwen3_tokenizer): """Regression: speculative decode delivering multiple params at once (PR #35615).""" request = ChatCompletionRequest(model=MODEL, messages=[]) @@ -1297,394 +1258,3 @@ def test_extract_tool_calls_streaming_various_chunk_sizes(qwen3_tool_parser): assert args["example_parameter_2"] == "This is the value for the second parameter\nthat can span\nmultiple lines" -def test_coder_string_null_value_not_converted_to_none(qwen3_tokenizer): - """Regression: string param with literal value 'null' must not become JSON null. - - The null-before-type-check in _convert_param_value returns Python None for - ANY parameter whose raw text is 'null', even when the schema says 'string'. - That turns {"param": "null"} into {"param": null}, which is wrong. - """ - tools = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "set_value", - "parameters": { - "type": "object", - "properties": { - "key": {"type": "string"}, - }, - }, - }, - ) - ] - - model_output = ( - "\n" - "\n" - "null\n" - "\n" - "" - ) - - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - result = parser.extract_tool_calls(model_output, request=request) - - assert result.tools_called - args = json.loads(result.tool_calls[0].function.arguments) - # The value is the string "null", NOT JSON null - assert args["key"] == "null", ( - f"String param 'null' was converted to JSON null. Got: {args['key']!r}" - ) - - -def test_coder_anyof_string_null_numeric_value_stays_string(qwen3_tokenizer): - """Regression: anyOf with string+null must keep numeric-looking values as strings. - - When anyOf is treated as 'object', json.loads('42') returns int 42 even - though the schema declares the type as 'string'. The correct behaviour is - to use the first non-null type from anyOf; for string, the raw text is - returned unchanged. - """ - tools = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "set_code", - "parameters": { - "type": "object", - "properties": { - "code": { - "anyOf": [{"type": "string"}, {"type": "null"}], - }, - }, - }, - }, - ) - ] - - model_output = ( - "\n" - "\n" - "42\n" - "\n" - "" - ) - - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - result = parser.extract_tool_calls(model_output, request=request) - - assert result.tools_called - args = json.loads(result.tool_calls[0].function.arguments) - # "42" is a string in the schema — must NOT become integer 42 - assert args["code"] == "42", ( - f"anyOf string param '42' was parsed as {type(args['code']).__name__}: {args['code']!r}" - ) - assert isinstance(args["code"], str) - - -_WRITE_FILE_TOOLS = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "write_file", - "parameters": { - "type": "object", - "properties": { - "path": {"type": "string"}, - "content": {"type": "string"}, - }, - }, - }, - ) -] - -# Tool with an object-type parameter to test double-encoded values. -_OBJECT_PARAM_TOOLS = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "process", - "parameters": { - "type": "object", - "properties": { - "name": {"type": "string"}, - "data": {"type": "object"}, - }, - }, - }, - ) -] - -# Model output as produced by a template with json.dumps(str(value)) bug: -# the dict argument is rendered as a JSON-encoded Python repr string. -_DOUBLE_ENCODED_OBJECT_OUTPUT = ( - "\n" - "\n" - "\nhello\n\n" - "\n\"{'key': 'value', 'n': 1}\"\n\n" - "\n" - "\n" -) - -# File content that contains and \\n\\n"""' -) - -_WRITE_FILE_OUTPUT = ( - "\n" - "\n" - "\ntest.py\n\n" - f"\n{_FILE_CONTENT_WITH_TOOL_CALL_TAG}\n\n" - "\n" - "\n" # trailing newline ensures a delta arrives after -) - - -def test_nonstreaming_content_param_with_tool_call_tag(qwen3_tokenizer): - """Non-streaming: literal inside a string param must not split it. - - When writing a file whose content contains '' as plain text, - extract_tool_calls must still produce exactly one tool call with the - correct path and full content. - """ - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) - result = parser.extract_tool_calls(_WRITE_FILE_OUTPUT, request=request) - - assert result.tools_called - assert len(result.tool_calls) == 1 - assert result.tool_calls[0].function.name == "write_file" - args = json.loads(result.tool_calls[0].function.arguments) - assert args["path"] == "test.py" - assert args["content"] == _FILE_CONTENT_WITH_TOOL_CALL_TAG - - -def test_streaming_content_param_with_tool_call_tag(qwen3_tokenizer): - """Streaming: literal inside a string param must not be mistaken - for a second tool call. - - The streaming parser counted ALL occurrences in current_text, - including those inside parameter values. After completing the first tool - call it would set is_tool_call_started=True again and attempt to process - the embedded as a second invocation — producing garbage or an - extra spurious tool call. - """ - - - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) - - reconstructor = run_tool_extraction_streaming( - parser, - _WRITE_FILE_OUTPUT, - request, - assert_one_tool_per_delta=False, - ) - - assert len(reconstructor.tool_calls) == 1, ( - f"Expected 1 tool call, got {len(reconstructor.tool_calls)}: " - f"{[tc.function.name for tc in reconstructor.tool_calls]}" - ) - assert reconstructor.tool_calls[0].function.name == "write_file" - args = json.loads(reconstructor.tool_calls[0].function.arguments) - assert args["path"] == "test.py", f"path wrong: {args.get('path')!r}" - assert args["content"] == _FILE_CONTENT_WITH_TOOL_CALL_TAG, ( - f"content wrong: {args.get('content')!r}" - ) - - -# Python content containing ALL XML structural tags as literal strings. -# This is the hardest case: the parameter value looks like it could end at -# any of the embedded closing tags. -_XML_TAGS_IN_CONTENT = ( - 'char_deltas = [\n' - ' "\\n",\n' - ' "\\n",\n' - ' "\\n\\n",\n' - ' "\\n",\n' - ']\n' -) - -_WRITE_FILE_XML_TAGS_OUTPUT = ( - "\n" - "\n" - "\ntest.py\n\n" - f"\n{_XML_TAGS_IN_CONTENT}\n" - "\n" - "\n" -) - - -def test_nonstreaming_content_with_xml_structural_tags(qwen3_tokenizer): - """Non-streaming: parameter value containing , , - as literal text must be extracted intact without spurious - extra parameters being created. - """ - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) - result = parser.extract_tool_calls(_WRITE_FILE_XML_TAGS_OUTPUT, request=request) - - assert result.tools_called - assert len(result.tool_calls) == 1 - assert result.tool_calls[0].function.name == "write_file" - args = json.loads(result.tool_calls[0].function.arguments) - assert list(args.keys()) == ["file_path", "content"], ( - f"Unexpected keys (spurious params?): {list(args.keys())}" - ) - assert args["file_path"] == "test.py" - assert args["content"] == _XML_TAGS_IN_CONTENT.rstrip("\n"), ( - f"content wrong: {args.get('content')!r}" - ) - - -def test_streaming_content_with_xml_structural_tags(qwen3_tokenizer): - """Streaming: parameter value containing , , - as literal text must not terminate the parameter early and - must not create spurious extra parameters. - """ - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) - - reconstructor = run_tool_extraction_streaming( - parser, - _WRITE_FILE_XML_TAGS_OUTPUT, - request, - assert_one_tool_per_delta=False, - ) - - assert len(reconstructor.tool_calls) == 1, ( - f"Expected 1 tool call, got {len(reconstructor.tool_calls)}: " - f"{[tc.function.name for tc in reconstructor.tool_calls]}" - ) - assert reconstructor.tool_calls[0].function.name == "write_file" - args = json.loads(reconstructor.tool_calls[0].function.arguments) - assert list(args.keys()) == ["file_path", "content"], ( - f"Unexpected keys (spurious params?): {list(args.keys())}" - ) - assert args["file_path"] == "test.py" - assert args["content"] == _XML_TAGS_IN_CONTENT.rstrip("\n"), ( - f"content wrong: {args.get('content')!r}" - ) - - -# File content that contains and on their OWN -# LINES (preceded by \n). This occurs when writing a Jinja2 template, a test -# fixture for the parser itself, or any file that documents the tool-call -# format. "new_string" is intentionally NOT a parameter of write_file, so the -# schema filter must prevent it from being treated as a structural boundary. -_CONTENT_WITH_PARAM_LIKE_LINES = ( - 'TOOL_CALL_TEMPLATE = """\n' - "\n" - "\n" - "#!/usr/bin/env python3\n" - "\n" - '"""\n' -) - -_WRITE_FILE_PARAM_LIKE_LINES_OUTPUT = ( - "\n" - "\n" - "\ntest_template.py\n\n" - f"\n{_CONTENT_WITH_PARAM_LIKE_LINES}\n" - "\n" - "\n" -) - - -def test_nonstreaming_content_with_param_like_lines(qwen3_tokenizer): - """Non-streaming: file content containing and - on their own lines must not be truncated at the first or - create spurious extra parameters. Requires schema-based filtering so that - "new_string" (not a real parameter of write_file) is ignored. - """ - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) - result = parser.extract_tool_calls(_WRITE_FILE_PARAM_LIKE_LINES_OUTPUT, request=request) - - assert result.tools_called - assert len(result.tool_calls) == 1 - assert result.tool_calls[0].function.name == "write_file" - args = json.loads(result.tool_calls[0].function.arguments) - assert list(args.keys()) == ["path", "content"], ( - f"Spurious parameters created: {list(args.keys())}" - ) - assert args["path"] == "test_template.py" - assert args["content"] == _CONTENT_WITH_PARAM_LIKE_LINES.rstrip("\n"), ( - f"content truncated or wrong: {args.get('content')!r}" - ) - - -def test_streaming_content_with_param_like_lines(qwen3_tokenizer): - """Streaming: file content containing and on - their own lines must not emit spurious extra tool calls or parameters. - """ - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) - - reconstructor = run_tool_extraction_streaming( - parser, - _WRITE_FILE_PARAM_LIKE_LINES_OUTPUT, - request, - assert_one_tool_per_delta=False, - ) - - assert len(reconstructor.tool_calls) == 1, ( - f"Expected 1 tool call, got {len(reconstructor.tool_calls)}: " - f"{[tc.function.name for tc in reconstructor.tool_calls]}" - ) - assert reconstructor.tool_calls[0].function.name == "write_file" - args = json.loads(reconstructor.tool_calls[0].function.arguments) - assert list(args.keys()) == ["path", "content"], ( - f"Spurious parameters created: {list(args.keys())}" - ) - assert args["path"] == "test_template.py" - assert args["content"] == _CONTENT_WITH_PARAM_LIKE_LINES.rstrip("\n"), ( - f"content truncated or wrong: {args.get('content')!r}" - ) - - -def test_nonstreaming_double_encoded_object_param(qwen3_tokenizer): - """Non-streaming: a model trained with a buggy template (json.dumps(str(dict))) - outputs object args as a JSON-encoded Python repr string like \"{'k': 'v'}\". - The parser must double-decode it back to a real dict. - """ - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_OBJECT_PARAM_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_OBJECT_PARAM_TOOLS - ) - result = parser.extract_tool_calls(_DOUBLE_ENCODED_OBJECT_OUTPUT, request=request) - - assert result.tools_called - assert len(result.tool_calls) == 1 - args = json.loads(result.tool_calls[0].function.arguments) - assert args["name"] == "hello" - assert isinstance(args["data"], dict), ( - f"Expected dict, got {type(args['data'])}: {args['data']!r}" - ) - assert args["data"] == {"key": "value", "n": 1} - - -def test_streaming_double_encoded_object_param(qwen3_tokenizer): - """Streaming: same double-encoded object parameter scenario.""" - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=_OBJECT_PARAM_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_OBJECT_PARAM_TOOLS - ) - reconstructor = run_tool_extraction_streaming( - parser, - _DOUBLE_ENCODED_OBJECT_OUTPUT, - request, - assert_one_tool_per_delta=False, - ) - assert len(reconstructor.tool_calls) == 1 - args = json.loads(reconstructor.tool_calls[0].function.arguments) - assert args["name"] == "hello" - assert isinstance(args["data"], dict), ( - f"Expected dict, got {type(args['data'])}: {args['data']!r}" - ) - assert args["data"] == {"key": "value", "n": 1} diff --git a/tests/tool_parsers/test_qwen3xml_tool_parser.py b/tests/tool_parsers/test_qwen3xml_tool_parser.py index 03b2d879c16f..f7977218b4f1 100644 --- a/tests/tool_parsers/test_qwen3xml_tool_parser.py +++ b/tests/tool_parsers/test_qwen3xml_tool_parser.py @@ -172,93 +172,6 @@ def test_qwen3xml_streaming_text_after_tool_call(self, qwen3_tokenizer): assert "I hope this helps!" in all_content, "Free text after the last tool call should be emitted." -def test_qwen36_anyof_parameter_xml_not_double_encoded(qwen3_tokenizer): - tools = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "update_record", - "parameters": { - "type": "object", - "properties": { - # anyOf schema — no top-level "type" key - "data": { - "anyOf": [{"type": "object"}, {"type": "null"}], - }, - }, - }, - }, - ) - ] - - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) - model_output = ( - "\n" - "\n" - '{"key": "value", "count": 42}\n' - "\n" - "" - ) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - result = parser.extract_tool_calls(model_output, request=request) - - assert result.tools_called - assert len(result.tool_calls) == 1 - args = json.loads(result.tool_calls[0].function.arguments) - - assert isinstance(args["data"], dict), ( - f"anyOf parameter was double-encoded: data={args['data']!r}. " - "StreamingXMLToolCallParser._get_param_type ignores anyOf schemas." - ) - assert args["data"] == {"key": "value", "count": 42} - - -def test_qwen36_anyof_parameter_xml_streaming_not_double_encoded(qwen3_tokenizer): - - tools = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "update_record", - "parameters": { - "type": "object", - "properties": { - "data": { - "anyOf": [{"type": "object"}, {"type": "null"}], - }, - }, - }, - }, - ) - ] - - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - - # Deltas are pre-formed XML element chunks (one element per delta), - # which is the same pattern used by speculative decoding. - deltas = [ - "", - "\n", - '\n{"key": "value", "count": 42}', - "\n", - "\n", - ] - - reconstructor = run_tool_extraction_streaming( - parser, - deltas, - request, - assert_one_tool_per_delta=False, - ) - - assert len(reconstructor.tool_calls) == 1 - args = json.loads(reconstructor.tool_calls[0].function.arguments) - assert isinstance(args["data"], dict), ( - f"anyOf parameter was double-encoded in streaming: data={args['data']!r}" - ) - - def test_qwen36_xml_streaming_double_close_brace(qwen3_tokenizer): tools = [ ChatCompletionToolsParam( @@ -360,60 +273,10 @@ def test_xml_streaming_parallel_tool_calls_preformed_chunks(qwen3_tokenizer): # --------------------------------------------------------------------------- -# Bug-confirmation tests (regressions to FIX) +# XML-specific streaming bugs (Coder parser is not affected) # --------------------------------------------------------------------------- -def test_xml_string_null_value_not_emptied(qwen3_tokenizer): - """ - Bug A: _convert_param_value intercepts "null" before the type check. - For a STRING parameter with value "null", the parser should output - the JSON string "null", not an empty string "". - - Root cause: `if param_value.lower() == "null": return None` runs first, - then _convert_for_json_streaming(None, "string") returns "", so the - closing-quote _end_element emits "" instead of "null". - """ - tools = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "search", - "parameters": { - "type": "object", - "properties": { - "query": {"type": "string"}, - }, - }, - }, - ) - ] - - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) - model_output = ( - "\n" - "\n" - "null\n" - "\n" - "" - ) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - result = parser.extract_tool_calls(model_output, request=request) - - assert result.tools_called - assert len(result.tool_calls) == 1 - args = json.loads(result.tool_calls[0].function.arguments) - - assert "query" in args, f"Parameter 'query' missing from args: {args!r}" - assert args["query"] == "null", ( - f"String parameter with literal value 'null' was incorrectly converted. " - f"Got: {args['query']!r}. " - f"Expected: 'null' (the string). " - f"_convert_param_value returns None before checking type, " - f"then _convert_for_json_streaming(None, 'string') returns ''." - ) - - def test_xml_streaming_boolean_true_not_false(qwen3_tokenizer): """ Bug B: In streaming mode, a boolean parameter with value "true" is @@ -534,360 +397,3 @@ def test_xml_streaming_string_null_last_char_not_dropped(qwen3_tokenizer): f"and _convert_for_json_streaming(None, 'string') returns '', " f"so the final delta is empty and the 'l' is never emitted." ) - - -def test_xml_anyof_integer_null_type_detected(qwen3_tokenizer): - """ - Bug C: _get_param_type only returns non-string for anyOf schemas that - contain "object" or "array". For anyOf: [{type: "integer"}, {type: "null"}] - it falls through and returns "string", so integer parameters with - nullable schemas are incorrectly quoted and not converted. - """ - tools = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "set_count", - "parameters": { - "type": "object", - "properties": { - "count": { - "anyOf": [{"type": "integer"}, {"type": "null"}], - }, - }, - }, - }, - ) - ] - - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) - model_output = ( - "\n" - "\n" - "42\n" - "\n" - "" - ) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - result = parser.extract_tool_calls(model_output, request=request) - - assert result.tools_called - assert len(result.tool_calls) == 1 - args = json.loads(result.tool_calls[0].function.arguments) - - assert args["count"] == 42, ( - f"anyOf integer+null: expected int 42, got {args['count']!r}. " - f"_get_param_type only checks for object/array in anyOf schemas, " - f"so integer anyOf schemas fall back to 'string', causing '42' " - f"to be returned as the JSON string '\"42\"' instead of the number 42." - ) - - -# --------------------------------------------------------------------------- -# Regression: XML structural tags as literal text inside string parameters -# --------------------------------------------------------------------------- - -_WRITE_FILE_TOOLS_XML = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "write_file", - "parameters": { - "type": "object", - "properties": { - "file_path": {"type": "string"}, - "content": {"type": "string"}, - }, - }, - }, - ) -] - -# Python content that contains all four XML structural tags as literal strings. -# When qwen3xml encounters "" inside the content value it -# currently treats it as the structural end of the element, -# truncating the value and creating a spurious "query" parameter from the text -# that follows the fake . -_XML_TAGS_IN_CONTENT_XML = ( - 'char_deltas = [\n' - ' "\\n",\n' - ' "\\n",\n' - ' "\\n\\n",\n' - ' "\\n",\n' - ']\n' -) - -_WRITE_FILE_XML_TAGS_OUTPUT_XML = ( - "\n" - "\n" - "\ntest.py\n\n" - f"\n{_XML_TAGS_IN_CONTENT_XML}\n" - "\n" - "\n" -) - - -def test_xml_streaming_content_with_structural_xml_tags(qwen3_tokenizer): - """Streaming variant: pre-formed chunks, full content in one delta.""" - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS_XML) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS_XML - ) - - char_deltas = [ - "\n", - "\n", - "\ntest.py\n\n", - f"\n{_XML_TAGS_IN_CONTENT_XML}\n", - "\n", - "\n", - ] - - reconstructor = run_tool_extraction_streaming( - parser, - char_deltas, - request, - assert_one_tool_per_delta=False, - ) - - assert len(reconstructor.tool_calls) == 1 - assert reconstructor.tool_calls[0].function.name == "write_file" - - args = json.loads(reconstructor.tool_calls[0].function.arguments) - - assert list(args.keys()) == ["file_path", "content"], ( - f"Unexpected parameter keys (spurious params from embedded tags?): " - f"{list(args.keys())}" - ) - assert args["file_path"] == "test.py" - expected_content = _XML_TAGS_IN_CONTENT_XML.rstrip("\n") - assert args["content"] == expected_content, ( - f"content was truncated or corrupted by embedded XML tags.\n" - f"Got: {args.get('content')!r}\n" - f"Expected: {expected_content!r}" - ) - - -def test_xml_nonstreaming_content_with_structural_xml_tags(qwen3_tokenizer): - """Regression: string parameter containing , , - as literal text must be extracted intact. - - Bug: the SAX pre-processor (_preprocess_xml_chunk) returns - ``safe_text + ""`` when it sees ```` inside the - accumulated parameter buffer, terminating the current parameter too early. - The text that follows the spurious closing tag is then misinterpreted as a - new parameter named "query", creating a ghost parameter and truncating - the real "content" value. - - Expected: exactly two parameters -- file_path and content -- with content - equal to the full Python snippet including the embedded XML tags. - """ - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS_XML) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS_XML - ) - result = parser.extract_tool_calls(_WRITE_FILE_XML_TAGS_OUTPUT_XML, request=request) - - assert result.tools_called - assert len(result.tool_calls) == 1 - assert result.tool_calls[0].function.name == "write_file" - - args = json.loads(result.tool_calls[0].function.arguments) - - assert list(args.keys()) == ["file_path", "content"], ( - f"Unexpected parameter keys (spurious params created from embedded tags?): " - f"{list(args.keys())}. " - f"_preprocess_xml_chunk sees '' inside the accumulated " - f"_pre_param_buffer and terminates the parameter early; the text after " - f"'' becomes a ghost 'query' parameter." - ) - assert args["file_path"] == "test.py" - expected_content = _XML_TAGS_IN_CONTENT_XML.rstrip("\n") - assert args["content"] == expected_content, ( - f"content was truncated or corrupted by embedded XML tags. " - f"Got: {args.get('content')!r}\n" - f"Expected: {expected_content!r}" - ) - - -# File content whose lines ARE standalone and -# tokens (preceded by \n). This simulates writing a Jinja2 template, a test -# fixture for the parser, or any file that references the tool-call format. -# "new_string" is intentionally NOT a parameter of write_file (schema has -# "file_path" and "content"), so the schema filter must prevent it from being -# treated as a structural boundary. -_CONTENT_WITH_PARAM_LIKE_LINES_XML = ( - 'TOOL_CALL_TEMPLATE = """\n' - "\n" - "\n" - "#!/usr/bin/env python3\n" - "\n" - '"""\n' -) - -_WRITE_FILE_PARAM_LIKE_LINES_OUTPUT_XML = ( - "\n" - "\n" - "\ntest_template.py\n\n" - f"\n{_CONTENT_WITH_PARAM_LIKE_LINES_XML}\n" - "\n" - "\n" -) - - -def test_xml_nonstreaming_content_with_param_like_lines(qwen3_tokenizer): - """Non-streaming: file content containing and - on their own lines must not be truncated at the first or - create spurious extra parameters. Requires schema-based filtering so that - "new_string" (not a real parameter of write_file) is ignored. - """ - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS_XML) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS_XML - ) - result = parser.extract_tool_calls(_WRITE_FILE_PARAM_LIKE_LINES_OUTPUT_XML, request=request) - - assert result.tools_called - assert len(result.tool_calls) == 1 - assert result.tool_calls[0].function.name == "write_file" - - args = json.loads(result.tool_calls[0].function.arguments) - assert list(args.keys()) == ["file_path", "content"], ( - f"Spurious parameters created: {list(args.keys())}" - ) - assert args["file_path"] == "test_template.py" - expected = _CONTENT_WITH_PARAM_LIKE_LINES_XML.rstrip("\n") - assert args["content"] == expected, ( - f"content truncated or wrong: {args.get('content')!r}" - ) - - -def test_xml_streaming_content_with_param_like_lines(qwen3_tokenizer): - """Streaming: file content containing and on - their own lines — split into one chunk per structural token — must not - cause spurious extra parameters. - - The critical scenario: chunk 5 is '\\n' arriving ALONE so - the streaming buffer has nothing after it (rest='') which previously - triggered the 'not rest → structural' fallback, ending the 'content' - parameter prematurely. After the schema fix, the subsequent - '' is recognised as non-structural and the full - content is preserved. - """ - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS_XML) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS_XML - ) - - char_deltas = [ - "\n", - "\n", - "\ntest_template.py\n\n", - '\nTOOL_CALL_TEMPLATE = """\n', - "\n", # first literal close — alone in its delta - "\n", # literal new-param line - "#!/usr/bin/env python3\n", - "\n", # second literal close - '"""\n', - "\n", # REAL close of content - "\n", - "\n", - ] - - reconstructor = run_tool_extraction_streaming( - parser, - char_deltas, - request, - assert_one_tool_per_delta=False, - ) - - assert len(reconstructor.tool_calls) == 1, ( - f"Expected 1 tool call, got {len(reconstructor.tool_calls)}: " - f"{[tc.function.name for tc in reconstructor.tool_calls]}" - ) - assert reconstructor.tool_calls[0].function.name == "write_file" - args = json.loads(reconstructor.tool_calls[0].function.arguments) - assert list(args.keys()) == ["file_path", "content"], ( - f"Spurious parameters created: {list(args.keys())}" - ) - assert args["file_path"] == "test_template.py" - expected = _CONTENT_WITH_PARAM_LIKE_LINES_XML.rstrip("\n") - assert args["content"] == expected, ( - f"content truncated or wrong: {args.get('content')!r}" - ) - - -_OBJECT_PARAM_TOOLS_XML = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "process", - "parameters": { - "type": "object", - "properties": { - "name": {"type": "string"}, - "data": {"type": "object"}, - }, - }, - }, - ) -] - -_DOUBLE_ENCODED_OBJECT_OUTPUT_XML = ( - "\n" - "\n" - "\nhello\n\n" - "\n\"{'key': 'value', 'n': 1}\"\n\n" - "\n" - "\n" -) - - -def test_xml_nonstreaming_double_encoded_object_param(qwen3_tokenizer): - """Non-streaming: model trained with buggy template (json.dumps(str(dict))) - outputs object args as a JSON-encoded Python repr. Parser must recover - the real dict via double-decode. - """ - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_OBJECT_PARAM_TOOLS_XML) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_OBJECT_PARAM_TOOLS_XML - ) - result = parser.extract_tool_calls( - _DOUBLE_ENCODED_OBJECT_OUTPUT_XML, request=request - ) - - assert result.tools_called - assert len(result.tool_calls) == 1 - args = json.loads(result.tool_calls[0].function.arguments) - assert args["name"] == "hello" - assert isinstance(args["data"], dict), ( - f"Expected dict, got {type(args['data'])}: {args['data']!r}" - ) - assert args["data"] == {"key": "value", "n": 1} - - -def test_xml_streaming_double_encoded_object_param(qwen3_tokenizer): - """Streaming: same double-encoded object parameter scenario.""" - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=_OBJECT_PARAM_TOOLS_XML) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_OBJECT_PARAM_TOOLS_XML - ) - reconstructor = run_tool_extraction_streaming( - parser, - _DOUBLE_ENCODED_OBJECT_OUTPUT_XML, - request, - assert_one_tool_per_delta=False, - ) - assert len(reconstructor.tool_calls) == 1 - args = json.loads(reconstructor.tool_calls[0].function.arguments) - assert args["name"] == "hello" - assert isinstance(args["data"], dict), ( - f"Expected dict, got {type(args['data'])}: {args['data']!r}" - ) - assert args["data"] == {"key": "value", "n": 1} - - -# ============================================================================ -# Qwen 3.6 Bug Confirmations (placeholder, truncated test removed) -# ============================================================================ - From 7036e5f6c263e6f2865bc143618389fd3dd9a8d9 Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Sat, 25 Apr 2026 06:33:29 +0200 Subject: [PATCH 14/21] test: maximize shared coverage between Qwen3 XML and Coder parsers Move all generic regression tests (basic extraction, type conversion, streaming variants, robustness) from the Coder-specific file into the shared parametrized file so each test runs against both parsers. Only behaviour that genuinely differs between the two parsers stays parser-specific: - Coder-only: ``streaming_split_tag`` (relies on ``is_tool_call_started``) and ``streaming_various_chunk_sizes`` (XML SAX cannot tolerate single-character chunks). - XML-only: ``streaming_missing_opening_tool_call_tag`` (Coder does not recover from a missing ```` opener in streaming mode). Two assertions were relaxed in the shared file to accept both legitimate behaviours: content between parallel tool calls (``None`` vs ``"\\n"``) and the streaming header arguments value (``""`` vs ``"{"``). Test count rises from 99 to 138 (+39 from cross-parser parametrization) while ``test_qwen3coder_tool_parser.py`` shrinks from 1260 to 162 lines. Co-Authored-By: Claude Opus 4.7 Signed-off-by: CNE Pierre FICHEPOIL --- .../test_qwen3_xml_coder_shared.py | 853 +++++++++++- .../test_qwen3coder_tool_parser.py | 1196 +---------------- .../tool_parsers/test_qwen3xml_tool_parser.py | 72 +- 3 files changed, 969 insertions(+), 1152 deletions(-) diff --git a/tests/tool_parsers/test_qwen3_xml_coder_shared.py b/tests/tool_parsers/test_qwen3_xml_coder_shared.py index 76b71e5eaa75..b53e8a067d55 100644 --- a/tests/tool_parsers/test_qwen3_xml_coder_shared.py +++ b/tests/tool_parsers/test_qwen3_xml_coder_shared.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """ -Shared regression tests for the Qwen3 XML and Coder tool parsers. +Shared tests for the Qwen3 XML and Coder tool parsers. These tests cover behaviour that BOTH parsers must implement identically. Each test runs twice — once against ``Qwen3XMLToolParser`` and once against @@ -11,15 +11,23 @@ ``test_qwen3coder_tool_parser.py``). """ import json +from collections.abc import Generator import pytest +from openai.types.responses.function_tool import FunctionTool from tests.tool_parsers.utils import run_tool_extraction_streaming from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, ) -from vllm.tokenizers import get_tokenizer +from vllm.entrypoints.openai.engine.protocol import ( + DeltaMessage, + FunctionCall, + ToolCall, +) +from vllm.tokenizers import TokenizerLike, get_tokenizer +from vllm.tokenizers.detokenizer_utils import detokenize_incrementally from vllm.tool_parsers.qwen3coder_tool_parser import Qwen3CoderToolParser from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser @@ -39,6 +47,845 @@ def parser_cls(request): return request.param +WEATHER_PARAMS = { + "type": "object", + "properties": { + "city": {"type": "string", "description": "The city name"}, + "state": {"type": "string", "description": "The state code"}, + "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]}, + }, + "required": ["city", "state"], +} + +AREA_PARAMS = { + "type": "object", + "properties": { + "shape": {"type": "string"}, + "dimensions": {"type": "object"}, + "precision": {"type": "integer"}, + }, +} + + +@pytest.fixture(params=["chat_completion", "responses_api"]) +def sample_tools(request): + if request.param == "chat_completion": + return [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "get_current_weather", + "description": "Get the current weather", + "parameters": WEATHER_PARAMS, + }, + ), + ChatCompletionToolsParam( + type="function", + function={ + "name": "calculate_area", + "description": "Calculate area of a shape", + "parameters": AREA_PARAMS, + }, + ), + ] + else: + return [ + FunctionTool( + type="function", + name="get_current_weather", + description="Get the current weather", + parameters=WEATHER_PARAMS, + ), + FunctionTool( + type="function", + name="calculate_area", + description="Calculate area of a shape", + parameters=AREA_PARAMS, + ), + ] + + +@pytest.fixture +def parser(parser_cls, qwen3_tokenizer, sample_tools): + return parser_cls(qwen3_tokenizer, tools=sample_tools) + + +def assert_tool_calls( + actual_tool_calls: list[ToolCall], expected_tool_calls: list[ToolCall] +): + assert len(actual_tool_calls) == len(expected_tool_calls) + for actual_tool_call, expected_tool_call in zip( + actual_tool_calls, expected_tool_calls + ): + assert actual_tool_call.type == "function" + assert actual_tool_call.function.name == expected_tool_call.function.name + assert json.loads(actual_tool_call.function.arguments) == json.loads( + expected_tool_call.function.arguments + ) + + +def stream_delta_message_generator( + parser, + tokenizer: TokenizerLike, + model_output: str, + request: ChatCompletionRequest | None = None, +) -> Generator[DeltaMessage, None, None]: + all_token_ids = tokenizer.encode(model_output, add_special_tokens=False) + + previous_text = "" + previous_tokens = None + prefix_offset = 0 + read_offset = 0 + for i, delta_token in enumerate(all_token_ids): + delta_token_ids = [delta_token] + previous_token_ids = all_token_ids[:i] + current_token_ids = all_token_ids[: i + 1] + + (new_tokens, delta_text, new_prefix_offset, new_read_offset) = ( + detokenize_incrementally( + tokenizer=tokenizer, + all_input_ids=current_token_ids, + prev_tokens=previous_tokens, + prefix_offset=prefix_offset, + read_offset=read_offset, + skip_special_tokens=False, + spaces_between_special_tokens=True, + ) + ) + + current_text = previous_text + delta_text + + delta_message = parser.extract_tool_calls_streaming( + previous_text, + current_text, + delta_text, + previous_token_ids, + current_token_ids, + delta_token_ids, + request=request, + ) + if delta_message: + yield delta_message + + previous_text = current_text + previous_tokens = ( + previous_tokens + new_tokens if previous_tokens else new_tokens + ) + prefix_offset = new_prefix_offset + read_offset = new_read_offset + + +# --------------------------------------------------------------------------- +# Basic extraction +# --------------------------------------------------------------------------- + + +def test_extract_tool_calls_no_tools(parser): + model_output = "This is a test response without any tool calls" + extracted_tool_calls = parser.extract_tool_calls( + model_output, request=None + ) + assert not extracted_tool_calls.tools_called + assert extracted_tool_calls.tool_calls == [] + assert extracted_tool_calls.content == model_output + + +_EXTRACT_CASES = [ + ( + """ + + +Dallas + + +TX + + +fahrenheit + + +""", + [ + ToolCall( + function=FunctionCall( + name="get_current_weather", + arguments=json.dumps( + {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} + ), + ) + ) + ], + None, + ), + ( + """Sure! Let me check the weather for you. + + +Dallas + + +TX + + +fahrenheit + + +""", + [ + ToolCall( + function=FunctionCall( + name="get_current_weather", + arguments=json.dumps( + {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} + ), + ) + ) + ], + "Sure! Let me check the weather for you.", + ), + ( + """ + + +rectangle + + +{"width": 10, + "height": 20} + + +2 + + +""", + [ + ToolCall( + function=FunctionCall( + name="calculate_area", + arguments=json.dumps( + { + "shape": "rectangle", + "dimensions": {"width": 10, "height": 20}, + "precision": 2, + } + ), + ) + ) + ], + None, + ), + ( + """ + + +Dallas + + +TX + + +fahrenheit + + + + + + +Orlando + + +FL + + +fahrenheit + + +""", + [ + ToolCall( + function=FunctionCall( + name="get_current_weather", + arguments=json.dumps( + {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} + ), + ) + ), + ToolCall( + function=FunctionCall( + name="get_current_weather", + arguments=json.dumps( + {"city": "Orlando", "state": "FL", "unit": "fahrenheit"} + ), + ) + ), + ], + "\n", + ), + ( + """Let me calculate that area for you. + + +circle + + +{"radius": 15.5} + + +3 + + +""", + [ + ToolCall( + function=FunctionCall( + name="calculate_area", + arguments=json.dumps( + { + "shape": "circle", + "dimensions": {"radius": 15.5}, + "precision": 3, + } + ), + ) + ) + ], + "Let me calculate that area for you.", + ), +] + +_EXTRACT_IDS = [ + "single_tool", + "single_tool_with_content", + "single_tool_multiline_param", + "parallel_tools", + "tool_with_typed_params", +] + + +@pytest.mark.parametrize( + ids=_EXTRACT_IDS, + argnames=["model_output", "expected_tool_calls", "expected_content"], + argvalues=_EXTRACT_CASES, +) +def test_extract_tool_calls( + parser, model_output, expected_tool_calls, expected_content +): + request = ChatCompletionRequest(model=MODEL, messages=[]) + extracted_tool_calls = parser.extract_tool_calls( + model_output, request=request + ) + assert extracted_tool_calls.tools_called + assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls) + # Both ``None`` and ``""`` are acceptable when the expected content is + # only whitespace — the two parsers differ on whether they preserve the + # newline that separates parallel tool-call blocks. + actual_content = extracted_tool_calls.content + if expected_content and expected_content.strip(): + assert actual_content == expected_content + else: + assert (actual_content or "").strip() == (expected_content or "").strip() + + +def test_extract_tool_calls_fallback_no_tags(parser): + """Test fallback parsing when XML tags are missing.""" + model_output = """ + +Dallas + + +TX + +""" + request = ChatCompletionRequest(model=MODEL, messages=[]) + extracted_tool_calls = parser.extract_tool_calls( + model_output, request=request + ) + assert extracted_tool_calls.tools_called + assert len(extracted_tool_calls.tool_calls) == 1 + assert ( + extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" + ) + + +# --------------------------------------------------------------------------- +# Type conversion +# --------------------------------------------------------------------------- + + +def test_extract_tool_calls_type_conversion(qwen3_tokenizer, parser_cls): + """Test parameter type conversion based on tool schema.""" + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "test_types", + "parameters": { + "type": "object", + "properties": { + "int_param": {"type": "integer"}, + "float_param": {"type": "float"}, + "bool_param": {"type": "boolean"}, + "str_param": {"type": "string"}, + "obj_param": {"type": "object"}, + }, + }, + }, + ) + ] + + model_output = """ + + +42 + + +3.14 + + +true + + +hello world + + +{"key": "value"} + + +""" + + parser_inst = parser_cls(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + extracted_tool_calls = parser_inst.extract_tool_calls( + model_output, request=request + ) + + args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) + assert args["int_param"] == 42 + assert args["float_param"] == 3.14 + assert args["bool_param"] is True + assert args["str_param"] == "hello world" + assert args["obj_param"] == {"key": "value"} + + +def test_extract_tool_calls_complex_type_with_single_quote( + qwen3_tokenizer, parser_cls +): + """Object parameter expressed as a Python repr (single quotes).""" + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "test_types", + "parameters": { + "type": "object", + "properties": { + "int_param": {"type": "integer"}, + "float_param": {"type": "float"}, + "bool_param": {"type": "boolean"}, + "str_param": {"type": "string"}, + "obj_param": {"type": "object"}, + }, + }, + }, + ) + ] + + model_output = """ + + +{'key': 'value'} + + +""" + + parser_inst = parser_cls(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + extracted_tool_calls = parser_inst.extract_tool_calls( + model_output, request=request + ) + + args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) + assert args["obj_param"] == {"key": "value"} + + +# --------------------------------------------------------------------------- +# Streaming extraction +# --------------------------------------------------------------------------- + + +_STREAMING_CASES = [ + ("This is a test without tools", [], "This is a test without tools"), +] + _EXTRACT_CASES + +_STREAMING_IDS = ["no_tools"] + _EXTRACT_IDS + + +@pytest.mark.parametrize( + ids=_STREAMING_IDS, + argnames=["model_output", "expected_tool_calls", "expected_content"], + argvalues=_STREAMING_CASES, +) +def test_extract_tool_calls_streaming( + parser, + qwen3_tokenizer, + model_output, + expected_tool_calls, + expected_content, +): + """Test incremental streaming behavior including typed parameters.""" + request = ChatCompletionRequest(model=MODEL, messages=[]) + + other_content = "" + tool_states = {} + + for delta_message in stream_delta_message_generator( + parser, qwen3_tokenizer, model_output, request + ): + assert not delta_message.role + + if delta_message.content: + other_content += delta_message.content + + if delta_message.tool_calls: + for tool_call in delta_message.tool_calls: + idx = tool_call.index + + if idx not in tool_states: + tool_states[idx] = { + "id": None, + "name": None, + "arguments": "", + "type": None, + } + + if tool_call.id: + tool_states[idx]["id"] = tool_call.id + + if tool_call.type: + assert tool_call.type == "function" + tool_states[idx]["type"] = tool_call.type + + if tool_call.function: + if tool_call.function.name: + assert tool_states[idx]["name"] is None + tool_states[idx]["name"] = tool_call.function.name + + if tool_call.function.arguments is not None: + tool_states[idx]["arguments"] += ( + tool_call.function.arguments + ) + + # Be tolerant about whitespace-only deltas between parallel tool calls; + # see ``test_extract_tool_calls`` for the same reasoning. + if expected_content and expected_content.strip(): + assert other_content == expected_content + else: + assert other_content.strip() == (expected_content or "").strip() + assert len(tool_states) == len(expected_tool_calls) + assert len(parser.prev_tool_call_arr) == len(expected_tool_calls) + + for idx, expected_tool in enumerate(expected_tool_calls): + state = tool_states[idx] + assert state["id"] is not None + assert state["type"] == "function" + assert state["name"] == expected_tool.function.name + + arguments_str = state["arguments"] + assert arguments_str is not None + actual_args = json.loads(arguments_str) + expected_args = json.loads(expected_tool.function.arguments) + assert actual_args == expected_args + + +def test_extract_tool_calls_missing_closing_parameter_tag(parser): + """Test handling of missing closing tag.""" + model_output = """Let me check the weather for you: + + + +Dallas + +TX + + +fahrenheit + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[]) + extracted_tool_calls = parser.extract_tool_calls( + model_output, request=request + ) + + assert extracted_tool_calls.tools_called + assert len(extracted_tool_calls.tool_calls) == 1 + assert ( + extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" + ) + args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) + assert "city" in args + assert args["city"] == "Dallas" + assert args["state"] == "TX" + assert args["unit"] == "fahrenheit" + assert "Let me check the weather for you:" in extracted_tool_calls.content + + +def test_extract_tool_calls_streaming_missing_closing_tag( + parser, qwen3_tokenizer +): + """Streaming with missing closing tag.""" + model_output = """Let me check the weather for you: + + + +Dallas + +TX + + +fahrenheit + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[]) + other_content = "" + tool_states = {} + + for delta_message in stream_delta_message_generator( + parser, qwen3_tokenizer, model_output, request + ): + if delta_message.content: + other_content += delta_message.content + + if delta_message.tool_calls: + for tool_call in delta_message.tool_calls: + idx = tool_call.index + if idx not in tool_states: + tool_states[idx] = { + "id": None, + "name": None, + "arguments": "", + "type": None, + } + if tool_call.id: + tool_states[idx]["id"] = tool_call.id + if tool_call.type: + assert tool_call.type == "function" + tool_states[idx]["type"] = tool_call.type + if tool_call.function: + if tool_call.function.name: + tool_states[idx]["name"] = tool_call.function.name + if tool_call.function.arguments is not None: + tool_states[idx]["arguments"] += ( + tool_call.function.arguments + ) + + assert "Let me check the weather for you:" in other_content + assert len(tool_states) == 1 + assert len(parser.prev_tool_call_arr) == 1 + + state = tool_states[0] + assert state["id"] is not None + assert state["type"] == "function" + assert state["name"] == "get_current_weather" + args = json.loads(state["arguments"]) + assert args["city"] == "Dallas" + assert args["state"] == "TX" + assert args["unit"] == "fahrenheit" + + +def test_extract_tool_calls_streaming_incremental(parser, qwen3_tokenizer): + """Test that streaming is truly incremental.""" + model_output = """I'll check the weather. + + +Dallas + + +TX + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[]) + chunks = [] + for delta_message in stream_delta_message_generator( + parser, qwen3_tokenizer, model_output, request + ): + chunks.append(delta_message) + + assert len(chunks) > 3 + assert chunks[0].content is not None + assert chunks[0].tool_calls is None or chunks[0].tool_calls == [] + + header_found = False + for chunk in chunks: + if chunk.tool_calls and chunk.tool_calls[0].id: + header_found = True + assert chunk.tool_calls[0].function.name == "get_current_weather" + assert chunk.tool_calls[0].type == "function" + # XML emits an empty arguments string with the header; Coder + # emits the opening "{" with the header. Both are valid. + assert chunk.tool_calls[0].function.arguments in ("", "{") + break + assert header_found + + arg_chunks = [] + for chunk in chunks: + if chunk.tool_calls and chunk.tool_calls[0].function.arguments: + arg_chunks.append(chunk.tool_calls[0].function.arguments) + + assert len(arg_chunks) > 1 + full_args = "".join(arg_chunks) + parsed_args = json.loads(full_args) + assert parsed_args["city"] == "Dallas" + assert parsed_args["state"] == "TX" + + +# --------------------------------------------------------------------------- +# Robustness regressions +# --------------------------------------------------------------------------- + + +def test_malformed_xml_no_gt_delimiter(parser): + """Regression: malformed XML without '>' must not crash (PR #36774).""" + model_output = ( + "\n" + "Dallas\n" + "\n" + "" + ) + request = ChatCompletionRequest(model=MODEL, messages=[]) + result = parser.extract_tool_calls(model_output, request=request) + assert result is not None + assert isinstance(result.tool_calls, list) + assert all(tc is not None for tc in result.tool_calls) + + +def test_none_tool_calls_filtered(parser): + """Regression: None tool calls filtered from output (PR #36774).""" + model_output = ( + "\n" + "\n" + "\n" + "\n" + "\n" + "Dallas\n" + "TX\n" + "\n" + "" + ) + request = ChatCompletionRequest(model=MODEL, messages=[]) + result = parser.extract_tool_calls(model_output, request=request) + assert all(tc is not None for tc in result.tool_calls) + assert result.tools_called + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].function.name == "get_current_weather" + args = json.loads(result.tool_calls[0].function.arguments) + assert args["city"] == "Dallas" + assert args["state"] == "TX" + + +def test_streaming_multi_param_single_chunk(parser): + """Regression: speculative decode delivering multiple params at once + (PR #35615).""" + request = ChatCompletionRequest(model=MODEL, messages=[]) + + deltas = [ + "", + "\n", + "\n", + # This single delta delivers all three parameters at once + "\nDallas\n" + "\n\nTX\n" + "\n\nfahrenheit\n", + "\n", + "\n", + ] + + reconstructor = run_tool_extraction_streaming( + parser, + deltas, + request, + assert_one_tool_per_delta=False, + ) + + assert len(reconstructor.tool_calls) == 1 + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert args["city"] == "Dallas" + assert args["state"] == "TX" + assert args["unit"] == "fahrenheit" + + +def test_no_double_serialization_string_args(qwen3_tokenizer, parser_cls): + """Regression: string arguments must not be double-serialized + (PR #35615).""" + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "greet", + "parameters": { + "type": "object", + "properties": { + "message": {"type": "string"}, + }, + }, + }, + ) + ] + + model_output = ( + "\n" + "\n" + "hello world\n" + "\n" + "" + ) + + parser_inst = parser_cls(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + result = parser_inst.extract_tool_calls(model_output, request=request) + + assert result.tools_called + assert len(result.tool_calls) == 1 + raw_arguments = result.tool_calls[0].function.arguments + args = json.loads(raw_arguments) + assert args["message"] == "hello world" + assert '\\"hello world\\"' not in raw_arguments + + +def test_extract_tool_calls_streaming_speculative_decode_loss(parser): + """If the parser hasn't started JSON yet and the delta contains the + parameters AND the end of the tool call, the parser should not just + return '{' and lose the parameters. + """ + request = ChatCompletionRequest(model="test", messages=[]) + + text1 = "\n\n" + parser.extract_tool_calls_streaming( + "", text1, text1, [], [1], [1], request + ) + + delta_str = ( + "\nParis\n\n\n" + ) + text2 = text1 + delta_str + delta2 = parser.extract_tool_calls_streaming( + text1, text2, delta_str, [1], [1, 2], [2], request + ) + + assert delta2 is not None + assert delta2.tool_calls is not None + assert len(delta2.tool_calls) == 1 + args = delta2.tool_calls[0].function.arguments + assert "Paris" in args, f"Arguments lost! Got: {args}" + + # --------------------------------------------------------------------------- # Value conversion: string "null" must NOT become JSON null # --------------------------------------------------------------------------- @@ -332,8 +1179,6 @@ def test_double_encoded_object_param_streaming(qwen3_tokenizer, parser_cls): ) ] -# Content with all four structural tags as literal strings (a Python file -# that documents the tool-call format). _XML_TAGS_IN_CONTENT = ( 'char_deltas = [\n' ' "\\n",\n' diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index 3f982acacac3..c3865e8e3935 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -1,28 +1,25 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +Coder-parser-specific tests. + +Tests that exercise behaviour shared with the XML parser live in +``tests/tool_parsers/test_qwen3_xml_coder_shared.py``. Only tests that +depend on Coder-only API (e.g. ``is_tool_call_started``) or on Coder-only +streaming behaviour (e.g. character-by-character chunking) belong here. +""" + import json -from collections.abc import Generator import pytest -from openai.types.responses.function_tool import FunctionTool from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, - ChatCompletionToolsParam, -) -from vllm.entrypoints.openai.engine.protocol import ( - DeltaMessage, - FunctionCall, - ToolCall, ) -from vllm.tokenizers import TokenizerLike, get_tokenizer -from vllm.tokenizers.detokenizer_utils import detokenize_incrementally -from vllm.tool_parsers.qwen3coder_tool_parser import ( - Qwen3CoderToolParser, -) -from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser -from tests.tool_parsers.utils import run_tool_extraction_streaming +from vllm.tokenizers import get_tokenizer +from vllm.tool_parsers.qwen3coder_tool_parser import Qwen3CoderToolParser + MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8" @@ -32,1085 +29,21 @@ def qwen3_tokenizer(): @pytest.fixture -def qwen3_tool_parser(qwen3_tokenizer, sample_tools): - return Qwen3CoderToolParser(qwen3_tokenizer, tools=sample_tools) - - -@pytest.fixture -def qwen3_xml_tool_parser(qwen3_tokenizer, sample_tools): - return Qwen3XMLToolParser(qwen3_tokenizer, tools=sample_tools) - - -@pytest.fixture(params=["xml"]) -def qwen3_tool_parser_parametrized(qwen3_tool_parser, qwen3_xml_tool_parser, request): - """Parameterized fixture that provides both parser types for testing""" - if request.param == "original": - return qwen3_tool_parser - else: - return qwen3_xml_tool_parser - - -WEATHER_PARAMS = { - "type": "object", - "properties": { - "city": {"type": "string", "description": "The city name"}, - "state": {"type": "string", "description": "The state code"}, - "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]}, - }, - "required": ["city", "state"], -} - -AREA_PARAMS = { - "type": "object", - "properties": { - "shape": {"type": "string"}, - "dimensions": {"type": "object"}, - "precision": {"type": "integer"}, - }, -} - - -@pytest.fixture(params=["chat_completion", "responses_api"]) -def sample_tools(request): - if request.param == "chat_completion": - return [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "get_current_weather", - "description": "Get the current weather", - "parameters": WEATHER_PARAMS, - }, - ), - ChatCompletionToolsParam( - type="function", - function={ - "name": "calculate_area", - "description": "Calculate area of a shape", - "parameters": AREA_PARAMS, - }, - ), - ] - else: - return [ - FunctionTool( - type="function", - name="get_current_weather", - description="Get the current weather", - parameters=WEATHER_PARAMS, - ), - FunctionTool( - type="function", - name="calculate_area", - description="Calculate area of a shape", - parameters=AREA_PARAMS, - ), - ] - - -def assert_tool_calls( - actual_tool_calls: list[ToolCall], expected_tool_calls: list[ToolCall] -): - assert len(actual_tool_calls) == len(expected_tool_calls) - - for actual_tool_call, expected_tool_call in zip( - actual_tool_calls, expected_tool_calls - ): - # Qwen3 parser doesn't generate IDs during extraction - assert actual_tool_call.type == "function" - assert actual_tool_call.function.name == expected_tool_call.function.name - assert json.loads(actual_tool_call.function.arguments) == json.loads( - expected_tool_call.function.arguments - ) - - -def stream_delta_message_generator( - qwen3_tool_parser, - qwen3_tokenizer: TokenizerLike, - model_output: str, - request: ChatCompletionRequest | None = None, -) -> Generator[DeltaMessage, None, None]: - all_token_ids = qwen3_tokenizer.encode(model_output, add_special_tokens=False) - - previous_text = "" - previous_tokens = None - prefix_offset = 0 - read_offset = 0 - for i, delta_token in enumerate(all_token_ids): - delta_token_ids = [delta_token] - previous_token_ids = all_token_ids[:i] - current_token_ids = all_token_ids[: i + 1] - - (new_tokens, delta_text, new_prefix_offset, new_read_offset) = ( - detokenize_incrementally( - tokenizer=qwen3_tokenizer, - all_input_ids=current_token_ids, - prev_tokens=previous_tokens, - prefix_offset=prefix_offset, - read_offset=read_offset, - skip_special_tokens=False, - spaces_between_special_tokens=True, - ) - ) - - current_text = previous_text + delta_text - - delta_message = qwen3_tool_parser.extract_tool_calls_streaming( - previous_text, - current_text, - delta_text, - previous_token_ids, - current_token_ids, - delta_token_ids, - request=request, - ) - if delta_message: - yield delta_message - - previous_text = current_text - previous_tokens = ( - previous_tokens + new_tokens if previous_tokens else new_tokens - ) - prefix_offset = new_prefix_offset - read_offset = new_read_offset - - -def test_extract_tool_calls_no_tools(qwen3_tool_parser_parametrized): - model_output = "This is a test response without any tool calls" - extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls( - model_output, request=None - ) # type: ignore[arg-type] - assert not extracted_tool_calls.tools_called - assert extracted_tool_calls.tool_calls == [] - assert extracted_tool_calls.content == model_output - - -@pytest.mark.parametrize( - ids=[ - "single_tool", - "single_tool_with_content", - "single_tool_multiline_param", - "parallel_tools", - "tool_with_typed_params", - ], - argnames=["model_output", "expected_tool_calls", "expected_content"], - argvalues=[ - ( - """ - - -Dallas - - -TX - - -fahrenheit - - -""", - [ - ToolCall( - function=FunctionCall( - name="get_current_weather", - arguments=json.dumps( - {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} - ), - ) - ) - ], - None, - ), - ( - """Sure! Let me check the weather for you. - - -Dallas - - -TX - - -fahrenheit - - -""", - [ - ToolCall( - function=FunctionCall( - name="get_current_weather", - arguments=json.dumps( - {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} - ), - ) - ) - ], - "Sure! Let me check the weather for you.", - ), - ( - """ - - -rectangle - - -{"width": 10, - "height": 20} - - -2 - - -""", - [ - ToolCall( - function=FunctionCall( - name="calculate_area", - arguments=json.dumps( - { - "shape": "rectangle", - "dimensions": {"width": 10, "height": 20}, - "precision": 2, - } - ), - ) - ) - ], - None, - ), - ( - """ - - -Dallas - - -TX - - -fahrenheit - - - - - - -Orlando - - -FL - - -fahrenheit - - -""", - [ - ToolCall( - function=FunctionCall( - name="get_current_weather", - arguments=json.dumps( - {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} - ), - ) - ), - ToolCall( - function=FunctionCall( - name="get_current_weather", - arguments=json.dumps( - {"city": "Orlando", "state": "FL", "unit": "fahrenheit"} - ), - ) - ), - ], - "\n", - ), ( - """Let me calculate that area for you. - - -circle - - -{"radius": 15.5} - - -3 - - -""", - [ - ToolCall( - function=FunctionCall( - name="calculate_area", - arguments=json.dumps( - { - "shape": "circle", - "dimensions": {"radius": 15.5}, - "precision": 3, - } - ), - ) - ) - ], - "Let me calculate that area for you.", - ), - ], -) -def test_extract_tool_calls( - qwen3_tool_parser_parametrized, - model_output, - expected_tool_calls, - expected_content, -): - request = ChatCompletionRequest(model=MODEL, messages=[]) - extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls( - model_output, request=request - ) - assert extracted_tool_calls.tools_called - - assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls) - - assert extracted_tool_calls.content == expected_content - - -def test_extract_tool_calls_fallback_no_tags( - qwen3_tool_parser_parametrized, -): - """Test fallback parsing when XML tags are missing""" - model_output = """ - -Dallas - - -TX - -""" - - request = ChatCompletionRequest(model=MODEL, messages=[]) - extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls( - model_output, request=request - ) - - assert extracted_tool_calls.tools_called - assert len(extracted_tool_calls.tool_calls) == 1 - assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" - - -def test_extract_tool_calls_type_conversion(qwen3_tokenizer): - """Test parameter type conversion based on tool schema""" - tools = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "test_types", - "parameters": { - "type": "object", - "properties": { - "int_param": {"type": "integer"}, - "float_param": {"type": "float"}, - "bool_param": {"type": "boolean"}, - "str_param": {"type": "string"}, - "obj_param": {"type": "object"}, - }, - }, - }, - ) - ] - - model_output = """ - - -42 - - -3.14 - - -true - - -hello world - - -{"key": "value"} - - -""" - - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - extracted_tool_calls = parser.extract_tool_calls(model_output, request=request) - - args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) - assert args["int_param"] == 42 - assert args["float_param"] == 3.14 - assert args["bool_param"] is True - assert args["str_param"] == "hello world" - assert args["obj_param"] == {"key": "value"} - - -@pytest.mark.parametrize( - ids=[ - "no_tools", - "single_tool", - "single_tool_with_content", - "single_tool_multiline_param", - "parallel_tools", - "tool_with_typed_params", # Added this test case - ], - argnames=["model_output", "expected_tool_calls", "expected_content"], - argvalues=[ - ("This is a test without tools", [], "This is a test without tools"), - ( - """ - - -Dallas - - -TX - - -fahrenheit - - -""", - [ - ToolCall( - function=FunctionCall( - name="get_current_weather", - arguments=json.dumps( - {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} - ), - ) - ) - ], - None, - ), - ( - """Sure! Let me check the weather for you. - - -Dallas - - -TX - - -fahrenheit - - -""", - [ - ToolCall( - function=FunctionCall( - name="get_current_weather", - arguments=json.dumps( - {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} - ), - ) - ) - ], - "Sure! Let me check the weather for you.", - ), - ( - """ - - -rectangle - - -{"width": 10, - "height": 20} - - -2 - - -""", - [ - ToolCall( - function=FunctionCall( - name="calculate_area", - arguments=json.dumps( - { - "shape": "rectangle", - "dimensions": {"width": 10, "height": 20}, - "precision": 2, - } - ), - ) - ) - ], - None, - ), - ( - """ - - -Dallas - - -TX - - -fahrenheit - - - - - - -Orlando - - -FL - - -celsius - - -""", - [ - ToolCall( - function=FunctionCall( - name="get_current_weather", - arguments=json.dumps( - {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} - ), - ) - ), - ToolCall( - function=FunctionCall( - name="get_current_weather", - arguments=json.dumps( - {"city": "Orlando", "state": "FL", "unit": "celsius"} - ), - ) - ), - ], - "\n", - ), # Added tool_with_typed_params test case - ( - """Let me calculate that area for you. - - -circle - - -{"radius": 15.5} - - -3 - - -""", - [ - ToolCall( - function=FunctionCall( - name="calculate_area", - arguments=json.dumps( - { - "shape": "circle", - "dimensions": {"radius": 15.5}, - "precision": 3, - } - ), - ) - ) - ], - "Let me calculate that area for you.", - ), - ], -) -def test_extract_tool_calls_streaming( - qwen3_tool_parser_parametrized, - qwen3_tokenizer, - model_output, - expected_tool_calls, - expected_content, -): - """Test incremental streaming behavior including typed parameters""" - request = ChatCompletionRequest(model=MODEL, messages=[]) - - other_content = "" - tool_states = {} # Track state per tool index - - for delta_message in stream_delta_message_generator( - qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output, request - ): - # role should never be streamed from tool parser - assert not delta_message.role - - if delta_message.content: - other_content += delta_message.content - - if delta_message.tool_calls: - for tool_call in delta_message.tool_calls: - idx = tool_call.index - - # Initialize state for new tool - if idx not in tool_states: - tool_states[idx] = { - "id": None, - "name": None, - "arguments": "", - "type": None, - } - - # First chunk should have id, name, and type - if tool_call.id: - tool_states[idx]["id"] = tool_call.id - - if tool_call.type: - assert tool_call.type == "function" - tool_states[idx]["type"] = tool_call.type - - if tool_call.function: - if tool_call.function.name: - # Should only be set once - assert tool_states[idx]["name"] is None - tool_states[idx]["name"] = tool_call.function.name - - if tool_call.function.arguments is not None: - # Accumulate arguments incrementally - tool_states[idx]["arguments"] += tool_call.function.arguments - - # Verify final content - assert other_content == (expected_content or "") # Handle None case - - # Verify we got all expected tool calls - assert len(tool_states) == len(expected_tool_calls) - assert len(qwen3_tool_parser_parametrized.prev_tool_call_arr) == len( - expected_tool_calls - ) - - # Verify each tool call - for idx, expected_tool in enumerate(expected_tool_calls): - state = tool_states[idx] - assert state["id"] is not None - assert state["type"] == "function" - assert state["name"] == expected_tool.function.name - - # Parse accumulated arguments - arguments_str = state["arguments"] - assert arguments_str is not None - actual_args = json.loads(arguments_str) - expected_args = json.loads(expected_tool.function.arguments) - assert actual_args == expected_args - - -def test_extract_tool_calls_missing_closing_parameter_tag( - qwen3_tool_parser_parametrized, -): - """Test handling of missing closing tag""" - # Using get_current_weather from sample_tools but with malformed XML - model_output = """Let me check the weather for you: - - - -Dallas - -TX - - -fahrenheit - - -""" - - request = ChatCompletionRequest(model=MODEL, messages=[]) - extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls( - model_output, request=request - ) - - # The parser should handle the malformed XML gracefully - assert extracted_tool_calls.tools_called - assert len(extracted_tool_calls.tool_calls) == 1 - - # Verify the function name is correct - assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" - - # Verify the arguments are parsed despite the missing closing tag - args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) - assert "city" in args - assert args["city"] == "Dallas" - assert args["state"] == "TX" - assert args["unit"] == "fahrenheit" - - # Check that content before the tool call is preserved - assert "Let me check the weather for you:" in extracted_tool_calls.content - - -def test_extract_tool_calls_streaming_missing_closing_tag( - qwen3_tool_parser_parametrized, qwen3_tokenizer -): - """Test streaming with missing closing tag""" - # Using get_current_weather from sample_tools but with malformed XML - model_output = """Let me check the weather for you: - - - -Dallas - -TX - - -fahrenheit - - -""" - - request = ChatCompletionRequest(model=MODEL, messages=[]) - - other_content = "" - tool_states = {} - - for delta_message in stream_delta_message_generator( - qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output, request - ): - if delta_message.content: - other_content += delta_message.content - - if delta_message.tool_calls: - for tool_call in delta_message.tool_calls: - idx = tool_call.index - - if idx not in tool_states: - tool_states[idx] = { - "id": None, - "name": None, - "arguments": "", - "type": None, - } - - if tool_call.id: - tool_states[idx]["id"] = tool_call.id - - if tool_call.type: - assert tool_call.type == "function" - tool_states[idx]["type"] = tool_call.type - - if tool_call.function: - if tool_call.function.name: - tool_states[idx]["name"] = tool_call.function.name - - if tool_call.function.arguments is not None: - tool_states[idx]["arguments"] += tool_call.function.arguments - - # Verify content was streamed - assert "Let me check the weather for you:" in other_content - # Verify we got the tool call - assert len(tool_states) == 1 - assert len(qwen3_tool_parser_parametrized.prev_tool_call_arr) == 1 - - state = tool_states[0] - assert state["id"] is not None - assert state["type"] == "function" - assert state["name"] == "get_current_weather" - - # Verify arguments were parsed correctly despite missing closing tag - assert state["arguments"] is not None - args = json.loads(state["arguments"]) - assert args["city"] == "Dallas" - assert args["state"] == "TX" - assert args["unit"] == "fahrenheit" - - -def test_extract_tool_calls_streaming_incremental( - qwen3_tool_parser_parametrized, qwen3_tokenizer -): - """Test that streaming is truly incremental""" - model_output = """I'll check the weather. - - -Dallas - - -TX - - -""" - - request = ChatCompletionRequest(model=MODEL, messages=[]) - - chunks = [] - for delta_message in stream_delta_message_generator( - qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output, request - ): - chunks.append(delta_message) - - # Should have multiple chunks - assert len(chunks) > 3 - - # First chunk(s) should be content - assert chunks[0].content is not None - assert chunks[0].tool_calls is None or chunks[0].tool_calls == [] - - # Should have a chunk with tool header (id, name, type) - header_found = False - for chunk in chunks: - if chunk.tool_calls and chunk.tool_calls[0].id: - header_found = True - assert chunk.tool_calls[0].function.name == "get_current_weather" - assert chunk.tool_calls[0].type == "function" - # Empty initially - assert chunk.tool_calls[0].function.arguments == "" - break - assert header_found - - # Should have chunks with incremental arguments - arg_chunks = [] - for chunk in chunks: - if chunk.tool_calls and chunk.tool_calls[0].function.arguments: - arg_chunks.append(chunk.tool_calls[0].function.arguments) - - # Arguments should be streamed incrementally - assert len(arg_chunks) > 1 - - # Concatenated arguments should form valid JSON - full_args = "".join(arg_chunks) - parsed_args = json.loads(full_args) - assert parsed_args["city"] == "Dallas" - assert parsed_args["state"] == "TX" - - -def test_extract_tool_calls_complex_type_with_single_quote( - qwen3_tokenizer, -): - """Test parameter type conversion based on tool schema""" - tools = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "test_types", - "parameters": { - "type": "object", - "properties": { - "int_param": {"type": "integer"}, - "float_param": {"type": "float"}, - "bool_param": {"type": "boolean"}, - "str_param": {"type": "string"}, - "obj_param": {"type": "object"}, - }, - }, - }, - ) - ] - - model_output = """ - - -{'key': 'value'} - - -""" - - parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - extracted_tool_calls = parser.extract_tool_calls(model_output, request=request) - - args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) - assert args["obj_param"] == {"key": "value"} - - -def test_extract_tool_calls_streaming_missing_opening_tag( - qwen3_tool_parser_parametrized, qwen3_tokenizer -): - """Test streaming with missing opening tag - - This tests that the streaming parser correctly handles - tool calls that start directly with - """ - model_output = """I'll check the weather for you. - - - -Dallas - - -TX - - -fahrenheit - - -""" - - request = ChatCompletionRequest(model=MODEL, messages=[]) - - other_content = "" - tool_states = {} - - for delta_message in stream_delta_message_generator( - qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output, request - ): - if delta_message.content: - other_content += delta_message.content - - if delta_message.tool_calls: - for tool_call in delta_message.tool_calls: - idx = tool_call.index - - if idx not in tool_states: - tool_states[idx] = { - "id": None, - "name": None, - "arguments": "", - "type": None, - } - - if tool_call.id: - tool_states[idx]["id"] = tool_call.id - - if tool_call.type: - assert tool_call.type == "function" - tool_states[idx]["type"] = tool_call.type - - if tool_call.function: - if tool_call.function.name: - tool_states[idx]["name"] = tool_call.function.name - - if tool_call.function.arguments is not None: - tool_states[idx]["arguments"] += tool_call.function.arguments - - # Verify content was streamed - assert "I'll check the weather for you." in other_content - - # Verify we got the tool call - assert len(tool_states) == 1 - assert len(qwen3_tool_parser_parametrized.prev_tool_call_arr) == 1 - - state = tool_states[0] - assert state["id"] is not None - assert state["type"] == "function" - assert state["name"] == "get_current_weather" - - # Verify arguments were parsed correctly despite missing opening tag - assert state["arguments"] is not None - args = json.loads(state["arguments"]) - assert args["city"] == "Dallas" - assert args["state"] == "TX" - assert args["unit"] == "fahrenheit" - - -def test_malformed_xml_no_gt_delimiter(qwen3_tool_parser): - """Regression: malformed XML without '>' must not crash (PR #36774).""" - model_output = ( - "\n" - "Dallas\n" - "\n" - "" - ) - - request = ChatCompletionRequest(model=MODEL, messages=[]) - result = qwen3_tool_parser.extract_tool_calls(model_output, request=request) - assert result is not None - assert isinstance(result.tool_calls, list) - assert all(tc is not None for tc in result.tool_calls) - - -def test_none_tool_calls_filtered(qwen3_tool_parser): - """Regression: None tool calls filtered from output (PR #36774).""" - model_output = ( - "\n" - "\n" - "\n" - "\n" - "\n" - "Dallas\n" - "TX\n" - "\n" - "" - ) - - request = ChatCompletionRequest(model=MODEL, messages=[]) - result = qwen3_tool_parser.extract_tool_calls(model_output, request=request) - assert all(tc is not None for tc in result.tool_calls) - assert result.tools_called - assert len(result.tool_calls) == 1 - assert result.tool_calls[0].function.name == "get_current_weather" - args = json.loads(result.tool_calls[0].function.arguments) - assert args["city"] == "Dallas" - assert args["state"] == "TX" - - -def test_streaming_multi_param_single_chunk(qwen3_tool_parser, qwen3_tokenizer): - """Regression: speculative decode delivering multiple params at once (PR #35615).""" - request = ChatCompletionRequest(model=MODEL, messages=[]) - - deltas = [ - "", - "\n", - "\n", # triggers json_started -> sends "{" - # This single delta delivers all three parameters at once - "\nDallas\n" - "\n\nTX\n" - "\n\nfahrenheit\n", - "\n", - "\n", - ] - - - reconstructor = run_tool_extraction_streaming( - qwen3_tool_parser, - deltas, - request, - assert_one_tool_per_delta=False, - ) - - assert len(reconstructor.tool_calls) == 1 - args = json.loads(reconstructor.tool_calls[0].function.arguments) - assert args["city"] == "Dallas" - assert args["state"] == "TX" - assert args["unit"] == "fahrenheit" - - -def test_no_double_serialization_string_args(qwen3_tool_parser): - """Regression: string arguments must not be double-serialized (PR #35615).""" - tools = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "greet", - "parameters": { - "type": "object", - "properties": { - "message": {"type": "string"}, - }, - }, - }, - ) - ] - - model_output = ( - "\n" - "\n" - "hello world\n" - "\n" - "" - ) - - request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - result = qwen3_tool_parser.extract_tool_calls(model_output, request=request) - - assert result.tools_called - assert len(result.tool_calls) == 1 - raw_arguments = result.tool_calls[0].function.arguments - args = json.loads(raw_arguments) - assert args["message"] == "hello world" - assert '\\"hello world\\"' not in raw_arguments +def qwen3_tool_parser(qwen3_tokenizer): + return Qwen3CoderToolParser(qwen3_tokenizer, tools=None) def test_extract_tool_calls_streaming_split_tag(qwen3_tool_parser): - """ - This highlights the need to use current_text instead of delta_text. + """```` arrives split across two deltas (````). ``is_tool_call_started`` must flip to ``True`` once the + full tag exists in ``current_text``, and the partial tag must not leak + into ``DeltaMessage.content``. + + This relies on the Coder parser's ``is_tool_call_started`` attribute, + which has no equivalent on the XML parser. """ request = ChatCompletionRequest(model=MODEL, messages=[]) - # Iteration 1: "" prev_text_2 = curr_text_1 delta_text_2 = "_call>" curr_text_2 = prev_text_2 + delta_text_2 @@ -1137,56 +69,29 @@ def test_extract_tool_calls_streaming_split_tag(qwen3_tool_parser): previous_token_ids=[1, 2, 3, 4], current_token_ids=[1, 2, 3, 4, 5], delta_token_ids=[5], - request=request + request=request, ) - # The assertion must verify that the is_tool_call_started variable correctly switches to True - assert qwen3_tool_parser.is_tool_call_started is True, "is_tool_call_started should be True when '' is completed in current_text." + assert qwen3_tool_parser.is_tool_call_started is True - # and that the function does not return fragments of the tag in DeltaMessage(content=...) if msg1 and msg1.content: assert "" not in msg2.content +def test_extract_tool_calls_streaming_various_chunk_sizes( + qwen3_tokenizer, +): + """Coder streaming must reconstruct arguments correctly even when the + deltas arrive a single character at a time. -def test_extract_tool_calls_streaming_speculative_decode_loss(qwen3_tool_parser): + The XML parser's SAX-based streaming cannot tolerate ``chunk_size=1`` + by design (an XML tag is not parseable until ``>`` arrives), so this + robustness test stays Coder-only. """ - if json_started=False, and the delta contains the parameters AND the end of the tool call, - the parser should not just return '{' and lose the parameters. - """ - request = ChatCompletionRequest(model="test", messages=[]) - - text1 = "\n\n" - qwen3_tool_parser.extract_tool_calls_streaming( - "", text1, text1, [], [1], [1], request - ) - - # Delta 2 has the rest of the tool call - delta_str = "\nParis\n\n\n" - text2 = text1 + delta_str - delta2 = qwen3_tool_parser.extract_tool_calls_streaming( - text1, text2, delta_str, [1], [1,2], [2], request - ) - - # The parameters should be in delta2! - assert delta2 is not None - assert delta2.tool_calls is not None - assert len(delta2.tool_calls) == 1 - args = delta2.tool_calls[0].function.arguments - assert "Paris" in args, f"Arguments lost! Got: {args}" - -def test_extract_tool_calls_streaming_various_chunk_sizes(qwen3_tool_parser): - """ - Test streaming with various chunk sizes using the exact template from Qwen 3.6. - """ - - request = ChatCompletionRequest(model="test", messages=[]) - - # Exact template format from Qwen 3.6 template_text = """ @@ -1200,34 +105,30 @@ def test_extract_tool_calls_streaming_various_chunk_sizes(qwen3_tool_parser): """ - # Test with different chunk sizes to simulate different network/speculative decoding behaviors for chunk_size in [1, 3, 15, len(template_text)]: - # Reset parser state - qwen3_tool_parser._reset_streaming_state() - + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=None) + tool_states = {} - - # Simulate custom streaming to precisely control chunk sizes current_text = "" previous_text = "" ptr = 0 - + while ptr < len(template_text): - delta = template_text[ptr:ptr+chunk_size] + delta = template_text[ptr:ptr + chunk_size] previous_text = current_text current_text += delta ptr += chunk_size - - delta_message = qwen3_tool_parser.extract_tool_calls_streaming( + + delta_message = parser.extract_tool_calls_streaming( previous_text=previous_text, current_text=current_text, delta_text=delta, previous_token_ids=[], current_token_ids=[], delta_token_ids=[], - request=request + request=request, ) - + if delta_message and delta_message.tool_calls: for tool_call in delta_message.tool_calls: idx = tool_call.index @@ -1238,7 +139,6 @@ def test_extract_tool_calls_streaming_various_chunk_sizes(qwen3_tool_parser): "arguments": "", "type": None, } - if tool_call.id: tool_states[idx]["id"] = tool_call.id if tool_call.type: @@ -1247,14 +147,16 @@ def test_extract_tool_calls_streaming_various_chunk_sizes(qwen3_tool_parser): if tool_call.function.name: tool_states[idx]["name"] = tool_call.function.name if tool_call.function.arguments is not None: - tool_states[idx]["arguments"] += tool_call.function.arguments + tool_states[idx]["arguments"] += ( + tool_call.function.arguments + ) - assert 0 in tool_states + assert 0 in tool_states, f"chunk_size={chunk_size}" assert tool_states[0]["name"] == "example_function_name" - - import json args = json.loads(tool_states[0]["arguments"]) assert args["example_parameter_1"] == "value_1" - assert args["example_parameter_2"] == "This is the value for the second parameter\nthat can span\nmultiple lines" - - + assert args["example_parameter_2"] == ( + "This is the value for the second parameter\n" + "that can span\n" + "multiple lines" + ) diff --git a/tests/tool_parsers/test_qwen3xml_tool_parser.py b/tests/tool_parsers/test_qwen3xml_tool_parser.py index f7977218b4f1..9b61b942fa36 100644 --- a/tests/tool_parsers/test_qwen3xml_tool_parser.py +++ b/tests/tool_parsers/test_qwen3xml_tool_parser.py @@ -9,13 +9,16 @@ ToolParserTestConfig, ToolParserTests, ) +from tests.tool_parsers.test_qwen3_xml_coder_shared import ( + stream_delta_message_generator, +) +from tests.tool_parsers.utils import run_tool_extraction_streaming from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, ) from vllm.tokenizers import get_tokenizer from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser -from tests.tool_parsers.utils import run_tool_extraction_streaming MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8" @@ -397,3 +400,70 @@ def test_xml_streaming_string_null_last_char_not_dropped(qwen3_tokenizer): f"and _convert_for_json_streaming(None, 'string') returns '', " f"so the final delta is empty and the 'l' is never emitted." ) + + +def test_xml_streaming_missing_opening_tool_call_tag(qwen3_tokenizer): + """The XML streaming parser must recover when the model emits a tool + call without the leading ```` tag — i.e. directly with + ````. The Coder parser does not support this in + streaming mode, so this regression stays XML-specific. + """ + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=None) + + model_output = """I'll check the weather for you. + + + +Dallas + + +TX + + +fahrenheit + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[]) + other_content = "" + tool_states: dict = {} + + for delta_message in stream_delta_message_generator( + parser, qwen3_tokenizer, model_output, request + ): + if delta_message.content: + other_content += delta_message.content + if delta_message.tool_calls: + for tool_call in delta_message.tool_calls: + idx = tool_call.index + if idx not in tool_states: + tool_states[idx] = { + "id": None, + "name": None, + "arguments": "", + "type": None, + } + if tool_call.id: + tool_states[idx]["id"] = tool_call.id + if tool_call.type: + assert tool_call.type == "function" + tool_states[idx]["type"] = tool_call.type + if tool_call.function: + if tool_call.function.name: + tool_states[idx]["name"] = tool_call.function.name + if tool_call.function.arguments is not None: + tool_states[idx]["arguments"] += ( + tool_call.function.arguments + ) + + assert "I'll check the weather for you." in other_content + assert len(tool_states) == 1 + state = tool_states[0] + assert state["id"] is not None + assert state["type"] == "function" + assert state["name"] == "get_current_weather" + args = json.loads(state["arguments"]) + assert args["city"] == "Dallas" + assert args["state"] == "TX" + assert args["unit"] == "fahrenheit" From 1aa80d9f16c2e10bca6750c08ae725162124c869 Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Sat, 25 Apr 2026 06:49:03 +0200 Subject: [PATCH 15/21] refactor(qwen3coder): extract _advance_to_next_tool and remove duplicate init Remove the duplicated `self.current_tool_index = 0` assignment at the end of `_reset_streaming_state()`. Extract `_advance_to_next_tool()` to deduplicate the identical "advance to next tool call" block that was copy-pasted between the normal delta path and the speculative-decoding recursion path in `extract_tool_calls_streaming()`. Signed-off-by: CNE Pierre FICHEPOIL --- vllm/tool_parsers/qwen3coder_tool_parser.py | 92 ++++++++------------- 1 file changed, 36 insertions(+), 56 deletions(-) diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index aaa0d63c1f53..3a8f0e640926 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -110,7 +110,6 @@ def _reset_streaming_state(self): self.accumulated_params = {} self.streaming_request = None self._sent_content_idx = 0 - self.current_tool_index = 0 def _convert_param_value( self, param_value: str, param_name: str, param_config: dict, func_name: str @@ -298,6 +297,36 @@ def _find_true_function_end(self, text: str) -> int: return idx search_pos = idx + len(self.function_end_token) + def _advance_to_next_tool(self, current_text: str) -> None: + """Advance streaming state to the next tool call. + + Updates _sent_content_idx to skip past the completed tool call's + closing tag, then resets per-tool state for the next invocation. + Called both on normal delta boundaries and during speculative- + decoding recursion when multiple complete tool calls arrive in one + delta. + """ + search_idx = 0 + for _ in range(self.current_tool_index + 1): + search_idx = current_text.find(self.tool_call_start_token, search_idx) + if search_idx == -1: + break + end_idx = current_text.find(self.tool_call_end_token, search_idx) + if end_idx != -1: + self._sent_content_idx = max( + self._sent_content_idx, + end_idx + len(self.tool_call_end_token), + ) + search_idx += len(self.tool_call_start_token) + + self.current_tool_index += 1 + self.header_sent = False + self.param_count = 0 + self.json_started = False + self.json_closed = False + self.accumulated_params = {} + self.is_tool_call_started = False + def _find_true_tool_call_end(self, text: str) -> int: """Return the index of the real structural ```` in text (followed with optional whitespace by another ```` @@ -601,35 +630,9 @@ def extract_tool_calls_streaming( # Check if this tool call has ended tool_ends = current_text.count(self.tool_call_end_token) if tool_ends > self.current_tool_index: - # Find the end of the tool call that just finished and update - # _sent_content_idx to prevent it from leaking into content. - search_idx = 0 - for _ in range(self.current_tool_index + 1): - search_idx = current_text.find(self.tool_call_start_token, - search_idx) - if search_idx == -1: - break - end_idx = current_text.find(self.tool_call_end_token, - search_idx) - if end_idx != -1: - self._sent_content_idx = max( - self._sent_content_idx, - end_idx + len(self.tool_call_end_token)) - search_idx += len(self.tool_call_start_token) - - # This tool has ended, advance to next - self.current_tool_index += 1 - self.header_sent = False - self.param_count = 0 - self.json_started = False - self.json_closed = False - self.accumulated_params = {} - - # Always reset is_tool_call_started when a tool call ends. - # This allows correctly sending any content between or after - # tool calls. - self.is_tool_call_started = False - # Continue processing next tool + # Advance to next tool; is_tool_call_started is reset so + # content between or after tool calls is emitted correctly. + self._advance_to_next_tool(current_text) return None content_message = None @@ -1001,32 +1004,9 @@ def extract_tool_calls_streaming( and current_text.count(self.tool_call_end_token) > self.current_tool_index + 1 ): - # Manually advance to the next tool: this mirrors the - # "advance to next tool" block executed at the top of - # this method on the next delta arrival. - search_idx = 0 - for _ in range(self.current_tool_index + 1): - search_idx = current_text.find( - self.tool_call_start_token, search_idx - ) - if search_idx == -1: - break - end_idx = current_text.find( - self.tool_call_end_token, search_idx - ) - if end_idx != -1: - self._sent_content_idx = max( - self._sent_content_idx, - end_idx + len(self.tool_call_end_token), - ) - search_idx += len(self.tool_call_start_token) - self.current_tool_index += 1 - self.header_sent = False - self.param_count = 0 - self.json_started = False - self.json_closed = False - self.accumulated_params = {} - self.is_tool_call_started = False + # Speculative decoding delivered multiple complete tool + # calls in one delta; advance and recurse for the next. + self._advance_to_next_tool(current_text) # Recurse with a sentinel previous_text so the entry # check `if not previous_text` does NOT reset the state. From 700da8a47123c591818925cb9ad4185475232c46 Mon Sep 17 00:00:00 2001 From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> Date: Sat, 25 Apr 2026 13:09:50 +0200 Subject: [PATCH 16/21] fix(qwen3-tool-parsers): six MTP / spec-decode and template edge cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coder parser ------------ 1. Trailing free text after the final in the SAME delta The early "advance to next tool" check fired when the previous delta closed a tool, then ``return None`` consumed the new delta without ever emitting the trailing free text it carried (MTP / speculative-decoding bursts that bundle + content into one chunk). Removing the early return lets the rest of the function emit that content via the not-started branch. 2. Recognise Python "None" as JSON null for non-string params Qwen3.5's chat template renders nullable args via ``| string`` so a previous turn's null value becomes the literal "None" in the prompt. Models trained on this template generate "None" verbatim. Both parsers now accept "None" alongside "null" in ``_convert_param_value``. 3. Don't drop recursion content when the outer call already has content In MTP bursts that deliver "before between " in one delta, the recursion correctly produced ``DeltaMessage(content= "between", tool_calls=[tool 2])`` but the outer merger guarded the assignment with ``not result.content``, silently discarding the "between" fragment whenever the outer had its own "before" content. Concatenate instead of guarding. 4. Use STRUCTURAL positions everywhere A parameter value can legitimately contain the literal string ```` (e.g. a write_file tool whose content is a code snippet documenting tool-call format). The naive ``current_text.find()`` and ``count()`` used by the early-advance check, ``_advance_to_next_tool``, and the per-tool ``tool_text`` truncation matched those literals and landed ``_sent_content_idx`` inside an earlier tool's content, silently dropping every subsequent emission. A new helper ``_structural_tool_call_end_positions`` walks each and accepts it only when (a) preceded by after optional whitespace, or (b) followed structurally by another or end of string. All naive lookups are migrated. 5. Confirm the spurious-start case before flipping is_tool_call_started ``current_text.count()`` over-counts when a previous tool's value contains the literal ````. The starting branch now checks for a real opener past ``_sent_content_idx`` before flipping the flag — otherwise the trailing-text emission in the else branch is reached and the content surfaces as expected. 6. Emit trailing free text after the LAST in MTP bursts that bundle N tool calls + trailing content The recursion processed every additional tool call but did not advance ``_sent_content_idx`` past the final ````, so trailing free text in the same delta was buffered indefinitely and lost when the EOS-style empty delta arrived. After the recursion completes, emit any text past the last structural . XML parser ---------- 7. Flush text_content_buffer at the end of _process_complete_xml_elements When and trailing free text arrive in the same delta, the trailing text was buffered in ``text_content_buffer`` but never emitted in that batch — and was lost entirely if EOS came before any subsequent delta. Flush the buffer when the batch completes outside any tool call. 8. Same Python "None" recognition as the Coder parser (``_convert_param_value``). 9. Defer numeric (int/float) parameter conversion in the streaming path Boolean params were already deferred to avoid the partial-string to JSON-literal flip producing invalid output. Numeric params suffered the same flip when the value was the literal "None" (Qwen3.5) or "null" (Qwen3.6): the diff-based char emission produced "Non" then a "l" delta against the new "null" output, yielding the cumulative string "Nonl" — invalid JSON. Add the numeric type families to ``need_defer`` so the full value is parsed once at . Each fix is covered by a focused regression test in tests/tool_parsers/test_qwen3coder_tool_parser.py, tests/tool_parsers/test_qwen3xml_tool_parser.py, or the shared suite tests/tool_parsers/test_qwen3_xml_coder_shared.py. Signed-off-by: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> --- .../test_qwen3_xml_coder_shared.py | 111 ++++++++++ .../test_qwen3coder_tool_parser.py | 199 ++++++++++++++++++ .../tool_parsers/test_qwen3xml_tool_parser.py | 96 +++++++++ vllm/tool_parsers/qwen3coder_tool_parser.py | 170 ++++++++++++--- vllm/tool_parsers/qwen3xml_tool_parser.py | 40 +++- 5 files changed, 579 insertions(+), 37 deletions(-) diff --git a/tests/tool_parsers/test_qwen3_xml_coder_shared.py b/tests/tool_parsers/test_qwen3_xml_coder_shared.py index b53e8a067d55..7f3302a6cad0 100644 --- a/tests/tool_parsers/test_qwen3_xml_coder_shared.py +++ b/tests/tool_parsers/test_qwen3_xml_coder_shared.py @@ -1449,3 +1449,114 @@ def test_two_tool_calls_in_one_streaming_chunk(qwen3_tokenizer, parser_cls): args1 = json.loads(reconstructor.tool_calls[1].function.arguments) assert args0 == {"city": "Paris"} assert args1 == {"city": "London"} + + +# --------------------------------------------------------------------------- +# Trailing free text after the LAST in the SAME delta (MTP / +# speculative decoding). The text must be emitted as content; dropping it +# silently is a regression. +# --------------------------------------------------------------------------- + + +def test_python_none_value_for_nullable_int(qwen3_tokenizer, parser_cls): + """A Qwen3.5-trained model emits Python ``None`` (not ``null``) for a + nullable non-string parameter, because the Qwen3.5 chat template + renders ``args_value | string`` for non-container types — turning a + null arg from a previous tool call into the literal "None" in the + prompt. The model then learns to generate the same "None" verbatim. + + The parser must recognise this and convert "None" to JSON null, + just like it already does for the literal "null" emitted by + Qwen3.6-trained models. + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "set_count", + "parameters": { + "type": "object", + "properties": { + "count": { + "anyOf": [ + {"type": "integer"}, + {"type": "null"}, + ], + }, + }, + }, + }, + ) + ] + parser = parser_cls(qwen3_tokenizer, tools=tools) + model_output = ( + "\n" + "\n" + "None\n" + "\n" + "" + ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + result = parser.extract_tool_calls(model_output, request=request) + + assert result.tools_called + args = json.loads(result.tool_calls[0].function.arguments) + assert args["count"] is None, ( + f"Python repr None was not converted to JSON null. " + f"Got: {args['count']!r}" + ) + + +def test_streaming_two_tool_calls_plus_trailing_text_one_delta( + qwen3_tokenizer, parser_cls +): + """MTP: a single delta delivers tool 1 + tool 2 + trailing free text. + Both tool calls must be emitted AND the trailing text must surface as + content in the same delta — not be silently dropped. + """ + parser = parser_cls(qwen3_tokenizer, tools=_WEATHER_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WEATHER_TOOLS + ) + deltas = [ + _TWO_TOOL_CALLS_IN_ONE_CHUNK + "\nAll done!", + ] + reconstructor = run_tool_extraction_streaming( + parser, deltas, request, assert_one_tool_per_delta=False + ) + assert len(reconstructor.tool_calls) == 2, ( + f"Expected 2 tool calls, got {len(reconstructor.tool_calls)}" + ) + assert "All done!" in reconstructor.other_content, ( + f"Trailing text after the second tool call was dropped. " + f"Got content: {reconstructor.other_content!r}" + ) + + +def test_streaming_trailing_text_with_final_close_in_same_delta( + qwen3_tokenizer, parser_cls +): + """MTP / speculative decoding can deliver the closing ```` + together with trailing free text in a single delta. The text after + the close must be emitted as content rather than being silently + consumed by the parser's "advance to next tool" logic. + """ + parser = parser_cls(qwen3_tokenizer, tools=_WEATHER_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WEATHER_TOOLS + ) + deltas = [ + # Build up the tool call up to and including . + "\n\n" + "Paris\n", + # Then deliver + trailing text in ONE delta. + "\n\nI hope this helps!", + ] + reconstructor = run_tool_extraction_streaming( + parser, deltas, request, assert_one_tool_per_delta=False + ) + assert len(reconstructor.tool_calls) == 1 + assert "I hope this helps!" in reconstructor.other_content, ( + f"Trailing text after was dropped. " + f"Got content: {reconstructor.other_content!r}" + ) diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index c3865e8e3935..0cba144f0864 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -33,6 +33,205 @@ def qwen3_tool_parser(qwen3_tokenizer): return Qwen3CoderToolParser(qwen3_tokenizer, tools=None) +def test_streaming_trailing_text_after_tool_with_literal_close_tag_in_value( + qwen3_tokenizer, +): + """A tool call's parameter value contains a literal ```` + string. After the real tool call closes, trailing free text must + still be emitted as content. + + The naive ``current_text.count()`` and + ``current_text.find()`` used by the early-advance and + ``_advance_to_next_tool`` logic don't distinguish literal text from + structural delimiters. This can cause ``_sent_content_idx`` to land + INSIDE the tool's parameter value, after which the trailing text + fails to be emitted. + """ + from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionToolsParam, + ) + + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "write_file", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string"}, + "content": {"type": "string"}, + }, + }, + }, + ) + ] + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + + # The parameter value contains a literal ```` string. + # The real ```` follows after ````. + delta_1 = ( + "\n\n" + "foo.py\n" + "\n" + "doc = 'example'\n" + "\n\n" + ) + parser.extract_tool_calls_streaming( + previous_text="", + current_text=delta_1, + delta_text=delta_1, + previous_token_ids=[], + current_token_ids=[1], + delta_token_ids=[1], + request=request, + ) + + delta_2 = "\nDone, file written!" + text2 = delta_1 + delta_2 + msg2 = parser.extract_tool_calls_streaming( + previous_text=delta_1, + current_text=text2, + delta_text=delta_2, + previous_token_ids=[1], + current_token_ids=[1, 2], + delta_token_ids=[2], + request=request, + ) + contents = [] + if msg2 and msg2.content: + contents.append(msg2.content) + # EOS-style empty delta to flush + msg3 = parser.extract_tool_calls_streaming( + previous_text=text2, + current_text=text2, + delta_text="", + previous_token_ids=[1, 2], + current_token_ids=[1, 2, 3], + delta_token_ids=[3], + request=request, + ) + if msg3 and msg3.content: + contents.append(msg3.content) + + full = "".join(contents) + assert "Done, file written!" in full, ( + f"Trailing text after a tool call whose parameter value contains " + f"a literal was dropped. Got content: {full!r}" + ) + + +def test_streaming_second_tool_after_first_with_literal_close_tag_in_value( + qwen3_tokenizer, +): + """A first tool call's parameter value contains a literal + ````. A SECOND structural tool call follows after the + real ````. Both tool calls and any inter-call content + must be emitted correctly. + """ + from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionToolsParam, + ) + + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "write_file", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string"}, + "content": {"type": "string"}, + }, + }, + }, + ), + ChatCompletionToolsParam( + type="function", + function={ + "name": "log", + "parameters": { + "type": "object", + "properties": {"msg": {"type": "string"}}, + }, + }, + ), + ] + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + + full = ( + "\n\n" + "foo.py\n" + "\n" + "doc = 'example'\n" + "\n\n" + "\n" + "\n\n" + "done\n" + "\n" + ) + + msg = parser.extract_tool_calls_streaming( + previous_text="", + current_text=full, + delta_text=full, + previous_token_ids=[], + current_token_ids=[1], + delta_token_ids=[1], + request=request, + ) + assert msg is not None + assert msg.tool_calls is not None + assert len(msg.tool_calls) == 2, ( + f"Expected 2 tool calls, got {len(msg.tool_calls)}: {msg.tool_calls}" + ) + names = [tc.function.name for tc in msg.tool_calls] + assert names == ["write_file", "log"], f"Wrong tool names: {names}" + + +def test_streaming_content_before_and_between_two_tool_calls_one_delta( + qwen3_tool_parser, +): + """MTP / spec-decode: a single delta delivers free text BEFORE tool 1 + AND free text BETWEEN tool 1 and tool 2. Both content fragments must + be emitted; the recursion path used to drop the second one because of a + ``not result.content`` guard that discarded the recursion's content + when the outer call already had content of its own. + """ + request = ChatCompletionRequest(model=MODEL, messages=[]) + delta = ( + "before text " + "\n\n" + "\n1\n\n" + "\n" + "between text " + "\n\n" + "\n2\n\n" + "\n" + ) + msg = qwen3_tool_parser.extract_tool_calls_streaming( + previous_text="", + current_text=delta, + delta_text=delta, + previous_token_ids=[], + current_token_ids=[1], + delta_token_ids=[1], + request=request, + ) + assert msg is not None + assert msg.content is not None, "outer content lost" + assert "before text " in msg.content, ( + f"missing 'before text' content: {msg.content!r}" + ) + assert "between text " in msg.content, ( + f"recursion content 'between text' was dropped because the outer " + f"already had content. Got: {msg.content!r}" + ) + + def test_extract_tool_calls_streaming_split_tag(qwen3_tool_parser): """```` arrives split across two deltas (````). ``is_tool_call_started`` must flip to ``True`` once the diff --git a/tests/tool_parsers/test_qwen3xml_tool_parser.py b/tests/tool_parsers/test_qwen3xml_tool_parser.py index 9b61b942fa36..83aeffdb27b7 100644 --- a/tests/tool_parsers/test_qwen3xml_tool_parser.py +++ b/tests/tool_parsers/test_qwen3xml_tool_parser.py @@ -175,6 +175,102 @@ def test_qwen3xml_streaming_text_after_tool_call(self, qwen3_tokenizer): assert "I hope this helps!" in all_content, "Free text after the last tool call should be emitted." +def test_qwen3xml_streaming_trailing_text_after_literal_close_in_value( + qwen3_tokenizer, +): + """XML parser: a tool_call's parameter value contains a literal + ````. After the real ````, trailing free + text must still be emitted. + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "write_file", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string"}, + "content": {"type": "string"}, + }, + }, + }, + ) + ] + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + + deltas = [ + # Tool 1 with literal embedded in 'content'. + "\n\n" + "foo.py\n" + "\n" + "doc = 'example'\n" + "\n\n", + # Trailing text in a separate delta. + "\nDone, file written!", + ] + + reconstructor = run_tool_extraction_streaming( + parser, deltas, request, assert_one_tool_per_delta=False + ) + assert len(reconstructor.tool_calls) == 1, ( + f"Expected 1 tool call, got {len(reconstructor.tool_calls)}" + ) + assert "Done, file written!" in reconstructor.other_content, ( + f"Trailing text after a tool with literal in its " + f"value was dropped. Got content: {reconstructor.other_content!r}" + ) + + +def test_qwen3xml_streaming_python_none_int_char_by_char(qwen3_tokenizer): + """Streaming a nullable INTEGER param value of "None" (Qwen3.5 style) + char-by-char must produce VALID JSON. The XML parser's incremental + char path used to emit "Non" then a "l" delta computed from the diff + between "Non" and "null", giving the cumulative invalid string + "Nonl". The fix defers int/float conversion just like bool/object + so the full value is parsed at close. + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "set_count", + "parameters": { + "type": "object", + "properties": { + "count": { + "anyOf": [ + {"type": "integer"}, + {"type": "null"}, + ], + }, + }, + }, + }, + ) + ] + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + + # Char-by-char deltas emulate worst-case slow streaming. + char_deltas = [ + "\n", "\n", "", + "\n", "N", "o", "n", "e", "\n", "\n", + "\n", "", + ] + reconstructor = run_tool_extraction_streaming( + parser, char_deltas, request, assert_one_tool_per_delta=False + ) + assert len(reconstructor.tool_calls) == 1 + raw = reconstructor.tool_calls[0].function.arguments + args = json.loads(raw) # must be valid JSON + assert args["count"] is None, ( + f"streaming nullable int 'None' produced invalid JSON or wrong " + f"value. Raw: {raw!r}" + ) + + def test_qwen36_xml_streaming_double_close_brace(qwen3_tokenizer): tools = [ ChatCompletionToolsParam( diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 3a8f0e640926..57b6b4343db4 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -151,8 +151,12 @@ def _convert_param_value( # the literal "null") rather than converting it to Python None. if param_type in ["string", "str", "text", "varchar", "char", "enum"]: return param_value - # For non-string types, "null" maps to JSON null. - if param_value.lower() == "null": + # For non-string types, "null" maps to JSON null. Also accept + # the Python literal "None" so that Qwen3.5-trained models — whose + # chat template renders null args via ``| string`` (yielding the + # literal "None" in the prompt) — round-trip nullable values + # correctly. + if param_value.lower() in ("null", "none"): return None if ( param_type.startswith("int") @@ -305,19 +309,18 @@ def _advance_to_next_tool(self, current_text: str) -> None: Called both on normal delta boundaries and during speculative- decoding recursion when multiple complete tool calls arrive in one delta. + + Uses STRUCTURAL ```` positions so a literal + ```` embedded in a parameter value (e.g. a code + snippet) does not move ``_sent_content_idx`` to the wrong place. """ - search_idx = 0 - for _ in range(self.current_tool_index + 1): - search_idx = current_text.find(self.tool_call_start_token, search_idx) - if search_idx == -1: - break - end_idx = current_text.find(self.tool_call_end_token, search_idx) - if end_idx != -1: - self._sent_content_idx = max( - self._sent_content_idx, - end_idx + len(self.tool_call_end_token), - ) - search_idx += len(self.tool_call_start_token) + end_positions = self._structural_tool_call_end_positions(current_text) + target = self.current_tool_index + if target < len(end_positions): + self._sent_content_idx = max( + self._sent_content_idx, + end_positions[target] + len(self.tool_call_end_token), + ) self.current_tool_index += 1 self.header_sent = False @@ -343,6 +346,38 @@ def _find_true_tool_call_end(self, text: str) -> int: return idx search_pos = idx + len(self.tool_call_end_token) + def _structural_tool_call_end_positions(self, text: str) -> list[int]: + """Return positions of every STRUCTURAL ```` in text. + + A naive ``text.count()`` over-counts when a parameter + value embeds the literal string ```` (e.g. a code-write + tool whose argument contains a tool-call example). A real + ```` matches at least one of: + - it is preceded (after optional whitespace) by ````, + i.e. it follows the standard tool-call template; or + - it is followed (after optional whitespace) by another + ```` opener or end of string. + Either condition rules out a literal embedded in parameter content. + """ + positions: list[int] = [] + search_pos = 0 + while True: + idx = text.find(self.tool_call_end_token, search_pos) + if idx == -1: + break + before = text[:idx].rstrip() + preceded_by_func = before.endswith(self.function_end_token) + after = text[idx + len(self.tool_call_end_token):] + stripped = after.lstrip() + followed_structurally = ( + stripped == "" + or stripped.startswith(self.tool_call_start_token) + ) + if preceded_by_func or followed_structurally: + positions.append(idx) + search_pos = idx + len(self.tool_call_end_token) + return positions + def _find_true_param_end( self, value_text: str, @@ -627,44 +662,63 @@ def extract_tool_calls_streaming( # Check if we need to advance to next tool if self.json_closed and not self.in_function: - # Check if this tool call has ended - tool_ends = current_text.count(self.tool_call_end_token) + # Use structural count: a literal + # embedded in a parameter value must not trigger spurious + # advance. + tool_ends = len( + self._structural_tool_call_end_positions(current_text) + ) if tool_ends > self.current_tool_index: # Advance to next tool; is_tool_call_started is reset so # content between or after tool calls is emitted correctly. + # We deliberately fall through (no early ``return None``): + # the rest of this delta may carry trailing free text after + # the closed or even an entire next tool call + # (MTP / speculative decoding). The downstream code handles + # both — emitting trailing content via the not-started + # branch, or starting the next tool via tool_starts_count. self._advance_to_next_tool(current_text) - return None content_message = None # Handle normal content before tool calls if not self.is_tool_call_started: - # Check if tool call is starting tool_starts_count = current_text.count(self.tool_call_start_token) - if ( + start_signal = ( self.tool_call_start_token_id in delta_token_ids or tool_starts_count > self.current_tool_index - ): + ) + # ``tool_starts_count`` is naive and over-counts when an + # earlier tool's parameter value contains a literal + # ````. Confirm a REAL next tool by locating an + # opener past ``_sent_content_idx`` (which sits after the last + # processed tool's structural ````). + last_start = -1 + if start_signal: + last_start = current_text.find( + self.tool_call_start_token, self._sent_content_idx + ) + if start_signal and last_start != -1: self.is_tool_call_started = True # Return any content before the tool call - last_start = current_text.find(self.tool_call_start_token, self._sent_content_idx) - if last_start != -1 and last_start > self._sent_content_idx: + if last_start > self._sent_content_idx: content_before = current_text[self._sent_content_idx:last_start] self._sent_content_idx = last_start if content_before: content_message = DeltaMessage(content=content_before) else: + # No real new tool starting in this delta — emit any + # trailing/inter-call content. overlap = partial_tag_overlap(current_text, self.tool_call_start_token) sendable_idx = len(current_text) - overlap - - # Check if we're between tool calls - skip whitespace + + # Skip whitespace-only deltas right after a closed tool. if ( current_text.rstrip().endswith(self.tool_call_end_token) and delta_text.strip() == "" ): - # We just ended a tool call, skip whitespace self._sent_content_idx = len(current_text) return None - + if sendable_idx > self._sent_content_idx: content = current_text[self._sent_content_idx:sendable_idx] self._sent_content_idx = sendable_idx @@ -685,6 +739,11 @@ def extract_tool_calls_streaming( # calls (past each ), so that tokens # embedded in parameter values of completed calls are never # included. + # Use STRUCTURAL positions when jumping past + # completed tool calls — naive ``current_text.find()`` + # matches a literal ```` embedded in a parameter + # value and would land inside an earlier tool's content. + structural_ends = self._structural_tool_call_end_positions(current_text) tool_start_positions: list[int] = [] search_pos = 0 for i in range(self.current_tool_index + 1): @@ -693,10 +752,12 @@ def extract_tool_calls_streaming( break tool_start_positions.append(idx) if i < self.current_tool_index: - # Completed tool call: jump past its so the - # next search starts after it, skipping any content - # inside (including literal ). - end_idx = current_text.find(self.tool_call_end_token, idx) + # Completed tool call: jump past its STRUCTURAL . + end_idx = -1 + for end_pos in structural_ends: + if end_pos > idx: + end_idx = end_pos + break if end_idx == -1: break search_pos = end_idx + len(self.tool_call_end_token) @@ -705,8 +766,14 @@ def extract_tool_calls_streaming( return content_message tool_start_idx = tool_start_positions[self.current_tool_index] - # Find where this tool call ends (or current position if not ended yet) - tool_end_idx = current_text.find(self.tool_call_end_token, tool_start_idx) + # Find this tool call's STRUCTURAL end (or use rest of text if + # the tool isn't closed yet). A naive find would truncate at a + # literal inside a parameter value. + tool_end_idx = -1 + for end_pos in structural_ends: + if end_pos > tool_start_idx: + tool_end_idx = end_pos + break if tool_end_idx == -1: tool_text = current_text[tool_start_idx:] else: @@ -1001,7 +1068,7 @@ def extract_tool_calls_streaming( if ( self.json_closed and not self.in_function - and current_text.count(self.tool_call_end_token) + and len(self._structural_tool_call_end_positions(current_text)) > self.current_tool_index + 1 ): # Speculative decoding delivered multiple complete tool @@ -1023,8 +1090,41 @@ def extract_tool_calls_streaming( if result.tool_calls is None: result.tool_calls = [] result.tool_calls.extend(next_delta.tool_calls) - if next_delta.content and not result.content: - result.content = next_delta.content + # Concatenate the recursion's content (e.g. text + # BETWEEN tool 1 and tool 2) with the outer's content + # (e.g. text BEFORE tool 1). Without this, the "between" + # fragment is silently dropped whenever the outer + # already produced its own content. + if next_delta.content: + result.content = ( + (result.content or "") + next_delta.content + ) + + # Emit trailing free text that follows the LAST structural + # in this delta (MTP / spec-decoding bursts that + # bundle N tool calls + trailing content into one chunk). + # Without this the trailing text is buffered indefinitely: + # the per-tool processing never advances ``_sent_content_idx`` + # past its tool's ````, and an EOS-style empty + # delta cannot recover content that was never emitted. + if self.json_closed and not self.in_function: + end_positions = self._structural_tool_call_end_positions( + current_text + ) + if end_positions: + last_end = ( + end_positions[-1] + len(self.tool_call_end_token) + ) + if ( + last_end < len(current_text) + and last_end > self._sent_content_idx + ): + trailing = current_text[last_end:] + if trailing: + self._sent_content_idx = len(current_text) + result.content = ( + (result.content or "") + trailing + ) return result return content_message diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py index 20b361320375..7de79a54002b 100644 --- a/vllm/tool_parsers/qwen3xml_tool_parser.py +++ b/vllm/tool_parsers/qwen3xml_tool_parser.py @@ -407,6 +407,20 @@ def _process_complete_xml_elements(self) -> bool: # Update processed position self.last_processed_pos = end_pos + # Flush any text accumulated AFTER the last processed + # in this batch. Without this, trailing free text that arrives in + # the SAME delta as the closing (MTP / speculative + # decoding) is buffered but never emitted — and is lost entirely + # if EOS comes before any subsequent delta. + if ( + found_any + and self.text_content_buffer + and self.current_call_id is None + ): + text_delta = DeltaMessage(content=self.text_content_buffer) + self._emit_delta(text_delta) + self.text_content_buffer = "" + return found_any def _should_skip_element(self, element: str) -> bool: @@ -650,11 +664,29 @@ def _preprocess_xml_chunk(self, chunk: str) -> str: # first char would otherwise be converted to False and # emit "false", shadowing the real "true" that follows. is_bool_type = param_type in ["boolean", "bool", "binary"] + # Numeric types need deferral too: a nullable + # parameter rendered as the literal "None" (Qwen3.5 + # template) or "null" (Qwen3.6 template) flips from + # the partial-string fallback to JSON ``null`` only + # when the FULL value is in. Without deferral the + # diff-based char emission would interleave the + # partial string ("Non") with the JSON literal + # ("null") and produce invalid output ("Nonl"). + is_numeric_type = ( + param_type.startswith("int") + or param_type.startswith("uint") + or param_type.startswith("long") + or param_type.startswith("short") + or param_type.startswith("unsigned") + or param_type.startswith("num") + or param_type.startswith("float") + ) need_defer = ( is_complex_type or is_object_type or is_bool_type + or is_numeric_type ) if not need_defer: @@ -1257,8 +1289,12 @@ def _convert_param_value(self, param_value: str, param_type: str) -> Any: # the string "null" instead of being converted to Python None. if param_type in ["string", "str", "text", "varchar", "char", "enum"]: return param_value - # Non-string: "null" → Python None → JSON null. - if param_value.lower() == "null": + # Non-string: "null" → Python None → JSON null. Also accept the + # Python literal "None" so that Qwen3.5-trained models — whose + # chat template renders null args via ``| string`` (yielding the + # literal "None" in the prompt) — round-trip nullable values + # correctly. + if param_value.lower() in ("null", "none"): return None if ( param_type.startswith("int") From 2dd0719e593bcd4115218385025254ee04902fa7 Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Sat, 25 Apr 2026 14:36:06 +0200 Subject: [PATCH 17/21] fix(qwen3-tool-parsers): handle every literal balise inside parameter values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a tool's parameter value embeds the Qwen tool-call format verbatim (qwen-code WriteFile writing a parser fixture, a Jinja2 template, a chat template documentation file, etc.), every structural-looking balise inside the value — ``, ``, ``, ``, ``, `` — was being mistaken for a real structural delimiter. The classes of failure were: - `` literal where NAME is a real param of the same tool: the schema-name filter accepted it, truncating the outer value and overwriting the sibling param. - `` literal whose lookahead happens to be a structural token (e.g. `` literal that itself sits inside another literal nested call): the lookahead heuristic accepted it. - A complete literal `...\n` block: the legacy "preceded by ``" / "followed by `` or EOS" heuristics matched the literal nested close, so the outer param was truncated and a phantom second tool call was produced. - Streaming fallback (`` followed by EOS = end of buffer) matching the literal `` of a nested call delivered in the same delta as its synthetic EOS, prematurely closing the outer param. For the Coder parser: - `_scan_to_structural_function_end` walks the function body parameter-by-parameter via `_find_true_param_end` and returns the index of the structural ``; literal `` / `` strings inside parameter values are skipped, and the walker also recovers from "missing ``" malformations via the next unseen-NAME ``. - `_structural_tool_call_end_positions` and `_get_function_calls` use the walker instead of a heuristic-based scan. - The streaming param-scan loop and the inner param fallback now filter `` candidates by "NAME not already emitted / not the param being scanned"; a same-name literal can no longer hijack the outer value. - The streaming function-close decision (`json_closed` flip) also goes through the walker so a literal `\n` inside a still-open content value cannot trip it. For the XML parser: - `_is_structural_closing_tag` and the `` chunk preprocess now reject NAMEs already emitted (or equal to the param being parsed) — a strictly stronger rule than schema membership. - A new `_literal_tag_depth` counter tracks LITERAL nested `` / `` opens encountered in the current parameter's value; while depth > 0, every closing tag pairs with a literal opener and is itself escaped. The post-processing structural-close fallback in `parse_single_streaming_chunks` is suppressed when the chunk produced any literal-tag event, so a literal `` / `` cannot trigger spurious `_end_element` calls that would prematurely close the outer param. Tests: - `tests/tool_parsers/test_qwen3_xml_coder_shared.py`: * `test_content_with_real_param_name_literal_{nonstreaming,streaming}` — `` literal where `path` is a real schema param. * `test_content_with_full_nested_tool_call_{nonstreaming,streaming}` — a complete literal nested `...` whose function/parameter names match the outer schema. - `tests/tool_parsers/test_qwen3coder_tool_parser.py`: * `test_streaming_char_by_char_literal_balises_in_value` — the qwen-code char-by-char delivery with all six balise types nested. 242 Qwen3 parser tests pass; the rest of the tool_parsers suite is unchanged. Co-authored-by: Claude Signed-off-by: CNE Pierre FICHEPOIL --- .../test_qwen3_xml_coder_shared.py | 215 +++++++++++++ .../test_qwen3coder_tool_parser.py | 101 ++++++ vllm/tool_parsers/qwen3coder_tool_parser.py | 302 +++++++++++++++--- vllm/tool_parsers/qwen3xml_tool_parser.py | 137 ++++++-- 4 files changed, 683 insertions(+), 72 deletions(-) diff --git a/tests/tool_parsers/test_qwen3_xml_coder_shared.py b/tests/tool_parsers/test_qwen3_xml_coder_shared.py index 7f3302a6cad0..72d8695e13f6 100644 --- a/tests/tool_parsers/test_qwen3_xml_coder_shared.py +++ b/tests/tool_parsers/test_qwen3_xml_coder_shared.py @@ -1560,3 +1560,218 @@ def test_streaming_trailing_text_with_final_close_in_same_delta( f"Trailing text after was dropped. " f"Got content: {reconstructor.other_content!r}" ) + + +# --------------------------------------------------------------------------- +# Parameter value containing a literal ```` whose NAME IS +# itself a real parameter of the same tool. The schema-based filter cannot +# rule the literal out by name, so a stronger heuristic is required (e.g. +# the literal does not pair with a structural ```` followed by +# another structural delimiter). This is the exact pattern that breaks +# qwen-code WriteFile when the file being written is itself a parser test +# fixture. +# --------------------------------------------------------------------------- + +_CONTENT_WITH_REAL_PARAM_NAME_LITERAL = ( + 'doc = """\n' + '\n' + 'literal/value\n' + '\n' + '"""\n' +) + +_REAL_PARAM_NAME_LITERAL_OUTPUT = ( + "\n" + "\n" + "\nfixture.py\n\n" + f"\n{_CONTENT_WITH_REAL_PARAM_NAME_LITERAL}\n" + "\n" + "" +) + + +def test_content_with_real_param_name_literal_nonstreaming( + qwen3_tokenizer, parser_cls +): + """Non-streaming: parameter ``content`` value embeds + ``...`` where ``path`` IS the other real + parameter of the same ``write_file`` tool. Schema name filtering alone + cannot disambiguate — the parser must use a stronger rule (e.g. the + embedded ```` must be followed by a structural delimiter + that closes the OUTER param, not the inner literal). + """ + parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS + ) + result = parser.extract_tool_calls( + _REAL_PARAM_NAME_LITERAL_OUTPUT, request=request + ) + + assert result.tools_called + assert len(result.tool_calls) == 1 + args = json.loads(result.tool_calls[0].function.arguments) + assert list(args.keys()) == ["path", "content"], ( + f"Spurious params from embedded same-name literal: " + f"{list(args.keys())}" + ) + assert args["path"] == "fixture.py", ( + f"Outer ``path`` was overwritten by embedded literal: " + f"{args.get('path')!r}" + ) + expected = _CONTENT_WITH_REAL_PARAM_NAME_LITERAL.rstrip("\n") + assert args["content"] == expected, ( + f"content was truncated at the embedded . " + f"Got: {args.get('content')!r}" + ) + + +def test_content_with_real_param_name_literal_streaming( + qwen3_tokenizer, parser_cls +): + """Streaming variant of the same case. Each meaningful structural- + looking line arrives in its own delta — the parser cannot wait for the + full text to disambiguate. + """ + parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS + ) + char_deltas = [ + "\n", + "\n", + "\nfixture.py\n\n", + '\ndoc = """\n', + "\n", + "literal/value\n", + "\n", + '"""\n', + "\n", + "\n", + "", + ] + reconstructor = run_tool_extraction_streaming( + parser, char_deltas, request, assert_one_tool_per_delta=False + ) + assert len(reconstructor.tool_calls) == 1 + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert list(args.keys()) == ["path", "content"], ( + f"Spurious params from embedded same-name literal: " + f"{list(args.keys())}" + ) + assert args["path"] == "fixture.py" + expected = _CONTENT_WITH_REAL_PARAM_NAME_LITERAL.rstrip("\n") + assert args["content"] == expected, ( + f"content was truncated at the embedded . " + f"Got: {args.get('content')!r}" + ) + + +# --------------------------------------------------------------------------- +# Parameter value containing a COMPLETE nested tool_call (all four balise +# types: , , , , +# , ) — the qwen-code WriteFile pattern when the +# file being written is itself a parser fixture or a chat-template +# example. Every literal must stay inside the value; no spurious extra +# tool calls or params should be generated. +# --------------------------------------------------------------------------- + +_CONTENT_WITH_FULL_NESTED_CALL = ( + 'doc = """\n' + "\n" + "\n" + "\n" + "literal/value.txt\n" + "\n" + "\n" + "hello\n" + "\n" + "\n" + "\n" + '"""\n' +) + +_FULL_NESTED_CALL_OUTPUT = ( + "\n" + "\n" + "\nfixture.py\n\n" + f"\n{_CONTENT_WITH_FULL_NESTED_CALL}\n" + "\n" + "" +) + + +def test_content_with_full_nested_tool_call_nonstreaming( + qwen3_tokenizer, parser_cls +): + """Non-streaming: parameter ``content`` contains a complete literal + ``...`` whose function/parameter names match + the OUTER tool's schema. Every literal must stay inside the value; + no extra tool call must be generated. + """ + parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS + ) + result = parser.extract_tool_calls(_FULL_NESTED_CALL_OUTPUT, request=request) + + assert result.tools_called + assert len(result.tool_calls) == 1, ( + f"Expected 1 tool call (the outer one), got " + f"{len(result.tool_calls)} — embedded literal tool_call was " + f"incorrectly promoted to a real call." + ) + args = json.loads(result.tool_calls[0].function.arguments) + assert list(args.keys()) == ["path", "content"] + assert args["path"] == "fixture.py" + expected = _CONTENT_WITH_FULL_NESTED_CALL.rstrip("\n") + assert args["content"] == expected, ( + f"content truncated/corrupted: {args.get('content')!r}" + ) + + +def test_content_with_full_nested_tool_call_streaming( + qwen3_tokenizer, parser_cls +): + """Streaming variant: the literal nested ``...`` + crosses many delta boundaries; the parser must not start a second + tool call. + """ + parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS + ) + char_deltas = [ + "\n", + "\n", + "\nfixture.py\n\n", + '\ndoc = """\n', + "\n", + "\n", + "\n", + "literal/value.txt\n", + "\n", + "\n", + "hello\n", + "\n", + "\n", + "\n", + '"""\n', + "\n", + "\n", + "", + ] + reconstructor = run_tool_extraction_streaming( + parser, char_deltas, request, assert_one_tool_per_delta=False + ) + assert len(reconstructor.tool_calls) == 1, ( + f"Expected 1 tool call, got {len(reconstructor.tool_calls)} — " + f"a literal nested was promoted to a real call." + ) + args = json.loads(reconstructor.tool_calls[0].function.arguments) + assert list(args.keys()) == ["path", "content"] + assert args["path"] == "fixture.py" + expected = _CONTENT_WITH_FULL_NESTED_CALL.rstrip("\n") + assert args["content"] == expected, ( + f"content truncated/corrupted: {args.get('content')!r}" + ) diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index 0cba144f0864..5c450de2cdaf 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -279,6 +279,107 @@ def test_extract_tool_calls_streaming_split_tag(qwen3_tool_parser): assert "_call>" not in msg2.content +def test_streaming_char_by_char_literal_balises_in_value(qwen3_tokenizer): + """Stress test: a WriteFile tool call whose ``content`` value embeds a + complete literal ``...`` block — including + ``...`` and ``... + `` with names that match the OUTER tool's schema — + streamed one character at a time. + + Reproduces the qwen-code scenario where the model writes a parser + fixture file: every literal ````, ````, + ````, ````, ```` and + ```` inside the ``content`` value must stay inside the + value; no spurious second tool call, no value truncation. + """ + from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionToolsParam, + ) + + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "write_file", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string"}, + "content": {"type": "string"}, + }, + }, + }, + ) + ] + parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + + nested_content = ( + 'doc = """\n' + "\n" + "\n" + "\nliteral/value.txt\n\n" + "\nhello\n\n" + "\n" + "\n" + '"""\n' + ) + + full_output = ( + "\n" + "\n" + "\nfixture.py\n\n" + f"\n{nested_content}\n" + "\n" + "" + ) + + tool_states: dict[int, dict] = {} + current_text = "" + previous_text = "" + for ch in full_output: + previous_text = current_text + current_text += ch + delta_message = parser.extract_tool_calls_streaming( + previous_text=previous_text, + current_text=current_text, + delta_text=ch, + previous_token_ids=[], + current_token_ids=[], + delta_token_ids=[], + request=request, + ) + if delta_message and delta_message.tool_calls: + for tool_call in delta_message.tool_calls: + idx = tool_call.index + state = tool_states.setdefault( + idx, {"id": None, "name": None, "arguments": ""} + ) + if tool_call.id: + state["id"] = tool_call.id + if tool_call.function: + if tool_call.function.name: + state["name"] = tool_call.function.name + if tool_call.function.arguments is not None: + state["arguments"] += tool_call.function.arguments + + assert list(tool_states.keys()) == [0], ( + f"Expected exactly one tool call; got indices " + f"{list(tool_states.keys())} — a literal nested " + f"was promoted to a real call." + ) + state = tool_states[0] + assert state["name"] == "write_file" + args = json.loads(state["arguments"]) + assert list(args.keys()) == ["path", "content"], ( + f"Spurious params from embedded literals: {list(args.keys())}" + ) + assert args["path"] == "fixture.py" + assert args["content"] == nested_content.rstrip("\n"), ( + f"content was truncated/corrupted: {args.get('content')!r}" + ) + + def test_extract_tool_calls_streaming_various_chunk_sizes( qwen3_tokenizer, ): diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 57b6b4343db4..d287f2085aa3 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -301,6 +301,79 @@ def _find_true_function_end(self, text: str) -> int: return idx search_pos = idx + len(self.function_end_token) + def _scan_to_structural_function_end( + self, + after_func_open: str, + valid_param_names: set[str] | None = None, + ) -> int: + """Scan a function body — text immediately following the closing + ``>`` of ```` — by walking through structural + ``...`` blocks and return the index of + the structural ```` in ``after_func_open``. + + This is more robust than ``_find_true_function_end`` when the + parameter value embeds a complete literal ``... + \\n`` block: that nested ```` + is followed by ```` and would pass the lookahead + heuristic, but it is INSIDE a parameter and must be skipped. + + Handles a "missing " malformation by treating the + next structural ```` (with NAME unseen so far) + as an implicit end. + + Returns -1 if the body is incomplete or malformed. + """ + pos = 0 + n = len(after_func_open) + seen: set[str] = set() + while pos < n: + # Skip whitespace between params + while pos < n and after_func_open[pos] in " \t\n\r": + pos += 1 + if pos >= n: + return -1 + if after_func_open[pos:].startswith(self.function_end_token): + return pos + if not after_func_open[pos:].startswith(self.parameter_prefix): + # Unexpected token before ; fall back to the + # legacy heuristic on the rest of the text. + rest_offset = self._find_true_function_end(after_func_open[pos:]) + return pos + rest_offset if rest_offset != -1 else -1 + name_end = after_func_open.find( + ">", pos + len(self.parameter_prefix) + ) + if name_end == -1: + return -1 + param_name = after_func_open[pos + len(self.parameter_prefix):name_end] + value_start = name_end + 1 + if value_start < n and after_func_open[value_start] == "\n": + value_start += 1 + param_end = self._find_true_param_end( + after_func_open[value_start:], + valid_param_names, + require_lookahead=True, + ) + if param_end == -1: + # Missing malformation: try the next + # structural with NAME unseen so far + # as the implicit end. + unseen: set[str] | None = ( + (valid_param_names - seen - {param_name}) + if valid_param_names is not None + else None + ) + implicit_end = self._next_structural_param_start( + after_func_open[value_start:], 0, unseen + ) + if implicit_end == -1: + return -1 + pos = value_start + implicit_end + seen.add(param_name) + continue + seen.add(param_name) + pos = value_start + param_end + len(self.parameter_end_token) + return -1 + def _advance_to_next_tool(self, current_text: str) -> None: """Advance streaming state to the next tool call. @@ -349,33 +422,59 @@ def _find_true_tool_call_end(self, text: str) -> int: def _structural_tool_call_end_positions(self, text: str) -> list[int]: """Return positions of every STRUCTURAL ```` in text. - A naive ``text.count()`` over-counts when a parameter - value embeds the literal string ```` (e.g. a code-write - tool whose argument contains a tool-call example). A real - ```` matches at least one of: - - it is preceded (after optional whitespace) by ````, - i.e. it follows the standard tool-call template; or - - it is followed (after optional whitespace) by another - ```` opener or end of string. - Either condition rules out a literal embedded in parameter content. + Walks each ``...`` top-level block by + following ````, scanning the body via + ``_scan_to_structural_function_end`` (which steps over parameter + values that may contain literal ````, ````, + ```` or ```` strings), then matching the + trailing ````. + + Falls back to a lookahead heuristic when the walker cannot + determine a structural close (incomplete body, malformed XML). """ positions: list[int] = [] - search_pos = 0 - while True: - idx = text.find(self.tool_call_end_token, search_pos) - if idx == -1: + pos = 0 + n = len(text) + while pos < n: + tc_start = text.find(self.tool_call_start_token, pos) + if tc_start == -1: break - before = text[:idx].rstrip() - preceded_by_func = before.endswith(self.function_end_token) - after = text[idx + len(self.tool_call_end_token):] - stripped = after.lstrip() - followed_structurally = ( - stripped == "" - or stripped.startswith(self.tool_call_start_token) + body_start = tc_start + len(self.tool_call_start_token) + func_open = text.find(self.tool_call_prefix, body_start) + if func_open == -1: + break + name_end = text.find( + ">", func_open + len(self.tool_call_prefix) ) - if preceded_by_func or followed_structurally: - positions.append(idx) - search_pos = idx + len(self.tool_call_end_token) + if name_end == -1: + break + func_name = text[func_open + len(self.tool_call_prefix):name_end] + valid_params: set[str] | None = None + if self.tools: + cfg = find_tool_properties(self.tools, func_name) + if cfg: + valid_params = set(cfg.keys()) + body_after_name = text[name_end + 1:] + func_end_rel = self._scan_to_structural_function_end( + body_after_name, valid_params + ) + if func_end_rel == -1: + # Body incomplete; the structural is not + # yet known. Stop walking — DO NOT fall back to the + # legacy heuristic for the rest of the text, because a + # literal embedded in an unfinished + # parameter would be erroneously treated as structural. + break + func_end_abs = (name_end + 1) + func_end_rel + after = text[func_end_abs + len(self.function_end_token):] + i = 0 + while i < len(after) and after[i] in " \t\n\r": + i += 1 + if not after[i:].startswith(self.tool_call_end_token): + break + tc_end_pos = func_end_abs + len(self.function_end_token) + i + positions.append(tc_end_pos) + pos = tc_end_pos + len(self.tool_call_end_token) return positions def _find_true_param_end( @@ -549,15 +648,35 @@ def _get_function_calls(self, model_output: str) -> list[str]: if len(raw_tool_calls) == 0: raw_tool_calls = [model_output] - # Use structural boundary instead of a greedy regex so - # that '' appearing as literal text inside a parameter - # value does not truncate the function body. + # Use a parameter-aware walk to find the structural : + # when the value of a parameter embeds a complete literal + # ``...\n`` block, the nested + # ```` is followed by ```` and would pass + # the simple "followed by " lookahead. Walking the + # body parameter-by-parameter with ``_find_true_param_end`` + # correctly steps over the literal. function_calls: list[str] = [] for tool_call in raw_tool_calls: func_start = tool_call.find(self.tool_call_prefix) if func_start == -1: continue after_func_open = tool_call[func_start + len(self.tool_call_prefix):] + name_end = after_func_open.find(">") + valid_param_names: set[str] | None = None + body_start = 0 + if name_end != -1: + func_name = after_func_open[:name_end] + cfg = find_tool_properties(self.tools, func_name) + if cfg: + valid_param_names = set(cfg.keys()) + body_start = name_end + 1 + scan_end = self._scan_to_structural_function_end( + after_func_open[body_start:], valid_param_names + ) + if scan_end != -1: + function_calls.append(after_func_open[:body_start + scan_end]) + continue + # Fallback to legacy heuristic. func_end = self._find_true_function_end(after_func_open) if func_end == -1: function_calls.append(after_func_open) @@ -880,8 +999,34 @@ def extract_tool_calls_streaming( require_lookahead=True, ) if end_in_after == -1: + # No structural ```` close yet. A + # legitimate "missing " malformation — + # the model jumps from ```` straight to + # ```` — is recoverable: treat the + # next structural ```` as implicit + # end of the current param. But only if NAME has + # NOT already been parsed as a sibling param of this + # tool call (and is not the param currently being + # scanned). A repeated NAME is almost always a + # literal embedded in the unfinished value, not a + # real next parameter. + cand_name = ( + tool_text[ + param_start_pos + len(self.parameter_prefix) + : name_end_pos + ] + ) + already_seen = ( + set(self.accumulated_params.keys()) + | ({cand_name} if cand_name else set()) + ) + unseen_valid: set[str] | None = ( + (valid_param_names - already_seen) + if valid_param_names is not None + else None + ) implicit_end = self._next_structural_param_start( - after_name_stripped, 0, valid_param_names + after_name_stripped, 0, unseen_valid ) if implicit_end != -1: search_idx = ( @@ -890,6 +1035,7 @@ def extract_tool_calls_streaming( + implicit_end ) else: + # Wait for more data. break else: search_idx = ( @@ -927,27 +1073,67 @@ def extract_tool_calls_streaming( value_text, valid_param_names, require_lookahead=True ) if param_end_idx == -1: - # Fallback for malformed/incomplete XML: a structural - # and - # to avoid cutting the parameter at XML - # tags that appear as literal text inside the - # parameter value. - func_end_idx = self._find_true_function_end(value_text) - tool_end_in_value = self._find_true_tool_call_end(value_text) - - if next_param_idx != -1 and ( - func_end_idx == -1 or next_param_idx < func_end_idx - ): - param_end_idx = next_param_idx - elif func_end_idx != -1: - param_end_idx = func_end_idx - elif tool_end_in_value != -1: - param_end_idx = tool_end_in_value + # Confirm via the parameter-aware walker that the + # function body is truly complete. The legacy + # ``_find_true_function_end`` matches a ```` + # at end-of-buffer (lstripped lookahead == ""), which + # is wrong in streaming when the literal close of a + # nested tool_call inside a parameter value sits at + # the buffer's end. Walking the body via + # ``_scan_to_structural_function_end`` correctly + # steps over literal tags inside parameter values + # and returns -1 if any param is still open. + tc_open_in_tool = tool_text.find(self.tool_call_prefix) + body_func_end_in_value = -1 + if tc_open_in_tool != -1: + name_end_in_tool = tool_text.find( + ">", tc_open_in_tool + len(self.tool_call_prefix) + ) + if name_end_in_tool != -1: + body_after_name = tool_text[name_end_in_tool + 1:] + body_func_end_rel = ( + self._scan_to_structural_function_end( + body_after_name, valid_param_names + ) + ) + if body_func_end_rel != -1: + body_func_end_abs = ( + name_end_in_tool + 1 + body_func_end_rel + ) + body_func_end_in_value = ( + body_func_end_abs - value_start + ) + + if body_func_end_in_value > 0: + # Function body is structurally complete; the + # current param has missing . Use + # the next legitimate (NAME + # unseen) before the structural as + # the implicit end. + already_seen = ( + set(self.accumulated_params.keys()) + | ({current_param_name} if current_param_name else set()) + ) + unseen_valid: set[str] | None = ( + (valid_param_names - already_seen) + if valid_param_names is not None + else None + ) + next_param_idx = self._next_structural_param_start( + value_text, 0, unseen_valid + ) + if ( + next_param_idx != -1 + and next_param_idx < body_func_end_in_value + ): + param_end_idx = next_param_idx + else: + param_end_idx = body_func_end_in_value else: + # Body not yet complete — wait for more data. + # Do NOT truncate at a literal or + # that may sit inside a still-open + # parameter value. break if param_end_idx == -1: @@ -1000,10 +1186,26 @@ def extract_tool_calls_streaming( # . If the close check ran first it would emit # "}" and set in_function=False before the parameter loop # ever ran, causing the parameter to be silently dropped. - # Use structural-aware search so a literal '' - # inside a parameter value does not trigger a premature + # Use the parameter-aware walker so a literal '' + # inside a parameter value (e.g. a content arg embedding a + # complete nested tool_call) does not trigger a premature # close. - true_func_end = self._find_true_function_end(tool_text) + true_func_end = -1 + tc_open_in_tool_for_close = tool_text.find(self.tool_call_prefix) + if tc_open_in_tool_for_close != -1: + name_end_in_tool = tool_text.find( + ">", + tc_open_in_tool_for_close + len(self.tool_call_prefix), + ) + if name_end_in_tool != -1: + body_after_name = tool_text[name_end_in_tool + 1:] + body_func_end_rel = self._scan_to_structural_function_end( + body_after_name, valid_param_names + ) + if body_func_end_rel != -1: + true_func_end = ( + name_end_in_tool + 1 + body_func_end_rel + ) if not self.json_closed and true_func_end != -1: self.json_closed = True diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py index 7de79a54002b..fb0db2e741ca 100644 --- a/vllm/tool_parsers/qwen3xml_tool_parser.py +++ b/vllm/tool_parsers/qwen3xml_tool_parser.py @@ -86,6 +86,21 @@ def reset_streaming_state(self): self.defer_current_parameter = False self.deferred_param_raw_value = "" + # Depth of LITERAL nested ````/```` opens + # encountered inside the current parameter's value. Each literal + # opener bumps the depth; each ````/```` + # encountered while depth > 0 is also literal (decrements the + # depth) and must not be treated as a structural close. Reset + # to 0 when leaving a parameter. + self._literal_tag_depth = 0 + # Number of literal tool_call/function open or close events seen + # in the current ``parse_single_streaming_chunks`` call. Used to + # suppress the post-processing structural-close fallback when + # the chunk contained literal nested-tag events: those events + # are already handled (escaped) by the preprocess pass and must + # not trigger ``_end_element`` calls. + self._literal_events_this_chunk = 0 + # recreate parser self.parser = ParserCreate() self.setup_parser() @@ -105,6 +120,12 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: # Record delta count before processing initial_delta_count = len(self.deltas) + # Reset literal-event counter for this chunk: it will be + # incremented by the preprocess pass whenever it encounters a + # literal nested ````/```` open or + # the matching close inside a parameter value. + self._literal_events_this_chunk = 0 + self.streaming_buffer += xml_chunk found_elements = self._process_complete_xml_elements() @@ -115,9 +136,24 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: # checks so that / appearing as literal # text inside a parameter value (e.g. file content) does NOT # trigger a spurious close that emits a duplicate '}' or ''. + # When ``_literal_tag_depth > 0`` we are still inside a + # literal nested ````/```` block in + # the current parameter's value — the chunk's `` + # or `` matches a literal opener, not a real + # structural close, so skip the fallback close events. try: + # Skip the fallback close events when this chunk + # contained any literal nested-tag event: those + # ````/```` strings are matched + # to literal openers in the param value and have + # already been escaped — firing ``_end_element`` here + # would prematurely close the OUTER parameter and + # truncate its value. + literals_in_chunk = self._literal_events_this_chunk > 0 if ( self.current_call_id is not None + and not literals_in_chunk + and self._literal_tag_depth == 0 and self._chunk_has_structural_function_end(xml_chunk) and self.current_function_open ): @@ -127,6 +163,8 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: self._end_element("function") if ( self.current_call_id is not None + and not literals_in_chunk + and self._literal_tag_depth == 0 and self._chunk_has_structural_tool_call_end(xml_chunk) ): if self.current_param_name: @@ -155,10 +193,16 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: # triggered by parser, manually complete end events. Only # execute when still on the same call as when entered, to # prevent accidentally closing new calls in multi- - # scenarios. - if self.current_call_id is not None and ( - self._chunk_has_structural_function_end(xml_chunk) - or self._chunk_has_structural_tool_call_end(xml_chunk) + # scenarios. Also skip when ``_literal_tag_depth > 0``: the + # chunk's ``/`` matches a literal + # opener inside the current parameter's value. + if ( + self.current_call_id is not None + and self._literal_tag_depth == 0 + and ( + self._chunk_has_structural_function_end(xml_chunk) + or self._chunk_has_structural_tool_call_end(xml_chunk) + ) ): if self.current_param_name: self._end_element("parameter") @@ -227,6 +271,20 @@ def _get_valid_param_names(self) -> set[str] | None: props = find_tool_properties(self.tools, self.current_function_name) return set(props.keys()) if props else None + def _is_already_emitted_param(self, name: str) -> bool: + """Return True when ``name`` has already appeared as a parameter + of the current tool call (either fully closed or currently open). + + A ```` whose NAME is already used for the same + tool is almost always literal text inside another parameter's + value (e.g. a parser fixture or a file that documents the + tool-call format). Treating it as a real structural opening + causes silent value truncation and spurious extra params. + """ + if name == self.current_param_name: + return True + return name in self.parameters + def _is_structural_closing_tag(self, chunk: str) -> bool: """Return True when a closing tag at the current buffer position is a real structural delimiter rather than literal text content. @@ -246,15 +304,22 @@ def _is_structural_closing_tag(self, chunk: str) -> bool: structural_param_follows = False if rest.startswith(self.parameter_start_token): valid_names = self._get_valid_param_names() - if valid_names is not None: - name_start = len(self.parameter_start_token) - name_end = rest.find(">", name_start) - if name_end != -1: + name_start = len(self.parameter_start_token) + name_end = rest.find(">", name_start) + if name_end != -1: + candidate = rest[name_start:name_end] + if valid_names is not None: structural_param_follows = ( - rest[name_start:name_end] in valid_names + candidate in valid_names + and not self._is_already_emitted_param(candidate) + ) + else: + # Fallback (no schema): trust the name unless it is a + # repeat of the current/already-emitted param, which + # is almost always a literal in a parser fixture. + structural_param_follows = ( + not self._is_already_emitted_param(candidate) ) - else: - structural_param_follows = True # fallback: trust all # Return True when rest is an incomplete prefix of a structural # closing token (e.g. rest="" hasn't fully @@ -708,30 +773,56 @@ def _preprocess_xml_chunk(self, chunk: str) -> str: or chunk.startswith(self.function_start_token) ): # Opening tool_call/function tags are always literal inside - # a parameter value. + # a parameter value. Track nesting depth so that the + # matching ```` / ```` is also + # treated as literal even when its lookahead would + # otherwise satisfy the structural heuristic. + self._literal_tag_depth += 1 + self._literal_events_this_chunk += 1 return self._escape_xml_special_chars(chunk) if chunk.startswith(self.parameter_start_token): # A structural always follows a newline in # the buffer. When a schema is available, also require # NAME to be a known parameter of the current function so # that literal ```` inside file - # content is treated as text. + # content is treated as text. A NAME already emitted + # for this tool (or equal to the param currently being + # parsed) is also literal text — a parser fixture or a + # file that documents the tool-call format. if not self._is_structural_tag_position(): return self._escape_xml_special_chars(chunk) - valid_names = self._get_valid_param_names() - if valid_names is not None: - name_start = len(self.parameter_start_token) - name_end = chunk.find(">", name_start) - if ( - name_end != -1 - and chunk[name_start:name_end] not in valid_names - ): + name_start = len(self.parameter_start_token) + name_end = chunk.find(">", name_start) + if name_end != -1: + candidate = chunk[name_start:name_end] + if self._is_already_emitted_param(candidate): + return self._escape_xml_special_chars(chunk) + valid_names = self._get_valid_param_names() + if valid_names is not None and candidate not in valid_names: return self._escape_xml_special_chars(chunk) if ( chunk.startswith(self.parameter_end_token) or chunk.startswith(self.function_end_token) or chunk.startswith(self.tool_call_end_token) ): + # Inside a literal nested tool_call/function (depth > 0), + # any closing tag pairs with the literal opener and is + # itself literal — regardless of what the lookahead says. + # ```` does not affect depth (parameters do + # not nest in the Qwen format). + if self._literal_tag_depth > 0: + if chunk.startswith(self.function_end_token) or ( + chunk.startswith(self.tool_call_end_token) + ): + self._literal_tag_depth -= 1 + self._literal_events_this_chunk += 1 + else: + # Literal `` inside a nested literal + # block — count it as a literal event so the + # post-processing fallback knows the chunk + # contained literals and skips spurious closes. + self._literal_events_this_chunk += 1 + return self._escape_xml_special_chars(chunk) if not self._is_structural_closing_tag(chunk): return self._escape_xml_special_chars(chunk) @@ -967,7 +1058,9 @@ def _end_element(self, name: str): if ( name.startswith("parameter") or name == "parameter" ) and self.current_param_name: - # End current parameter + # End current parameter; reset literal-tag depth tracker + # since we are leaving the param's value scope. + self._literal_tag_depth = 0 param_name = self.current_param_name param_value = self.current_param_value From 6c0824e7298b120d45659627d19611b488e52c9d Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Sat, 25 Apr 2026 16:58:56 +0200 Subject: [PATCH 18/21] fix(qwen3-tool-parsers): out-of-schema literal nesting + Jinja phantom rejection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two more bug classes observed against a real Qwen 3.6 server when the model writes a parser fixture or a chat-template description into a parameter value: 1. **Out-of-schema literal nested ````** — when the inner ``NAME`` is NOT in the outer tool's schema, the previous depth tracker filtered it out and the literal ```` that closes the nested literal appeared unmatched. Combined with a structural-looking ```` lookahead, the OUTER content value got truncated mid-literal. Fixed by tracking depth via UNFILTERED structural opens (every ```` preceded by ``\n`` increments depth, regardless of NAME). 2. **Phantom Jinja-template tool calls** — when the model writes an unrendered template like ````, the function name contains ``{``/``}`` characters that cannot belong to a real identifier. Surfacing such names as real tool calls makes the client fail with "tool not found" and the agent loop. Fixed by rejecting names that contain template-syntax characters (``{``, ``}``, ``<``, ``>``), whitespace, or quotes. Applied in ``_parse_xml_function_call`` (Coder, both streaming and non-streaming since this is the validator at function-end) and at the ``extract_tool_calls`` boundary of the XML parser (non-streaming). Tests: - ``test_two_tools_second_with_out_of_schema_nested_literal_{nonstreaming,streaming}`` - ``test_jinja_template_phantom_tool_call_is_rejected_nonstreaming`` The XML / Coder streaming paths are not yet phantom-aware: filtering phantoms in streaming requires a separate "client-visible index" counter, since bumping ``current_tool_index`` to skip the phantom desyncs ``streamed_args_for_tool`` and the client-visible delta index. A note in the test file documents the deferral. Co-authored-by: Claude Signed-off-by: CNE Pierre FICHEPOIL --- .../test_qwen3_xml_coder_shared.py | 181 ++++++++++++++++++ vllm/tool_parsers/qwen3coder_tool_parser.py | 37 +++- vllm/tool_parsers/qwen3xml_tool_parser.py | 20 ++ 3 files changed, 235 insertions(+), 3 deletions(-) diff --git a/tests/tool_parsers/test_qwen3_xml_coder_shared.py b/tests/tool_parsers/test_qwen3_xml_coder_shared.py index 72d8695e13f6..0cd2194f5d8f 100644 --- a/tests/tool_parsers/test_qwen3_xml_coder_shared.py +++ b/tests/tool_parsers/test_qwen3_xml_coder_shared.py @@ -1775,3 +1775,184 @@ def test_content_with_full_nested_tool_call_streaming( assert args["content"] == expected, ( f"content truncated/corrupted: {args.get('content')!r}" ) + + +# --------------------------------------------------------------------------- +# Two consecutive tool calls, where the SECOND embeds a literal nested +# tool_call whose ```` uses a NAME that is NOT in the +# OUTER tool's schema (e.g. a description of a different tool's format). +# Reproduces the qwen-code Qwen 3.6 freeze scenario: the depth tracker +# in ``_find_true_param_end`` filters opens by schema, so the literal +# ```` that closes the unknown-NAME literal open appears +# unmatched and matches the structural lookahead of the trailing +# ````, truncating the OUTER content value. +# --------------------------------------------------------------------------- + +_OUT_OF_SCHEMA_NESTED_CONTENT = ( + 'template = """\n' + "\n\n" + "baz\n" + "\n\n" + '"""\n' +) + +_TWO_TOOLS_OUT_OF_SCHEMA_NESTED_OUTPUT = ( + "\n\n" + "baz\n" + "\n" + "\n\n" + "\n\n" + "\nfixture.py\n\n" + f"\n{_OUT_OF_SCHEMA_NESTED_CONTENT}\n" + "\n" +) + + +def test_two_tools_second_with_out_of_schema_nested_literal_nonstreaming( + qwen3_tokenizer, parser_cls +): + """Two structural tool calls; the second's ``content`` value embeds a + literal nested ```` block whose inner ```` + uses a NAME not in the outer tool's schema (``write_file`` only knows + ``path`` and ``content``). + + The walker must still match the outer ```` of ``content``, + not the literal ```` of the unknown-NAME nested open. + """ + parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS + ) + result = parser.extract_tool_calls( + _TWO_TOOLS_OUT_OF_SCHEMA_NESTED_OUTPUT, request=request + ) + assert result.tools_called + assert len(result.tool_calls) == 2, ( + f"Expected 2 tool calls, got {len(result.tool_calls)}: " + f"{[tc.function.name for tc in result.tool_calls]}" + ) + args0 = json.loads(result.tool_calls[0].function.arguments) + args1 = json.loads(result.tool_calls[1].function.arguments) + assert args0 == {"bar": "baz"}, f"first tool args wrong: {args0!r}" + assert result.tool_calls[1].function.name == "write_file" + assert list(args1.keys()) == ["path", "content"], ( + f"Spurious params on outer tool: {list(args1.keys())}" + ) + assert args1["path"] == "fixture.py" + expected = _OUT_OF_SCHEMA_NESTED_CONTENT.rstrip("\n") + assert args1["content"] == expected, ( + f"outer content truncated at literal : " + f"{args1.get('content')!r}" + ) + + +def test_two_tools_second_with_out_of_schema_nested_literal_streaming( + qwen3_tokenizer, parser_cls +): + """Streaming variant of the same scenario.""" + parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) + request = ChatCompletionRequest( + model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS + ) + char_deltas = [ + "\n\n", + "baz\n", + "\n", + "\n\n", + "\n\n", + "\nfixture.py\n\n", + '\ntemplate = """\n', + "\n\n", + "baz\n", + "\n\n", + '"""\n', + "\n", + "\n", + "", + ] + reconstructor = run_tool_extraction_streaming( + parser, char_deltas, request, assert_one_tool_per_delta=False + ) + assert len(reconstructor.tool_calls) == 2, ( + f"Expected 2 tool calls, got {len(reconstructor.tool_calls)}" + ) + args0 = json.loads(reconstructor.tool_calls[0].function.arguments) + args1 = json.loads(reconstructor.tool_calls[1].function.arguments) + assert args0 == {"bar": "baz"} + assert reconstructor.tool_calls[1].function.name == "write_file" + assert list(args1.keys()) == ["path", "content"] + assert args1["path"] == "fixture.py" + expected = _OUT_OF_SCHEMA_NESTED_CONTENT.rstrip("\n") + assert args1["content"] == expected, ( + f"outer content truncated/corrupted: {args1.get('content')!r}" + ) + + +# --------------------------------------------------------------------------- +# Phantom tool calls produced when the model writes an UNRENDERED Jinja +# template literally in its response: ``\n\n +# ...``. The function name ``{{ x }}`` contains +# template-syntax characters and CANNOT be a real function — the parser +# must reject these tool calls (or render them as content) rather than +# emit them as real ones, since the client will then raise "tool not +# found" errors and cause the agent to loop. +# --------------------------------------------------------------------------- + +_JINJA_PHANTOM_OUTPUT = ( + "\n\n" + "\n{{ v }}\n\n" + "\n" + "\n\n" + "\n\n" + "\nout.txt\n\n" + "\nhello\n\n" + "\n" +) + + +def test_jinja_template_phantom_tool_call_is_rejected_nonstreaming( + qwen3_tokenizer, parser_cls +): + """A ```` block (unrendered Jinja) emits a + function name that is not a valid identifier. It must NOT be + surfaced as a real tool call — the client would fail with "tool not + found" and the agent would loop. + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "write_file", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string"}, + "content": {"type": "string"}, + }, + }, + }, + ) + ] + parser = parser_cls(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + result = parser.extract_tool_calls(_JINJA_PHANTOM_OUTPUT, request=request) + assert result.tools_called + names = [tc.function.name for tc in result.tool_calls] + assert "{{ tc.name }}" not in names, ( + f"Phantom Jinja-template tool call surfaced as real: {names}" + ) + assert names == ["write_file"], ( + f"Expected only the real ``write_file`` tool call, got: {names}" + ) + + +# NOTE: a streaming counterpart of the above test is intentionally not +# added. Filtering phantoms in streaming requires a separate +# "client-visible index" counter (the existing ``current_tool_index`` is +# also used for internal position bookkeeping). Until that refactor +# lands, the streaming path may still surface phantoms and the client +# is expected to drop unknown function names. The non-streaming path +# is the one consumed by the offline tools-extraction code and by the +# ``_parse_xml_function_call`` helper invoked at function-end during +# streaming, so production users still see the filtered result for +# completed tool calls. diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index d287f2085aa3..bdb07c699abf 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -488,8 +488,12 @@ def _find_true_param_end( A ```` is structural only when it is followed by another structural delimiter (schema-known ````, ````, ````) or — in non-streaming mode — - end-of-string. Nested structural ```` tags - decrement depth like matched openings. + end-of-string. Nested ```` opens are tracked + for depth REGARDLESS of whether NAME is in the schema: a + literal nested tool_call may use NAMEs that are not in the + outer tool's schema, but its literal ```` still + pairs with the literal open and must not be mistaken for a + structural close. Returns the index of the true ```` in value_text, or -1 if incomplete. @@ -500,8 +504,13 @@ def _find_true_param_end( param_end_len = len(self.parameter_end_token) while pos < len(value_text): + # Use UNFILTERED structural opens for depth tracking so that + # a literal ```` (NAME not in the outer + # schema) still increments depth and its matching literal + # ```` is balanced — otherwise that close would + # appear unmatched and pass the structural lookahead. next_open = self._next_structural_param_start( - value_text, pos, valid_param_names + value_text, pos, None ) next_close = value_text.find(self.parameter_end_token, pos) if next_close == -1: @@ -539,6 +548,21 @@ def _find_true_param_end( return -1 + @staticmethod + def _is_valid_function_name(name: str) -> bool: + """Return True when ``name`` looks like a real function identifier + and not a stray template token, malformed tag, or freeform text. + + Rejects names that contain template-syntax characters (``{``, + ``}``, ``<``, ``>``), whitespace, quotes, or are empty. Permits + identifiers, dashes (``max-retries``), dots (``user.name``), + slashes (``namespace/tool``), and Unicode letters. + """ + if not name: + return False + forbidden = set("{}<>\"' \t\n\r") + return not any(c in forbidden for c in name) + def _parse_xml_function_call(self, function_call_str: str) -> ToolCall | None: # Extract function name end_index = function_call_str.find(">") @@ -546,6 +570,13 @@ def _parse_xml_function_call(self, function_call_str: str) -> ToolCall | None: if end_index == -1: return None function_name = function_call_str[:end_index] + # Reject phantom tool calls produced when the model writes an + # unrendered Jinja template or pseudo-XML in its response (e.g. + # ````). Surfacing such names as real + # tool calls causes "tool not found" errors at the client and + # makes agents loop. + if not self._is_valid_function_name(function_name): + return None param_config = find_tool_properties(self.tools, function_name) valid_param_names: set[str] | None = ( set(param_config.keys()) if param_config else None diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py index fb0db2e741ca..d22d6a4ea939 100644 --- a/vllm/tool_parsers/qwen3xml_tool_parser.py +++ b/vllm/tool_parsers/qwen3xml_tool_parser.py @@ -31,6 +31,19 @@ logger = init_logger(__name__) +def _is_valid_function_name(name: str) -> bool: + """Return True when ``name`` looks like a real function identifier and + not a stray template token, malformed tag, or freeform text. + + Rejects names that contain template-syntax characters (``{``, ``}``, + ``<``, ``>``), whitespace, quotes, or are empty. + """ + if not name: + return False + forbidden = set("{}<>\"' \t\n\r") + return not any(c in forbidden for c in name) + + class StreamingXMLToolCallParser: """ Simplified streaming XML tool call parser @@ -1520,6 +1533,13 @@ def extract_tool_calls( tool_calls = [] for tool_call in result.tool_calls: if tool_call.function and tool_call.function.name: + # Reject phantom tool calls produced when the model + # writes an unrendered Jinja template or pseudo-XML + # in its response (e.g. ````). + # Surfacing such names as real tool calls causes + # "tool not found" errors at the client. + if not _is_valid_function_name(tool_call.function.name): + continue tool_calls.append( ToolCall( id=tool_call.id, From a5b39494adbd0e0c423e508214bd4497c23170ba Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Sat, 25 Apr 2026 17:05:14 +0200 Subject: [PATCH 19/21] fix(qwen3coder): preserve content around inline empty ... MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the model writes a bare ``example`` in its narrative text — no ```` inside — the previous content extraction anchored at the FIRST ```` token regardless of whether that block contained a real tool call. This dropped: - the inline empty block itself, AND - all narrative text BETWEEN the inline block and the real ```` that follows. Fix: walk ``...`` blocks and anchor at the FIRST one that actually contains a `` --- .../test_qwen3_xml_coder_shared.py | 58 +++++++++++++++++++ vllm/tool_parsers/qwen3coder_tool_parser.py | 42 ++++++++++++-- 2 files changed, 95 insertions(+), 5 deletions(-) diff --git a/tests/tool_parsers/test_qwen3_xml_coder_shared.py b/tests/tool_parsers/test_qwen3_xml_coder_shared.py index 0cd2194f5d8f..0085af49c230 100644 --- a/tests/tool_parsers/test_qwen3_xml_coder_shared.py +++ b/tests/tool_parsers/test_qwen3_xml_coder_shared.py @@ -1956,3 +1956,61 @@ def test_jinja_template_phantom_tool_call_is_rejected_nonstreaming( # ``_parse_xml_function_call`` helper invoked at function-end during # streaming, so production users still see the filtered result for # completed tool calls. + + +# --------------------------------------------------------------------------- +# Inline empty ``...`` (no ````) before a +# real tool call: the content text BETWEEN the inline literal and the real +# tool call must be preserved. Previously the content was truncated at the +# position of the FIRST ```` token regardless of whether that +# block contained a real ````. +# --------------------------------------------------------------------------- + + +def test_inline_empty_tool_call_preserves_content_before_real_call( + qwen3_tokenizer, parser_cls +): + """A bare ``example`` in the model's narrative + text (no ```` inside) must NOT consume the surrounding + content; only the real ```` block that contains a valid + function call should anchor ``content_index``. + + The XML parser's SAX-based pipeline consumes the inline empty + block's body as XML text (so ``example`` is dropped), but the + surrounding narrative ("I'll show:" and "Now real:") must still be + preserved — both parsers are checked. + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "log", + "parameters": { + "type": "object", + "properties": {"msg": {"type": "string"}}, + }, + }, + ) + ] + parser = parser_cls(qwen3_tokenizer, tools=tools) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + + text = ( + "I'll show: example. Now real:\n" + "\n\n\nhi\n\n" + "\n" + ) + result = parser.extract_tool_calls(text, request=request) + assert result.tools_called + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].function.name == "log" + # Content between the inline empty tool_call and the real one MUST be + # preserved — dropping it loses the model's contextual narrative. + assert result.content is not None + assert "I'll show:" in result.content, ( + f"Pre-inline narrative lost from content: {result.content!r}" + ) + assert "Now real:" in result.content, ( + f"Content between inline literal and real tool_call lost: " + f"{result.content!r}" + ) diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index bdb07c699abf..13323eaf828c 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -748,11 +748,43 @@ def extract_tool_calls( } ) - # Extract content before tool calls - content_index = model_output.find(self.tool_call_start_token) - idx = model_output.find(self.tool_call_prefix) - content_index = content_index if content_index >= 0 else idx - content = model_output[:content_index] # .rstrip() + # Extract content before tool calls. Anchor at the FIRST + # ```` that contains a real ```` + # opener — a bare ``...`` written by + # the model in its narrative text (no function inside) is + # NOT a real tool call and the surrounding text MUST stay + # in ``content``. + content_index = -1 + search_pos = 0 + tc_start_token = self.tool_call_start_token + tc_end_token = self.tool_call_end_token + while True: + tc_pos = model_output.find(tc_start_token, search_pos) + if tc_pos == -1: + break + tc_close = model_output.find( + tc_end_token, tc_pos + len(tc_start_token) + ) + # Look for a ```` block contains a + # ``= 0 else model_output + ) valid_tool_calls = [tc for tc in tool_calls if tc is not None] return ExtractedToolCallInformation( tools_called=(len(valid_tool_calls) > 0), From 00d0a333ba9b8e40eb35fc0b54b4ae96114f30b3 Mon Sep 17 00:00:00 2001 From: CNE Pierre FICHEPOIL Date: Sat, 25 Apr 2026 17:16:52 +0200 Subject: [PATCH 20/21] fix(qwen3-tool-parsers): nullable string param converts "null"/"None" to JSON null Observed against a real Qwen 3.6 server: a parameter declared as ``anyOf: [{type: string}, {type: null}]`` and emitted by the model as the literal ``"None"`` (Qwen3.5 ``| string`` template) or ``"null"`` (Qwen3.6 ``| tojson`` template) was kept as the literal string instead of being converted to JSON null. Downstream type checks then reject the value because they expect either ``string`` or ``null``. The previous fix for nullable types only touched non-string primary types; nullable strings still went down the string-preserving path. Coder parser: schema-extraction path now sets ``allows_null`` and short-circuits ``"null"``/``"None"`` BEFORE returning the raw string. XML parser: - new ``_param_allows_null`` helper (no signature change to ``_convert_param_value``); - streaming ``need_defer`` now includes nullable strings so that the full value is in before deciding null vs. string; - the deferred path short-circuits nullable strings so that ``"null"``/``"None"`` map to JSON null and any other value is kept verbatim (no spurious ``json.loads`` of ``"42"`` to int 42). Test: ``test_anyof_string_null_with_null_literal_returns_none`` (parametrised xml/coder). Co-authored-by: Claude Signed-off-by: CNE Pierre FICHEPOIL --- .../test_qwen3_xml_coder_shared.py | 53 ++++++++++++ vllm/tool_parsers/qwen3coder_tool_parser.py | 20 ++++- vllm/tool_parsers/qwen3xml_tool_parser.py | 86 +++++++++++++++++++ 3 files changed, 157 insertions(+), 2 deletions(-) diff --git a/tests/tool_parsers/test_qwen3_xml_coder_shared.py b/tests/tool_parsers/test_qwen3_xml_coder_shared.py index 0085af49c230..07602499daf7 100644 --- a/tests/tool_parsers/test_qwen3_xml_coder_shared.py +++ b/tests/tool_parsers/test_qwen3_xml_coder_shared.py @@ -2014,3 +2014,56 @@ def test_inline_empty_tool_call_preserves_content_before_real_call( f"Content between inline literal and real tool_call lost: " f"{result.content!r}" ) + + +# --------------------------------------------------------------------------- +# anyOf [{type: string}, {type: null}] with the literal "null" or "None" +# value must convert to JSON null, NOT preserve as the string "null"/"None". +# Observed against a real Qwen 3.6 server: the model emits ``None`` for a +# nullable optional parameter and the parser kept it as the string "None", +# breaking nullable-typed clients. +# --------------------------------------------------------------------------- + + +def test_anyof_string_null_with_null_literal_returns_none( + qwen3_tokenizer, parser_cls +): + """anyOf [{type: string}, {type: null}] with value "null" or "None" + must convert to JSON null. String-typed paths preserve the literal, + but a nullable schema MUST recognise the null sentinel — otherwise + the client receives the literal "null" / "None" string and downstream + type checks fail. + """ + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "set_value", + "parameters": { + "type": "object", + "properties": { + "optional": { + "anyOf": [{"type": "string"}, {"type": "null"}], + }, + }, + }, + }, + ) + ] + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + for literal in ("null", "None"): + parser = parser_cls(qwen3_tokenizer, tools=tools) + model_output = ( + "\n" + "\n" + f"{literal}\n" + "\n" + "" + ) + result = parser.extract_tool_calls(model_output, request=request) + assert result.tools_called + args = json.loads(result.tool_calls[0].function.arguments) + assert args["optional"] is None, ( + f"anyOf string|null with value {literal!r} was kept as " + f"{type(args['optional']).__name__}: {args['optional']!r}" + ) diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 13323eaf828c..195c6c924ee2 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -126,11 +126,20 @@ def _convert_param_value( ) return param_value + # ``allows_null`` is True when the schema explicitly admits a + # null value (either via ``"type": "null"`` or in an ``anyOf`` + # union). A nullable parameter must convert the literal + # ``"null"`` / ``"None"`` to JSON null even when the primary + # type is ``string`` — otherwise a Qwen3.5-trained model that + # emits the Python ``None`` literal leaves the client with the + # string ``"None"`` for a nullable optional. + allows_null = False if ( isinstance(param_config[param_name], dict) and "type" in param_config[param_name] ): param_type = str(param_config[param_name]["type"]).strip().lower() + allows_null = param_type == "null" elif ( isinstance(param_config[param_name], dict) and "anyOf" in param_config[param_name] @@ -139,14 +148,21 @@ def _convert_param_value( # nullable schemas like {"anyOf": [{"type": "string"}, # {"type": "null"}]} behave as "string", not "object". param_type = "string" + picked = False for option in param_config[param_name]["anyOf"]: if isinstance(option, dict) and "type" in option: opt_type = str(option["type"]).strip().lower() - if opt_type != "null": + if opt_type == "null": + allows_null = True + elif not picked: param_type = opt_type - break + picked = True else: param_type = "string" + # Nullable schemas: recognise "null" / "None" up front so a + # string-typed nullable still maps to JSON null. + if allows_null and param_value.lower() in ("null", "none"): + return None # String type takes precedence: preserve the raw value (including # the literal "null") rather than converting it to Python None. if param_type in ["string", "str", "text", "varchar", "char", "enum"]: diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py index d22d6a4ea939..302f9d5513aa 100644 --- a/vllm/tool_parsers/qwen3xml_tool_parser.py +++ b/vllm/tool_parsers/qwen3xml_tool_parser.py @@ -760,11 +760,24 @@ def _preprocess_xml_chunk(self, chunk: str) -> str: or param_type.startswith("float") ) + # Nullable string params (``anyOf: [string, null]``) + # must defer too: the literal ``null`` / ``None`` is + # only recognisable when the full value is in. + # Without deferral, the streaming string path emits + # ``"`` + chars + ``"`` and the literal stays + # quoted. + is_nullable_string = ( + param_type in [ + "string", "str", "text", "varchar", "char", "enum", + ] + and self._param_allows_null(self._pre_current_param_name) + ) need_defer = ( is_complex_type or is_object_type or is_bool_type or is_numeric_type + or is_nullable_string ) if not need_defer: @@ -1092,6 +1105,48 @@ def _end_element(self, name: str): raw_for_parse = raw_text + "\n" else: raw_for_parse = raw_text + # Nullable-string short-circuit: when the schema is + # ``anyOf: [string, null]``, ``"null"`` and Python's + # ``"None"`` map to JSON null. Any other value is + # kept verbatim as a string — never parsed as int, + # float, JSON, etc., even if it LOOKS like one. + _param_type_for_check = self._get_param_type(param_name) + if ( + _param_type_for_check in [ + "string", "str", "text", "varchar", "char", "enum", + ] + and self._param_allows_null(param_name) + ): + if raw_for_parse.strip().lower() in ("null", "none"): + parsed_value = None + output_arguments = "null" + else: + parsed_value = raw_for_parse + output_arguments = json.dumps( + raw_for_parse, ensure_ascii=False + ) + delta = DeltaMessage( + tool_calls=[ + DeltaToolCall( + index=self.tool_call_index - 1, + id=self._get_call_id_for_delta(), + type="function", + function=DeltaFunctionCall( + name=None, arguments=output_arguments + ), + ) + ] + ) + self._emit_delta(delta) + self.parameters[param_name] = parsed_value + self.current_param_name = None + self.current_param_value = "" + self.current_param_value_converted = "" + self.start_quote_emitted = False + self.should_emit_end_newline = False + self.defer_current_parameter = False + self.deferred_param_raw_value = "" + return raw_lower = raw_for_parse.strip().lower() # Handle JSON literals that ast.literal_eval cannot parse # (true/false/null are JSON, not Python). @@ -1353,6 +1408,27 @@ def _get_param_type(self, param_name: str) -> str: return self.repair_param_type(str(param_type or "string")) return "string" + def _param_allows_null(self, param_name: str | None) -> bool: + """Return True when the schema for ``param_name`` admits a null + value — either via ``"type": "null"`` or as one alternative in + an ``anyOf`` union. Used to recognise the literal ``"null"`` / + ``"None"`` as JSON null even when the primary type is string. + """ + if not self.tools or not self.current_function_name or not param_name: + return False + properties = find_tool_properties(self.tools, self.current_function_name) + if param_name not in properties or not isinstance( + properties[param_name], dict + ): + return False + prop = properties[param_name] + if str(prop.get("type", "")).lower() == "null": + return True + for option in prop.get("anyOf", []) or []: + if isinstance(option, dict) and str(option.get("type", "")).lower() == "null": + return True + return False + def repair_param_type(self, param_type: str) -> str: """Repair unknown parameter types by treating them as string Args: @@ -1391,6 +1467,16 @@ def _convert_param_value(self, param_value: str, param_type: str) -> Any: Converted value """ param_type = param_type.strip().lower() + # Nullable schemas (``anyOf: [string, null]`` or similar): the + # primary type may be string but the literal ``"null"`` / + # ``"None"`` must still convert to JSON null. Caller passes the + # current parameter name via the parser state so we can query + # the schema. + if ( + self._param_allows_null(self.current_param_name) + and param_value.lower() in ("null", "none") + ): + return None # String type takes precedence: the literal value "null" must remain # the string "null" instead of being converted to Python None. if param_type in ["string", "str", "text", "varchar", "char", "enum"]: From e8313093fabdb835f7e352d750e9b23c0813b483 Mon Sep 17 00:00:00 2001 From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> Date: Wed, 27 May 2026 16:44:16 +0200 Subject: [PATCH 21/21] test: move anyOf type-conversion tests into shared qwen3 suite Relocate the anyOf / nullable type-resolution tests (originally added by #38973 to the Coder-only file) into the shared XML/Coder suite, parametrized over both parsers, so the coverage applies to both. To make the JSON-Schema list-form type {"type": ["integer", "null"]} resolve consistently across parsers, teach the XML parser's _get_param_type to pick the first non-null entry of a list-form type (it already did this for anyOf). Both parsers now coerce it to int. Ruff: replace try/except/pass with contextlib.suppress in both parsers and run ruff format on the touched qwen3 files. Signed-off-by: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) --- .../test_qwen3_xml_coder_shared.py | 347 +++++++++--------- .../test_qwen3coder_tool_parser.py | 183 +-------- .../tool_parsers/test_qwen3xml_tool_parser.py | 82 +++-- vllm/tool_parsers/qwen3coder_tool_parser.py | 137 +++---- vllm/tool_parsers/qwen3xml_tool_parser.py | 119 +++--- 5 files changed, 321 insertions(+), 547 deletions(-) diff --git a/tests/tool_parsers/test_qwen3_xml_coder_shared.py b/tests/tool_parsers/test_qwen3_xml_coder_shared.py index f2f8a8b7899f..6c56c6d47063 100644 --- a/tests/tool_parsers/test_qwen3_xml_coder_shared.py +++ b/tests/tool_parsers/test_qwen3_xml_coder_shared.py @@ -10,6 +10,7 @@ parser-specific file (``test_qwen3xml_tool_parser.py`` or ``test_qwen3coder_tool_parser.py``). """ + import json from collections.abc import Generator @@ -206,9 +207,7 @@ def stream_delta_message_generator( def test_extract_tool_calls_no_tools(parser): model_output = "This is a test response without any tool calls" - extracted_tool_calls = parser.extract_tool_calls( - model_output, request=None - ) + extracted_tool_calls = parser.extract_tool_calls(model_output, request=None) assert not extracted_tool_calls.tools_called assert extracted_tool_calls.tool_calls == [] assert extracted_tool_calls.content == model_output @@ -395,9 +394,7 @@ def test_extract_tool_calls( parser, model_output, expected_tool_calls, expected_content ): request = ChatCompletionRequest(model=MODEL, messages=[]) - extracted_tool_calls = parser.extract_tool_calls( - model_output, request=request - ) + extracted_tool_calls = parser.extract_tool_calls(model_output, request=request) assert extracted_tool_calls.tools_called assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls) # Both ``None`` and ``""`` are acceptable when the expected content is @@ -421,14 +418,10 @@ def test_extract_tool_calls_fallback_no_tags(parser): """ request = ChatCompletionRequest(model=MODEL, messages=[]) - extracted_tool_calls = parser.extract_tool_calls( - model_output, request=request - ) + extracted_tool_calls = parser.extract_tool_calls(model_output, request=request) assert extracted_tool_calls.tools_called assert len(extracted_tool_calls.tool_calls) == 1 - assert ( - extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" - ) + assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" # --------------------------------------------------------------------------- @@ -479,9 +472,7 @@ def test_extract_tool_calls_type_conversion(qwen3_tokenizer, parser_cls): parser_inst = parser_cls(qwen3_tokenizer, tools=tools) request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - extracted_tool_calls = parser_inst.extract_tool_calls( - model_output, request=request - ) + extracted_tool_calls = parser_inst.extract_tool_calls(model_output, request=request) args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) assert args["int_param"] == 42 @@ -491,9 +482,7 @@ def test_extract_tool_calls_type_conversion(qwen3_tokenizer, parser_cls): assert args["obj_param"] == {"key": "value"} -def test_extract_tool_calls_complex_type_with_single_quote( - qwen3_tokenizer, parser_cls -): +def test_extract_tool_calls_complex_type_with_single_quote(qwen3_tokenizer, parser_cls): """Object parameter expressed as a Python repr (single quotes).""" tools = [ ChatCompletionToolsParam( @@ -524,9 +513,7 @@ def test_extract_tool_calls_complex_type_with_single_quote( parser_inst = parser_cls(qwen3_tokenizer, tools=tools) request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - extracted_tool_calls = parser_inst.extract_tool_calls( - model_output, request=request - ) + extracted_tool_calls = parser_inst.extract_tool_calls(model_output, request=request) args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) assert args["obj_param"] == {"key": "value"} @@ -595,9 +582,7 @@ def test_extract_tool_calls_streaming( tool_states[idx]["name"] = tool_call.function.name if tool_call.function.arguments is not None: - tool_states[idx]["arguments"] += ( - tool_call.function.arguments - ) + tool_states[idx]["arguments"] += tool_call.function.arguments # Be tolerant about whitespace-only deltas between parallel tool calls; # see ``test_extract_tool_calls`` for the same reasoning. @@ -638,15 +623,11 @@ def test_extract_tool_calls_missing_closing_parameter_tag(parser): """ request = ChatCompletionRequest(model=MODEL, messages=[]) - extracted_tool_calls = parser.extract_tool_calls( - model_output, request=request - ) + extracted_tool_calls = parser.extract_tool_calls(model_output, request=request) assert extracted_tool_calls.tools_called assert len(extracted_tool_calls.tool_calls) == 1 - assert ( - extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" - ) + assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) assert "city" in args assert args["city"] == "Dallas" @@ -655,9 +636,7 @@ def test_extract_tool_calls_missing_closing_parameter_tag(parser): assert "Let me check the weather for you:" in extracted_tool_calls.content -def test_extract_tool_calls_streaming_missing_closing_tag( - parser, qwen3_tokenizer -): +def test_extract_tool_calls_streaming_missing_closing_tag(parser, qwen3_tokenizer): """Streaming with missing closing tag.""" model_output = """Let me check the weather for you: @@ -702,9 +681,7 @@ def test_extract_tool_calls_streaming_missing_closing_tag( if tool_call.function.name: tool_states[idx]["name"] = tool_call.function.name if tool_call.function.arguments is not None: - tool_states[idx]["arguments"] += ( - tool_call.function.arguments - ) + tool_states[idx]["arguments"] += tool_call.function.arguments assert "Let me check the weather for you:" in other_content assert len(tool_states) == 1 @@ -891,13 +868,9 @@ def test_extract_tool_calls_streaming_speculative_decode_loss(parser): request = ChatCompletionRequest(model="test", messages=[]) text1 = "\n\n" - parser.extract_tool_calls_streaming( - "", text1, text1, [], [1], [1], request - ) + parser.extract_tool_calls_streaming("", text1, text1, [], [1], [1], request) - delta_str = ( - "\nParis\n\n\n" - ) + delta_str = "\nParis\n\n\n" text2 = text1 + delta_str delta2 = parser.extract_tool_calls_streaming( text1, text2, delta_str, [1], [1, 2], [2], request @@ -949,8 +922,7 @@ def test_string_null_value_preserved(qwen3_tokenizer, parser_cls): assert result.tools_called args = json.loads(result.tool_calls[0].function.arguments) assert args["query"] == "null", ( - f"String parameter 'null' was converted incorrectly. " - f"Got: {args.get('query')!r}" + f"String parameter 'null' was converted incorrectly. Got: {args.get('query')!r}" ) @@ -1076,9 +1048,7 @@ def test_anyof_object_param_not_double_encoded_nonstreaming( qwen3_tokenizer, parser_cls ): parser = parser_cls(qwen3_tokenizer, tools=_ANYOF_OBJECT_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_ANYOF_OBJECT_TOOLS - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_ANYOF_OBJECT_TOOLS) result = parser.extract_tool_calls(_ANYOF_OBJECT_OUTPUT, request=request) assert result.tools_called @@ -1089,13 +1059,9 @@ def test_anyof_object_param_not_double_encoded_nonstreaming( assert args["data"] == {"key": "value", "count": 42} -def test_anyof_object_param_not_double_encoded_streaming( - qwen3_tokenizer, parser_cls -): +def test_anyof_object_param_not_double_encoded_streaming(qwen3_tokenizer, parser_cls): parser = parser_cls(qwen3_tokenizer, tools=_ANYOF_OBJECT_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_ANYOF_OBJECT_TOOLS - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_ANYOF_OBJECT_TOOLS) deltas = [ "", "\n", @@ -1109,78 +1075,156 @@ def test_anyof_object_param_not_double_encoded_streaming( assert len(reconstructor.tool_calls) == 1 args = json.loads(reconstructor.tool_calls[0].function.arguments) assert isinstance(args["data"], dict), ( - f"anyOf object param was double-encoded in streaming: " - f"data={args['data']!r}" + f"anyOf object param was double-encoded in streaming: data={args['data']!r}" ) # --------------------------------------------------------------------------- -# anyOf array schema — value parsed as a list +# anyOf / nullable (Pydantic v2 Optional[T]) type resolution. +# Both parsers extract the first non-null type from the anyOf union. # --------------------------------------------------------------------------- -_ANYOF_ARRAY_TOOLS = [ +_ANYOF_TYPES_TOOLS = [ ChatCompletionToolsParam( type="function", function={ - "name": "set_items", + "name": "test_anyof", "parameters": { "type": "object", "properties": { - "items": { + "anyof_int": { + "anyOf": [{"type": "integer"}, {"type": "null"}], + "default": 5, + }, + "anyof_str": { + "anyOf": [{"type": "string"}, {"type": "null"}], + }, + "anyof_array": { "anyOf": [ {"type": "array", "items": {"type": "string"}}, {"type": "null"}, ], }, + "anyof_obj": { + "anyOf": [{"type": "object"}, {"type": "null"}], + }, + "type_as_array": { + "type": ["integer", "null"], + }, + "multi_non_null": { + "anyOf": [ + {"type": "string"}, + {"type": "integer"}, + {"type": "null"}, + ], + }, }, }, }, ) ] -_ANYOF_ARRAY_OUTPUT = ( +_ANYOF_TYPES_OUTPUT = ( "\n" - "\n" - '["a", "b", "c"]\n' + "\n" + "5\n" + "hello\n" + '["a", "b", "c"]\n' + '{"key": "value"}\n' + "42\n" + "some text\n" "\n" "" ) -def test_anyof_array_null_parses_as_list_nonstreaming( - qwen3_tokenizer, parser_cls -): - """anyOf [{type: array}, {type: null}] must parse a JSON array value as - a list (the first non-null type is ``array``), not as a raw string. +def test_extract_tool_calls_anyof_type_conversion(qwen3_tokenizer, parser_cls): + """anyOf nullable schemas (Pydantic v2 ``Optional[T]``) must resolve to + the first non-null type and apply the matching conversion: int(), + list/dict via json, string passthrough. """ - parser = parser_cls(qwen3_tokenizer, tools=_ANYOF_ARRAY_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_ANYOF_ARRAY_TOOLS - ) - result = parser.extract_tool_calls(_ANYOF_ARRAY_OUTPUT, request=request) + parser = parser_cls(qwen3_tokenizer, tools=_ANYOF_TYPES_TOOLS) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_ANYOF_TYPES_TOOLS) + result = parser.extract_tool_calls(_ANYOF_TYPES_OUTPUT, request=request) assert result.tools_called args = json.loads(result.tool_calls[0].function.arguments) - assert isinstance(args["items"], list), ( - f"anyOf array|null was not parsed as a list: {args['items']!r}" + assert args["anyof_int"] == 5 + assert isinstance(args["anyof_int"], int) + assert args["anyof_str"] == "hello" + assert isinstance(args["anyof_str"], str) + assert args["anyof_array"] == ["a", "b", "c"] + assert isinstance(args["anyof_array"], list) + assert args["anyof_obj"] == {"key": "value"} + assert isinstance(args["anyof_obj"], dict) + # JSON-Schema list-form type {"type": ["integer", "null"]} → int + assert args["type_as_array"] == 42 + assert isinstance(args["type_as_array"], int) + # anyOf[string, integer, null] → first non-null type is string + assert args["multi_non_null"] == "some text" + assert isinstance(args["multi_non_null"], str) + + +_ANYOF_STREAMING_TOOLS = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "search_web", + "parameters": { + "type": "object", + "properties": { + "query": { + "anyOf": [{"type": "string"}, {"type": "null"}], + }, + "count": { + "anyOf": [{"type": "integer"}, {"type": "null"}], + "default": 5, + }, + "verbose": { + "anyOf": [{"type": "boolean"}, {"type": "null"}], + }, + }, + }, + }, ) - assert args["items"] == ["a", "b", "c"] +] + +_ANYOF_STREAMING_OUTPUT = ( + "\n" + "\n" + "vllm tool parser\n" + "10\n" + "true\n" + "\n" + "" +) -def test_anyof_array_null_parses_as_list_streaming(qwen3_tokenizer, parser_cls): - parser = parser_cls(qwen3_tokenizer, tools=_ANYOF_ARRAY_TOOLS) +def test_extract_tool_calls_anyof_type_conversion_streaming( + qwen3_tokenizer, parser_cls +): + """Streaming e2e for anyOf nullable schemas: string/int/bool types must + be resolved through the incremental pipeline for both parsers. + """ + parser = parser_cls(qwen3_tokenizer, tools=_ANYOF_STREAMING_TOOLS) request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_ANYOF_ARRAY_TOOLS + model=MODEL, messages=[], tools=_ANYOF_STREAMING_TOOLS ) reconstructor = run_tool_extraction_streaming( - parser, _ANYOF_ARRAY_OUTPUT, request, assert_one_tool_per_delta=False + parser, + _ANYOF_STREAMING_OUTPUT, + request, + assert_one_tool_per_delta=False, ) assert len(reconstructor.tool_calls) == 1 + assert reconstructor.tool_calls[0].function.name == "search_web" args = json.loads(reconstructor.tool_calls[0].function.arguments) - assert isinstance(args["items"], list), ( - f"anyOf array|null was not a list in streaming: {args['items']!r}" - ) - assert args["items"] == ["a", "b", "c"] + assert args["query"] == "vllm tool parser" + assert isinstance(args["query"], str) + assert args["count"] == 10 + assert isinstance(args["count"], int) + assert args["verbose"] is True + assert isinstance(args["verbose"], bool) # --------------------------------------------------------------------------- @@ -1273,12 +1317,12 @@ def test_double_encoded_object_param_streaming(qwen3_tokenizer, parser_cls): ] _XML_TAGS_IN_CONTENT = ( - 'char_deltas = [\n' + "char_deltas = [\n" ' "\\n",\n' ' "\\n",\n' ' "\\n\\n",\n' ' "\\n",\n' - ']\n' + "]\n" ) _WRITE_FILE_XML_TAGS_OUTPUT = ( @@ -1291,21 +1335,15 @@ def test_double_encoded_object_param_streaming(qwen3_tokenizer, parser_cls): ) -def test_content_with_xml_structural_tags_nonstreaming( - qwen3_tokenizer, parser_cls -): +def test_content_with_xml_structural_tags_nonstreaming(qwen3_tokenizer, parser_cls): """Non-streaming: a string param whose value embeds , , , as literal text must be extracted intact, with no spurious extra params being created from the embedded tags. """ parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS - ) - result = parser.extract_tool_calls( - _WRITE_FILE_XML_TAGS_OUTPUT, request=request - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) + result = parser.extract_tool_calls(_WRITE_FILE_XML_TAGS_OUTPUT, request=request) assert result.tools_called assert len(result.tool_calls) == 1 @@ -1321,14 +1359,10 @@ def test_content_with_xml_structural_tags_nonstreaming( ) -def test_content_with_xml_structural_tags_streaming( - qwen3_tokenizer, parser_cls -): +def test_content_with_xml_structural_tags_streaming(qwen3_tokenizer, parser_cls): """Streaming variant: pre-formed chunks, full content in one delta.""" parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) char_deltas = [ "\n", "\n", @@ -1376,18 +1410,14 @@ def test_content_with_xml_structural_tags_streaming( ) -def test_content_with_param_like_lines_nonstreaming( - qwen3_tokenizer, parser_cls -): +def test_content_with_param_like_lines_nonstreaming(qwen3_tokenizer, parser_cls): """Non-streaming: ```` and ```` on their own lines inside a string value must not terminate the parameter early. Requires schema-based filtering so that ``new_string`` (not a real parameter of write_file) is treated as literal text. """ parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) result = parser.extract_tool_calls( _WRITE_FILE_PARAM_LIKE_LINES_OUTPUT, request=request ) @@ -1411,20 +1441,18 @@ def test_content_with_param_like_lines_streaming(qwen3_tokenizer, parser_cls): structural close. """ parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) char_deltas = [ "\n", "\n", "\ntest_template.py\n\n", '\nTOOL_CALL_TEMPLATE = """\n', - "\n", # literal close — alone in its delta + "\n", # literal close — alone in its delta "\n", # literal new-param line "#!/usr/bin/env python3\n", - "\n", # second literal close + "\n", # second literal close '"""\n', - "\n", # REAL close of content + "\n", # REAL close of content "\n", "\n", ] @@ -1475,12 +1503,8 @@ def test_array_with_json_bool(qwen3_tokenizer, parser_cls): ``ast.literal_eval``. """ parser = parser_cls(qwen3_tokenizer, tools=_ARRAY_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_ARRAY_TOOLS - ) - result = parser.extract_tool_calls( - _ARRAY_WITH_JSON_BOOL_OUTPUT, request=request - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_ARRAY_TOOLS) + result = parser.extract_tool_calls(_ARRAY_WITH_JSON_BOOL_OUTPUT, request=request) assert result.tools_called args = json.loads(result.tool_calls[0].function.arguments) @@ -1525,9 +1549,7 @@ def test_two_tool_calls_in_one_streaming_chunk(qwen3_tokenizer, parser_cls): emitted; dropping the second one is a regression. """ parser = parser_cls(qwen3_tokenizer, tools=_WEATHER_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WEATHER_TOOLS - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WEATHER_TOOLS) reconstructor = run_tool_extraction_streaming( parser, [_TWO_TOOL_CALLS_IN_ONE_CHUNK], @@ -1535,8 +1557,7 @@ def test_two_tool_calls_in_one_streaming_chunk(qwen3_tokenizer, parser_cls): assert_one_tool_per_delta=False, ) assert len(reconstructor.tool_calls) == 2, ( - f"Expected 2 tool calls in one delta, got " - f"{len(reconstructor.tool_calls)}" + f"Expected 2 tool calls in one delta, got {len(reconstructor.tool_calls)}" ) args0 = json.loads(reconstructor.tool_calls[0].function.arguments) args1 = json.loads(reconstructor.tool_calls[1].function.arguments) @@ -1595,8 +1616,7 @@ def test_python_none_value_for_nullable_int(qwen3_tokenizer, parser_cls): assert result.tools_called args = json.loads(result.tool_calls[0].function.arguments) assert args["count"] is None, ( - f"Python repr None was not converted to JSON null. " - f"Got: {args['count']!r}" + f"Python repr None was not converted to JSON null. Got: {args['count']!r}" ) @@ -1608,9 +1628,7 @@ def test_streaming_two_tool_calls_plus_trailing_text_one_delta( content in the same delta — not be silently dropped. """ parser = parser_cls(qwen3_tokenizer, tools=_WEATHER_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WEATHER_TOOLS - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WEATHER_TOOLS) deltas = [ _TWO_TOOL_CALLS_IN_ONE_CHUNK + "\nAll done!", ] @@ -1635,9 +1653,7 @@ def test_streaming_trailing_text_with_final_close_in_same_delta( consumed by the parser's "advance to next tool" logic. """ parser = parser_cls(qwen3_tokenizer, tools=_WEATHER_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WEATHER_TOOLS - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WEATHER_TOOLS) deltas = [ # Build up the tool call up to and including . "\n\n" @@ -1666,11 +1682,7 @@ def test_streaming_trailing_text_with_final_close_in_same_delta( # --------------------------------------------------------------------------- _CONTENT_WITH_REAL_PARAM_NAME_LITERAL = ( - 'doc = """\n' - '\n' - 'literal/value\n' - '\n' - '"""\n' + 'doc = """\n\nliteral/value\n\n"""\n' ) _REAL_PARAM_NAME_LITERAL_OUTPUT = ( @@ -1683,9 +1695,7 @@ def test_streaming_trailing_text_with_final_close_in_same_delta( ) -def test_content_with_real_param_name_literal_nonstreaming( - qwen3_tokenizer, parser_cls -): +def test_content_with_real_param_name_literal_nonstreaming(qwen3_tokenizer, parser_cls): """Non-streaming: parameter ``content`` value embeds ``...`` where ``path`` IS the other real parameter of the same ``write_file`` tool. Schema name filtering alone @@ -1694,23 +1704,17 @@ def test_content_with_real_param_name_literal_nonstreaming( that closes the OUTER param, not the inner literal). """ parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS - ) - result = parser.extract_tool_calls( - _REAL_PARAM_NAME_LITERAL_OUTPUT, request=request - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) + result = parser.extract_tool_calls(_REAL_PARAM_NAME_LITERAL_OUTPUT, request=request) assert result.tools_called assert len(result.tool_calls) == 1 args = json.loads(result.tool_calls[0].function.arguments) assert list(args.keys()) == ["path", "content"], ( - f"Spurious params from embedded same-name literal: " - f"{list(args.keys())}" + f"Spurious params from embedded same-name literal: {list(args.keys())}" ) assert args["path"] == "fixture.py", ( - f"Outer ``path`` was overwritten by embedded literal: " - f"{args.get('path')!r}" + f"Outer ``path`` was overwritten by embedded literal: {args.get('path')!r}" ) expected = _CONTENT_WITH_REAL_PARAM_NAME_LITERAL.rstrip("\n") assert args["content"] == expected, ( @@ -1719,17 +1723,13 @@ def test_content_with_real_param_name_literal_nonstreaming( ) -def test_content_with_real_param_name_literal_streaming( - qwen3_tokenizer, parser_cls -): +def test_content_with_real_param_name_literal_streaming(qwen3_tokenizer, parser_cls): """Streaming variant of the same case. Each meaningful structural- looking line arrives in its own delta — the parser cannot wait for the full text to disambiguate. """ parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) char_deltas = [ "\n", "\n", @@ -1749,8 +1749,7 @@ def test_content_with_real_param_name_literal_streaming( assert len(reconstructor.tool_calls) == 1 args = json.loads(reconstructor.tool_calls[0].function.arguments) assert list(args.keys()) == ["path", "content"], ( - f"Spurious params from embedded same-name literal: " - f"{list(args.keys())}" + f"Spurious params from embedded same-name literal: {list(args.keys())}" ) assert args["path"] == "fixture.py" expected = _CONTENT_WITH_REAL_PARAM_NAME_LITERAL.rstrip("\n") @@ -1794,18 +1793,14 @@ def test_content_with_real_param_name_literal_streaming( ) -def test_content_with_full_nested_tool_call_nonstreaming( - qwen3_tokenizer, parser_cls -): +def test_content_with_full_nested_tool_call_nonstreaming(qwen3_tokenizer, parser_cls): """Non-streaming: parameter ``content`` contains a complete literal ``...`` whose function/parameter names match the OUTER tool's schema. Every literal must stay inside the value; no extra tool call must be generated. """ parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) result = parser.extract_tool_calls(_FULL_NESTED_CALL_OUTPUT, request=request) assert result.tools_called @@ -1823,17 +1818,13 @@ def test_content_with_full_nested_tool_call_nonstreaming( ) -def test_content_with_full_nested_tool_call_streaming( - qwen3_tokenizer, parser_cls -): +def test_content_with_full_nested_tool_call_streaming(qwen3_tokenizer, parser_cls): """Streaming variant: the literal nested ``...`` crosses many delta boundaries; the parser must not start a second tool call. """ parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) char_deltas = [ "\n", "\n", @@ -1913,9 +1904,7 @@ def test_two_tools_second_with_out_of_schema_nested_literal_nonstreaming( not the literal ```` of the unknown-NAME nested open. """ parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) result = parser.extract_tool_calls( _TWO_TOOLS_OUT_OF_SCHEMA_NESTED_OUTPUT, request=request ) @@ -1934,8 +1923,7 @@ def test_two_tools_second_with_out_of_schema_nested_literal_nonstreaming( assert args1["path"] == "fixture.py" expected = _OUT_OF_SCHEMA_NESTED_CONTENT.rstrip("\n") assert args1["content"] == expected, ( - f"outer content truncated at literal : " - f"{args1.get('content')!r}" + f"outer content truncated at literal : {args1.get('content')!r}" ) @@ -1944,9 +1932,7 @@ def test_two_tools_second_with_out_of_schema_nested_literal_streaming( ): """Streaming variant of the same scenario.""" parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS) - request = ChatCompletionRequest( - model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS - ) + request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS) char_deltas = [ "\n\n", "baz\n", @@ -2104,8 +2090,7 @@ def test_inline_empty_tool_call_preserves_content_before_real_call( f"Pre-inline narrative lost from content: {result.content!r}" ) assert "Now real:" in result.content, ( - f"Content between inline literal and real tool_call lost: " - f"{result.content!r}" + f"Content between inline literal and real tool_call lost: {result.content!r}" ) @@ -2118,9 +2103,7 @@ def test_inline_empty_tool_call_preserves_content_before_real_call( # --------------------------------------------------------------------------- -def test_anyof_string_null_with_null_literal_returns_none( - qwen3_tokenizer, parser_cls -): +def test_anyof_string_null_with_null_literal_returns_none(qwen3_tokenizer, parser_cls): """anyOf [{type: string}, {type: null}] with value "null" or "None" must convert to JSON null. String-typed paths preserve the literal, but a nullable schema MUST recognise the null sentinel — otherwise diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index 782eb951ad47..9ff5a933a515 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -14,10 +14,8 @@ import pytest -from tests.tool_parsers.utils import run_tool_extraction_streaming from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, - ChatCompletionToolsParam, ) from vllm.tokenizers import get_tokenizer from vllm.tool_parsers.qwen3coder_tool_parser import Qwen3CoderToolParser @@ -416,7 +414,7 @@ def test_extract_tool_calls_streaming_various_chunk_sizes( ptr = 0 while ptr < len(template_text): - delta = template_text[ptr:ptr + chunk_size] + delta = template_text[ptr : ptr + chunk_size] previous_text = current_text current_text += delta ptr += chunk_size @@ -458,182 +456,5 @@ def test_extract_tool_calls_streaming_various_chunk_sizes( args = json.loads(tool_states[0]["arguments"]) assert args["example_parameter_1"] == "value_1" assert args["example_parameter_2"] == ( - "This is the value for the second parameter\n" - "that can span\n" - "multiple lines" + "This is the value for the second parameter\nthat can span\nmultiple lines" ) - - -def test_extract_tool_calls_anyof_type_conversion(qwen3_tokenizer): - """Test type conversion for anyOf/oneOf nullable schemas (Pydantic v2). - - Pydantic v2 emits anyOf for Optional[T] fields, e.g.: - Optional[int] -> {"anyOf": [{"type": "integer"}, {"type": "null"}]} - The parser must extract the non-null type and apply the correct - conversion (int(), float(), etc.) instead of returning a raw string. - - Coder-specific: this also exercises the JSON-Schema list-form - ``{"type": ["integer", "null"]}`` which the Coder parser coerces to an - int (the XML parser keeps it as a string), so the assertions only hold - for the Coder parser and the test stays out of the shared suite. - """ - tools = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "test_anyof", - "parameters": { - "type": "object", - "properties": { - "anyof_int": { - "anyOf": [ - {"type": "integer"}, - {"type": "null"}, - ], - "default": 5, - }, - "anyof_str": { - "anyOf": [ - {"type": "string"}, - {"type": "null"}, - ], - }, - "anyof_array": { - "anyOf": [ - {"type": "array", "items": {"type": "string"}}, - {"type": "null"}, - ], - }, - "anyof_obj": { - "anyOf": [ - {"type": "object"}, - {"type": "null"}, - ], - }, - "type_as_array": { - "type": ["integer", "null"], - }, - "multi_non_null": { - "anyOf": [ - {"type": "string"}, - {"type": "integer"}, - {"type": "null"}, - ], - }, - }, - }, - }, - ) - ] - - model_output = """ - - -5 - - -hello - - -["a", "b", "c"] - - -{"key": "value"} - - -42 - - -some text - - -""" - - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - extracted = parser.extract_tool_calls(model_output, request=request) - - args = json.loads(extracted.tool_calls[0].function.arguments) - assert args["anyof_int"] == 5 - assert isinstance(args["anyof_int"], int) - assert args["anyof_str"] == "hello" - assert isinstance(args["anyof_str"], str) - assert args["anyof_array"] == ["a", "b", "c"] - assert isinstance(args["anyof_array"], list) - assert args["anyof_obj"] == {"key": "value"} - assert isinstance(args["anyof_obj"], dict) - assert args["type_as_array"] == 42 - assert isinstance(args["type_as_array"], int) - # Multi non-null: anyOf[string, integer, null] → first non-null is string - assert args["multi_non_null"] == "some text" - assert isinstance(args["multi_non_null"], str) - - -def test_extract_tool_calls_anyof_type_conversion_streaming(qwen3_tokenizer): - """Test streaming e2e for anyOf/oneOf nullable schemas (Pydantic v2). - - Verifies that the full streaming pipeline correctly resolves types from - anyOf schemas and produces valid JSON with properly typed values. - """ - tools = [ - ChatCompletionToolsParam( - type="function", - function={ - "name": "search_web", - "parameters": { - "type": "object", - "properties": { - "query": { - "anyOf": [ - {"type": "string"}, - {"type": "null"}, - ], - }, - "count": { - "anyOf": [ - {"type": "integer"}, - {"type": "null"}, - ], - "default": 5, - }, - "verbose": { - "anyOf": [ - {"type": "boolean"}, - {"type": "null"}, - ], - }, - }, - }, - }, - ) - ] - - model_output = """ - - -vllm tool parser - - -10 - - -true - - -""" - - parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools) - request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - reconstructor = run_tool_extraction_streaming( - parser, model_output, request, assert_one_tool_per_delta=False - ) - - assert len(reconstructor.tool_calls) == 1 - assert reconstructor.tool_calls[0].function.name == "search_web" - args = json.loads(reconstructor.tool_calls[0].function.arguments) - assert args["query"] == "vllm tool parser" - assert isinstance(args["query"], str) - assert args["count"] == 10 - assert isinstance(args["count"], int) - assert args["verbose"] is True - assert isinstance(args["verbose"], bool) diff --git a/tests/tool_parsers/test_qwen3xml_tool_parser.py b/tests/tool_parsers/test_qwen3xml_tool_parser.py index 83aeffdb27b7..c38268c62ec9 100644 --- a/tests/tool_parsers/test_qwen3xml_tool_parser.py +++ b/tests/tool_parsers/test_qwen3xml_tool_parser.py @@ -27,6 +27,7 @@ def qwen3_tokenizer(): return get_tokenizer(tokenizer_name=MODEL) + class TestQwen3xmlToolParser(ToolParserTests): @pytest.fixture def test_config(self) -> ToolParserTestConfig: @@ -76,7 +77,7 @@ def test_config(self) -> ToolParserTestConfig: def test_qwen3xml_async_streaming_free_text(self, qwen3_tokenizer): parser = Qwen3XMLToolParser(qwen3_tokenizer) - + # 1. First tool call # 2. Free text # 3. Second tool call @@ -85,19 +86,19 @@ def test_qwen3xml_async_streaming_free_text(self, qwen3_tokenizer): "\nNext, I will check the weather for London:\n" "\n\nLondon\n\n" ) - + request = ChatCompletionRequest(messages=[], model="test") emitted_messages = [] previous_text = "" previous_tokens = [] token_ids = qwen3_tokenizer.encode(text_to_stream, add_special_tokens=False) - + for i in range(1, len(token_ids) + 1): current_token_ids = token_ids[:i] current_text = qwen3_tokenizer.decode(current_token_ids) - delta_text = current_text[len(previous_text):] - token_delta = current_token_ids[len(previous_tokens):] - + delta_text = current_text[len(previous_text) :] + token_delta = current_token_ids[len(previous_tokens) :] + delta = parser.extract_tool_calls_streaming( previous_text, current_text, @@ -105,11 +106,11 @@ def test_qwen3xml_async_streaming_free_text(self, qwen3_tokenizer): previous_tokens, current_token_ids, token_delta, - request + request, ) if delta is not None: emitted_messages.append(delta) - + previous_text = current_text previous_tokens = current_token_ids @@ -119,41 +120,46 @@ def test_qwen3xml_async_streaming_free_text(self, qwen3_tokenizer): for i, msg in enumerate(emitted_messages): if msg.content: accumulated_content += msg.content - + if "Next, I will check the weather for London" in accumulated_content: - # Check if we already saw "London" in any previous or current tool call arguments + # Check if we already saw "London" in any previous or + # current tool call arguments is_london_emitted = any( - tc.function.arguments and "London" in tc.function.arguments - for m in emitted_messages[:i+1] if m.tool_calls + tc.function.arguments and "London" in tc.function.arguments + for m in emitted_messages[: i + 1] + if m.tool_calls for tc in m.tool_calls ) if not is_london_emitted: found_early = True break - - assert found_early, "Free text between tool calls should be emitted as soon as the second tool call starts, not delayed." + + assert found_early, ( + "Free text between tool calls should be emitted as soon as the " + "second tool call starts, not delayed." + ) def test_qwen3xml_streaming_text_after_tool_call(self, qwen3_tokenizer): parser = Qwen3XMLToolParser(qwen3_tokenizer) - + # Tool call followed by free text text_to_stream = ( "\n\nParis\n\n" "\nI hope this helps!" ) - + request = ChatCompletionRequest(messages=[], model="test") emitted_messages = [] previous_text = "" previous_tokens = [] token_ids = qwen3_tokenizer.encode(text_to_stream, add_special_tokens=False) - + for i in range(1, len(token_ids) + 1): current_token_ids = token_ids[:i] current_text = qwen3_tokenizer.decode(current_token_ids) - delta_text = current_text[len(previous_text):] - token_delta = current_token_ids[len(previous_tokens):] - + delta_text = current_text[len(previous_text) :] + token_delta = current_token_ids[len(previous_tokens) :] + delta = parser.extract_tool_calls_streaming( previous_text, current_text, @@ -161,18 +167,20 @@ def test_qwen3xml_streaming_text_after_tool_call(self, qwen3_tokenizer): previous_tokens, current_token_ids, token_delta, - request + request, ) if delta is not None: emitted_messages.append(delta) - + previous_text = current_text previous_tokens = current_token_ids # Aggregate all emitted content all_content = "".join([m.content for m in emitted_messages if m.content]) - - assert "I hope this helps!" in all_content, "Free text after the last tool call should be emitted." + + assert "I hope this helps!" in all_content, ( + "Free text after the last tool call should be emitted." + ) def test_qwen3xml_streaming_trailing_text_after_literal_close_in_value( @@ -255,9 +263,18 @@ def test_qwen3xml_streaming_python_none_int_char_by_char(qwen3_tokenizer): # Char-by-char deltas emulate worst-case slow streaming. char_deltas = [ - "\n", "\n", "", - "\n", "N", "o", "n", "e", "\n", "\n", - "\n", "", + "\n", + "\n", + "", + "\n", + "N", + "o", + "n", + "e", + "\n", + "\n", + "\n", + "", ] reconstructor = run_tool_extraction_streaming( parser, char_deltas, request, assert_one_tool_per_delta=False @@ -320,7 +337,6 @@ def test_xml_streaming_parallel_tool_calls_preformed_chunks(qwen3_tokenizer): the tokenizer splits XML tags across multiple tokens. It CAN trigger with speculative decoding multi-token flushes. """ - tools = [ ChatCompletionToolsParam( @@ -410,10 +426,10 @@ def test_xml_streaming_boolean_true_not_false(qwen3_tokenizer): "", "\n", "\n", - "t", # ← first char triggers False → emits "false" + "t", # ← first char triggers False → emits "false" "r", "u", - "e", # ← full "true" but delta = "true"[5:] = "" + "e", # ← full "true" but delta = "true"[5:] = "" "", "\n", "\n", @@ -472,7 +488,7 @@ def test_xml_streaming_string_null_last_char_not_dropped(qwen3_tokenizer): "n", "u", "l", - "l", # ← triggers _convert_param_value("null",…) = None → nothing emitted + "l", # ← triggers _convert_param_value("null",…) = None → nothing emitted "", "\n", "\n", @@ -549,9 +565,7 @@ def test_xml_streaming_missing_opening_tool_call_tag(qwen3_tokenizer): if tool_call.function.name: tool_states[idx]["name"] = tool_call.function.name if tool_call.function.arguments is not None: - tool_states[idx]["arguments"] += ( - tool_call.function.arguments - ) + tool_states[idx]["arguments"] += tool_call.function.arguments assert "I'll check the weather for you." in other_content assert len(tool_states) == 1 diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 3226de582921..a3875118861d 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import ast +import contextlib import json import uuid from collections.abc import Sequence @@ -242,10 +243,8 @@ def _convert_param_value( # Python repr like "{'k': 'v'}". json.loads returns a # string in that case — try one more parse. if isinstance(parsed, str): - try: + with contextlib.suppress(ValueError, SyntaxError, TypeError): parsed = ast.literal_eval(parsed) - except (ValueError, SyntaxError, TypeError): - pass return parsed except (json.JSONDecodeError, TypeError, ValueError): logger.debug( @@ -262,10 +261,8 @@ def _convert_param_value( # had no JSON outer layer (e.g. bare Python repr # "{'k': 'v'}"). if is_container_type and isinstance(param_value, str): - try: + with contextlib.suppress(ValueError, SyntaxError, TypeError): param_value = ast.literal_eval(param_value) - except (ValueError, SyntaxError, TypeError): - pass except (ValueError, SyntaxError, TypeError): logger.debug( "Parsed value '%s' of parameter '%s' cannot be " @@ -318,7 +315,7 @@ def _find_true_function_end(self, text: str) -> int: idx = text.find(self.function_end_token, search_pos) if idx == -1: return -1 - after = text[idx + len(self.function_end_token):] + after = text[idx + len(self.function_end_token) :] stripped = after.lstrip() if stripped == "" or stripped.startswith(self.tool_call_end_token): return idx @@ -362,12 +359,10 @@ def _scan_to_structural_function_end( # legacy heuristic on the rest of the text. rest_offset = self._find_true_function_end(after_func_open[pos:]) return pos + rest_offset if rest_offset != -1 else -1 - name_end = after_func_open.find( - ">", pos + len(self.parameter_prefix) - ) + name_end = after_func_open.find(">", pos + len(self.parameter_prefix)) if name_end == -1: return -1 - param_name = after_func_open[pos + len(self.parameter_prefix):name_end] + param_name = after_func_open[pos + len(self.parameter_prefix) : name_end] value_start = name_end + 1 if value_start < n and after_func_open[value_start] == "\n": value_start += 1 @@ -436,7 +431,7 @@ def _find_true_tool_call_end(self, text: str) -> int: idx = text.find(self.tool_call_end_token, search_pos) if idx == -1: return -1 - after = text[idx + len(self.tool_call_end_token):] + after = text[idx + len(self.tool_call_end_token) :] stripped = after.lstrip() if stripped == "" or stripped.startswith(self.tool_call_start_token): return idx @@ -466,18 +461,16 @@ def _structural_tool_call_end_positions(self, text: str) -> list[int]: func_open = text.find(self.tool_call_prefix, body_start) if func_open == -1: break - name_end = text.find( - ">", func_open + len(self.tool_call_prefix) - ) + name_end = text.find(">", func_open + len(self.tool_call_prefix)) if name_end == -1: break - func_name = text[func_open + len(self.tool_call_prefix):name_end] + func_name = text[func_open + len(self.tool_call_prefix) : name_end] valid_params: set[str] | None = None if self.tools: cfg = find_tool_properties(self.tools, func_name) if cfg: valid_params = set(cfg.keys()) - body_after_name = text[name_end + 1:] + body_after_name = text[name_end + 1 :] func_end_rel = self._scan_to_structural_function_end( body_after_name, valid_params ) @@ -489,7 +482,7 @@ def _structural_tool_call_end_positions(self, text: str) -> list[int]: # parameter would be erroneously treated as structural. break func_end_abs = (name_end + 1) + func_end_rel - after = text[func_end_abs + len(self.function_end_token):] + after = text[func_end_abs + len(self.function_end_token) :] i = 0 while i < len(after) and after[i] in " \t\n\r": i += 1 @@ -532,9 +525,7 @@ def _find_true_param_end( # schema) still increments depth and its matching literal # ```` is balanced — otherwise that close would # appear unmatched and pass the structural lookahead. - next_open = self._next_structural_param_start( - value_text, pos, None - ) + next_open = self._next_structural_param_start(value_text, pos, None) next_close = value_text.find(self.parameter_end_token, pos) if next_close == -1: return -1 @@ -543,7 +534,7 @@ def _find_true_param_end( depth += 1 pos = next_open + param_prefix_len elif depth == 0: - after = value_text[next_close + param_end_len:] + after = value_text[next_close + param_end_len :] stripped = after.lstrip() structural_next_param = False if stripped.startswith(self.parameter_prefix): @@ -552,8 +543,7 @@ def _find_true_param_end( name_end = stripped.find(">", name_start) if name_end != -1: structural_next_param = ( - stripped[name_start:name_end] - in valid_param_names + stripped[name_start:name_end] in valid_param_names ) else: structural_next_param = True @@ -614,9 +604,7 @@ def _parse_xml_function_call(self, function_call_str: str) -> ToolCall | None: # in the schema (e.g. renamed fields). Schema filtering is # applied only when scanning INSIDE a parameter value, to # disambiguate real nested delimiters from literal text. - param_start = self._next_structural_param_start( - parameters, pos, None - ) + param_start = self._next_structural_param_start(parameters, pos, None) if param_start == -1: break name_start = param_start + len(self.parameter_prefix) @@ -675,7 +663,7 @@ def _get_function_calls(self, model_output: str) -> list[str]: tc_start = model_output.find(self.tool_call_start_token, search_pos) if tc_start == -1: break - after_open = model_output[tc_start + len(self.tool_call_start_token):] + after_open = model_output[tc_start + len(self.tool_call_start_token) :] tc_end = -1 inner_search = 0 while True: @@ -683,7 +671,7 @@ def _get_function_calls(self, model_output: str) -> list[str]: if idx == -1: tc_end = -1 break - after_close = after_open[idx + len(self.tool_call_end_token):] + after_close = after_open[idx + len(self.tool_call_end_token) :] stripped = after_close.lstrip() if stripped == "" or stripped.startswith(self.tool_call_start_token): tc_end = idx @@ -694,8 +682,10 @@ def _get_function_calls(self, model_output: str) -> list[str]: break raw_tool_calls.append(after_open[:tc_end]) search_pos = ( - tc_start + len(self.tool_call_start_token) - + tc_end + len(self.tool_call_end_token) + tc_start + + len(self.tool_call_start_token) + + tc_end + + len(self.tool_call_end_token) ) # Back-off strategy if no tool_call tags found @@ -714,7 +704,7 @@ def _get_function_calls(self, model_output: str) -> list[str]: func_start = tool_call.find(self.tool_call_prefix) if func_start == -1: continue - after_func_open = tool_call[func_start + len(self.tool_call_prefix):] + after_func_open = tool_call[func_start + len(self.tool_call_prefix) :] name_end = after_func_open.find(">") valid_param_names: set[str] | None = None body_start = 0 @@ -728,7 +718,7 @@ def _get_function_calls(self, model_output: str) -> list[str]: after_func_open[body_start:], valid_param_names ) if scan_end != -1: - function_calls.append(after_func_open[:body_start + scan_end]) + function_calls.append(after_func_open[: body_start + scan_end]) continue # Fallback to legacy heuristic. func_end = self._find_true_function_end(after_func_open) @@ -785,9 +775,7 @@ def extract_tool_calls( tc_pos = model_output.find(tc_start_token, search_pos) if tc_pos == -1: break - tc_close = model_output.find( - tc_end_token, tc_pos + len(tc_start_token) - ) + tc_close = model_output.find(tc_end_token, tc_pos + len(tc_start_token)) # Look for a ```` block contains a # `` count: a literal # embedded in a parameter value must not trigger spurious # advance. - tool_ends = len( - self._structural_tool_call_end_positions(current_text) - ) + tool_ends = len(self._structural_tool_call_end_positions(current_text)) if tool_ends > self.current_tool_index: # Advance to next tool; is_tool_call_started is reset so # content between or after tool calls is emitted correctly. @@ -906,7 +890,7 @@ def extract_tool_calls_streaming( self.is_tool_call_started = True # Return any content before the tool call if last_start > self._sent_content_idx: - content_before = current_text[self._sent_content_idx:last_start] + content_before = current_text[self._sent_content_idx : last_start] self._sent_content_idx = last_start if content_before: content_message = DeltaMessage(content=content_before) @@ -925,7 +909,7 @@ def extract_tool_calls_streaming( return None if sendable_idx > self._sent_content_idx: - content = current_text[self._sent_content_idx:sendable_idx] + content = current_text[self._sent_content_idx : sendable_idx] self._sent_content_idx = sendable_idx if content: return DeltaMessage(content=content) @@ -936,7 +920,7 @@ def extract_tool_calls_streaming( # of completed calls) so that tokens # embedded in a parameter value of a completed call are not # counted as spurious new tool calls. - if self.tool_call_start_token not in current_text[self._sent_content_idx:]: + if self.tool_call_start_token not in current_text[self._sent_content_idx :]: return content_message # We're in a tool call, find the current tool call portion. @@ -1022,7 +1006,9 @@ def extract_tool_calls_streaming( tool_call_fragments = DeltaToolCall( index=self.current_tool_index, id=self.current_tool_id, - function=DeltaFunctionCall(name=self.current_function_name, arguments=""), + function=DeltaFunctionCall( + name=self.current_function_name, arguments="" + ), type="function", ) if not self.header_sent: @@ -1051,9 +1037,7 @@ def extract_tool_calls_streaming( self.tools, self.current_function_name or "" ) valid_param_names: set[str] | None = ( - set(streaming_param_config.keys()) - if streaming_param_config - else None + set(streaming_param_config.keys()) if streaming_param_config else None ) param_starts: list[int] = [] search_idx = 0 @@ -1075,7 +1059,7 @@ def extract_tool_calls_streaming( ) if name_end_pos == -1: break - after_name = tool_text[name_end_pos + 1:] + after_name = tool_text[name_end_pos + 1 :] after_name_stripped = ( after_name[1:] if after_name.startswith("\n") else after_name ) @@ -1096,15 +1080,11 @@ def extract_tool_calls_streaming( # scanned). A repeated NAME is almost always a # literal embedded in the unfinished value, not a # real next parameter. - cand_name = ( - tool_text[ - param_start_pos + len(self.parameter_prefix) - : name_end_pos - ] - ) - already_seen = ( - set(self.accumulated_params.keys()) - | ({cand_name} if cand_name else set()) + cand_name = tool_text[ + param_start_pos + len(self.parameter_prefix) : name_end_pos + ] + already_seen = set(self.accumulated_params.keys()) | ( + {cand_name} if cand_name else set() ) unseen_valid: set[str] | None = ( (valid_param_names - already_seen) @@ -1176,19 +1156,15 @@ def extract_tool_calls_streaming( ">", tc_open_in_tool + len(self.tool_call_prefix) ) if name_end_in_tool != -1: - body_after_name = tool_text[name_end_in_tool + 1:] - body_func_end_rel = ( - self._scan_to_structural_function_end( - body_after_name, valid_param_names - ) + body_after_name = tool_text[name_end_in_tool + 1 :] + body_func_end_rel = self._scan_to_structural_function_end( + body_after_name, valid_param_names ) if body_func_end_rel != -1: body_func_end_abs = ( name_end_in_tool + 1 + body_func_end_rel ) - body_func_end_in_value = ( - body_func_end_abs - value_start - ) + body_func_end_in_value = body_func_end_abs - value_start if body_func_end_in_value > 0: # Function body is structurally complete; the @@ -1196,9 +1172,8 @@ def extract_tool_calls_streaming( # the next legitimate (NAME # unseen) before the structural as # the implicit end. - already_seen = ( - set(self.accumulated_params.keys()) - | ({current_param_name} if current_param_name else set()) + already_seen = set(self.accumulated_params.keys()) | ( + {current_param_name} if current_param_name else set() ) unseen_valid: set[str] | None = ( (valid_param_names - already_seen) @@ -1284,14 +1259,12 @@ def extract_tool_calls_streaming( tc_open_in_tool_for_close + len(self.tool_call_prefix), ) if name_end_in_tool != -1: - body_after_name = tool_text[name_end_in_tool + 1:] + body_after_name = tool_text[name_end_in_tool + 1 :] body_func_end_rel = self._scan_to_structural_function_end( body_after_name, valid_param_names ) if body_func_end_rel != -1: - true_func_end = ( - name_end_in_tool + 1 + body_func_end_rel - ) + true_func_end = name_end_in_tool + 1 + body_func_end_rel if not self.json_closed and true_func_end != -1: self.json_closed = True @@ -1384,9 +1357,7 @@ def extract_tool_calls_streaming( # fragment is silently dropped whenever the outer # already produced its own content. if next_delta.content: - result.content = ( - (result.content or "") + next_delta.content - ) + result.content = (result.content or "") + next_delta.content # Emit trailing free text that follows the LAST structural # in this delta (MTP / spec-decoding bursts that @@ -1396,13 +1367,9 @@ def extract_tool_calls_streaming( # past its tool's ````, and an EOS-style empty # delta cannot recover content that was never emitted. if self.json_closed and not self.in_function: - end_positions = self._structural_tool_call_end_positions( - current_text - ) + end_positions = self._structural_tool_call_end_positions(current_text) if end_positions: - last_end = ( - end_positions[-1] + len(self.tool_call_end_token) - ) + last_end = end_positions[-1] + len(self.tool_call_end_token) if ( last_end < len(current_text) and last_end > self._sent_content_idx @@ -1410,9 +1377,7 @@ def extract_tool_calls_streaming( trailing = current_text[last_end:] if trailing: self._sent_content_idx = len(current_text) - result.content = ( - (result.content or "") + trailing - ) + result.content = (result.content or "") + trailing return result return content_message diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py index f2ed5aa7c038..3f2ae4d253bf 100644 --- a/vllm/tool_parsers/qwen3xml_tool_parser.py +++ b/vllm/tool_parsers/qwen3xml_tool_parser.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import ast +import contextlib import json from collections.abc import Sequence from typing import Any @@ -334,8 +335,8 @@ def _is_structural_closing_tag(self, chunk: str) -> bool: # Fallback (no schema): trust the name unless it is a # repeat of the current/already-emitted param, which # is almost always a literal in a parser fixture. - structural_param_follows = ( - not self._is_already_emitted_param(candidate) + structural_param_follows = not self._is_already_emitted_param( + candidate ) # Return True when rest is an incomplete prefix of a structural @@ -376,7 +377,7 @@ def _chunk_has_structural_function_end(self, chunk: str) -> bool: idx = chunk.find(token, search) if idx == -1: return False - rest = chunk[idx + len(token):].lstrip() + rest = chunk[idx + len(token) :].lstrip() if not rest or rest.startswith(end_token): return True search = idx + len(token) @@ -390,7 +391,7 @@ def _chunk_has_structural_tool_call_end(self, chunk: str) -> bool: idx = chunk.find(token, search) if idx == -1: return False - rest = chunk[idx + len(token):].lstrip() + rest = chunk[idx + len(token) :].lstrip() if not rest or rest.startswith(start_token): return True search = idx + len(token) @@ -438,12 +439,9 @@ def _process_complete_xml_elements(self) -> bool: preprocessed_element = self._preprocess_xml_chunk(element) # Check if a new tool_call starts and we have buffered text content if ( - ( - preprocessed_element.strip().startswith("") - or preprocessed_element.strip().startswith("") + or preprocessed_element.strip().startswith(" str: # Without deferral, the streaming string path emits # ``"`` + chars + ``"`` and the literal stays # quoted. - is_nullable_string = ( - param_type in [ - "string", "str", "text", "varchar", "char", "enum", - ] - and self._param_allows_null(self._pre_current_param_name) - ) + is_nullable_string = param_type in [ + "string", + "str", + "text", + "varchar", + "char", + "enum", + ] and self._param_allows_null(self._pre_current_param_name) need_defer = ( is_complex_type or is_object_type @@ -798,9 +794,8 @@ def _preprocess_xml_chunk(self, chunk: str) -> str: # describes the tool-call format. Escape them unless they are # genuine structural delimiters. if self.current_param_name is not None: - if ( - chunk.startswith(self.tool_call_start_token) - or chunk.startswith(self.function_start_token) + if chunk.startswith(self.tool_call_start_token) or chunk.startswith( + self.function_start_token ): # Opening tool_call/function tags are always literal inside # a parameter value. Track nesting depth so that the @@ -1115,20 +1110,20 @@ def _end_element(self, name: str): # kept verbatim as a string — never parsed as int, # float, JSON, etc., even if it LOOKS like one. _param_type_for_check = self._get_param_type(param_name) - if ( - _param_type_for_check in [ - "string", "str", "text", "varchar", "char", "enum", - ] - and self._param_allows_null(param_name) - ): + if _param_type_for_check in [ + "string", + "str", + "text", + "varchar", + "char", + "enum", + ] and self._param_allows_null(param_name): if raw_for_parse.strip().lower() in ("null", "none"): parsed_value = None output_arguments = "null" else: parsed_value = raw_for_parse - output_arguments = json.dumps( - raw_for_parse, ensure_ascii=False - ) + output_arguments = json.dumps(raw_for_parse, ensure_ascii=False) delta = DeltaMessage( tool_calls=[ DeltaToolCall( @@ -1177,13 +1172,11 @@ def _end_element(self, name: str): try: parsed_value = ast.literal_eval(parsed_value) except (ValueError, SyntaxError, TypeError): - try: + with contextlib.suppress( + json.JSONDecodeError, ValueError + ): parsed_value = json.loads(parsed_value) - except (json.JSONDecodeError, ValueError): - pass - output_arguments = json.dumps( - parsed_value, ensure_ascii=False - ) + output_arguments = json.dumps(parsed_value, ensure_ascii=False) except (json.JSONDecodeError, ValueError): try: parsed_value = ast.literal_eval(raw_for_parse) @@ -1194,31 +1187,22 @@ def _end_element(self, name: str): # case — try one more level. if isinstance(parsed_value, str): try: - parsed_value = ast.literal_eval( - parsed_value - ) + parsed_value = ast.literal_eval(parsed_value) except ( ValueError, SyntaxError, TypeError, ): - try: - parsed_value = json.loads( - parsed_value - ) - except ( - json.JSONDecodeError, - ValueError, + with contextlib.suppress( + json.JSONDecodeError, ValueError ): - pass + parsed_value = json.loads(parsed_value) output_arguments = json.dumps( parsed_value, ensure_ascii=False ) except (ValueError, SyntaxError, TypeError): # Fallback: output as string as-is - output_arguments = json.dumps( - raw_text, ensure_ascii=False - ) + output_arguments = json.dumps(raw_text, ensure_ascii=False) parsed_value = raw_text delta = DeltaMessage( @@ -1398,6 +1382,14 @@ def _get_param_type(self, param_name: str) -> str: if param_name in properties and isinstance(properties[param_name], dict): prop = properties[param_name] param_type = prop.get("type") + if isinstance(param_type, list): + # JSON-Schema list-form type, e.g. + # {"type": ["integer", "null"]}. Pick the first non-null + # type, mirroring the anyOf handling below. + for option_type in param_type: + if str(option_type).lower() != "null": + return self.repair_param_type(str(option_type)) + return "string" if param_type is None and "anyOf" in prop: # Handle anyOf schemas (e.g. nullable types like # anyOf: [{type: "integer"}, {type: "null"}]). @@ -1421,15 +1413,16 @@ def _param_allows_null(self, param_name: str | None) -> bool: if not self.tools or not self.current_function_name or not param_name: return False properties = find_tool_properties(self.tools, self.current_function_name) - if param_name not in properties or not isinstance( - properties[param_name], dict - ): + if param_name not in properties or not isinstance(properties[param_name], dict): return False prop = properties[param_name] if str(prop.get("type", "")).lower() == "null": return True for option in prop.get("anyOf", []) or []: - if isinstance(option, dict) and str(option.get("type", "")).lower() == "null": + if ( + isinstance(option, dict) + and str(option.get("type", "")).lower() == "null" + ): return True return False @@ -1476,9 +1469,9 @@ def _convert_param_value(self, param_value: str, param_type: str) -> Any: # ``"None"`` must still convert to JSON null. Caller passes the # current parameter name via the parser state so we can query # the schema. - if ( - self._param_allows_null(self.current_param_name) - and param_value.lower() in ("null", "none") + if self._param_allows_null(self.current_param_name) and param_value.lower() in ( + "null", + "none", ): return None # String type takes precedence: the literal value "null" must remain @@ -1503,8 +1496,7 @@ def _convert_param_value(self, param_value: str, param_type: str) -> Any: return int(param_value) except (ValueError, TypeError): logger.warning( - "Parsed value '%s' is not an integer, " - "degenerating to string.", + "Parsed value '%s' is not an integer, degenerating to string.", param_value, ) return param_value @@ -1518,8 +1510,7 @@ def _convert_param_value(self, param_value: str, param_type: str) -> Any: ) except (ValueError, TypeError): logger.warning( - "Parsed value '%s' is not a float, " - "degenerating to string.", + "Parsed value '%s' is not a float, degenerating to string.", param_value, ) return param_value