From 80e5da43e7bb378f17fd4292fbd5c9f25c449128 Mon Sep 17 00:00:00 2001 From: Karan Bansal Date: Sun, 18 Jan 2026 11:48:38 +0530 Subject: [PATCH] [Bugfix][Tool Parser] Fix Qwen3 Coder parser to stream tool call arguments When using Qwen3 Coder for tool calls, the parser previously waited until the entire parameter value was complete before sending any stream chunks. This caused the stream to halt for long parameters like code blocks, with no indication of progress. This change enables incremental streaming of tool call arguments by: - Setting `self.in_param = True` when a parameter starts but isn't complete - Streaming the parameter key header and partial values as they arrive - Properly handling multiple end markers (, ) - JSON-escaping streamed values correctly Fixes vllm-project/vllm#30439 Signed-off-by: Karan Bansal --- vllm/tool_parsers/qwen3coder_tool_parser.py | 136 ++++++++++++++------ 1 file changed, 96 insertions(+), 40 deletions(-) diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index a3c79f865b15..9ee11d752b77 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -631,8 +631,53 @@ def extract_tool_calls_streaming( # remaining text before function end param_end_idx = len(value_text) else: - # Still streaming, wait for more content - return None + # Parameter is incomplete - start streaming + # mode instead of waiting for complete param + self.in_param = True + self.current_param_value = "" + + # Build and send the parameter key header + if self.param_count == 0: + key_fragment = f'"{self.current_param_name}": "' + else: + key_fragment = f', "{self.current_param_name}": "' + + self.param_count += 1 + + # Stream the partial value we have so far + partial_value = value_text + if partial_value.endswith("\n"): + partial_value = partial_value[:-1] + + if partial_value: + # JSON escape the partial value + escaped_value = json.dumps( + partial_value, ensure_ascii=False + )[1:-1] # Remove surrounding quotes + self.current_param_value = partial_value + args = key_fragment + escaped_value + return DeltaMessage( + tool_calls=[ + DeltaToolCall( + index=self.current_tool_index, + function=DeltaFunctionCall( + arguments=args + ), + ) + ] + ) + else: + # No value yet, just send the key header + return DeltaMessage( + tool_calls=[ + DeltaToolCall( + index=self.current_tool_index, + function=DeltaFunctionCall( + arguments=key_fragment + ), + ) + ] + ) if param_end_idx != -1: # Complete parameter found @@ -685,12 +730,25 @@ def extract_tool_calls_streaming( ] ) - # Continue parameter value - Not used in the current implementation - # since we process complete parameters above + # Continue streaming parameter value when in_param is True if self.in_param: - if self.parameter_end_token in delta_text: - # End of parameter - end_idx = delta_text.find(self.parameter_end_token) + # Check for parameter end markers: , + # + end_marker = None + end_idx = -1 + + for marker in [ + self.parameter_end_token, + self.parameter_prefix, + self.function_end_token, + ]: + idx = delta_text.find(marker) + if idx != -1 and (end_idx == -1 or idx < end_idx): + end_idx = idx + end_marker = marker + + if end_marker is not None: + # End of parameter found value_chunk = delta_text[:end_idx] # Skip past > if at start @@ -701,46 +759,44 @@ def extract_tool_calls_streaming( if not self.current_param_value and value_chunk.startswith("\n"): value_chunk = value_chunk[1:] + # Remove trailing newline if present + if value_chunk.endswith("\n"): + value_chunk = value_chunk[:-1] + # Store complete value full_value = self.current_param_value + value_chunk - self.accumulated_params[self.current_param_name] = full_value - - # Get parameter configuration for type conversion - param_config = self._get_arguments_config( - self.current_function_name or "", - self.streaming_request.tools - if self.streaming_request - else None, - ) + if self.current_param_name: + self.accumulated_params[self.current_param_name] = full_value - # Convert the parameter value to the appropriate type - converted_value = self._convert_param_value( - full_value, - self.current_param_name or "", - param_config, - self.current_function_name or "", - ) - - # Serialize the converted value - serialized_value = json.dumps(converted_value, ensure_ascii=False) - - # Since we've been streaming the quoted version, - # we need to close it properly - # This is complex - for now just complete the value + # Reset streaming state self.in_param = False self.current_param_value = "" - # Just close the current parameter string - return DeltaMessage( - tool_calls=[ - DeltaToolCall( - index=self.current_tool_index, - function=DeltaFunctionCall( - arguments='"' - ), # Close the string quote - ) + # Stream the final chunk (escaped) and close the string + if value_chunk: + escaped_chunk = json.dumps(value_chunk, ensure_ascii=False)[ + 1:-1 ] - ) + return DeltaMessage( + tool_calls=[ + DeltaToolCall( + index=self.current_tool_index, + function=DeltaFunctionCall( + arguments=escaped_chunk + '"' + ), + ) + ] + ) + else: + # Just close the string quote + return DeltaMessage( + tool_calls=[ + DeltaToolCall( + index=self.current_tool_index, + function=DeltaFunctionCall(arguments='"'), + ) + ] + ) else: # Continue accumulating value value_chunk = delta_text