diff --git a/tests/tool_parsers/test_deepseekv32_tool_parser.py b/tests/tool_parsers/test_deepseekv32_tool_parser.py index 0adffa7ec824..3a166e8daa84 100644 --- a/tests/tool_parsers/test_deepseekv32_tool_parser.py +++ b/tests/tool_parsers/test_deepseekv32_tool_parser.py @@ -11,6 +11,10 @@ import pytest +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionToolsParam, + FunctionDefinition, +) from vllm.tokenizers import get_tokenizer from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser @@ -24,8 +28,8 @@ MOCK_TOKENIZER.get_vocab.return_value = {} -def make_parser() -> DeepSeekV32ToolParser: - return DeepSeekV32ToolParser(MOCK_TOKENIZER) +def make_parser(tools=None) -> DeepSeekV32ToolParser: + return DeepSeekV32ToolParser(MOCK_TOKENIZER, tools=tools) def make_tool_param(name: str, params: dict) -> MagicMock: @@ -275,20 +279,22 @@ def test_content_before_tool_call_streaming(self, parser): content = "".join(d.content for d in deltas if d.content is not None) assert "Thinking" in content - def test_type_conversion_in_streaming(self, parser): - tool = make_tool_param( - "add", - { - "type": "object", - "properties": { - "x": {"type": "integer"}, - "y": {"type": "integer"}, + def test_type_conversion_in_streaming(self): + tool = ChatCompletionToolsParam( + function=FunctionDefinition( + name="add", + parameters={ + "type": "object", + "properties": { + "x": {"type": "integer"}, + "y": {"type": "integer"}, + }, }, - }, + ), ) - request = make_request(tools=[tool]) + parser = make_parser(tools=[tool]) full_text = build_tool_call("add", {"x": "3", "y": "4"}) - deltas = self._stream(parser, full_text, request=request) + deltas = self._stream(parser, full_text) args_str = self._reconstruct_args(deltas) assert json.loads(args_str) == {"x": 3, "y": 4} diff --git a/tests/tool_parsers/test_glm47_moe_tool_parser.py b/tests/tool_parsers/test_glm47_moe_tool_parser.py index c7170e67500f..ebcd4e8d42e0 100644 --- a/tests/tool_parsers/test_glm47_moe_tool_parser.py +++ b/tests/tool_parsers/test_glm47_moe_tool_parser.py @@ -25,14 +25,8 @@ def glm47_tokenizer(): @pytest.fixture -def glm47_tool_parser(glm47_tokenizer): - return Glm47MoeModelToolParser(glm47_tokenizer) - - -@pytest.fixture -def mock_request() -> ChatCompletionRequest: - request = Mock(spec=ChatCompletionRequest) - request.tools = [ +def sample_tools(): + return [ ChatCompletionToolsParam( function=FunctionDefinition(name="get_current_date", parameters={}), ), @@ -49,6 +43,17 @@ def mock_request() -> ChatCompletionRequest: ), ), ] + + +@pytest.fixture +def glm47_tool_parser(glm47_tokenizer, sample_tools): + return Glm47MoeModelToolParser(glm47_tokenizer, tools=sample_tools) + + +@pytest.fixture +def mock_request(sample_tools) -> ChatCompletionRequest: + request = Mock(spec=ChatCompletionRequest) + request.tools = sample_tools request.tool_choice = "auto" return request diff --git a/tests/tool_parsers/test_glm4_moe_tool_parser.py b/tests/tool_parsers/test_glm4_moe_tool_parser.py index 213cc75db7ea..ab6b4f150d31 100644 --- a/tests/tool_parsers/test_glm4_moe_tool_parser.py +++ b/tests/tool_parsers/test_glm4_moe_tool_parser.py @@ -27,14 +27,8 @@ def glm4_moe_tokenizer(): @pytest.fixture -def glm4_moe_tool_parser(glm4_moe_tokenizer): - return Glm4MoeModelToolParser(glm4_moe_tokenizer) - - -@pytest.fixture -def mock_request() -> ChatCompletionRequest: - request = Mock(spec=ChatCompletionRequest) - request.tools = [ # GLM45 parser needs this attribute to enable tool parsing. +def sample_tools(): + return [ ChatCompletionToolsParam( function=FunctionDefinition( name="get_weather", @@ -42,6 +36,17 @@ def mock_request() -> ChatCompletionRequest: ), ), ] + + +@pytest.fixture +def glm4_moe_tool_parser(glm4_moe_tokenizer, sample_tools): + return Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=sample_tools) + + +@pytest.fixture +def mock_request(sample_tools) -> ChatCompletionRequest: + request = Mock(spec=ChatCompletionRequest) + request.tools = sample_tools return request @@ -671,14 +676,13 @@ def test_streaming_json_escape_in_string(glm4_moe_tool_parser, mock_request): assert '"' in parsed["message"] or "world" in parsed["message"] -def test_streaming_long_content_incremental(glm4_moe_tool_parser): +def test_streaming_long_content_incremental(glm4_moe_tokenizer): """Test incremental streaming of long content (Issue #32829). This is the core fix: for long string values like code (4000+ chars), the parser should stream incrementally rather than buffering until complete. This test verifies we get many fragments, not just 1-3. """ - _reset_streaming_state(glm4_moe_tool_parser) # Bubble sort example from Issue #32829 - realistic long content bubble_sort_code = '''#!/usr/bin/env python3 @@ -705,27 +709,28 @@ def bubble_sort(arr): sorted_arr = bubble_sort(test_arr.copy()) print(f"Sorted: {sorted_arr}")''' - # Create a request with tool schema to enable string type detection + # Create tools with schema to enable string type detection # This is required for incremental streaming of string values + tools = [ + ChatCompletionToolsParam( + function=FunctionDefinition( + name="write_to_file", + parameters={ + "type": "object", + "properties": { + "file_path": {"type": "string"}, + "content": {"type": "string"}, + }, + }, + ), + ), + ] + glm4_moe_tool_parser = Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=tools) request = ChatCompletionRequest( model=MODEL, messages=[], - tools=[ - { - "type": "function", - "function": { - "name": "write_to_file", - "parameters": { - "type": "object", - "properties": { - "file_path": {"type": "string"}, - "content": {"type": "string"}, - }, - }, - }, - } - ], - ) # type: ignore + tools=tools, + ) # Simulate token-based streaming (special tags as single tokens) chunks = [ diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index 3d46f73de612..2b0b6d3bfefc 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -31,13 +31,13 @@ def qwen3_tokenizer(): @pytest.fixture -def qwen3_tool_parser(qwen3_tokenizer): - return Qwen3CoderToolParser(qwen3_tokenizer) +def qwen3_tool_parser(qwen3_tokenizer, sample_tools): + return Qwen3CoderToolParser(qwen3_tokenizer, tools=sample_tools) @pytest.fixture -def qwen3_xml_tool_parser(qwen3_tokenizer): - return Qwen3XMLToolParser(qwen3_tokenizer) +def qwen3_xml_tool_parser(qwen3_tokenizer, sample_tools): + return Qwen3XMLToolParser(qwen3_tokenizer, tools=sample_tools) @pytest.fixture(params=["xml"]) @@ -376,7 +376,7 @@ def test_extract_tool_calls_fallback_no_tags( assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" -def test_extract_tool_calls_type_conversion(qwen3_tool_parser_parametrized): +def test_extract_tool_calls_type_conversion(qwen3_tokenizer): """Test parameter type conversion based on tool schema""" tools = [ ChatCompletionToolsParam( @@ -417,10 +417,9 @@ def test_extract_tool_calls_type_conversion(qwen3_tool_parser_parametrized): """ + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls( - model_output, request=request - ) + extracted_tool_calls = parser.extract_tool_calls(model_output, request=request) args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) assert args["int_param"] == 42 @@ -859,7 +858,7 @@ def test_extract_tool_calls_streaming_incremental( def test_extract_tool_calls_complex_type_with_single_quote( - qwen3_tool_parser_parametrized, + qwen3_tokenizer, ): """Test parameter type conversion based on tool schema""" tools = [ @@ -889,10 +888,9 @@ def test_extract_tool_calls_complex_type_with_single_quote( """ + parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools) request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls( - model_output, request=request - ) + extracted_tool_calls = parser.extract_tool_calls(model_output, request=request) args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) assert args["obj_param"] == {"key": "value"} diff --git a/tests/tool_parsers/test_seed_oss_tool_parser.py b/tests/tool_parsers/test_seed_oss_tool_parser.py index 87e71a12faa2..9dd13afe01e3 100644 --- a/tests/tool_parsers/test_seed_oss_tool_parser.py +++ b/tests/tool_parsers/test_seed_oss_tool_parser.py @@ -30,8 +30,8 @@ def seed_oss_tokenizer(): @pytest.fixture -def seed_oss_tool_parser(seed_oss_tokenizer): - return SeedOssToolParser(seed_oss_tokenizer) +def seed_oss_tool_parser(seed_oss_tokenizer, sample_tools): + return SeedOssToolParser(seed_oss_tokenizer, tools=sample_tools) @pytest.fixture diff --git a/tests/tool_parsers/test_step3p5_tool_parser.py b/tests/tool_parsers/test_step3p5_tool_parser.py index b3cb4e20fb9c..45fc33d8d579 100644 --- a/tests/tool_parsers/test_step3p5_tool_parser.py +++ b/tests/tool_parsers/test_step3p5_tool_parser.py @@ -28,8 +28,8 @@ def step3p5_tokenizer(): @pytest.fixture -def step3p5_tool_parser(step3p5_tokenizer): - return Step3p5ToolParser(step3p5_tokenizer) +def step3p5_tool_parser(step3p5_tokenizer, sample_tools): + return Step3p5ToolParser(step3p5_tokenizer, tools=sample_tools) @pytest.fixture @@ -386,7 +386,7 @@ def test_extract_tool_calls_fallback_no_tags(step3p5_tool_parser, sample_tools): assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" -def test_extract_tool_calls_type_conversion(step3p5_tool_parser): +def test_extract_tool_calls_type_conversion(step3p5_tokenizer): """Test parameter type conversion based on tool schema""" tools = [ ChatCompletionToolsParam( @@ -427,10 +427,9 @@ def test_extract_tool_calls_type_conversion(step3p5_tool_parser): """ + parser = Step3p5ToolParser(step3p5_tokenizer, tools=tools) request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - extracted_tool_calls = step3p5_tool_parser.extract_tool_calls( - model_output, request=request - ) + extracted_tool_calls = parser.extract_tool_calls(model_output, request=request) args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) assert args["int_param"] == 42 @@ -864,7 +863,7 @@ def test_extract_tool_calls_streaming_incremental( assert parsed_args["state"] == "TX" -def test_extract_tool_calls_complex_type_with_single_quote(step3p5_tool_parser): +def test_extract_tool_calls_complex_type_with_single_quote(step3p5_tokenizer): """Test parameter type conversion based on tool schema""" tools = [ ChatCompletionToolsParam( @@ -893,10 +892,9 @@ def test_extract_tool_calls_complex_type_with_single_quote(step3p5_tool_parser): """ + parser = Step3p5ToolParser(step3p5_tokenizer, tools=tools) request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - extracted_tool_calls = step3p5_tool_parser.extract_tool_calls( - model_output, request=request - ) + extracted_tool_calls = parser.extract_tool_calls(model_output, request=request) args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) assert args["obj_param"] == {"key": "value"} diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index 3f578b1b64f6..b8783163d846 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -10,9 +10,11 @@ ResponseFormatTextJSONSchemaConfig, ResponseTextConfig, ) +from openai.types.responses.function_tool import FunctionTool from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, + ChatCompletionToolsParam, ) from vllm.entrypoints.openai.engine.protocol import ( DeltaMessage, @@ -54,7 +56,14 @@ def __init__( self.streamed_args_for_tool: list[str] = [] self.model_tokenizer = tokenizer - self.tools = tools + if tools: + self.tools: list[ChatCompletionToolsParam | FunctionTool] = [ + tool + for tool in tools + if isinstance(tool, (ChatCompletionToolsParam, FunctionTool)) + ] + else: + self.tools = [] @cached_property def vocab(self) -> dict[str, int]: diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py index a9772b753134..fe44be6e17cb 100644 --- a/vllm/tool_parsers/deepseekv32_tool_parser.py +++ b/vllm/tool_parsers/deepseekv32_tool_parser.py @@ -139,12 +139,11 @@ def _convert_params_with_schema( self, function_name: str, param_dict: dict[str, str], - request: ChatCompletionRequest | None, ) -> dict[str, Any]: """Convert raw string param values using the tool schema types.""" param_config: dict = {} - if request and request.tools: - for tool in request.tools: + if self.tools: + for tool in self.tools: if ( hasattr(tool, "function") and tool.function.name == function_name @@ -238,9 +237,7 @@ def _extract_delta_tool_calls( invoke_name, invoke_body = complete_invokes[self.current_tool_index] param_dict = self._parse_invoke_params(invoke_body) - converted = self._convert_params_with_schema( - invoke_name, param_dict, request - ) + converted = self._convert_params_with_schema(invoke_name, param_dict) args_json = json.dumps(converted, ensure_ascii=False) idx = self.current_tool_index self.current_tool_index += 1 diff --git a/vllm/tool_parsers/glm4_moe_tool_parser.py b/vllm/tool_parsers/glm4_moe_tool_parser.py index fc718921d5ce..53541aaba79a 100644 --- a/vllm/tool_parsers/glm4_moe_tool_parser.py +++ b/vllm/tool_parsers/glm4_moe_tool_parser.py @@ -186,7 +186,7 @@ def extract_tool_calls( for key, value in pairs: arg_key = key.strip() arg_val = value.strip() - if not self._is_string_type(tc_name, arg_key, request.tools): + if not self._is_string_type(tc_name, arg_key, self.tools): arg_val = self._deserialize(arg_val) logger.debug("arg_key = %s, arg_val = %s", arg_key, arg_val) arg_dct[arg_key] = arg_val @@ -327,7 +327,7 @@ def extract_tool_calls_streaming( key = (self._pending_key or "").strip() is_string = self._is_string_type( - self._current_tool_name, key, request.tools + self._current_tool_name, key, self.tools ) if is_string: diff --git a/vllm/tool_parsers/internlm2_tool_parser.py b/vllm/tool_parsers/internlm2_tool_parser.py index fc7c44cff9ef..f24d97289be7 100644 --- a/vllm/tool_parsers/internlm2_tool_parser.py +++ b/vllm/tool_parsers/internlm2_tool_parser.py @@ -197,7 +197,7 @@ def extract_tool_calls( request: ChatCompletionRequest, ) -> ExtractedToolCallInformation: text = model_output - tools = request.tools + tools = self.tools if "<|action_start|><|plugin|>" in text: text, action = text.split("<|action_start|><|plugin|>") action = action.split("<|action_end|>".strip())[0] diff --git a/vllm/tool_parsers/minimax_m2_tool_parser.py b/vllm/tool_parsers/minimax_m2_tool_parser.py index 6c75e009947a..028468fe5121 100644 --- a/vllm/tool_parsers/minimax_m2_tool_parser.py +++ b/vllm/tool_parsers/minimax_m2_tool_parser.py @@ -308,7 +308,7 @@ def _extract_delta_tool_calls( invoke_str = complete_invokes[self.current_tool_index] tool_call = self._parse_single_invoke( invoke_str, - request.tools if request else None, + self.tools, ) if not tool_call: self.current_tool_index += 1 @@ -358,9 +358,7 @@ def extract_tool_calls( for tool_call_match in self.tool_call_complete_regex.findall(model_output): # Find all invokes within this tool_call for invoke_match in self.invoke_complete_regex.findall(tool_call_match): - tool_call = self._parse_single_invoke( - invoke_match, request.tools if request else None - ) + tool_call = self._parse_single_invoke(invoke_match, self.tools) if tool_call: tool_calls.append(tool_call) diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index f9b406b53ec3..ea25ea2be923 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -314,7 +314,7 @@ def extract_tool_calls( ) tool_calls = [ - self._parse_xml_function_call(function_call_str, request.tools) + self._parse_xml_function_call(function_call_str, self.tools) for function_call_str in function_calls ] # Populate prev_tool_call_arr for serving layer to set finish_reason @@ -607,7 +607,7 @@ def extract_tool_calls_streaming( param_config = self._get_arguments_config( self.current_function_name or "", - self.streaming_request.tools if self.streaming_request else None, + self.tools, ) converted_value = self._convert_param_value( @@ -666,9 +666,7 @@ def extract_tool_calls_streaming( try: parsed_tool = self._parse_xml_function_call( func_content, - self.streaming_request.tools - if self.streaming_request - else None, + self.tools, ) if parsed_tool and self.current_tool_index < len( self.prev_tool_call_arr diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py index 23778091ee98..6e28c82b13d9 100644 --- a/vllm/tool_parsers/qwen3xml_tool_parser.py +++ b/vllm/tool_parsers/qwen3xml_tool_parser.py @@ -1188,8 +1188,7 @@ def extract_tool_calls( # Reset tool call tracking arrays for new extraction self.prev_tool_call_arr = [] self.streamed_args_for_tool = [] - if request: - self.parser.set_tools(request.tools) + self.parser.set_tools(self.tools) result = self.parser.parse_single_streaming_chunks(model_output) if not result.tool_calls: return ExtractedToolCallInformation( @@ -1260,8 +1259,7 @@ def extract_tool_calls_streaming( # Reset tool call tracking arrays for new streaming session self.prev_tool_call_arr = [] self.streamed_args_for_tool = [] - if request: - self.parser.set_tools(request.tools) + self.parser.set_tools(self.tools) # Model sometimes outputs separately causing delta_text to be empty. # If there were tool_calls before and all current tool_calls have ended, diff --git a/vllm/tool_parsers/seed_oss_tool_parser.py b/vllm/tool_parsers/seed_oss_tool_parser.py index 48cee6a6951f..7dbc6aa49ea1 100644 --- a/vllm/tool_parsers/seed_oss_tool_parser.py +++ b/vllm/tool_parsers/seed_oss_tool_parser.py @@ -312,7 +312,7 @@ def extract_tool_calls( ) tool_calls = [ - self._parse_xml_function_call(function_call_str, request.tools) + self._parse_xml_function_call(function_call_str, self.tools) for function_call_str in function_calls ] @@ -566,7 +566,7 @@ def extract_tool_calls_streaming( # Parse to get the complete arguments try: parsed_tool = self._parse_xml_function_call( - func_content, request.tools if request else None + func_content, self.tools ) if parsed_tool: # Update existing entry in prev_tool_call_arr with complete arguments diff --git a/vllm/tool_parsers/step3_tool_parser.py b/vllm/tool_parsers/step3_tool_parser.py index a9c5695876f7..ad03ed20ee00 100644 --- a/vllm/tool_parsers/step3_tool_parser.py +++ b/vllm/tool_parsers/step3_tool_parser.py @@ -79,9 +79,8 @@ def _cast_arguments( self, func_name: str, params: dict[str, Any], - request: ChatCompletionRequest, ) -> dict[str, Any]: - for tool in request.tools or []: + for tool in self.tools or []: if tool.function.name == func_name: schema = tool.function.parameters or {} properties = schema.get("properties", {}) @@ -231,7 +230,6 @@ def extract_tool_calls_streaming( final_args = self._cast_arguments( function_name, tool_call_arr.get("parameters", {}), # type: ignore - request, ) if final_args: final_args_json = json.dumps(final_args, ensure_ascii=False) @@ -288,7 +286,7 @@ def extract_tool_calls( function_name, params_dict = self._parse_steptml_invoke(invoke_part) if function_name and params_dict is not None: - params_dict = self._cast_arguments(function_name, params_dict, request) + params_dict = self._cast_arguments(function_name, params_dict) params_str = json.dumps(params_dict, ensure_ascii=False) tool_calls.append( ToolCall( diff --git a/vllm/tool_parsers/step3p5_tool_parser.py b/vllm/tool_parsers/step3p5_tool_parser.py index 25b310f2af6c..b46f899ce2ca 100644 --- a/vllm/tool_parsers/step3p5_tool_parser.py +++ b/vllm/tool_parsers/step3p5_tool_parser.py @@ -1385,8 +1385,7 @@ def extract_tool_calls( # Reset tool call tracking arrays for new extraction self.prev_tool_call_arr = [] self.streamed_args_for_tool = [] - if request: - self.parser.set_tools(request.tools) + self.parser.set_tools(self.tools) result = self.parser.parse_single_streaming_chunks(model_output) if not result.tool_calls: return ExtractedToolCallInformation( @@ -1457,8 +1456,7 @@ def extract_tool_calls_streaming( # Reset tool call tracking arrays for new streaming session self.prev_tool_call_arr = [] self.streamed_args_for_tool = [] - if request: - self.parser.set_tools(request.tools) + self.parser.set_tools(self.tools) # Model sometimes outputs separately causing delta_text to be empty. # If there were tool_calls before and all current tool_calls have ended,