From 79c56304d8d1a54e724f3e03c28187f49a359530 Mon Sep 17 00:00:00 2001 From: mariohong Date: Tue, 3 Feb 2026 18:00:38 +0800 Subject: [PATCH 1/3] fix step3p5 parsers Signed-off-by: mariohong --- vllm/reasoning/step3p5_reasoning_parser.py | 23 ++++++-------------- vllm/tool_parsers/step3p5_tool_parser.py | 25 +++++++++++++++++----- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/vllm/reasoning/step3p5_reasoning_parser.py b/vllm/reasoning/step3p5_reasoning_parser.py index b93f551426fb..610851e73bac 100644 --- a/vllm/reasoning/step3p5_reasoning_parser.py +++ b/vllm/reasoning/step3p5_reasoning_parser.py @@ -39,24 +39,18 @@ def __init__(self, tokenizer: TokenizerLike, *args, **kwargs): # whether it is immediately before . self._pending_reasoning_newline = False - # Used to delay the reasoning end detection. - # This is necessary to remove the newline appears immediately after , - # which may cause the end detection to be delayed by one round. - self.end_offset = 1 - def is_reasoning_end(self, input_ids: Sequence[int]) -> bool: - if self.end_token_id in input_ids and self.end_offset > 0: - self.end_offset -= 1 - return False - return self.end_offset < 1 + # Stateless gating: reasoning ends only after and at least + # one token appears after the last in the same sequence. + for idx in range(len(input_ids) - 1, -1, -1): + if input_ids[idx] == self.end_token_id: + return idx < len(input_ids) - 1 + return False def is_reasoning_end_streaming( self, input_ids: Sequence[int], delta_ids: Sequence[int] ) -> bool: - if self.end_token_id in input_ids and self.end_offset > 0: - self.end_offset -= 1 - return False - return self.end_offset < 1 + return self.is_reasoning_end(input_ids) def extract_reasoning( self, @@ -136,9 +130,6 @@ def extract_reasoning_streaming( # Content: handle the newline immediately after . if content_to_output is not None: - # No need to get into parser again to remove newline after . - self.end_offset -= 1 - # If we have content, reasoning must have ended. self._pending_reasoning_newline = False diff --git a/vllm/tool_parsers/step3p5_tool_parser.py b/vllm/tool_parsers/step3p5_tool_parser.py index b7c8699a03db..e52c0a706da0 100644 --- a/vllm/tool_parsers/step3p5_tool_parser.py +++ b/vllm/tool_parsers/step3p5_tool_parser.py @@ -97,11 +97,26 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: """ # Record delta count before processing initial_delta_count = len(self.deltas) + entry_call_id = self.current_call_id + entry_tool_call_index = self.tool_call_index self.streaming_buffer += xml_chunk found_elements = self._process_complete_xml_elements() + fallback_call_id = None + if entry_call_id is not None: + if ( + self.current_call_id == entry_call_id + and self.tool_call_index == entry_tool_call_index + ): + fallback_call_id = entry_call_id + elif ( + self.current_call_id is not None + and self.tool_call_index == entry_tool_call_index + 1 + ): + fallback_call_id = self.current_call_id + if found_elements: # If complete elements found, check if end events were missed # some tags may not have been triggered @@ -110,7 +125,7 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: # If this chunk contains # but didn't generate '}', then complete it if ( - self.current_call_id is not None + fallback_call_id is not None and self.function_end_token in xml_chunk ): # - Added '}' (non-empty parameter ending) @@ -121,7 +136,7 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: and any( ( tc.function - and tc.id == self.current_call_id + and tc.id == fallback_call_id and isinstance(tc.function.arguments, str) and (tc.function.arguments in ("}", "{}")) ) @@ -139,7 +154,7 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: # If this chunk contains # but didn't generate final empty delta, then complete it if ( - self.current_call_id is not None + fallback_call_id is not None and self.tool_call_end_token in xml_chunk ): has_toolcall_close = any( @@ -150,7 +165,7 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: tc.type == "function" and tc.function and tc.function.arguments == "" - and tc.id == self.current_call_id + and tc.id == fallback_call_id ) for tc in td.tool_calls ) @@ -186,7 +201,7 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage: # Only execute when still on the same call as when entered, # to prevent accidentally closing new calls # in multi scenarios - if self.current_call_id is not None and ( + if fallback_call_id is not None and ( self.function_end_token in xml_chunk or self.tool_call_end_token in xml_chunk ): From dbb693940eb48f3ec9d7e2a6b0079c99f56ece93 Mon Sep 17 00:00:00 2001 From: mariohong Date: Tue, 3 Feb 2026 19:40:43 +0800 Subject: [PATCH 2/3] revert reasoning parser Signed-off-by: mariohong --- vllm/reasoning/step3p5_reasoning_parser.py | 23 +++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/vllm/reasoning/step3p5_reasoning_parser.py b/vllm/reasoning/step3p5_reasoning_parser.py index 610851e73bac..b93f551426fb 100644 --- a/vllm/reasoning/step3p5_reasoning_parser.py +++ b/vllm/reasoning/step3p5_reasoning_parser.py @@ -39,18 +39,24 @@ def __init__(self, tokenizer: TokenizerLike, *args, **kwargs): # whether it is immediately before . self._pending_reasoning_newline = False + # Used to delay the reasoning end detection. + # This is necessary to remove the newline appears immediately after , + # which may cause the end detection to be delayed by one round. + self.end_offset = 1 + def is_reasoning_end(self, input_ids: Sequence[int]) -> bool: - # Stateless gating: reasoning ends only after and at least - # one token appears after the last in the same sequence. - for idx in range(len(input_ids) - 1, -1, -1): - if input_ids[idx] == self.end_token_id: - return idx < len(input_ids) - 1 - return False + if self.end_token_id in input_ids and self.end_offset > 0: + self.end_offset -= 1 + return False + return self.end_offset < 1 def is_reasoning_end_streaming( self, input_ids: Sequence[int], delta_ids: Sequence[int] ) -> bool: - return self.is_reasoning_end(input_ids) + if self.end_token_id in input_ids and self.end_offset > 0: + self.end_offset -= 1 + return False + return self.end_offset < 1 def extract_reasoning( self, @@ -130,6 +136,9 @@ def extract_reasoning_streaming( # Content: handle the newline immediately after . if content_to_output is not None: + # No need to get into parser again to remove newline after . + self.end_offset -= 1 + # If we have content, reasoning must have ended. self._pending_reasoning_newline = False From 32f2c00b5f038a8b5b96a9437f359e5f71808d54 Mon Sep 17 00:00:00 2001 From: mariohong Date: Tue, 3 Feb 2026 19:58:39 +0800 Subject: [PATCH 3/3] add test Signed-off-by: mariohong --- .../tool_parsers/test_step3p5_tool_parser.py | 1435 +++++++++++++++++ 1 file changed, 1435 insertions(+) create mode 100644 tests/tool_parsers/test_step3p5_tool_parser.py diff --git a/tests/tool_parsers/test_step3p5_tool_parser.py b/tests/tool_parsers/test_step3p5_tool_parser.py new file mode 100644 index 000000000000..6da1e08550a2 --- /dev/null +++ b/tests/tool_parsers/test_step3p5_tool_parser.py @@ -0,0 +1,1435 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import json +from collections.abc import Generator + +import pytest + +from vllm.entrypoints.openai.chat_completion.protocol import ( + ChatCompletionRequest, + ChatCompletionToolsParam, +) +from vllm.entrypoints.openai.engine.protocol import ( + DeltaMessage, + FunctionCall, + ToolCall, +) +from vllm.tokenizers import TokenizerLike, get_tokenizer +from vllm.tokenizers.detokenizer_utils import detokenize_incrementally +from vllm.tool_parsers.step3p5_tool_parser import Step3p5ToolParser + +MODEL = "stepfun-ai/Step-3.5-Flash" + + +@pytest.fixture(scope="module") +def step3p5_tokenizer(): + return get_tokenizer(tokenizer_name=MODEL) + + +@pytest.fixture +def step3p5_tool_parser(step3p5_tokenizer): + return Step3p5ToolParser(step3p5_tokenizer) + + +@pytest.fixture +def sample_tools(): + return [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "get_current_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "city": {"type": "string", "description": "The city name"}, + "state": {"type": "string", "description": "The state code"}, + "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]}, + }, + "required": ["city", "state"], + }, + }, + ), + ChatCompletionToolsParam( + type="function", + function={ + "name": "calculate_area", + "description": "Calculate area of a shape", + "parameters": { + "type": "object", + "properties": { + "shape": {"type": "string"}, + "dimensions": {"type": "object"}, + "precision": {"type": "integer"}, + }, + }, + }, + ), + ] + + +def assert_tool_calls( + actual_tool_calls: list[ToolCall], expected_tool_calls: list[ToolCall] +): + assert len(actual_tool_calls) == len(expected_tool_calls) + + for actual_tool_call, expected_tool_call in zip( + actual_tool_calls, expected_tool_calls + ): + assert actual_tool_call.type == "function" + assert actual_tool_call.function.name == expected_tool_call.function.name + assert json.loads(actual_tool_call.function.arguments) == json.loads( + expected_tool_call.function.arguments + ) + + +def stream_delta_message_generator( + step3p5_tool_parser, + step3p5_tokenizer: TokenizerLike, + model_output: str, + request: ChatCompletionRequest | None = None, +) -> Generator[DeltaMessage, None, None]: + all_token_ids = step3p5_tokenizer.encode(model_output, add_special_tokens=False) + + previous_text = "" + previous_tokens = None + prefix_offset = 0 + read_offset = 0 + for i, delta_token in enumerate(all_token_ids): + delta_token_ids = [delta_token] + previous_token_ids = all_token_ids[:i] + current_token_ids = all_token_ids[: i + 1] + + (new_tokens, delta_text, new_prefix_offset, new_read_offset) = ( + detokenize_incrementally( + tokenizer=step3p5_tokenizer, + all_input_ids=current_token_ids, + prev_tokens=previous_tokens, + prefix_offset=prefix_offset, + read_offset=read_offset, + skip_special_tokens=False, + spaces_between_special_tokens=True, + ) + ) + + current_text = previous_text + delta_text + + delta_message = step3p5_tool_parser.extract_tool_calls_streaming( + previous_text, + current_text, + delta_text, + previous_token_ids, + current_token_ids, + delta_token_ids, + request=request, + ) + if delta_message: + yield delta_message + + previous_text = current_text + previous_tokens = ( + previous_tokens + new_tokens if previous_tokens else new_tokens + ) + prefix_offset = new_prefix_offset + read_offset = new_read_offset + + +def stream_delta_message_generator_from_chunks( + step3p5_tool_parser, + step3p5_tokenizer: TokenizerLike, + delta_text_chunks: list[str], + request: ChatCompletionRequest | None = None, +) -> Generator[DeltaMessage, None, None]: + previous_text = "" + previous_token_ids: list[int] = [] + + for delta_text in delta_text_chunks: + delta_token_ids = step3p5_tokenizer.encode(delta_text, add_special_tokens=False) + current_text = previous_text + delta_text + current_token_ids = previous_token_ids + delta_token_ids + + delta_message = step3p5_tool_parser.extract_tool_calls_streaming( + previous_text, + current_text, + delta_text, + previous_token_ids, + current_token_ids, + delta_token_ids, + request=request, + ) + if delta_message: + yield delta_message + + previous_text = current_text + previous_token_ids = current_token_ids + + +def test_extract_tool_calls_no_tools(step3p5_tool_parser): + model_output = "This is a test response without any tool calls" + extracted_tool_calls = step3p5_tool_parser.extract_tool_calls( + model_output, request=None + ) # type: ignore[arg-type] + assert not extracted_tool_calls.tools_called + assert extracted_tool_calls.tool_calls == [] + assert extracted_tool_calls.content == model_output + + +@pytest.mark.parametrize( + ids=[ + "single_tool", + "single_tool_with_content", + "single_tool_multiline_param", + "parallel_tools", + "tool_with_typed_params", + ], + argnames=["model_output", "expected_tool_calls", "expected_content"], + argvalues=[ + ( + """ + + +Dallas + + +TX + + +fahrenheit + + +""", + [ + ToolCall( + function=FunctionCall( + name="get_current_weather", + arguments=json.dumps( + {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} + ), + ) + ) + ], + None, + ), + ( + """Sure! Let me check the weather for you. + + +Dallas + + +TX + + +fahrenheit + + +""", + [ + ToolCall( + function=FunctionCall( + name="get_current_weather", + arguments=json.dumps( + {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} + ), + ) + ) + ], + "Sure! Let me check the weather for you.", + ), + ( + """ + + +rectangle + + +{"width": 10, + "height": 20} + + +2 + + +""", + [ + ToolCall( + function=FunctionCall( + name="calculate_area", + arguments=json.dumps( + { + "shape": "rectangle", + "dimensions": {"width": 10, "height": 20}, + "precision": 2, + } + ), + ) + ) + ], + None, + ), + ( + """ + + +Dallas + + +TX + + +fahrenheit + + + + + + +Orlando + + +FL + + +fahrenheit + + +""", + [ + ToolCall( + function=FunctionCall( + name="get_current_weather", + arguments=json.dumps( + {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} + ), + ) + ), + ToolCall( + function=FunctionCall( + name="get_current_weather", + arguments=json.dumps( + {"city": "Orlando", "state": "FL", "unit": "fahrenheit"} + ), + ) + ), + ], + None, + ), + ( + """Let me calculate that area for you. + + +circle + + +{"radius": 15.5} + + +3 + + +""", + [ + ToolCall( + function=FunctionCall( + name="calculate_area", + arguments=json.dumps( + { + "shape": "circle", + "dimensions": {"radius": 15.5}, + "precision": 3, + } + ), + ) + ) + ], + "Let me calculate that area for you.", + ), + ], +) +def test_extract_tool_calls( + step3p5_tool_parser, + sample_tools, + model_output, + expected_tool_calls, + expected_content, +): + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + extracted_tool_calls = step3p5_tool_parser.extract_tool_calls( + model_output, request=request + ) + assert extracted_tool_calls.tools_called + + assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls) + + assert extracted_tool_calls.content == expected_content + + +def test_extract_tool_calls_fallback_no_tags(step3p5_tool_parser, sample_tools): + """Test fallback parsing when XML tags are missing""" + model_output = """ + +Dallas + + +TX + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + extracted_tool_calls = step3p5_tool_parser.extract_tool_calls( + model_output, request=request + ) + + assert extracted_tool_calls.tools_called + assert len(extracted_tool_calls.tool_calls) == 1 + assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" + + +def test_extract_tool_calls_type_conversion(step3p5_tool_parser): + """Test parameter type conversion based on tool schema""" + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "test_types", + "parameters": { + "type": "object", + "properties": { + "int_param": {"type": "integer"}, + "float_param": {"type": "float"}, + "bool_param": {"type": "boolean"}, + "str_param": {"type": "string"}, + "obj_param": {"type": "object"}, + }, + }, + }, + ) + ] + + model_output = """ + + +42 + + +3.14 + + +true + + +hello world + + +{"key": "value"} + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + extracted_tool_calls = step3p5_tool_parser.extract_tool_calls( + model_output, request=request + ) + + args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) + assert args["int_param"] == 42 + assert args["float_param"] == 3.14 + assert args["bool_param"] is True + assert args["str_param"] == "hello world" + assert args["obj_param"] == {"key": "value"} + + +@pytest.mark.parametrize( + ids=[ + "no_tools", + "single_tool", + "single_tool_with_content", + "single_tool_multiline_param", + "parallel_tools", + "tool_with_typed_params", # Added this test case + ], + argnames=["model_output", "expected_tool_calls", "expected_content"], + argvalues=[ + ("This is a test without tools", [], "This is a test without tools"), + ( + """ + + +Dallas + + +TX + + +fahrenheit + + +""", + [ + ToolCall( + function=FunctionCall( + name="get_current_weather", + arguments=json.dumps( + {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} + ), + ) + ) + ], + None, + ), + ( + """Sure! Let me check the weather for you. + + +Dallas + + +TX + + +fahrenheit + + +""", + [ + ToolCall( + function=FunctionCall( + name="get_current_weather", + arguments=json.dumps( + {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} + ), + ) + ) + ], + "Sure! Let me check the weather for you.", + ), + ( + """ + + +rectangle + + +{"width": 10, + "height": 20} + + +2 + + +""", + [ + ToolCall( + function=FunctionCall( + name="calculate_area", + arguments=json.dumps( + { + "shape": "rectangle", + "dimensions": {"width": 10, "height": 20}, + "precision": 2, + } + ), + ) + ) + ], + None, + ), + ( + """ + + +Dallas + + +TX + + +fahrenheit + + + + + + +Orlando + + +FL + + +celsius + + +""", + [ + ToolCall( + function=FunctionCall( + name="get_current_weather", + arguments=json.dumps( + {"city": "Dallas", "state": "TX", "unit": "fahrenheit"} + ), + ) + ), + ToolCall( + function=FunctionCall( + name="get_current_weather", + arguments=json.dumps( + {"city": "Orlando", "state": "FL", "unit": "celsius"} + ), + ) + ), + ], + None, + ), + # Added tool_with_typed_params test case + ( + """Let me calculate that area for you. + + +circle + + +{"radius": 15.5} + + +3 + + +""", + [ + ToolCall( + function=FunctionCall( + name="calculate_area", + arguments=json.dumps( + { + "shape": "circle", + "dimensions": {"radius": 15.5}, + "precision": 3, + } + ), + ) + ) + ], + "Let me calculate that area for you.", + ), + ], +) +def test_extract_tool_calls_streaming( + step3p5_tool_parser, + step3p5_tokenizer, + sample_tools, + model_output, + expected_tool_calls, + expected_content, +): + """Test incremental streaming behavior including typed parameters""" + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + + other_content = "" + tool_states = {} # Track state per tool index + + for delta_message in stream_delta_message_generator( + step3p5_tool_parser, step3p5_tokenizer, model_output, request + ): + # role should never be streamed from tool parser + assert not delta_message.role + + if delta_message.content: + other_content += delta_message.content + + if delta_message.tool_calls: + for tool_call in delta_message.tool_calls: + idx = tool_call.index + + # Initialize state for new tool + if idx not in tool_states: + tool_states[idx] = { + "id": None, + "name": None, + "arguments": "", + "type": None, + } + + # First chunk should have id, name, and type + if tool_call.id: + tool_states[idx]["id"] = tool_call.id + + if tool_call.type: + assert tool_call.type == "function" + tool_states[idx]["type"] = tool_call.type + + if tool_call.function: + if tool_call.function.name: + # Should only be set once + assert tool_states[idx]["name"] is None + tool_states[idx]["name"] = tool_call.function.name + + if tool_call.function.arguments is not None: + # Accumulate arguments incrementally + tool_states[idx]["arguments"] += tool_call.function.arguments + + # Verify final content + assert other_content == (expected_content or "") # Handle None case + + # Verify we got all expected tool calls + assert len(tool_states) == len(expected_tool_calls) + + # Verify each tool call + for idx, expected_tool in enumerate(expected_tool_calls): + state = tool_states[idx] + assert state["id"] is not None + assert state["type"] == "function" + assert state["name"] == expected_tool.function.name + + # Parse accumulated arguments + arguments_str = state["arguments"] + assert arguments_str is not None + actual_args = json.loads(arguments_str) + expected_args = json.loads(expected_tool.function.arguments) + assert actual_args == expected_args + + +def test_extract_tool_calls_missing_closing_parameter_tag( + step3p5_tool_parser, sample_tools +): + """Test handling of missing closing tag""" + # Using get_current_weather from sample_tools but with malformed XML + model_output = """Let me check the weather for you: + + + +Dallas + +TX + + +fahrenheit + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + extracted_tool_calls = step3p5_tool_parser.extract_tool_calls( + model_output, request=request + ) + + # The parser should handle the malformed XML gracefully + assert extracted_tool_calls.tools_called + assert len(extracted_tool_calls.tool_calls) == 1 + + # Verify the function name is correct + assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" + + # Verify the arguments are parsed despite the missing closing tag + args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) + assert "city" in args + assert args["city"] == "Dallas" + assert args["state"] == "TX" + assert args["unit"] == "fahrenheit" + + # Check that content before the tool call is preserved + assert "Let me check the weather for you:" in extracted_tool_calls.content + + +def test_extract_tool_calls_streaming_missing_closing_tag( + step3p5_tool_parser, step3p5_tokenizer, sample_tools +): + """Test streaming with missing closing tag""" + # Using get_current_weather from sample_tools but with malformed XML + model_output = """Let me check the weather for you: + + + +Dallas + +TX + + +fahrenheit + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + + other_content = "" + tool_states = {} + + for delta_message in stream_delta_message_generator( + step3p5_tool_parser, step3p5_tokenizer, model_output, request + ): + if delta_message.content: + other_content += delta_message.content + + if delta_message.tool_calls: + for tool_call in delta_message.tool_calls: + idx = tool_call.index + + if idx not in tool_states: + tool_states[idx] = { + "id": None, + "name": None, + "arguments": "", + "type": None, + } + + if tool_call.id: + tool_states[idx]["id"] = tool_call.id + + if tool_call.type: + assert tool_call.type == "function" + tool_states[idx]["type"] = tool_call.type + + if tool_call.function: + if tool_call.function.name: + tool_states[idx]["name"] = tool_call.function.name + + if tool_call.function.arguments is not None: + tool_states[idx]["arguments"] += tool_call.function.arguments + + # Verify content was streamed + assert "Let me check the weather for you:" in other_content + + # Verify we got the tool call + assert len(tool_states) == 1 + state = tool_states[0] + assert state["id"] is not None + assert state["type"] == "function" + assert state["name"] == "get_current_weather" + + # Verify arguments were parsed correctly despite missing closing tag + assert state["arguments"] is not None + args = json.loads(state["arguments"]) + assert args["city"] == "Dallas" + assert args["state"] == "TX" + assert args["unit"] == "fahrenheit" + + +def test_extract_tool_calls_streaming_incremental( + step3p5_tool_parser, step3p5_tokenizer, sample_tools +): + """Test that streaming is truly incremental""" + model_output = """I'll check the weather. + + +Dallas + + +TX + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + + chunks = [] + for delta_message in stream_delta_message_generator( + step3p5_tool_parser, step3p5_tokenizer, model_output, request + ): + chunks.append(delta_message) + + # Should have multiple chunks + assert len(chunks) > 3 + + # First chunk(s) should be content + assert chunks[0].content is not None + assert chunks[0].tool_calls is None or chunks[0].tool_calls == [] + + # Should have a chunk with tool header (id, name, type) + header_found = False + for chunk in chunks: + if chunk.tool_calls and chunk.tool_calls[0].id: + header_found = True + assert chunk.tool_calls[0].function.name == "get_current_weather" + assert chunk.tool_calls[0].type == "function" + # Empty initially + assert chunk.tool_calls[0].function.arguments == "" + break + assert header_found + + # Should have chunks with incremental arguments + arg_chunks = [] + for chunk in chunks: + if chunk.tool_calls and chunk.tool_calls[0].function.arguments: + arg_chunks.append(chunk.tool_calls[0].function.arguments) + + # Arguments should be streamed incrementally + assert len(arg_chunks) > 1 + + # Concatenated arguments should form valid JSON + full_args = "".join(arg_chunks) + parsed_args = json.loads(full_args) + assert parsed_args["city"] == "Dallas" + assert parsed_args["state"] == "TX" + + +def test_extract_tool_calls_complex_type_with_single_quote(step3p5_tool_parser): + """Test parameter type conversion based on tool schema""" + tools = [ + ChatCompletionToolsParam( + type="function", + function={ + "name": "test_types", + "parameters": { + "type": "object", + "properties": { + "int_param": {"type": "integer"}, + "float_param": {"type": "float"}, + "bool_param": {"type": "boolean"}, + "str_param": {"type": "string"}, + "obj_param": {"type": "object"}, + }, + }, + }, + ) + ] + + model_output = """ + + +{'key': 'value'} + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + extracted_tool_calls = step3p5_tool_parser.extract_tool_calls( + model_output, request=request + ) + + args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) + assert args["obj_param"] == {"key": "value"} + + +def test_extract_tool_calls_streaming_mixed_content_and_multiple_tool_calls( + step3p5_tool_parser, step3p5_tokenizer, sample_tools +): + """Test mixed content with multiple complete tool calls. + + Scenario: Model outputs "hello" + complete tool call + "hi" + complete tool call. + Expected: "hello" as content, first tool call parsed (index=0), "hi" as content, + second tool call parsed (index=1). + """ + # Model output: hello + complete tool call + hi + complete tool call + model_output = """hello + + +Dallas + + +TX + + +hi + + +rectangle + + +{"width": 10, "height": 5} + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + + other_content = "" + tool_states = {} + + for delta_message in stream_delta_message_generator( + step3p5_tool_parser, step3p5_tokenizer, model_output, request + ): + if delta_message.content: + other_content += delta_message.content + + if delta_message.tool_calls: + for tool_call in delta_message.tool_calls: + idx = tool_call.index + + if idx not in tool_states: + tool_states[idx] = { + "id": None, + "name": None, + "arguments": "", + "type": None, + } + + if tool_call.id: + tool_states[idx]["id"] = tool_call.id + + if tool_call.type: + assert tool_call.type == "function" + tool_states[idx]["type"] = tool_call.type + + if tool_call.function: + if tool_call.function.name: + tool_states[idx]["name"] = tool_call.function.name + + if tool_call.function.arguments is not None: + tool_states[idx]["arguments"] += tool_call.function.arguments + + # Should have exactly two complete tool calls + assert len(tool_states) == 2, "Should have exactly two complete tool calls" + + # Verify the first tool call (index=0) + assert tool_states[0]["name"] == "get_current_weather" + assert tool_states[0]["arguments"] + args_dict_0 = json.loads(tool_states[0]["arguments"]) + assert args_dict_0["city"] == "Dallas" + assert args_dict_0["state"] == "TX" + + # Verify the second tool call (index=1) + assert tool_states[1]["name"] == "calculate_area" + assert tool_states[1]["arguments"] + args_dict_1 = json.loads(tool_states[1]["arguments"]) + assert args_dict_1["shape"] == "rectangle" + assert isinstance(args_dict_1["dimensions"], dict), "dimensions should be a dict" + assert args_dict_1["dimensions"]["width"] == 10 + assert args_dict_1["dimensions"]["height"] == 5 + # Verify content: should contain "hello", "hi" + assert "hello" in other_content, "Should contain 'hello' as content" + assert "hi" in other_content, "Should contain 'hi' as content" + + # Verify the order: hello should come first, then hi + hello_index = other_content.find("hello") + hi_index = other_content.find("hi") + + assert hello_index >= 0, "'hello' should be in content" + assert hi_index > hello_index, "'hi' should come after 'hello'" + + # Verify that tool call tags are NOT in the content + # We should not see complete tool call structures in content + assert "" not in other_content, ( + "First tool call should not be in content" + ) + assert "" not in other_content, ( + "Second tool call should not be in content" + ) + + +def test_extract_tool_calls_non_streaming_mixed_content_and_multiple_tool_calls( + step3p5_tool_parser, sample_tools +): + """Test non-streaming extraction with mixed content and multiple tool calls. + + Scenario: Model outputs "hello" + complete tool call + "hi" + complete tool call. + Expected: "hello" as content, first tool call parsed (index=0), "hi" as content, + second tool call parsed (index=1) + """ + # Model output: hello + complete tool call + hi + complete tool call + model_output = """hello + + +Dallas + + +TX + + +hi + + +rectangle + + +{"width": 10, "height": 5} + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + + extracted_tool_calls = step3p5_tool_parser.extract_tool_calls( + model_output, request=request + ) + + # Should have exactly two complete tool calls + assert extracted_tool_calls.tools_called + assert len(extracted_tool_calls.tool_calls) == 2, ( + "Should have exactly two complete tool calls" + ) + + # Verify the first tool call (index=0) + assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" + args_dict_0 = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) + assert args_dict_0["city"] == "Dallas" + assert args_dict_0["state"] == "TX" + + # Verify the second tool call (index=1) + assert extracted_tool_calls.tool_calls[1].function.name == "calculate_area" + args_dict_1 = json.loads(extracted_tool_calls.tool_calls[1].function.arguments) + assert args_dict_1["shape"] == "rectangle" + assert isinstance(args_dict_1["dimensions"], dict), "dimensions should be a dict" + assert args_dict_1["dimensions"]["width"] == 10 + assert args_dict_1["dimensions"]["height"] == 5 + + # Verify content: should contain "hello", "hi" + assert extracted_tool_calls.content is not None + assert "hello" in extracted_tool_calls.content, "Should contain 'hello' as content" + assert "hi" in extracted_tool_calls.content, "Should contain 'hi' as content" + + # Verify the order: hello should come first, then hi + hello_index = extracted_tool_calls.content.find("hello") + hi_index = extracted_tool_calls.content.find("hi") + + assert hello_index >= 0, "'hello' should be in content" + assert hi_index > hello_index, "'hi' should come after 'hello'" + + # Verify that tool call tags are NOT in the content + assert "" not in extracted_tool_calls.content, ( + "First tool call should not be in content" + ) + assert "" not in extracted_tool_calls.content, ( + "Second tool call should not be in content" + ) + + +def test_extract_tool_calls_streaming_full_input_mixed_content_and_multiple_tool_calls( + step3p5_tool_parser, step3p5_tokenizer, sample_tools +): + """Test streaming with entire input as single delta_text. + + Scenario: Model outputs "hello" + complete tool call + "hi" + complete tool call. + This test simulates the case where the entire input is sent as a single delta_text. + Expected: "hello" as content, first tool call parsed (index=0), "hi" as content, + second tool call parsed (index=1). + """ + # Model output: hello + complete tool call + hi + complete tool call + model_output = """hello + + +Dallas + + +TX + + +hi + + +rectangle + + +{"width": 10, "height": 5} + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + + other_content = "" + tool_states = {} + + # Encode all content tokens at once + all_token_ids = step3p5_tokenizer.encode(model_output, add_special_tokens=False) + eos_token_id = getattr(step3p5_tokenizer, "eos_token_id", None) + + # Include EOS token in delta_token_ids if available + if eos_token_id is not None: + delta_token_ids = all_token_ids + [eos_token_id] + else: + delta_token_ids = all_token_ids + + # current_token_ids includes all content tokens (EOS is not part of the text) + current_token_ids = all_token_ids + previous_token_ids: list[int] = [] + + # Decode all tokens to get the full text + current_text = step3p5_tokenizer.decode( + current_token_ids, skip_special_tokens=False + ) + previous_text = "" + delta_text = current_text + + # Call parser once with all tokens including EOS + delta_result = step3p5_tool_parser.extract_tool_calls_streaming( + previous_text, + current_text, + delta_text, + previous_token_ids, + current_token_ids, + delta_token_ids, + request=request, + ) + + # Process delta result + if delta_result: + if delta_result.content: + other_content += delta_result.content + if delta_result.tool_calls: + for tool_call in delta_result.tool_calls: + idx = tool_call.index + if idx not in tool_states: + tool_states[idx] = { + "id": None, + "name": None, + "arguments": "", + "type": None, + } + if tool_call.id: + tool_states[idx]["id"] = tool_call.id + if tool_call.type: + tool_states[idx]["type"] = tool_call.type + if tool_call.function: + if tool_call.function.name: + tool_states[idx]["name"] = tool_call.function.name + if tool_call.function.arguments is not None: + tool_states[idx]["arguments"] += tool_call.function.arguments + + # Should have exactly two complete tool calls + assert len(tool_states) == 2, "Should have exactly two complete tool calls" + + # Verify the first tool call (index=0) + assert tool_states[0]["name"] == "get_current_weather" + assert tool_states[0]["arguments"] + args_dict_0 = json.loads(tool_states[0]["arguments"]) + assert args_dict_0["city"] == "Dallas" + assert args_dict_0["state"] == "TX" + + # Verify the second tool call (index=1) + assert tool_states[1]["name"] == "calculate_area" + assert tool_states[1]["arguments"] + args_dict_1 = json.loads(tool_states[1]["arguments"]) + assert args_dict_1["shape"] == "rectangle" + assert isinstance(args_dict_1["dimensions"], dict), "dimensions should be a dict" + assert args_dict_1["dimensions"]["width"] == 10 + assert args_dict_1["dimensions"]["height"] == 5 + + # Verify content: should contain "hello", "hi" + assert "hello" in other_content, "Should contain 'hello' as content" + assert "hi" in other_content, "Should contain 'hi' as content" + + # Verify the order: hello should come first, then hi + hello_index = other_content.find("hello") + hi_index = other_content.find("hi") + + assert hello_index >= 0, "'hello' should be in content" + assert hi_index > hello_index, "'hi' should come after 'hello'" + + # Verify that tool call tags are NOT in the content + assert "" not in other_content, ( + "First tool call should not be in content" + ) + assert "" not in other_content, ( + "Second tool call should not be in content" + ) + + +def test_extract_tool_calls_streaming_multiple_tool_calls_no_content_between( + step3p5_tool_parser, step3p5_tokenizer, sample_tools +): + """Test multiple tool calls with no content between them. + + Scenario: Model outputs "hello" + tool call + tool call + Expected: "hello" as content, first tool call parsed (index=0), + second tool call parsed (index=1). + No content should appear between the two tool calls. + """ + # Model output: hello + tool call + tool call (no content between tool calls) + model_output = """hello + + +Dallas + + +TX + + + + + +rectangle + + +{"width": 10, "height": 5} + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + + other_content = "" + tool_states = {} + + for delta_message in stream_delta_message_generator( + step3p5_tool_parser, step3p5_tokenizer, model_output, request + ): + if delta_message.content: + other_content += delta_message.content + + if delta_message.tool_calls: + for tool_call in delta_message.tool_calls: + idx = tool_call.index + + if idx not in tool_states: + tool_states[idx] = { + "id": None, + "name": None, + "arguments": "", + "type": None, + } + + if tool_call.id: + tool_states[idx]["id"] = tool_call.id + + if tool_call.type: + assert tool_call.type == "function" + tool_states[idx]["type"] = tool_call.type + + if tool_call.function: + if tool_call.function.name: + tool_states[idx]["name"] = tool_call.function.name + + if tool_call.function.arguments is not None: + tool_states[idx]["arguments"] += tool_call.function.arguments + + # Should have exactly two complete tool calls + assert len(tool_states) == 2, "Should have exactly two complete tool calls" + + # Verify the first tool call (index=0) + assert tool_states[0]["name"] == "get_current_weather" + assert tool_states[0]["arguments"] + args_dict_0 = json.loads(tool_states[0]["arguments"]) + assert args_dict_0["city"] == "Dallas" + assert args_dict_0["state"] == "TX" + + # Verify the second tool call (index=1) + assert tool_states[1]["name"] == "calculate_area" + assert tool_states[1]["arguments"] + args_dict_1 = json.loads(tool_states[1]["arguments"]) + assert args_dict_1["shape"] == "rectangle" + assert isinstance(args_dict_1["dimensions"], dict), "dimensions should be a dict" + assert args_dict_1["dimensions"]["width"] == 10 + assert args_dict_1["dimensions"]["height"] == 5 + + assert "hello" in other_content, "Should contain 'hello' as content" + + # Verify that tool call tags are NOT in the content + assert "" not in other_content, ( + "First tool call should not be in content" + ) + assert "" not in other_content, ( + "Second tool call should not be in content" + ) + + +def test_extract_tool_calls_streaming_multi_token_chunk_boundary( + step3p5_tool_parser, step3p5_tokenizer, sample_tools +): + """Ensure fallback doesn't close a new tool_call when boundary is in one chunk.""" + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + delta_text_chunks = [ + """ + + +Sys""", + """ + + +""", + """ +<""", + """function=calculate_area> + +rectangle""", + """ + +""", + ] + boundary_chunk = delta_text_chunks[1] + assert len(step3p5_tokenizer.encode(boundary_chunk, add_special_tokens=False)) > 1 + + tool_states = {} + for delta_message in stream_delta_message_generator_from_chunks( + step3p5_tool_parser, step3p5_tokenizer, delta_text_chunks, request + ): + print(delta_message) + if delta_message.tool_calls: + for tool_call in delta_message.tool_calls: + idx = tool_call.index + if idx not in tool_states: + tool_states[idx] = { + "name": None, + "arguments": "", + } + if tool_call.function: + if tool_call.function.name: + tool_states[idx]["name"] = tool_call.function.name + if tool_call.function.arguments is not None: + tool_states[idx]["arguments"] += tool_call.function.arguments + + assert len(tool_states) == 2 + assert all(state["name"] for state in tool_states.values()) + assert tool_states[0]["name"] == "get_current_weather" + assert tool_states[1]["name"] == "calculate_area" + + +def test_extract_tool_calls_non_streaming_multiple_tool_calls_no_content_between( + step3p5_tool_parser, sample_tools +): + """Test non-streaming extraction with tool calls and no content between them. + + Scenario: Model outputs "hello" + tool call + tool call. + Expected: "hello" as content, first tool call parsed (index=0), + second tool call parsed (index=1). + No content should appear between the two tool calls. + """ + # Model output: hello + tool call + tool call (no content between tool calls) + model_output = """hello + + +Dallas + + +TX + + + + + +rectangle + + +{"width": 10, "height": 5} + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + + extracted_tool_calls = step3p5_tool_parser.extract_tool_calls( + model_output, request=request + ) + + # Should have exactly two complete tool calls + assert extracted_tool_calls.tools_called + assert len(extracted_tool_calls.tool_calls) == 2, ( + "Should have exactly two complete tool calls" + ) + + # Verify the first tool call (index=0) + assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather" + args_dict_0 = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) + assert args_dict_0["city"] == "Dallas" + assert args_dict_0["state"] == "TX" + + # Verify the second tool call (index=1) + assert extracted_tool_calls.tool_calls[1].function.name == "calculate_area" + args_dict_1 = json.loads(extracted_tool_calls.tool_calls[1].function.arguments) + assert args_dict_1["shape"] == "rectangle" + assert isinstance(args_dict_1["dimensions"], dict), "dimensions should be a dict" + assert args_dict_1["dimensions"]["width"] == 10 + assert args_dict_1["dimensions"]["height"] == 5 + + # Verify content: should contain "hello" + assert extracted_tool_calls.content is not None + assert "hello" in extracted_tool_calls.content, "Should contain 'hello' as content" + + # Verify that tool call tags are NOT in the content + assert "" not in extracted_tool_calls.content, ( + "First tool call should not be in content" + ) + assert "" not in extracted_tool_calls.content, ( + "Second tool call should not be in content" + )