Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 19 additions & 13 deletions tests/tool_parsers/test_deepseekv32_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@

import pytest

from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionToolsParam,
FunctionDefinition,
)
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser

Expand All @@ -24,8 +28,8 @@
MOCK_TOKENIZER.get_vocab.return_value = {}


def make_parser() -> DeepSeekV32ToolParser:
return DeepSeekV32ToolParser(MOCK_TOKENIZER)
def make_parser(tools=None) -> DeepSeekV32ToolParser:
return DeepSeekV32ToolParser(MOCK_TOKENIZER, tools=tools)


def make_tool_param(name: str, params: dict) -> MagicMock:
Expand Down Expand Up @@ -275,20 +279,22 @@ def test_content_before_tool_call_streaming(self, parser):
content = "".join(d.content for d in deltas if d.content is not None)
assert "Thinking" in content

def test_type_conversion_in_streaming(self, parser):
tool = make_tool_param(
"add",
{
"type": "object",
"properties": {
"x": {"type": "integer"},
"y": {"type": "integer"},
def test_type_conversion_in_streaming(self):
tool = ChatCompletionToolsParam(
function=FunctionDefinition(
name="add",
parameters={
"type": "object",
"properties": {
"x": {"type": "integer"},
"y": {"type": "integer"},
},
},
},
),
)
request = make_request(tools=[tool])
parser = make_parser(tools=[tool])
full_text = build_tool_call("add", {"x": "3", "y": "4"})
deltas = self._stream(parser, full_text, request=request)
deltas = self._stream(parser, full_text)
args_str = self._reconstruct_args(deltas)
assert json.loads(args_str) == {"x": 3, "y": 4}

Expand Down
21 changes: 13 additions & 8 deletions tests/tool_parsers/test_glm47_moe_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,8 @@ def glm47_tokenizer():


@pytest.fixture
def glm47_tool_parser(glm47_tokenizer):
return Glm47MoeModelToolParser(glm47_tokenizer)


@pytest.fixture
def mock_request() -> ChatCompletionRequest:
request = Mock(spec=ChatCompletionRequest)
request.tools = [
def sample_tools():
return [
ChatCompletionToolsParam(
function=FunctionDefinition(name="get_current_date", parameters={}),
),
Expand All @@ -49,6 +43,17 @@ def mock_request() -> ChatCompletionRequest:
),
),
]


@pytest.fixture
def glm47_tool_parser(glm47_tokenizer, sample_tools):
return Glm47MoeModelToolParser(glm47_tokenizer, tools=sample_tools)


@pytest.fixture
def mock_request(sample_tools) -> ChatCompletionRequest:
request = Mock(spec=ChatCompletionRequest)
request.tools = sample_tools
request.tool_choice = "auto"
return request

Expand Down
59 changes: 32 additions & 27 deletions tests/tool_parsers/test_glm4_moe_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,26 @@ def glm4_moe_tokenizer():


@pytest.fixture
def glm4_moe_tool_parser(glm4_moe_tokenizer):
return Glm4MoeModelToolParser(glm4_moe_tokenizer)


@pytest.fixture
def mock_request() -> ChatCompletionRequest:
request = Mock(spec=ChatCompletionRequest)
request.tools = [ # GLM45 parser needs this attribute to enable tool parsing.
def sample_tools():
return [
ChatCompletionToolsParam(
function=FunctionDefinition(
name="get_weather",
parameters={"city": {"type": "string"}},
),
),
]


@pytest.fixture
def glm4_moe_tool_parser(glm4_moe_tokenizer, sample_tools):
return Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=sample_tools)


@pytest.fixture
def mock_request(sample_tools) -> ChatCompletionRequest:
request = Mock(spec=ChatCompletionRequest)
request.tools = sample_tools
return request


Expand Down Expand Up @@ -671,14 +676,13 @@ def test_streaming_json_escape_in_string(glm4_moe_tool_parser, mock_request):
assert '"' in parsed["message"] or "world" in parsed["message"]


def test_streaming_long_content_incremental(glm4_moe_tool_parser):
def test_streaming_long_content_incremental(glm4_moe_tokenizer):
"""Test incremental streaming of long content (Issue #32829).

This is the core fix: for long string values like code (4000+ chars),
the parser should stream incrementally rather than buffering until
complete. This test verifies we get many fragments, not just 1-3.
"""
_reset_streaming_state(glm4_moe_tool_parser)

# Bubble sort example from Issue #32829 - realistic long content
bubble_sort_code = '''#!/usr/bin/env python3
Expand All @@ -705,27 +709,28 @@ def bubble_sort(arr):
sorted_arr = bubble_sort(test_arr.copy())
print(f"Sorted: {sorted_arr}")'''

# Create a request with tool schema to enable string type detection
# Create tools with schema to enable string type detection
# This is required for incremental streaming of string values
tools = [
ChatCompletionToolsParam(
function=FunctionDefinition(
name="write_to_file",
parameters={
"type": "object",
"properties": {
"file_path": {"type": "string"},
"content": {"type": "string"},
},
},
),
),
]
glm4_moe_tool_parser = Glm4MoeModelToolParser(glm4_moe_tokenizer, tools=tools)
request = ChatCompletionRequest(
model=MODEL,
messages=[],
tools=[
{
"type": "function",
"function": {
"name": "write_to_file",
"parameters": {
"type": "object",
"properties": {
"file_path": {"type": "string"},
"content": {"type": "string"},
},
},
},
}
],
) # type: ignore
tools=tools,
)

# Simulate token-based streaming (special tags as single tokens)
chunks = [
Expand Down
22 changes: 10 additions & 12 deletions tests/tool_parsers/test_qwen3coder_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ def qwen3_tokenizer():


@pytest.fixture
def qwen3_tool_parser(qwen3_tokenizer):
return Qwen3CoderToolParser(qwen3_tokenizer)
def qwen3_tool_parser(qwen3_tokenizer, sample_tools):
return Qwen3CoderToolParser(qwen3_tokenizer, tools=sample_tools)


@pytest.fixture
def qwen3_xml_tool_parser(qwen3_tokenizer):
return Qwen3XMLToolParser(qwen3_tokenizer)
def qwen3_xml_tool_parser(qwen3_tokenizer, sample_tools):
return Qwen3XMLToolParser(qwen3_tokenizer, tools=sample_tools)


@pytest.fixture(params=["xml"])
Expand Down Expand Up @@ -376,7 +376,7 @@ def test_extract_tool_calls_fallback_no_tags(
assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather"


def test_extract_tool_calls_type_conversion(qwen3_tool_parser_parametrized):
def test_extract_tool_calls_type_conversion(qwen3_tokenizer):
"""Test parameter type conversion based on tool schema"""
tools = [
ChatCompletionToolsParam(
Expand Down Expand Up @@ -417,10 +417,9 @@ def test_extract_tool_calls_type_conversion(qwen3_tool_parser_parametrized):
</function>
</tool_call>"""

parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools)
request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
model_output, request=request
)
extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)

args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
assert args["int_param"] == 42
Expand Down Expand Up @@ -859,7 +858,7 @@ def test_extract_tool_calls_streaming_incremental(


def test_extract_tool_calls_complex_type_with_single_quote(
qwen3_tool_parser_parametrized,
qwen3_tokenizer,
):
"""Test parameter type conversion based on tool schema"""
tools = [
Expand Down Expand Up @@ -889,10 +888,9 @@ def test_extract_tool_calls_complex_type_with_single_quote(
</function>
</tool_call>"""

parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools)
request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
model_output, request=request
)
extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)

args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
assert args["obj_param"] == {"key": "value"}
Expand Down
4 changes: 2 additions & 2 deletions tests/tool_parsers/test_seed_oss_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def seed_oss_tokenizer():


@pytest.fixture
def seed_oss_tool_parser(seed_oss_tokenizer):
return SeedOssToolParser(seed_oss_tokenizer)
def seed_oss_tool_parser(seed_oss_tokenizer, sample_tools):
return SeedOssToolParser(seed_oss_tokenizer, tools=sample_tools)


@pytest.fixture
Expand Down
18 changes: 8 additions & 10 deletions tests/tool_parsers/test_step3p5_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def step3p5_tokenizer():


@pytest.fixture
def step3p5_tool_parser(step3p5_tokenizer):
return Step3p5ToolParser(step3p5_tokenizer)
def step3p5_tool_parser(step3p5_tokenizer, sample_tools):
return Step3p5ToolParser(step3p5_tokenizer, tools=sample_tools)


@pytest.fixture
Expand Down Expand Up @@ -386,7 +386,7 @@ def test_extract_tool_calls_fallback_no_tags(step3p5_tool_parser, sample_tools):
assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather"


def test_extract_tool_calls_type_conversion(step3p5_tool_parser):
def test_extract_tool_calls_type_conversion(step3p5_tokenizer):
"""Test parameter type conversion based on tool schema"""
tools = [
ChatCompletionToolsParam(
Expand Down Expand Up @@ -427,10 +427,9 @@ def test_extract_tool_calls_type_conversion(step3p5_tool_parser):
</function>
</tool_call>"""

parser = Step3p5ToolParser(step3p5_tokenizer, tools=tools)
request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
extracted_tool_calls = step3p5_tool_parser.extract_tool_calls(
model_output, request=request
)
extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)

args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
assert args["int_param"] == 42
Expand Down Expand Up @@ -864,7 +863,7 @@ def test_extract_tool_calls_streaming_incremental(
assert parsed_args["state"] == "TX"


def test_extract_tool_calls_complex_type_with_single_quote(step3p5_tool_parser):
def test_extract_tool_calls_complex_type_with_single_quote(step3p5_tokenizer):
"""Test parameter type conversion based on tool schema"""
tools = [
ChatCompletionToolsParam(
Expand Down Expand Up @@ -893,10 +892,9 @@ def test_extract_tool_calls_complex_type_with_single_quote(step3p5_tool_parser):
</function>
</tool_call>"""

parser = Step3p5ToolParser(step3p5_tokenizer, tools=tools)
request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
extracted_tool_calls = step3p5_tool_parser.extract_tool_calls(
model_output, request=request
)
extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)

args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
assert args["obj_param"] == {"key": "value"}
Expand Down
11 changes: 10 additions & 1 deletion vllm/tool_parsers/abstract_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
ResponseFormatTextJSONSchemaConfig,
ResponseTextConfig,
)
from openai.types.responses.function_tool import FunctionTool

from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
Expand Down Expand Up @@ -54,7 +56,14 @@ def __init__(
self.streamed_args_for_tool: list[str] = []

self.model_tokenizer = tokenizer
self.tools = tools
if tools:
self.tools: list[ChatCompletionToolsParam | FunctionTool] = [
tool
for tool in tools
if isinstance(tool, (ChatCompletionToolsParam, FunctionTool))
]
else:
self.tools = []

@cached_property
def vocab(self) -> dict[str, int]:
Expand Down
9 changes: 3 additions & 6 deletions vllm/tool_parsers/deepseekv32_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,11 @@ def _convert_params_with_schema(
self,
function_name: str,
param_dict: dict[str, str],
request: ChatCompletionRequest | None,
) -> dict[str, Any]:
"""Convert raw string param values using the tool schema types."""
param_config: dict = {}
if request and request.tools:
for tool in request.tools:
if self.tools:
for tool in self.tools:
if (
hasattr(tool, "function")
and tool.function.name == function_name
Expand Down Expand Up @@ -238,9 +237,7 @@ def _extract_delta_tool_calls(
invoke_name, invoke_body = complete_invokes[self.current_tool_index]
param_dict = self._parse_invoke_params(invoke_body)

converted = self._convert_params_with_schema(
invoke_name, param_dict, request
)
converted = self._convert_params_with_schema(invoke_name, param_dict)
args_json = json.dumps(converted, ensure_ascii=False)
idx = self.current_tool_index
self.current_tool_index += 1
Expand Down
4 changes: 2 additions & 2 deletions vllm/tool_parsers/glm4_moe_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def extract_tool_calls(
for key, value in pairs:
arg_key = key.strip()
arg_val = value.strip()
if not self._is_string_type(tc_name, arg_key, request.tools):
if not self._is_string_type(tc_name, arg_key, self.tools):
arg_val = self._deserialize(arg_val)
logger.debug("arg_key = %s, arg_val = %s", arg_key, arg_val)
arg_dct[arg_key] = arg_val
Expand Down Expand Up @@ -327,7 +327,7 @@ def extract_tool_calls_streaming(
key = (self._pending_key or "").strip()

is_string = self._is_string_type(
self._current_tool_name, key, request.tools
self._current_tool_name, key, self.tools
)

if is_string:
Expand Down
2 changes: 1 addition & 1 deletion vllm/tool_parsers/internlm2_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def extract_tool_calls(
request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
text = model_output
tools = request.tools
tools = self.tools
if "<|action_start|><|plugin|>" in text:
text, action = text.split("<|action_start|><|plugin|>")
action = action.split("<|action_end|>".strip())[0]
Expand Down
Loading
Loading