Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 202 additions & 0 deletions tests/tool_parsers/test_qwen3coder_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,208 @@ def test_extract_tool_calls_type_conversion(qwen3_tool_parser_parametrized):
assert args["obj_param"] == {"key": "value"}


def test_extract_tool_calls_anyof_type_conversion(qwen3_tool_parser):
"""Test type conversion for anyOf/oneOf nullable schemas (Pydantic v2).

Pydantic v2 emits anyOf for Optional[T] fields, e.g.:
Optional[int] -> {"anyOf": [{"type": "integer"}, {"type": "null"}]}
The parser must extract the non-null type and apply the correct
conversion (int(), float(), etc.) instead of returning a raw string.
"""
tools = [
ChatCompletionToolsParam(
type="function",
function={
"name": "test_anyof",
"parameters": {
"type": "object",
"properties": {
"anyof_int": {
"anyOf": [
{"type": "integer"},
{"type": "null"},
],
"default": 5,
},
"anyof_str": {
"anyOf": [
{"type": "string"},
{"type": "null"},
],
},
"anyof_array": {
"anyOf": [
{"type": "array", "items": {"type": "string"}},
{"type": "null"},
],
},
"anyof_obj": {
"anyOf": [
{"type": "object"},
{"type": "null"},
],
},
"type_as_array": {
"type": ["integer", "null"],
},
"multi_non_null": {
"anyOf": [
{"type": "string"},
{"type": "integer"},
{"type": "null"},
],
},
"ref_param": {
"$ref": "#/$defs/ToolInput",
},
},
},
},
)
]

model_output = """<tool_call>
<function=test_anyof>
<parameter=anyof_int>
5
</parameter>
<parameter=anyof_str>
hello
</parameter>
<parameter=anyof_array>
["a", "b", "c"]
</parameter>
<parameter=anyof_obj>
{"key": "value"}
</parameter>
<parameter=type_as_array>
42
</parameter>
<parameter=multi_non_null>
some text
</parameter>
<parameter=ref_param>
{"city": "Paris"}
</parameter>
</function>
</tool_call>"""

request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
extracted = qwen3_tool_parser.extract_tool_calls(model_output, request=request)

args = json.loads(extracted.tool_calls[0].function.arguments)
assert args["anyof_int"] == 5
assert isinstance(args["anyof_int"], int)
assert args["anyof_str"] == "hello"
assert isinstance(args["anyof_str"], str)
assert args["anyof_array"] == ["a", "b", "c"]
assert isinstance(args["anyof_array"], list)
assert args["anyof_obj"] == {"key": "value"}
assert isinstance(args["anyof_obj"], dict)
assert args["type_as_array"] == 42
assert isinstance(args["type_as_array"], int)
# Multi non-null: anyOf[string, integer, null] → first non-null is string
assert args["multi_non_null"] == "some text"
assert isinstance(args["multi_non_null"], str)
# $ref: treated as object, parsed via json.loads
assert args["ref_param"] == {"city": "Paris"}
assert isinstance(args["ref_param"], dict)


def test_extract_tool_calls_anyof_type_conversion_streaming(
qwen3_tool_parser, qwen3_tokenizer
):
"""Test streaming e2e for anyOf/oneOf nullable schemas (Pydantic v2).

Verifies that the full streaming pipeline — tokenize, incrementally
decode, extract_tool_calls_streaming — correctly resolves types from
anyOf schemas and produces valid JSON with properly typed values.
"""
tools = [
ChatCompletionToolsParam(
type="function",
function={
"name": "search_web",
"parameters": {
"type": "object",
"properties": {
"query": {
"anyOf": [
{"type": "string"},
{"type": "null"},
],
},
"count": {
"anyOf": [
{"type": "integer"},
{"type": "null"},
],
"default": 5,
},
"verbose": {
"anyOf": [
{"type": "boolean"},
{"type": "null"},
],
},
"filters": {
"$ref": "#/$defs/SearchFilters",
},
},
},
},
)
]

model_output = """<tool_call>
<function=search_web>
<parameter=query>
vllm tool parser
</parameter>
<parameter=count>
10
</parameter>
<parameter=verbose>
true
</parameter>
<parameter=filters>
{"lang": "en", "year": 2025}
</parameter>
</function>
</tool_call>"""

request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)

tool_states = {}
for delta_message in stream_delta_message_generator(
qwen3_tool_parser, qwen3_tokenizer, model_output, request
):
if delta_message.tool_calls:
for tool_call in delta_message.tool_calls:
idx = tool_call.index
if idx not in tool_states:
tool_states[idx] = {"name": None, "arguments": ""}
if tool_call.function:
if tool_call.function.name:
tool_states[idx]["name"] = tool_call.function.name
if tool_call.function.arguments is not None:
tool_states[idx]["arguments"] += tool_call.function.arguments

assert len(tool_states) == 1
assert tool_states[0]["name"] == "search_web"
assert tool_states[0]["arguments"] is not None
args = json.loads(tool_states[0]["arguments"])
assert args["query"] == "vllm tool parser"
assert isinstance(args["query"], str)
assert args["count"] == 10
assert isinstance(args["count"], int)
assert args["verbose"] is True
assert isinstance(args["verbose"], bool)
# $ref: treated as object, parsed via json.loads
assert args["filters"] == {"lang": "en", "year": 2025}
assert isinstance(args["filters"], dict)


@pytest.mark.parametrize(
ids=[
"no_tools",
Expand Down
66 changes: 52 additions & 14 deletions vllm/tool_parsers/qwen3coder_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,58 @@ def _get_arguments_config(
logger.debug("Tool '%s' is not defined in the tools list.", func_name)
return {}

@staticmethod
def _first_non_null_type(type_value: Any) -> str | None:
"""Extract the first non-null type from a type value.

Handles both scalar types ("integer") and type-as-array
(["integer", "null"]) per JSON Schema spec.
"""
if isinstance(type_value, list):
return next(
(
str(t).strip().lower()
for t in type_value
if t is not None and str(t).lower() != "null"
),
None,
)
if type_value is not None and str(type_value).lower() != "null":
return str(type_value).strip().lower()
return None

def _resolve_param_type(self, param_def: dict) -> str:
"""Resolve the effective type string from a parameter definition.

Handles direct "type" fields (including type-as-array),
anyOf/oneOf schemas emitted by Pydantic v2 for Optional[T],
and $ref schemas from Pydantic model inputs.
"""
if "type" in param_def:
resolved = self._first_non_null_type(param_def["type"])
return resolved or "string"

if "anyOf" in param_def or "oneOf" in param_def:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

Thanks. Could you write an end-to-end test for this?

Copy link
Copy Markdown
Contributor Author

@AAISSJ AAISSJ Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, @chaunceyjiang

I've added a streaming e2e test (test_extract_tool_calls_anyof_type_conversion_streaming) that exercises the full pipeline: tokenize → incremental decode → extract_tool_calls_streaming with anyOf nullable schemas. Both non-streaming and streaming paths now have coverage for type resolution.

Update: Also added $ref coverage to the streaming e2e test — the filters parameter uses {"$ref": "#/$defs/SearchFilters"} to verify that $ref schemas are correctly resolved to "object" in the streaming pipeline.

Please let me know if there's anything else you'd like me to address.

  • streaming e2e test commit: 4e076bd
  • $ref streaming coverage commit: 22cdf04

variants = param_def.get("anyOf") or param_def.get("oneOf", [])
for v in variants:
if not isinstance(v, dict):
continue
resolved = self._first_non_null_type(v.get("type"))
if resolved:
return resolved

# $ref points to a schema definition (e.g. a Pydantic model).
# The referenced type is almost always an object, so treat it
# as such to route through json.loads.
if "$ref" in param_def:
return "object"

return "string"

def _convert_param_value(
self, param_value: str, param_name: str, param_config: dict, func_name: str
) -> Any:
"""Convert parameter value based on its type in the schema."""
# Handle null value for any type
if param_value.lower() == "null":
return None

Expand All @@ -152,19 +199,10 @@ def _convert_param_value(
)
return param_value

if (
isinstance(param_config[param_name], dict)
and "type" in param_config[param_name]
):
param_type = str(param_config[param_name]["type"]).strip().lower()
elif (
isinstance(param_config[param_name], dict)
and "anyOf" in param_config[param_name]
):
# anyOf has no top-level "type"; treat as object to trigger json.loads.
param_type = "object"
else:
param_type = "string"
if not isinstance(param_config[param_name], dict):
return param_value

param_type = self._resolve_param_type(param_config[param_name])
if param_type in ["string", "str", "text", "varchar", "char", "enum"]:
return param_value
elif (
Expand Down
Loading