Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
bfe136a
update the logic of tool parser.
Seven-Streams Apr 28, 2026
76899e1
finish the test.
Seven-Streams Apr 5, 2026
5a984a1
update the qwen_coder.
Seven-Streams Apr 28, 2026
95e64e7
update the logic of get stag.
Seven-Streams Apr 29, 2026
7a3bbd0
update the test.
Seven-Streams Apr 29, 2026
3f5e0f3
fix the tool_choice type.
Seven-Streams Apr 29, 2026
db9ccc6
fix the test.
Seven-Streams Apr 29, 2026
8b246f9
Revert "fix the test."
Seven-Streams Apr 29, 2026
a5a5277
fix the validation.
Seven-Streams Apr 29, 2026
2de7bbd
fix the test.
Seven-Streams Apr 29, 2026
e70a720
update the version of xgr.
Seven-Streams Apr 29, 2026
7dfbd4d
fix the tool type.
Seven-Streams Apr 29, 2026
f06ccda
fix the test.
Seven-Streams Apr 29, 2026
f7c8c91
fix the import.
Seven-Streams Apr 29, 2026
5fbb503
fix the import.
Seven-Streams Apr 29, 2026
8a09479
update the api.
Seven-Streams Apr 29, 2026
098b80c
add v4 tests.
Seven-Streams Apr 29, 2026
15c99cb
update.
Seven-Streams Apr 29, 2026
93fc4b4
update hte priority.
Seven-Streams Apr 29, 2026
cbc745e
fix the test.
Seven-Streams Apr 29, 2026
894871f
fix the import.
Seven-Streams Apr 29, 2026
b3bf271
update the version of xgr.
Seven-Streams Apr 29, 2026
2e9478b
Merge branch 'main' into main-dev/2026-03-25/new_stag
mgoin May 1, 2026
1ecff43
Lint
mgoin May 1, 2026
6ca893e
Move structural tag builders into vLLM
Ubospica May 3, 2026
9ae5478
Drop non-target structural tag changes
Ubospica May 3, 2026
d962b80
Centralize structural tag xgrammar imports
Ubospica May 3, 2026
7d90832
Rename Qwen structural tag key
Ubospica May 3, 2026
760e5af
Inline structural tag builders
Ubospica May 3, 2026
4bd7d72
Stop Qwen 3.5 structural tag after first tool call
Ubospica May 3, 2026
1e94b99
Fix Qwen structural tag parsing
Ubospica May 3, 2026
deaf07a
Normalize tool schemas for Qwen structural tags
Ubospica May 3, 2026
e4285c7
Allow multiple Qwen structural tool calls
Ubospica May 3, 2026
e6ec236
format.
Seven-Streams May 3, 2026
260dea2
add the requirement.
Seven-Streams May 3, 2026
c6f98c5
avoid overwriting user's setting.
Seven-Streams May 3, 2026
306d220
a
Ubospica May 4, 2026
5fb2f35
format.
Seven-Streams May 4, 2026
e6fa4e3
set the flag off as default.
Seven-Streams May 4, 2026
dfda37c
update and fix bug
Ubospica May 4, 2026
07eb072
update
Ubospica May 4, 2026
376b84e
update
Ubospica May 4, 2026
45d43b6
format.
Ubospica May 4, 2026
ad4395a
Fix failing qwen3coder test
sfeng33 May 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements/common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ outlines_core == 0.2.14
# required for outlines backend disk cache
diskcache == 5.6.3
lark == 1.2.2
xgrammar >= 0.1.32, < 1.0.0; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le"
xgrammar >= 0.1.34, < 1.0.0; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le"
typing_extensions >= 4.10
filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
partial-json-parser # used for parsing partial JSON outputs
Expand Down
2 changes: 1 addition & 1 deletion requirements/test/rocm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1597,7 +1597,7 @@ wrapt==2.1.2
# via smart-open
xarray==2026.2.0
# via rioxarray
xgrammar==0.1.33
xgrammar==0.1.34
# via
# -c requirements/common.txt
# -r requirements/test/../common.txt
Expand Down
118 changes: 118 additions & 0 deletions tests/tool_parsers/test_deepseekv32_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,20 @@
from unittest.mock import MagicMock

import pytest
from xgrammar import StructuralTag

from tests.tool_parsers.utils import run_tool_extraction_streaming
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionToolsParam,
FunctionDefinition,
)
from vllm.tokenizers import get_tokenizer
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
ChatCompletionToolsParam,
ChatCompletionNamedToolChoiceParam,
ChatCompletionNamedFunction,
)
from vllm.tool_parsers.deepseekv32_tool_parser import DeepSeekV32ToolParser

# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -48,6 +55,43 @@ def make_request(tools=None) -> MagicMock:
return req


@pytest.fixture
def sample_tools() -> list[ChatCompletionToolsParam]:
return [
ChatCompletionToolsParam(
type="function",
function={
"name": "get_current_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string", "description": "The city name"},
"state": {"type": "string", "description": "The state code"},
"unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
},
"required": ["city", "state"],
},
},
),
ChatCompletionToolsParam(
type="function",
function={
"name": "calculate_area",
"description": "Calculate area of a shape",
"parameters": {
"type": "object",
"properties": {
"shape": {"type": "string"},
"dimensions": {"type": "object"},
"precision": {"type": "integer"},
},
},
},
),
]


# Shorthand for the DSML tokens used throughout
FC_START = "<|DSML|function_calls>"
FC_END = "</|DSML|function_calls>"
Expand Down Expand Up @@ -797,3 +841,77 @@ def test_convert_param_value_checked_helper(parser):
assert parser._convert_param_value("null", "integer") is None
assert parser._convert_param_value("null", "boolean") is None
assert parser._convert_param_value("null", "object") is None


def test_support_builtin_structural_tag():
assert make_parser().support_structural_tag() is True


def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
sample_tools: list[ChatCompletionToolsParam],
) -> None:
parser = make_parser()
req = ChatCompletionRequest(
messages=[],
model="m",
tools=sample_tools,
tool_choice="auto",
)
tag = parser.get_structural_tag(req)
assert isinstance(tag, StructuralTag)

req = ChatCompletionRequest(
messages=[],
model="m",
tools=sample_tools,
tool_choice="required",
)
tag = parser.get_structural_tag(req)
assert isinstance(tag, StructuralTag)

if sample_tools:
tool = sample_tools[0]
req = ChatCompletionRequest(
messages=[],
model="m",
tools=sample_tools,
tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
)
tag = parser.get_structural_tag(req)
assert isinstance(tag, StructuralTag)


@pytest.mark.parametrize("include_reasoning", [True, False])
def test_adjust_request_auto_structural_tag_is_json_string(
sample_tools: list[ChatCompletionToolsParam],
include_reasoning: bool,
) -> None:
parser = make_parser()
req = ChatCompletionRequest(
messages=[],
model="m",
tools=sample_tools,
tool_choice="auto",
include_reasoning=include_reasoning,
)
out = parser.adjust_request(req)
assert out.structured_outputs is not None
assert out.structured_outputs.structural_tag is not None
assert isinstance(out.structured_outputs.structural_tag, str)
loaded = json.loads(out.structured_outputs.structural_tag)
assert isinstance(loaded, dict)


def test_adjust_request_required_uses_json_schema_not_structural_tag(
sample_tools: list[ChatCompletionToolsParam],
) -> None:
parser = make_parser()
req = ChatCompletionRequest(
messages=[],
model="m",
tools=sample_tools,
tool_choice="required",
)
out = parser.adjust_request(req)
assert out.structured_outputs is not None
assert out.structured_outputs.structural_tag is None
120 changes: 120 additions & 0 deletions tests/tool_parsers/test_kimi_k2_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,18 @@
from unittest.mock import MagicMock

import pytest
from xgrammar import StructuralTag

from tests.tool_parsers.utils import (
run_tool_extraction,
run_tool_extraction_streaming,
)
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
ChatCompletionToolsParam,
ChatCompletionNamedToolChoiceParam,
ChatCompletionNamedFunction,
)
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
Expand All @@ -20,6 +27,43 @@
MODEL = "moonshotai/Kimi-K2-Instruct"


@pytest.fixture
def sample_tools() -> list[ChatCompletionToolsParam]:
return [
ChatCompletionToolsParam(
type="function",
function={
"name": "get_current_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string", "description": "The city name"},
"state": {"type": "string", "description": "The state code"},
"unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
},
"required": ["city", "state"],
},
},
),
ChatCompletionToolsParam(
type="function",
function={
"name": "calculate_area",
"description": "Calculate area of a shape",
"parameters": {
"type": "object",
"properties": {
"shape": {"type": "string"},
"dimensions": {"type": "object"},
"precision": {"type": "integer"},
},
},
},
),
]


@pytest.fixture(scope="module")
def kimi_k2_tokenizer():
return get_tokenizer(tokenizer_name=MODEL, trust_remote_code=True)
Expand Down Expand Up @@ -463,6 +507,7 @@ def test_sets_skip_special_tokens_false(self, parser):
request = MagicMock(spec=ChatCompletionRequest)
request.tools = [{"type": "function", "function": {"name": "test"}}]
request.tool_choice = "auto"
request.include_reasoning = True
request.skip_special_tokens = True

result = parser.adjust_request(request)
Expand All @@ -472,6 +517,7 @@ def test_no_change_when_tool_choice_none(self, parser):
request = MagicMock(spec=ChatCompletionRequest)
request.tools = [{"type": "function", "function": {"name": "test"}}]
request.tool_choice = "none"
request.include_reasoning = True
request.skip_special_tokens = True

result = parser.adjust_request(request)
Expand All @@ -481,6 +527,7 @@ def test_no_change_when_no_tools(self, parser):
request = MagicMock(spec=ChatCompletionRequest)
request.tools = None
request.tool_choice = "auto"
request.include_reasoning = False
request.skip_special_tokens = True

result = parser.adjust_request(request)
Expand Down Expand Up @@ -580,3 +627,76 @@ def test_content_and_tool_call_in_single_chunk(self, kimi_k2_tokenizer):
assert len(rec.tool_calls) == 1
assert rec.tool_calls[0].function.name == "get_weather"
assert json.loads(rec.tool_calls[0].function.arguments) == {"city": "Beijing"}


def test_support_builtin_structural_tag(parser: KimiK2ToolParser):
assert parser.support_structural_tag() is True


def test_get_xgrammar_builtin_structural_tag_returns_structural_tag(
parser: KimiK2ToolParser,
sample_tools: list[ChatCompletionToolsParam],
) -> None:
req = ChatCompletionRequest(
messages=[],
model="m",
tools=sample_tools,
tool_choice="auto",
)
tag = parser.get_structural_tag(req)
assert isinstance(tag, StructuralTag)

req = ChatCompletionRequest(
messages=[],
model="m",
tools=sample_tools,
tool_choice="required",
)
tag = parser.get_structural_tag(req)
assert isinstance(tag, StructuralTag)

if sample_tools:

tool = sample_tools[0]
req = ChatCompletionRequest(
messages=[],
model="m",
tools=sample_tools,
tool_choice=ChatCompletionNamedToolChoiceParam(function=ChatCompletionNamedFunction(name=tool.function.name)),
)
tag = parser.get_structural_tag(req)
assert isinstance(tag, StructuralTag)

@pytest.mark.parametrize("include_reasoning", [True, False])
def test_adjust_request_auto_structural_tag_is_json_string(
parser: KimiK2ToolParser,
sample_tools: list[ChatCompletionToolsParam],
include_reasoning: bool,
) -> None:
req = ChatCompletionRequest(
messages=[],
model="m",
tools=sample_tools,
tool_choice="auto",
include_reasoning=include_reasoning,
)
out = parser.adjust_request(req)
assert out.structured_outputs is not None
assert out.structured_outputs.structural_tag is not None
assert isinstance(out.structured_outputs.structural_tag, str)
loaded = json.loads(out.structured_outputs.structural_tag)
assert isinstance(loaded, dict)


def test_adjust_request_required_uses_json_schema_not_structural_tag(
parser: KimiK2ToolParser,
sample_tools: list[ChatCompletionToolsParam],
) -> None:
req = ChatCompletionRequest(
messages=[],
model="m",
tools=sample_tools,
tool_choice="required",
)
out = parser.adjust_request(req)
assert out.structured_outputs.structural_tag is None
Loading
Loading