Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions tests/entrypoints/openai/test_cli_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,32 @@ def test_enable_auto_choice_passes_with_tool_call_parser(serve_parser):
validate_parsed_serve_args(args)


def test_deepseek_v4_agentic_flags_pass_validation(monkeypatch):
import vllm.platforms as platforms
from vllm.platforms.cpu import CpuPlatform

monkeypatch.setattr(platforms, "_current_platform", CpuPlatform())
serve_parser = _build_vllm_parsers()["vllm serve"]

args = serve_parser.parse_args(
args=[
"--tokenizer-mode",
"deepseek_v4",
"--tool-call-parser",
"deepseek_v4",
"--enable-auto-tool-choice",
"--reasoning-parser",
"deepseek_v4",
]
)

validate_parsed_serve_args(args)
assert args.tokenizer_mode == "deepseek_v4"
assert args.tool_call_parser == "deepseek_v4"
assert args.enable_auto_tool_choice
assert args.reasoning_parser == "deepseek_v4"


def test_enable_auto_choice_fails_with_enable_reasoning(serve_parser):
"""Ensure validation fails if reasoning is enabled with auto tool choice"""
args = serve_parser.parse_args(
Expand Down
53 changes: 53 additions & 0 deletions tests/tokenizers_/test_deepseek_v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,59 @@ def test_deepseek_v4_renders_parsed_history_tool_arguments():
assert 'parameter name="arguments"' not in prompt


def test_deepseek_v4_escapes_arguments_tool_schema_name():
tools = [
{
"type": "function",
"function": {
"name": "echo_args",
"description": "Echo arguments",
"parameters": {
"type": "object",
"properties": {
"arguments": {"type": "string"},
},
"required": ["arguments"],
},
},
}
]

prompt = _tokenizer().apply_chat_template(
[{"role": "user", "content": "Echo this"}],
tools=tools,
tokenize=False,
)

assert "__vllm_param_arguments__" in prompt
assert '"required": ["__vllm_param_arguments__"]' in prompt
assert '"arguments": {"type": "string"}' not in prompt


def test_deepseek_v4_escapes_arguments_history_tool_call_name():
prompt = _tokenizer().apply_chat_template(
[
{"role": "user", "content": "Echo this"},
{
"role": "assistant",
"tool_calls": [
{
"type": "function",
"function": {
"name": "echo_args",
"arguments": '{"arguments": "hello"}',
},
}
],
},
],
tokenize=False,
)

assert 'parameter name="__vllm_param_arguments__" string="true">hello' in prompt
assert 'parameter name="arguments"' not in prompt


@pytest.mark.parametrize("reasoning_effort", ["minimal", "low", "medium", "high"])
def test_deepseek_v4_accepts_openai_reasoning_effort_values(reasoning_effort):
prompt = _tokenizer().apply_chat_template(
Expand Down
137 changes: 136 additions & 1 deletion tests/tool_parsers/test_deepseekv4_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,16 @@ def make_request(tools=None) -> MagicMock:
return req


def make_tool(name: str, properties: dict[str, dict]) -> MagicMock:
tool = MagicMock()
tool.function.name = name
tool.function.parameters = {
"type": "object",
"properties": properties,
}
return tool


def build_tool_call(func_name: str, params: dict[str, str]) -> str:
param_strs = "".join(
f'{PARAM_START}{k}" string="true">{v}{PARAM_END}\n' for k, v in params.items()
Expand All @@ -86,6 +96,7 @@ def build_tool_call(func_name: str, params: dict[str, str]) -> str:
def stream(parser: DeepSeekV4ToolParser, full_text: str, chunk_size: int = 7):
deltas = []
previous_text = ""
request = make_request()
for start in range(0, len(full_text), chunk_size):
delta_text = full_text[start : start + chunk_size]
current_text = previous_text + delta_text
Expand All @@ -96,7 +107,7 @@ def stream(parser: DeepSeekV4ToolParser, full_text: str, chunk_size: int = 7):
previous_token_ids=[],
current_token_ids=[],
delta_token_ids=[1],
request=make_request(),
request=request,
)
previous_text = current_text
if delta is not None:
Expand Down Expand Up @@ -203,3 +214,127 @@ def test_get_vllm_registry_structural_tag_returns_structural_tag(
)
tag = parser.get_structural_tag(req)
assert isinstance(tag, StructuralTag)


def test_streaming_split_start_token_does_not_leak_dsml_markers():
parser = make_parser()
full_text = "I will check." + build_tool_call("search", {"query": "vllm"})

deltas = stream(parser, full_text, chunk_size=1)

content = "".join(delta.content or "" for delta in deltas)
assert content == "I will check."
assert "DSML" not in content
assert json.loads(reconstruct_args(deltas)) == {"query": "vllm"}


def test_streaming_plain_text_trailing_angle_bracket_is_flushed():
parser = make_parser()
request = make_request()
previous_text = "2 <"

delta = parser.extract_tool_calls_streaming(
previous_text=previous_text,
current_text=previous_text,
delta_text="",
previous_token_ids=[],
current_token_ids=[],
delta_token_ids=[1],
request=request,
)

assert delta is not None
assert delta.content == "2 <"
assert not delta.tool_calls


def test_extract_tool_calls_non_streaming_preserves_typed_arguments():
parser = make_parser()
request = make_request(
[
make_tool(
"plan_trip",
{
"days": {"type": "integer"},
"flexible": {"type": "boolean"},
"cities": {"type": "array"},
"notes": {"type": "string"},
},
)
]
)
model_output = (
f"{TC_START}"
f'{INV_START}plan_trip">'
f'{PARAM_START}days" string="false">3{PARAM_END}'
f'{PARAM_START}flexible" string="false">false{PARAM_END}'
f'{PARAM_START}cities" string="false">["Beijing", "Shanghai"]{PARAM_END}'
f'{PARAM_START}notes" string="true">window seat{PARAM_END}'
f"{INV_END}"
f"{TC_END}"
)

result = parser.extract_tool_calls(model_output, request)

assert result.tools_called
assert json.loads(result.tool_calls[0].function.arguments) == {
"days": 3,
"flexible": False,
"cities": ["Beijing", "Shanghai"],
"notes": "window seat",
}


def test_extract_tool_calls_repairs_arguments_wrapper_object():
parser = make_parser()
request = make_request([make_tool("get_weather", {"location": {"type": "string"}})])
model_output = (
f"{TC_START}"
f'{INV_START}get_weather">'
f'{PARAM_START}arguments" string="false">{{"location": "Beijing"}}{PARAM_END}'
f"{INV_END}"
f"{TC_END}"
)

result = parser.extract_tool_calls(model_output, request)

assert result.tools_called
assert json.loads(result.tool_calls[0].function.arguments) == {
"location": "Beijing"
}


def test_extract_tool_calls_repairs_input_wrapper_string():
parser = make_parser()
request = make_request([make_tool("get_weather", {"location": {"type": "string"}})])
model_output = (
f"{TC_START}"
f'{INV_START}get_weather">'
f'{PARAM_START}input" string="true">{{"location": "Beijing"}}{PARAM_END}'
f"{INV_END}"
f"{TC_END}"
)

result = parser.extract_tool_calls(model_output, request)

assert result.tools_called
assert json.loads(result.tool_calls[0].function.arguments) == {
"location": "Beijing"
}


def test_extract_tool_calls_unescapes_arguments_field_name():
parser = make_parser()
request = make_request([make_tool("echo_args", {"arguments": {"type": "string"}})])
model_output = (
f"{TC_START}"
f'{INV_START}echo_args">'
f'{PARAM_START}__vllm_param_arguments__" string="true">hello{PARAM_END}'
f"{INV_END}"
f"{TC_END}"
)

result = parser.extract_tool_calls(model_output, request)

assert result.tools_called
assert json.loads(result.tool_calls[0].function.arguments) == {"arguments": "hello"}
81 changes: 74 additions & 7 deletions vllm/tokenizers/deepseek_v4_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
"<{dsml_token}{tc_block_name}>\n{tool_calls}\n</{dsml_token}{tc_block_name}>"
)
tool_calls_block_name: str = "tool_calls"
ESCAPED_ARGUMENTS_PARAM_NAME = "__vllm_param_arguments__"

tool_output_template: str = (
"<tool_result>{content}</tool_result>"
Expand Down Expand Up @@ -117,6 +118,40 @@ def tools_from_openai_format(tools):
return [tool["function"] for tool in tools]


def _escape_param_name(name: str) -> str:
if name == "arguments":
return ESCAPED_ARGUMENTS_PARAM_NAME
return name


def _unescape_param_name(name: str) -> str:
if name == ESCAPED_ARGUMENTS_PARAM_NAME:
return "arguments"
return name


def _escape_tool_schema(tool: Dict[str, Any]) -> Dict[str, Any]:
escaped_tool = copy.deepcopy(tool)
parameters = escaped_tool.get("parameters")
if not isinstance(parameters, dict):
return escaped_tool

properties = parameters.get("properties")
if isinstance(properties, dict):
parameters["properties"] = {
_escape_param_name(key): value for key, value in properties.items()
}

required = parameters.get("required")
if isinstance(required, list):
parameters["required"] = [
_escape_param_name(name) if isinstance(name, str) else name
for name in required
]

return escaped_tool


def tool_calls_from_openai_format(tool_calls):
"""Convert OpenAI-format tool calls to internal format."""
return [
Expand Down Expand Up @@ -155,15 +190,14 @@ def encode_arguments_to_dsml(tool_call: Dict[str, Any]) -> str:
p_dsml_template = '<{dsml_token}parameter name="{key}" string="{is_str}">{value}</{dsml_token}parameter>'
P_dsml_strs = []

if isinstance(tool_call["arguments"], str):
arguments = json.loads(tool_call["arguments"])
else:
arguments = tool_call["arguments"]
arguments = _normalize_tool_call_arguments(tool_call["arguments"])
if not isinstance(arguments, dict):
return ""

for k, v in arguments.items():
p_dsml_str = p_dsml_template.format(
dsml_token=dsml_token,
key=k,
key=_escape_param_name(k),
is_str="true" if isinstance(v, str) else "false",
value=v if isinstance(v, str) else to_json(v),
)
Expand All @@ -172,6 +206,39 @@ def encode_arguments_to_dsml(tool_call: Dict[str, Any]) -> str:
return "\n".join(P_dsml_strs)


def _normalize_tool_call_arguments(arguments: Any) -> Dict[str, Any] | None:
if isinstance(arguments, str):
try:
arguments = json.loads(arguments)
except json.JSONDecodeError:
return None

if not isinstance(arguments, dict):
return None

if set(arguments.keys()) == {"input"}:
inner = arguments["input"]
if isinstance(inner, str):
try:
inner = json.loads(inner)
except json.JSONDecodeError:
return arguments
if isinstance(inner, dict):
arguments = inner

if set(arguments.keys()) == {"arguments"}:
inner = arguments["arguments"]
if isinstance(inner, str):
try:
inner = json.loads(inner)
except json.JSONDecodeError:
return arguments
if isinstance(inner, dict):
return inner

return arguments


def decode_dsml_to_arguments(tool_name: str, tool_args: Dict[str, Tuple[str, str]]) -> Dict[str, str]:
"""
Decode DSML parameters back to a tool call dict.
Expand All @@ -188,7 +255,7 @@ def _decode_value(key: str, value: str, string: str):
value = to_json(value)
return f"{to_json(key)}: {value}"

tool_args_json = "{" + ", ".join([_decode_value(k, v, string=is_str) for k, (v, is_str) in tool_args.items()]) + "}"
tool_args_json = "{" + ", ".join([_decode_value(_unescape_param_name(k), v, string=is_str) for k, (v, is_str) in tool_args.items()]) + "}"
return dict(name=tool_name, arguments=tool_args_json)


Expand All @@ -202,7 +269,7 @@ def render_tools(tools: List[Dict[str, Union[str, Dict[str, Any]]]]) -> str:
Returns:
Formatted tools section string.
"""
tools_json = [to_json(t) for t in tools]
tools_json = [to_json(_escape_tool_schema(t)) for t in tools]

return TOOLS_TEMPLATE.format(
tool_schemas="\n".join(tools_json),
Expand Down
Loading
Loading