Skip to content

Commit 0471aed

Browse files
committed
rebased to solve DCO problem
Signed-off-by: Kai Wu <[email protected]>
1 parent 22481fb commit 0471aed

File tree

4 files changed

+198
-67
lines changed

4 files changed

+198
-67
lines changed

examples/tool_chat_template_llama4_pythonic.jinja

Lines changed: 31 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,84 +1,47 @@
11
{{- bos_token }}
2-
{%- if custom_tools is defined %}
2+
{%- if custom_tools is defined and custom_tools%}
33
{%- set tools = custom_tools %}
4-
{%- endif %}
5-
{%- if not tools_in_user_message is defined %}
6-
{%- set tools_in_user_message = false %}
4+
{%- set tool_definition = tool_definition ~ (tools | tojson(indent=4)) ~ "\n" %}
75
{%- endif %}
86
{%- if not tools is defined %}
97
{%- set tools = none %}
108
{%- endif %}
119

1210
{#- This block extracts the system message, so we can slot it into the right place. #}
1311
{%- if messages[0]['role'] == 'system' %}
12+
{%- set user_provided_system_message = true %}
1413
{%- if messages[0]['content'] is string %}
1514
{%- set system_message = messages[0]['content']|trim %}
1615
{%- else %}
1716
{%- set system_message = messages[0]['content'][0]['text']|trim %}
1817
{%- endif %}
1918
{%- set messages = messages[1:] %}
2019
{%- else %}
21-
{%- if tools is not none %}
22-
{#- Add default tool system message when tools are provided #}
23-
{%- set system_message = "You are a helpful assistant with tool calling "
24-
"capabilities. Only reply with a tool call if the function exists in the "
25-
"library provided by the user. If it doesn't exist, just reply directly in "
26-
"natural language. When you receive a tool call response, use the output to "
27-
"format an answer to the original user question." %}
20+
{%- if tools is not none %}
21+
{#- Since not system_message was provided by user, if tool is provided, system_message is now default tool system message #}
22+
{%- set system_message = "You are a helpful assistant and an expert in function composition. You can answer general questions using your internal knowledge OR invoke functions when necessary. Follow these strict guidelines:\n\n1. FUNCTION CALLS:\n- ONLY use functions that are EXPLICITLY listed in the function list below\n- If NO functions are listed (empty function list []), respond ONLY with internal knowledge or \"I don't have access to [Unavailable service] information\"\n- If a function is not in the list, respond ONLY with internal knowledge or \"I don't have access to [Unavailable service] information\"\n- If ALL required parameters are present AND the query EXACTLY matches a listed function's purpose: output ONLY the function call(s)\n- Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)]\nExamples:\nCORRECT: [get_weather(location=\"Vancouver\"), calculate_route(start=\"Boston\", end=\"New York\")] <- Only if get_weather and calculate_route are in function list\nINCORRECT: get_weather(location=\"New York\")\nINCORRECT: Let me check the weather: [get_weather(location=\"New York\")]\nINCORRECT: [get_events(location=\"Singapore\")] <- If function not in list\n\n2. RESPONSE RULES:\n- For pure function requests matching a listed function: ONLY output the function call(s)\n- For knowledge questions: ONLY output text\n- For missing parameters: ONLY request the specific missing parameters\n- For unavailable services (not in function list): output ONLY with internal knowledge or \"I don't have access to [Unavailable service] information\". Do NOT execute a function call.\n- If the query asks for information beyond what a listed function provides: output ONLY with internal knowledge about your limitations\n- NEVER combine text and function calls in the same response\n- NEVER suggest alternative functions when the requested service is unavailable\n- NEVER create or invent new functions not listed below\n\n3. STRICT BOUNDARIES:\n- ONLY use functions from the list below - no exceptions\n- NEVER use a function as an alternative to unavailable information\n- NEVER call functions not present in the function list\n- NEVER add explanatory text to function calls\n- NEVER respond with empty brackets\n- Use proper Python/JSON syntax for function calls\n- Check the function list carefully before responding\n\n4. TOOL RESPONSE HANDLING:\n- When receiving tool responses: provide concise, natural language responses\n- Don't repeat tool response verbatim\n- Don't add supplementary information\n\nHere is a list of functions in JSON format that you can invoke:\n" %}
2823
{%- else %}
29-
{%- set system_message = "" %}
24+
{%- set system_message = "" %}
3025
{%- endif %}
3126
{%- endif %}
32-
33-
{#- System message if the user supplied one, or if tools are used (default tool system message) #}
27+
{#- Now writing the system message: use the user provided system message if user_provided_system_message, else default tool system message if tools presented #}
3428
{%- if system_message %}
3529
{#- always use user provided system message to override default tool system message #}
3630
{{- "<|header_start|>system<|header_end|>\n\n" }}
3731
{{- system_message }}
38-
{%- if tools is not none and not tools_in_user_message %}
39-
{{- "Tools: You have access to the following tools. You might need to use one "
40-
"or more function/tool calls to fulfill the task. \n"
41-
"If none are needed, then proceed to the response.\n\n"
42-
"Tool Call Syntax: You can call tools using the following syntax:\n"
43-
"[func_name1(params_name1=params_value1, params_name2=params_value2, ...), ...]\n"
44-
"Do not include anything else when calling the tools with the syntax above.\n\n"
45-
"Here is a list of functions in JSON format that you can invoke.\n " }}
46-
{%- for t in tools %}
47-
{{- t | tojson(indent=4) }}
48-
{{- "\n\n" }}
49-
{%- endfor %}
50-
{%- endif %}
32+
{%- if user_provided_system_message and tools %}
33+
{{- "\nHere is a list of functions in JSON format that you can invoke. Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)]\n" }}
34+
{{- tool_definition -}}
35+
{%- elif tool_definition %}
36+
{{- tool_definition -}}
37+
{%- endif %}
5138
{{- "<|eot|>" }}
5239
{%- endif %}
53-
54-
{#- Custom tools are passed in a user message with some extra guidance #}
55-
{%- if tools_in_user_message and tools is not none %}
56-
{#- Extract the first user message so we can plug it in here #}
57-
{%- if messages | length != 0 %}
58-
{%- if messages[0]['content'] is string %}
59-
{%- set first_user_message = messages[0]['content']|trim %}
60-
{%- else %}
61-
{%- set first_user_message = messages[0]['content'] | selectattr('type', 'equalto', 'text') | map(attribute='text') | map('trim') | join('\n') %}
62-
{%- endif %}
63-
{%- set messages = messages[1:] %}
64-
{%- else %}
65-
{{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
66-
{%- endif %}
67-
{{- '<|header_start|>user<|header_end|>\n\n' -}}
68-
{{- first_user_message}}
69-
{{- "\nHere is a list of functions in JSON format that you can invoke:"}}
70-
{%- for t in tools %}
71-
{{- t | tojson(indent=4) }}
72-
{{- "\n\n" }}
73-
{%- endfor %}
74-
{{- "Should you decide to return the function call(s), put them in the format "
75-
"of [func_name1(params_name1=params_value1, params_name2=params_value2, "
76-
"...), ...]\nDo not include anything else when calling the tools with the "
77-
"syntax above." }}
78-
{%- endif %}
40+
{#- Now deal with all other messages #}
7941

8042
{%- for message in messages %}
81-
{%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
43+
{#- Base case: messages that are not from tool role and has empty tool_call list #}
44+
{%- if not (message.role == 'ipython' or message.role == 'tool' or ('tool_calls' in message and message.tool_calls|length != 0 )) %}
8245
{{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }}
8346
{%- if message['content'] is string %}
8447
{{- message['content'] }}
@@ -92,8 +55,10 @@
9255
{%- endfor %}
9356
{%- endif %}
9457
{{- "<|eot|>" }}
95-
{%- elif 'tool_calls' in message and message.tool_calls|length > 0 %}
96-
{%- set tool_call = message.tool_calls[0].function %}
58+
{#- Tool case: messages has non-empty tool_call list, must from assistant #}
59+
{%- elif 'tool_calls' in message%}
60+
{#- assume tool_calls are always coming from assistant #}
61+
{%- if message.role == 'assistant' %}
9762
{{- '<|header_start|>assistant<|header_end|>\n\n' -}}
9863
{%- if message['content'] is string %}
9964
{{- message['content'] }}
@@ -106,32 +71,36 @@
10671
{%- endif %}
10772
{%- endfor %}
10873
{%- endif %}
74+
{{- "[" }}
10975
{%- for tool_call in message.tool_calls %}
11076
{%- if tool_call.function is defined %}
11177
{%- set tool_call = tool_call.function %}
11278
{%- endif %}
113-
{{- tool_call.name + '(' -}}
79+
{{- tool_call.name + '(' -}}
11480
{%- for param in tool_call.arguments %}
115-
{{- param + '=' -}}
81+
{{- param + '="' -}}
11682
{{- "%s" | format(tool_call.arguments[param]) -}}
83+
{{- '"' -}}
11784
{% if not loop.last %}, {% endif %}
11885
{%- endfor %}
11986
{{- ')' -}}
12087
{% if not loop.last %}, {% endif %}
12188
{%- endfor %}
122-
{{- "<|eom|>" }}
89+
{{- "]<|eot|>" }}
90+
{%- endif %}
91+
{#- Tool_response case: messages are from tool_response #}
12392
{%- elif message.role == "tool" or message.role == "ipython" %}
12493
{{- "<|header_start|>ipython<|header_end|>\n\n" }}
12594
{%- if message.content is string %}
126-
{{- message.content | tojson }}
95+
{{- message.content | tojson }}
12796
{%- else %}
12897
{%- for content in message['content'] %}
12998
{%- if content['type'] == 'text' %}
130-
{{- content['text'] | tojson }}
99+
{{- content['text'] | tojson }}
131100
{%- endif %}
132101
{%- endfor %}
133102
{%- endif %}
134-
{{- "<|eom|>" }}
103+
{{- "<|eot|>" }}
135104
{%- endif %}
136105
{%- endfor %}
137106
{%- if add_generation_prompt %}

tests/tool_use/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
8888
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
8989
"arguments": [
9090
"--enforce-eager", "--no-enable-prefix-caching",
91-
"--tool-call-parser", "pythonic", "--chat-template",
91+
"--tool-call-parser", "llama4_pythonic", "--chat-template",
9292
str(VLLM_PATH /
9393
"examples/tool_chat_template_llama4_pythonic.jinja"), "-tp",
9494
"4"
Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
from .abstract_tool_parser import ToolParser, ToolParserManager
4+
from .deepseekv3_tool_parser import DeepSeekV3ToolParser
45
from .granite_20b_fc_tool_parser import Granite20bFCToolParser
56
from .granite_tool_parser import GraniteToolParser
67
from .hermes_tool_parser import Hermes2ProToolParser
@@ -9,11 +10,12 @@
910
from .llama_tool_parser import Llama3JsonToolParser
1011
from .mistral_tool_parser import MistralToolParser
1112
from .phi4mini_tool_parser import Phi4MiniJsonToolParser
12-
from .pythonic_tool_parser import PythonicToolParser
13+
from .pythonic_tool_parser import Llama4PythonicToolParser, PythonicToolParser
1314

1415
__all__ = [
1516
"ToolParser", "ToolParserManager", "Granite20bFCToolParser",
1617
"GraniteToolParser", "Hermes2ProToolParser", "MistralToolParser",
1718
"Internlm2ToolParser", "Llama3JsonToolParser", "JambaToolParser",
18-
"PythonicToolParser", "Phi4MiniJsonToolParser"
19+
"Llama4PythonicToolParser", "PythonicToolParser", "Phi4MiniJsonToolParser",
20+
"DeepSeekV3ToolParser"
1921
]

vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py

Lines changed: 162 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,170 @@ class _UnexpectedAstError(Exception):
2424
pass
2525

2626

27+
@ToolParserManager.register_module("llama4_pythonic")
28+
class Llama4PythonicToolParser(ToolParser):
29+
"""
30+
Toolcall parser for Llama4 that produce tool calls in a pythonic style
31+
Use --enable-auto-tool-choice --tool-call-parser llama4_pythonic
32+
"""
33+
# TODO(mdepinet): Possible future improvements:
34+
# 1. Support text + tools separated by either <|python_tag|> or \n\n
35+
# 2. Support tools outside of a list (or separated by a semicolon).
36+
# This depends on item 1 for consistent streaming.
37+
# Neither of these are necessary for e.g. ToolACE, but both would help make
38+
# Llama3.2 models more reliable.
39+
40+
TOOL_CALL_REGEX = re.compile(
41+
r"\[([a-zA-Z]+\w*\(([a-zA-Z]+\w*=.*,\s*)*([a-zA-Z]+\w*=.*\s)?\),\s*)*([a-zA-Z]+\w*\(([a-zA-Z]+\w*=.*,\s*)*([a-zA-Z]+\w*=.*\s*)?\)\s*)+\]",
42+
re.DOTALL)
43+
44+
def __init__(self, tokenizer: PreTrainedTokenizerBase):
45+
super().__init__(tokenizer)
46+
47+
# Rename for readability. This is NOT a tool id.
48+
@property
49+
def current_tool_index(self) -> int:
50+
return self.current_tool_id
51+
52+
@current_tool_index.setter
53+
def current_tool_index(self, value: int) -> None:
54+
self.current_tool_id = value
55+
56+
def extract_tool_calls(
57+
self, model_output: str,
58+
request: ChatCompletionRequest) -> ExtractedToolCallInformation:
59+
"""
60+
Extract the tool calls from a complete model response.
61+
"""
62+
63+
# remove <|python_start|> and <|python_end|>
64+
# as Llama 4 model sometime will output those tokens
65+
if model_output.startswith("<|python_start|>"):
66+
model_output = model_output[len("<|python_start|>"):]
67+
model_output = model_output.replace("<|python_end|>", "")
68+
if not (self.TOOL_CALL_REGEX.match(model_output)):
69+
return ExtractedToolCallInformation(tools_called=False,
70+
tool_calls=[],
71+
content=model_output)
72+
73+
try:
74+
module = ast.parse(model_output)
75+
parsed = getattr(module.body[0], "value", None)
76+
if isinstance(parsed, ast.List) and all(
77+
isinstance(e, ast.Call) for e in parsed.elts):
78+
return ExtractedToolCallInformation(
79+
tools_called=True,
80+
tool_calls=[
81+
_handle_single_tool(e) # type: ignore
82+
for e in parsed.elts
83+
],
84+
content=None)
85+
else:
86+
raise _UnexpectedAstError(
87+
"Tool output must be a list of function calls")
88+
except Exception:
89+
logger.exception("Error in extracting tool call from response.")
90+
# Treat as regular text
91+
return ExtractedToolCallInformation(tools_called=False,
92+
tool_calls=[],
93+
content=model_output)
94+
95+
def extract_tool_calls_streaming(
96+
self,
97+
previous_text: str,
98+
current_text: str,
99+
delta_text: str,
100+
previous_token_ids: Sequence[int],
101+
current_token_ids: Sequence[int],
102+
delta_token_ids: Sequence[int],
103+
request: ChatCompletionRequest,
104+
) -> Union[DeltaMessage, None]:
105+
106+
if not current_text.startswith("[") and not current_text.startswith(
107+
"<|python_start|>"):
108+
return DeltaMessage(content=delta_text)
109+
110+
try:
111+
if current_text.startswith("<|python_start|>"):
112+
current_text = current_text[len("<|python_start|>"):]
113+
current_text = current_text.replace("<|python_end|>", "")
114+
valid_and_added_text = _make_valid_python(current_text)
115+
if valid_and_added_text is None:
116+
return None
117+
valid_text, added_text = valid_and_added_text
118+
119+
module = ast.parse(valid_text)
120+
parsed = getattr(module.body[0], "value", None)
121+
if not isinstance(parsed, ast.List) or not all(
122+
isinstance(e, ast.Call) for e in parsed.elts):
123+
raise _UnexpectedAstError(
124+
"Tool output must be a list of function calls")
125+
tool_calls = [
126+
_handle_single_tool(e) # type: ignore
127+
for e in parsed.elts
128+
]
129+
130+
tool_deltas = []
131+
for index, new_call in enumerate(tool_calls):
132+
if index < self.current_tool_index:
133+
continue
134+
135+
self.current_tool_index = index
136+
if len(self.streamed_args_for_tool) == index:
137+
self.streamed_args_for_tool.append("")
138+
139+
new_call_complete = index < len(
140+
tool_calls) - 1 or ")]" not in added_text
141+
if new_call_complete:
142+
self.current_tool_index += 1
143+
144+
withheld_suffix = (added_text[:-2]
145+
if not new_call_complete else "")
146+
if not new_call_complete and added_text[-2] == ")":
147+
# Function call is incomplete. Withhold the closing bracket.
148+
withheld_suffix = withheld_suffix + "}"
149+
# Strings get single quotes in the model-produced string.
150+
# JSON requires double quotes.
151+
withheld_suffix = withheld_suffix.replace("'", '"')
152+
delta = _compute_tool_delta(self.streamed_args_for_tool[index],
153+
new_call, index, withheld_suffix)
154+
155+
if delta is not None:
156+
tool_deltas.append(delta)
157+
if (delta.function is not None
158+
and delta.function.arguments is not None):
159+
self.streamed_args_for_tool[
160+
index] += delta.function.arguments
161+
162+
# HACK: serving_chat.py inspects the internal state of tool parsers
163+
# when determining it's final streaming delta, automatically
164+
# adding autocompleted JSON.
165+
# These two lines avoid that nonsense while ensuring finish_reason
166+
# is set to tool_calls when at least one tool is called.
167+
if tool_deltas and not self.prev_tool_call_arr:
168+
self.prev_tool_call_arr = [{"arguments": {}}]
169+
170+
if tool_deltas:
171+
return DeltaMessage(tool_calls=tool_deltas)
172+
elif not added_text and self.current_tool_id > 0:
173+
# Return an empty DeltaMessage once the tool calls are all done
174+
# so that finish_reason gets set.
175+
return DeltaMessage(content='')
176+
else:
177+
return None
178+
except Exception:
179+
logger.exception("Error trying to handle streaming tool call.")
180+
logger.debug(
181+
"Skipping chunk as a result of tool streaming extraction "
182+
"error")
183+
return None
184+
185+
27186
@ToolParserManager.register_module("pythonic")
28187
class PythonicToolParser(ToolParser):
29188
"""
30189
Tool call parser for models that produce tool calls in a pythonic style,
31-
such as Llama 3.2 and Llama 4 models.
190+
such as Llama 3.2 models.
32191
33192
Used when --enable-auto-tool-choice --tool-call-parser pythonic are all set
34193
"""
@@ -280,6 +439,7 @@ def _compute_tool_delta(previously_sent_args: str, new_call: ToolCall,
280439
new_call_args = new_call_args[:-len(withheld_suffix)]
281440
if not previously_sent_args:
282441
return DeltaToolCall(id=new_call.id,
442+
type="function",
283443
index=index,
284444
function=DeltaFunctionCall(
285445
name=new_call.function.name,
@@ -288,5 +448,5 @@ def _compute_tool_delta(previously_sent_args: str, new_call: ToolCall,
288448

289449
arg_diff = new_call_args[len(previously_sent_args):]
290450
return DeltaToolCall(
291-
id="", index=index, function=DeltaFunctionCall(
451+
id=None, index=index, function=DeltaFunctionCall(
292452
arguments=arg_diff)) if arg_diff else None

0 commit comments

Comments
 (0)