diff --git a/examples/tool_chat_template_glm4.jinja b/examples/tool_chat_template_glm4.jinja new file mode 100644 index 000000000000..11f76b4d4af4 --- /dev/null +++ b/examples/tool_chat_template_glm4.jinja @@ -0,0 +1,54 @@ +{%- set counter = namespace(index=0) -%} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{%- if messages and messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "You are a helpful assistant." %} +{%- endif %} + +{%- if tools is not none %} + {%- set tool_instruction %} +You have access to the following tools. When you need to call a tool, you MUST use the following format: + +function_name +parameter_name +parameter_value + + +Important rules: +- Always wrap tool calls with ... tags +- Put the function name on the first line after +- Use and tags for each parameter +- If a parameter value is a string, keep it as-is. If it's a number or boolean, convert it appropriately +- You can make multiple tool calls if needed +- If no tool is suitable, respond with regular text + +Available tools: +{% endset %} + {{- tool_instruction + "\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} + +{%- for message in messages -%} + {%- if message['role'] == 'user' -%} + {{- '[Round ' + counter.index|string + ']\n问:' + message['content'] -}} + {%- set counter.index = counter.index + 1 -%} + {%- endif -%} + {%- if message['role'] == 'assistant' -%} + {{- '\n答:' + message['content'] -}} + {%- if (loop.last and add_generation_prompt) or not loop.last -%} + {{- '\n' -}} + {%- endif -%} + {%- endif -%} +{%- endfor -%} + +{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%} + {{- '\n答:' -}} +{%- endif -%} diff --git a/vllm/tool_parsers/glm4_moe_tool_parser.py b/vllm/tool_parsers/glm4_moe_tool_parser.py index ebfd91297b41..6ad7d7cb460c 100644 --- a/vllm/tool_parsers/glm4_moe_tool_parser.py +++ b/vllm/tool_parsers/glm4_moe_tool_parser.py @@ -56,6 +56,20 @@ def __init__(self, tokenizer: TokenizerLike): self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token) self._buffer = "" + def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest: + """ + Adjust request parameters to ensure tool call tokens are not skipped + during tokenizer decoding. + """ + request = super().adjust_request(request) + if request.tools and request.tool_choice != "none": + # Ensure tool call tokens (, ) are not skipped + # during decoding. Even though they are not marked as special tokens, + # setting skip_special_tokens=False ensures proper handling in + # transformers 5.x where decoding behavior may have changed. + request.skip_special_tokens = False + return request + def extract_tool_calls( self, model_output: str,