vllm-project · vllm-bot · Jan 5, 2026 · Jan 2, 2026
diff --git a/examples/tool_chat_template_glm4.jinja b/examples/tool_chat_template_glm4.jinja
@@ -0,0 +1,54 @@
+{%- set counter = namespace(index=0) -%}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+
+{%- if messages and messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "You are a helpful assistant." %}
+{%- endif %}
+
+{%- if tools is not none %}
+    {%- set tool_instruction %}
+You have access to the following tools. When you need to call a tool, you MUST use the following format:
+
+<tool_call>function_name
+<arg_key>parameter_name</arg_key>
+<arg_value>parameter_value</arg_value>
+</tool_call>
+
+Important rules:
+- Always wrap tool calls with <tool_call>...</tool_call> tags
+- Put the function name on the first line after <tool_call>
+- Use <arg_key> and <arg_value> tags for each parameter
+- If a parameter value is a string, keep it as-is. If it's a number or boolean, convert it appropriately
+- You can make multiple tool calls if needed
+- If no tool is suitable, respond with regular text
+
+Available tools:
+{% endset %}
+    {{- tool_instruction + "\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+
+{%- for message in messages -%}
+    {%- if message['role'] == 'user' -%}
+        {{- '[Round ' + counter.index|string + ']\n问：' + message['content'] -}}
+        {%- set counter.index = counter.index + 1 -%}
+    {%- endif -%}
+    {%- if message['role'] == 'assistant' -%}
+        {{- '\n答：' + message['content'] -}}
+        {%- if (loop.last and add_generation_prompt) or not loop.last -%}
+            {{- '\n' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+    {{- '\n答：' -}}
+{%- endif -%}
@@ -56,6 +56,20 @@ def __init__(self, tokenizer: TokenizerLike):
         self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
         self._buffer = ""
 
+    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+        """
+        Adjust request parameters to ensure tool call tokens are not skipped
+        during tokenizer decoding.
+        """
+        request = super().adjust_request(request)
+        if request.tools and request.tool_choice != "none":
+            # Ensure tool call tokens (<tool_call>, </tool_call>) are not skipped
+            # during decoding. Even though they are not marked as special tokens,
+            # setting skip_special_tokens=False ensures proper handling in
+            # transformers 5.x where decoding behavior may have changed.
+            request.skip_special_tokens = False
+        return request
+
     def extract_tool_calls(
         self,
         model_output: str,