diff --git a/examples/tool_chat_template_glm4.jinja b/examples/tool_chat_template_glm4.jinja
new file mode 100644
index 000000000000..11f76b4d4af4
--- /dev/null
+++ b/examples/tool_chat_template_glm4.jinja
@@ -0,0 +1,54 @@
+{%- set counter = namespace(index=0) -%}
+{%- if not tools is defined %}
+ {%- set tools = none %}
+{%- endif %}
+
+{%- if messages and messages[0]['role'] == 'system' %}
+ {%- set system_message = messages[0]['content']|trim %}
+ {%- set messages = messages[1:] %}
+{%- else %}
+ {%- set system_message = "You are a helpful assistant." %}
+{%- endif %}
+
+{%- if tools is not none %}
+ {%- set tool_instruction %}
+You have access to the following tools. When you need to call a tool, you MUST use the following format:
+
+function_name
+parameter_name
+parameter_value
+
+
+Important rules:
+- Always wrap tool calls with ... tags
+- Put the function name on the first line after
+- Use and tags for each parameter
+- If a parameter value is a string, keep it as-is. If it's a number or boolean, convert it appropriately
+- You can make multiple tool calls if needed
+- If no tool is suitable, respond with regular text
+
+Available tools:
+{% endset %}
+ {{- tool_instruction + "\n\n" }}
+ {%- for t in tools %}
+ {{- t | tojson(indent=4) }}
+ {{- "\n\n" }}
+ {%- endfor %}
+{%- endif %}
+
+{%- for message in messages -%}
+ {%- if message['role'] == 'user' -%}
+ {{- '[Round ' + counter.index|string + ']\n问:' + message['content'] -}}
+ {%- set counter.index = counter.index + 1 -%}
+ {%- endif -%}
+ {%- if message['role'] == 'assistant' -%}
+ {{- '\n答:' + message['content'] -}}
+ {%- if (loop.last and add_generation_prompt) or not loop.last -%}
+ {{- '\n' -}}
+ {%- endif -%}
+ {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+ {{- '\n答:' -}}
+{%- endif -%}
diff --git a/vllm/tool_parsers/glm4_moe_tool_parser.py b/vllm/tool_parsers/glm4_moe_tool_parser.py
index ebfd91297b41..6ad7d7cb460c 100644
--- a/vllm/tool_parsers/glm4_moe_tool_parser.py
+++ b/vllm/tool_parsers/glm4_moe_tool_parser.py
@@ -56,6 +56,20 @@ def __init__(self, tokenizer: TokenizerLike):
self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
self._buffer = ""
+ def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+ """
+ Adjust request parameters to ensure tool call tokens are not skipped
+ during tokenizer decoding.
+ """
+ request = super().adjust_request(request)
+ if request.tools and request.tool_choice != "none":
+ # Ensure tool call tokens (, ) are not skipped
+ # during decoding. Even though they are not marked as special tokens,
+ # setting skip_special_tokens=False ensures proper handling in
+ # transformers 5.x where decoding behavior may have changed.
+ request.skip_special_tokens = False
+ return request
+
def extract_tool_calls(
self,
model_output: str,