vllm-project · DarkLight1337 · Jan 6, 2026 · Jan 6, 2026 · Jan 6, 2026 · Jan 6, 2026
@@ -24,9 +24,9 @@ class DeepSeekV3ReasoningParser(ReasoningParser):
     def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
         super().__init__(tokenizer, *args, **kwargs)
 
-        chat_kwargs = kwargs.pop("chat_template_kwargs", {}) or {}
-        thinking = bool(chat_kwargs.pop("thinking", False))
-        enable_thinking = bool(chat_kwargs.pop("enable_thinking", False))
+        chat_kwargs = kwargs.get("chat_template_kwargs", {}) or {}
+        thinking = bool(chat_kwargs.get("thinking", False))
+        enable_thinking = bool(chat_kwargs.get("enable_thinking", False))
         thinking = thinking or enable_thinking
 
         if thinking:

@@ -1,12 +1,13 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
+from vllm.reasoning.holo2_reasoning_parser import Holo2ReasoningParser
 
 
-class Glm4MoeModelReasoningParser(DeepSeekR1ReasoningParser):
+class Glm4MoeModelReasoningParser(Holo2ReasoningParser):
     """
-    Reasoning parser for the Glm4MoeModel model is same as DeepSeekR1ReasoningParser.
+    Reasoning parser for the Glm4MoeModel model,which inherits from
+    `Holo2ReasoningParser`.
     """
 
     pass
@@ -46,9 +46,10 @@ def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
         # all requests in the structured output manager. So it is important that without
         # user specified chat template args, the default thinking is True.
 
-        enable_thinking = bool(chat_kwargs.get("thinking", True))
-
-        if enable_thinking:
+        thinking = bool(chat_kwargs.get("thinking", True))
+        enable_thinking = bool(chat_kwargs.get("enable_thinking", True))
+        thinking = thinking and enable_thinking
+        if thinking:
             self._parser = DeepSeekR1ReasoningParser(tokenizer, *args, **kwargs)
         else:
             self._parser = IdentityReasoningParser(tokenizer, *args, **kwargs)