diff --git a/vllm/reasoning/glm4_moe_reasoning_parser.py b/vllm/reasoning/glm4_moe_reasoning_parser.py
index 1871adcd4321..32587cf35404 100644
--- a/vllm/reasoning/glm4_moe_reasoning_parser.py
+++ b/vllm/reasoning/glm4_moe_reasoning_parser.py
@@ -1,171 +1,12 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from collections.abc import Sequence
+from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
-from transformers import PreTrainedTokenizerBase
-from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
-from vllm.logger import init_logger
-from vllm.reasoning import ReasoningParser
-
-logger = init_logger(__name__)
-
-
-class Glm4MoeModelReasoningParser(ReasoningParser):
+class Glm4MoeModelReasoningParser(DeepSeekR1ReasoningParser):
"""
- Reasoning parser for the Glm4MoeModel model.
-
- The Glm4MoeModel model uses ... tokens to denote reasoning
- text within its output. The model provides a strict switch to disable
- reasoning output via the 'enable_thinking=False' parameter. This parser
- extracts the reasoning content enclosed by and tokens
- from the model's output.
+ Reasoning parser for the Glm4MoeModel model is same as DeepSeekR1ReasoningParser.
"""
- def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
- super().__init__(tokenizer, *args, **kwargs)
- self.think_start_token = ""
- self.think_end_token = ""
- self.assistant_token = "<|assistant|>"
-
- if not self.model_tokenizer:
- raise ValueError(
- "The model tokenizer must be passed to the ReasoningParser "
- "constructor during construction."
- )
-
- self.think_start_token_id = self.vocab.get(self.think_start_token)
- self.think_end_token_id = self.vocab.get(self.think_end_token)
- self.assistant_token_id = self.vocab.get(self.assistant_token)
- if (
- self.think_start_token_id is None
- or self.think_end_token_id is None
- or self.assistant_token_id is None
- ):
- raise RuntimeError(
- "Glm4MoeModel reasoning parser could not locate "
- "think start/end or assistant tokens in the tokenizer!"
- )
-
- def is_reasoning_end(self, input_ids: list[int]) -> bool:
- """
- GLM's chat template has tokens after every
- <|assistant|> token. Thus, we need to check if is
- after the most recent <|assistant|> token (if present).
- """
- for token_id in input_ids[::-1]:
- if token_id == self.think_end_token_id:
- return True
- elif token_id == self.assistant_token_id:
- return False
- return False
-
- def extract_content_ids(self, input_ids: list[int]) -> list[int]:
- """
- Extract the content after the end tokens
- """
- if self.think_end_token_id not in input_ids[:-1]:
- return []
- else:
- return input_ids[input_ids.index(self.think_end_token_id) + 1 :]
-
- def extract_reasoning_streaming(
- self,
- previous_text: str,
- current_text: str,
- delta_text: str,
- previous_token_ids: Sequence[int],
- current_token_ids: Sequence[int],
- delta_token_ids: Sequence[int],
- ) -> DeltaMessage | None:
- """
- Extract reasoning content from a delta message.
- Handles streaming output where previous + delta = current.
- Uses token IDs for faster processing.
- For text abcxyz:
- - 'abc' goes to reasoning
- - 'xyz' goes to content
- """
- # Skip single special tokens
- if len(delta_token_ids) == 1 and (
- delta_token_ids[0] in [self.think_start_token_id, self.think_end_token_id]
- ):
- return None
-
- if self.think_start_token_id in previous_token_ids:
- if self.think_end_token_id in delta_token_ids:
- # in previous, in delta,
- # extract reasoning content
- end_index = delta_text.find(self.think_end_token)
- reasoning = delta_text[:end_index]
- content = delta_text[end_index + len(self.think_end_token) :]
- return DeltaMessage(
- reasoning=reasoning,
- content=content if content else None,
- )
- elif self.think_end_token_id in previous_token_ids:
- # in previous, in previous,
- # reasoning content continues
- return DeltaMessage(content=delta_text)
- else:
- # in previous, no in previous or delta,
- # reasoning content continues
- return DeltaMessage(reasoning=delta_text)
- elif self.think_start_token_id in delta_token_ids:
- if self.think_end_token_id in delta_token_ids:
- # in delta, in delta, extract reasoning content
- start_index = delta_text.find(self.think_start_token)
- end_index = delta_text.find(self.think_end_token)
- reasoning = delta_text[
- start_index + len(self.think_start_token) : end_index
- ]
- content = delta_text[end_index + len(self.think_end_token) :]
- return DeltaMessage(
- reasoning=reasoning,
- content=content if content else None,
- )
- else:
- # in delta, no in delta,
- # reasoning content continues
- return DeltaMessage(reasoning=delta_text)
- else:
- # thinking is disabled, just content
- return DeltaMessage(content=delta_text)
-
- def extract_reasoning(
- self, model_output: str, request: ChatCompletionRequest
- ) -> tuple[str | None, str | None]:
- """
- Extract reasoning content from the model output.
-
- For text abcxyz:
- - 'abc' goes to reasoning
- - 'xyz' goes to content
-
- Returns:
- tuple[Optional[str], Optional[str]]: reasoning content and content
- """
-
- # Check if the model output contains the and tokens.
- if (
- self.think_start_token not in model_output
- or self.think_end_token not in model_output
- ):
- return None, model_output
- # Check if the is present in the model output, remove it
- # if it is present.
- model_output_parts = model_output.partition(self.think_start_token)
- model_output = (
- model_output_parts[2] if model_output_parts[1] else model_output_parts[0]
- )
- # Check if the model output contains the tokens.
- # If the end token is not found, return the model output as is.
- if self.think_end_token not in model_output:
- return None, model_output
-
- # Extract reasoning content from the model output.
- reasoning, _, content = model_output.partition(self.think_end_token)
-
- final_content = content or None
- return reasoning, final_content
+ pass