diff --git a/tests/reasoning/test_glm4_moe_reasoning_parser.py b/tests/reasoning/test_glm4_moe_reasoning_parser.py
index 6f7827e5b827..6cfc4d8a2d75 100644
--- a/tests/reasoning/test_glm4_moe_reasoning_parser.py
+++ b/tests/reasoning/test_glm4_moe_reasoning_parser.py
@@ -59,6 +59,20 @@ def glm45_tokenizer():
"content": "This is the rest\nThat",
"is_reasoning_end": True,
}
+EMPTY_THINK_BLOCK = {
+ "output": "This is the rest",
+ "reasoning": None,
+ "content": "This is the rest",
+ "is_reasoning_end": True,
+}
+
+EMPTY_THINK_BLOCK_NO_CONTENT = {
+ "output": "",
+ "reasoning": None,
+ "content": None,
+ "is_reasoning_end": True,
+}
+
ONLY_OPEN_TAG = {
"output": "This is a reasoning section",
"reasoning": None,
@@ -114,6 +128,26 @@ def glm45_tokenizer():
MULTILINE_REASONING,
id="multiline_reasoning_stream",
),
+ pytest.param(
+ False,
+ EMPTY_THINK_BLOCK,
+ id="empty_think_block",
+ ),
+ pytest.param(
+ True,
+ EMPTY_THINK_BLOCK,
+ id="empty_think_block_stream",
+ ),
+ pytest.param(
+ False,
+ EMPTY_THINK_BLOCK_NO_CONTENT,
+ id="empty_think_block_no_content",
+ ),
+ pytest.param(
+ True,
+ EMPTY_THINK_BLOCK_NO_CONTENT,
+ id="empty_think_block_no_content_stream",
+ ),
pytest.param(
False,
ONLY_OPEN_TAG,
diff --git a/vllm/reasoning/__init__.py b/vllm/reasoning/__init__.py
index 8c78db6f1878..ccfe9fd176b9 100644
--- a/vllm/reasoning/__init__.py
+++ b/vllm/reasoning/__init__.py
@@ -33,8 +33,8 @@
"Ernie45ReasoningParser",
),
"glm45": (
- "deepseek_v3_reasoning_parser",
- "DeepSeekV3ReasoningWithThinkingParser",
+ "glm4_moe_reasoning_parser",
+ "Glm4MoeReasoningParser",
),
"openai_gptoss": (
"gptoss_reasoning_parser",
diff --git a/vllm/reasoning/glm4_moe_reasoning_parser.py b/vllm/reasoning/glm4_moe_reasoning_parser.py
new file mode 100644
index 000000000000..edf19b13bb07
--- /dev/null
+++ b/vllm/reasoning/glm4_moe_reasoning_parser.py
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import TYPE_CHECKING
+
+from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
+
+if TYPE_CHECKING:
+ from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+ from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+
+
+class Glm4MoeReasoningParser(BaseThinkingReasoningParser):
+ """
+ Reasoning parser for GLM-4 MoE models.
+
+ Unlike DeepSeek R1, GLM-4 injects via the chat template rather
+ than generating it. When the model output lacks , the entire
+ output is treated as *content* (not reasoning), because the absence of
+ the end tag means the model chose not to reason.
+ """
+
+ @property
+ def start_token(self) -> str:
+ return ""
+
+ @property
+ def end_token(self) -> str:
+ return ""
+
+ def extract_reasoning(
+ self, model_output: str, request: "ChatCompletionRequest | ResponsesRequest"
+ ) -> tuple[str | None, str | None]:
+ if self.end_token not in model_output:
+ # No closing tag — model didn't produce reasoning.
+ # Return the full original output as content.
+ return None, model_output
+
+ # Normal case: reasoningcontent
+ parts = model_output.partition(self.start_token)
+ after_start = parts[2] if parts[1] else parts[0]
+ reasoning, _, content = after_start.partition(self.end_token)
+
+ # Normalize empty strings to None -- means
+ # the model chose not to reason, not that reasoning is "".
+ return reasoning or None, content or None
diff --git a/vllm/reasoning/seedoss_reasoning_parser.py b/vllm/reasoning/seedoss_reasoning_parser.py
index d3d4d8ec0749..e13e8994258d 100644
--- a/vllm/reasoning/seedoss_reasoning_parser.py
+++ b/vllm/reasoning/seedoss_reasoning_parser.py
@@ -1,7 +1,9 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections.abc import Sequence
+from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
@@ -25,3 +27,43 @@ def start_token(self) -> str:
def end_token(self) -> str:
"""The token that ends reasoning content."""
return ""
+
+ def extract_reasoning_streaming(
+ self,
+ previous_text: str,
+ current_text: str,
+ delta_text: str,
+ previous_token_ids: Sequence[int],
+ current_token_ids: Sequence[int],
+ delta_token_ids: Sequence[int],
+ ) -> DeltaMessage | None:
+ # Like R1, SeedOSS may not emit the start token (it's in the chat
+ # template). When neither previous nor delta contains the start
+ # token, treat text as reasoning unless the end token has been seen.
+ ret = super().extract_reasoning_streaming(
+ previous_text,
+ current_text,
+ delta_text,
+ previous_token_ids,
+ current_token_ids,
+ delta_token_ids,
+ )
+ if (
+ ret is not None
+ and self.start_token_id not in previous_token_ids
+ and self.start_token_id not in delta_token_ids
+ ):
+ if self.end_token_id in delta_token_ids:
+ end_index = delta_text.find(self.end_token)
+ reasoning = delta_text[:end_index]
+ content = delta_text[end_index + len(self.end_token) :]
+ return DeltaMessage(
+ reasoning=reasoning,
+ content=content if content else None,
+ )
+ elif self.end_token_id in previous_token_ids:
+ return DeltaMessage(content=delta_text)
+ else:
+ return DeltaMessage(reasoning=delta_text)
+
+ return ret