diff --git a/tests/reasoning/test_glm4_moe_reasoning_parser.py b/tests/reasoning/test_glm4_moe_reasoning_parser.py index 6f7827e5b827..6cfc4d8a2d75 100644 --- a/tests/reasoning/test_glm4_moe_reasoning_parser.py +++ b/tests/reasoning/test_glm4_moe_reasoning_parser.py @@ -59,6 +59,20 @@ def glm45_tokenizer(): "content": "This is the rest\nThat", "is_reasoning_end": True, } +EMPTY_THINK_BLOCK = { + "output": "This is the rest", + "reasoning": None, + "content": "This is the rest", + "is_reasoning_end": True, +} + +EMPTY_THINK_BLOCK_NO_CONTENT = { + "output": "", + "reasoning": None, + "content": None, + "is_reasoning_end": True, +} + ONLY_OPEN_TAG = { "output": "This is a reasoning section", "reasoning": None, @@ -114,6 +128,26 @@ def glm45_tokenizer(): MULTILINE_REASONING, id="multiline_reasoning_stream", ), + pytest.param( + False, + EMPTY_THINK_BLOCK, + id="empty_think_block", + ), + pytest.param( + True, + EMPTY_THINK_BLOCK, + id="empty_think_block_stream", + ), + pytest.param( + False, + EMPTY_THINK_BLOCK_NO_CONTENT, + id="empty_think_block_no_content", + ), + pytest.param( + True, + EMPTY_THINK_BLOCK_NO_CONTENT, + id="empty_think_block_no_content_stream", + ), pytest.param( False, ONLY_OPEN_TAG, diff --git a/vllm/reasoning/__init__.py b/vllm/reasoning/__init__.py index 8c78db6f1878..ccfe9fd176b9 100644 --- a/vllm/reasoning/__init__.py +++ b/vllm/reasoning/__init__.py @@ -33,8 +33,8 @@ "Ernie45ReasoningParser", ), "glm45": ( - "deepseek_v3_reasoning_parser", - "DeepSeekV3ReasoningWithThinkingParser", + "glm4_moe_reasoning_parser", + "Glm4MoeReasoningParser", ), "openai_gptoss": ( "gptoss_reasoning_parser", diff --git a/vllm/reasoning/glm4_moe_reasoning_parser.py b/vllm/reasoning/glm4_moe_reasoning_parser.py new file mode 100644 index 000000000000..edf19b13bb07 --- /dev/null +++ b/vllm/reasoning/glm4_moe_reasoning_parser.py @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from typing import TYPE_CHECKING + +from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser + +if TYPE_CHECKING: + from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest + from vllm.entrypoints.openai.responses.protocol import ResponsesRequest + + +class Glm4MoeReasoningParser(BaseThinkingReasoningParser): + """ + Reasoning parser for GLM-4 MoE models. + + Unlike DeepSeek R1, GLM-4 injects via the chat template rather + than generating it. When the model output lacks , the entire + output is treated as *content* (not reasoning), because the absence of + the end tag means the model chose not to reason. + """ + + @property + def start_token(self) -> str: + return "" + + @property + def end_token(self) -> str: + return "" + + def extract_reasoning( + self, model_output: str, request: "ChatCompletionRequest | ResponsesRequest" + ) -> tuple[str | None, str | None]: + if self.end_token not in model_output: + # No closing tag — model didn't produce reasoning. + # Return the full original output as content. + return None, model_output + + # Normal case: reasoningcontent + parts = model_output.partition(self.start_token) + after_start = parts[2] if parts[1] else parts[0] + reasoning, _, content = after_start.partition(self.end_token) + + # Normalize empty strings to None -- means + # the model chose not to reason, not that reasoning is "". + return reasoning or None, content or None diff --git a/vllm/reasoning/seedoss_reasoning_parser.py b/vllm/reasoning/seedoss_reasoning_parser.py index d3d4d8ec0749..e13e8994258d 100644 --- a/vllm/reasoning/seedoss_reasoning_parser.py +++ b/vllm/reasoning/seedoss_reasoning_parser.py @@ -1,7 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from collections.abc import Sequence +from vllm.entrypoints.openai.engine.protocol import DeltaMessage from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser @@ -25,3 +27,43 @@ def start_token(self) -> str: def end_token(self) -> str: """The token that ends reasoning content.""" return "" + + def extract_reasoning_streaming( + self, + previous_text: str, + current_text: str, + delta_text: str, + previous_token_ids: Sequence[int], + current_token_ids: Sequence[int], + delta_token_ids: Sequence[int], + ) -> DeltaMessage | None: + # Like R1, SeedOSS may not emit the start token (it's in the chat + # template). When neither previous nor delta contains the start + # token, treat text as reasoning unless the end token has been seen. + ret = super().extract_reasoning_streaming( + previous_text, + current_text, + delta_text, + previous_token_ids, + current_token_ids, + delta_token_ids, + ) + if ( + ret is not None + and self.start_token_id not in previous_token_ids + and self.start_token_id not in delta_token_ids + ): + if self.end_token_id in delta_token_ids: + end_index = delta_text.find(self.end_token) + reasoning = delta_text[:end_index] + content = delta_text[end_index + len(self.end_token) :] + return DeltaMessage( + reasoning=reasoning, + content=content if content else None, + ) + elif self.end_token_id in previous_token_ids: + return DeltaMessage(content=delta_text) + else: + return DeltaMessage(reasoning=delta_text) + + return ret