diff --git a/python/sglang/srt/constrained/base_grammar_backend.py b/python/sglang/srt/constrained/base_grammar_backend.py index e0738db9c88b..3704e6d0d200 100644 --- a/python/sglang/srt/constrained/base_grammar_backend.py +++ b/python/sglang/srt/constrained/base_grammar_backend.py @@ -204,6 +204,7 @@ def create_grammar_backend( tokenizer, vocab_size: int, eos_token_ids: Optional[set] = None, + think_end_id: Optional[int] = None, ) -> Optional[BaseGrammarBackend]: name = server_args.grammar_backend @@ -258,13 +259,11 @@ def create_grammar_backend( else: raise ValueError(f"Invalid grammar backend: {name}") - if server_args.reasoning_parser and hasattr(tokenizer, "think_end_id"): + if server_args.reasoning_parser and think_end_id is not None: from sglang.srt.constrained.reasoner_grammar_backend import ( ReasonerGrammarBackend, ) - grammar_backend = ReasonerGrammarBackend( - grammar_backend, tokenizer.think_end_id - ) + grammar_backend = ReasonerGrammarBackend(grammar_backend, think_end_id) return grammar_backend diff --git a/python/sglang/srt/constrained/grammar_manager.py b/python/sglang/srt/constrained/grammar_manager.py index 829675ec5afd..8b1e796587f6 100644 --- a/python/sglang/srt/constrained/grammar_manager.py +++ b/python/sglang/srt/constrained/grammar_manager.py @@ -32,6 +32,7 @@ def __init__(self, scheduler: Scheduler): scheduler.tokenizer, scheduler.model_config.vocab_size, scheduler.model_config.hf_eos_token_id, + think_end_id=scheduler.model_config.think_end_id, ) else: self.grammar_backend = None diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index ceb79f4a1adb..9b9811662bcd 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -549,13 +549,9 @@ def init_tokenizer(self): reasoning_parser = ReasoningParser( model_type=self.server_args.reasoning_parser, stream_reasoning=False ) - self.tokenizer.think_end_id = self.tokenizer.encode( + self.model_config.think_end_id = self.tokenizer.encode( reasoning_parser.detector.think_end_token, add_special_tokens=False )[0] - self._think_end_id = self.tokenizer.think_end_id - self.model_config.think_end_id = self._think_end_id - else: - self._think_end_id = None def init_mamba_backend(self) -> None: initialize_mamba_selective_state_update_backend(self.server_args) diff --git a/python/sglang/srt/managers/scheduler_output_processor_mixin.py b/python/sglang/srt/managers/scheduler_output_processor_mixin.py index 8e7639df1335..e135dfb92680 100644 --- a/python/sglang/srt/managers/scheduler_output_processor_mixin.py +++ b/python/sglang/srt/managers/scheduler_output_processor_mixin.py @@ -558,8 +558,9 @@ def _handle_finished_req( def _maybe_update_reasoning_tokens( self: Scheduler, req: Req, next_token_id: Union[int, List[int]] ): - if req.require_reasoning and self._think_end_id is not None: - req.update_reasoning_tokens(next_token_id, self._think_end_id) + think_end_id = self.model_config.think_end_id + if req.require_reasoning and think_end_id is not None: + req.update_reasoning_tokens(next_token_id, think_end_id) def _mamba_prefix_cache_update( self: Scheduler,