Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions python/sglang/srt/constrained/base_grammar_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ def create_grammar_backend(
tokenizer,
vocab_size: int,
eos_token_ids: Optional[set] = None,
think_end_id: Optional[int] = None,
) -> Optional[BaseGrammarBackend]:
name = server_args.grammar_backend

Expand Down Expand Up @@ -258,13 +259,11 @@ def create_grammar_backend(
else:
raise ValueError(f"Invalid grammar backend: {name}")

if server_args.reasoning_parser and hasattr(tokenizer, "think_end_id"):
if server_args.reasoning_parser and think_end_id is not None:
from sglang.srt.constrained.reasoner_grammar_backend import (
ReasonerGrammarBackend,
)

grammar_backend = ReasonerGrammarBackend(
grammar_backend, tokenizer.think_end_id
)
grammar_backend = ReasonerGrammarBackend(grammar_backend, think_end_id)

return grammar_backend
1 change: 1 addition & 0 deletions python/sglang/srt/constrained/grammar_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(self, scheduler: Scheduler):
scheduler.tokenizer,
scheduler.model_config.vocab_size,
scheduler.model_config.hf_eos_token_id,
think_end_id=scheduler.model_config.think_end_id,
)
else:
self.grammar_backend = None
Expand Down
6 changes: 1 addition & 5 deletions python/sglang/srt/managers/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,13 +549,9 @@ def init_tokenizer(self):
reasoning_parser = ReasoningParser(
model_type=self.server_args.reasoning_parser, stream_reasoning=False
)
self.tokenizer.think_end_id = self.tokenizer.encode(
self.model_config.think_end_id = self.tokenizer.encode(
reasoning_parser.detector.think_end_token, add_special_tokens=False
)[0]
Comment on lines +552 to 554
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The current implementation assumes that self.tokenizer.encode will always return at least one token ID. If the tokenizer fails to encode the think_end_token (e.g., due to an empty string or a tokenizer-specific quirk), this will raise an IndexError. Additionally, if the token is split into multiple IDs, only the first one is captured, which might lead to incorrect reasoning detection later. Consider adding a safety check.

Suggested change
self.model_config.think_end_id = self.tokenizer.encode(
reasoning_parser.detector.think_end_token, add_special_tokens=False
)[0]
ids = self.tokenizer.encode(
reasoning_parser.detector.think_end_token, add_special_tokens=False
)
self.model_config.think_end_id = ids[0] if ids else None

self._think_end_id = self.tokenizer.think_end_id
self.model_config.think_end_id = self._think_end_id
else:
self._think_end_id = None

def init_mamba_backend(self) -> None:
initialize_mamba_selective_state_update_backend(self.server_args)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -558,8 +558,9 @@ def _handle_finished_req(
def _maybe_update_reasoning_tokens(
self: Scheduler, req: Req, next_token_id: Union[int, List[int]]
):
if req.require_reasoning and self._think_end_id is not None:
req.update_reasoning_tokens(next_token_id, self._think_end_id)
think_end_id = self.model_config.think_end_id
if req.require_reasoning and think_end_id is not None:
req.update_reasoning_tokens(next_token_id, think_end_id)

def _mamba_prefix_cache_update(
self: Scheduler,
Expand Down
Loading