-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Correctly abort the failed grammar requests & Improve the handling of abort #6803
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,7 @@ | |
| ) | ||
|
|
||
| from sglang.srt.constrained.base_grammar_backend import ( | ||
| INVALID_GRAMMAR_OBJ, | ||
| BaseGrammarBackend, | ||
| BaseGrammarObject, | ||
| ) | ||
|
|
@@ -152,10 +153,11 @@ def __init__( | |
| ): | ||
| super().__init__() | ||
|
|
||
| tokenizer_info = TokenizerInfo.from_huggingface( | ||
| tokenizer, vocab_size=vocab_size | ||
| ) | ||
| override_stop_tokens = None | ||
| if True: | ||
| tokenizer_info = TokenizerInfo.from_huggingface( | ||
| tokenizer, vocab_size=vocab_size | ||
| ) | ||
| override_stop_tokens = None | ||
|
|
||
| self.grammar_compiler = GrammarCompiler(tokenizer_info=tokenizer_info) | ||
| self.vocab_size = vocab_size | ||
|
|
@@ -178,25 +180,26 @@ def dispatch_json(self, key_string: str) -> Optional[XGrammarGrammar]: | |
| ctx = self.grammar_compiler.compile_builtin_json_grammar() | ||
| else: | ||
| ctx = self.grammar_compiler.compile_json_schema(schema=key_string) | ||
| except RuntimeError as e: | ||
| logging.warning(f"Skip invalid json_schema: json_schema={key_string}, {e=}") | ||
| return None | ||
|
|
||
| except (RuntimeError, json.decoder.JSONDecodeError) as e: | ||
| logging.error(f"Hit invalid json_schema: {key_string=}, {e=}") | ||
| return INVALID_GRAMMAR_OBJ | ||
| return self._from_context(ctx, key_string) | ||
|
|
||
| def dispatch_ebnf(self, key_string: str) -> Optional[XGrammarGrammar]: | ||
| try: | ||
| ctx = self.grammar_compiler.compile_grammar(key_string) | ||
| except RuntimeError as e: | ||
| logging.warning(f"Skip invalid ebnf: ebnf={key_string}, {e=}") | ||
| return None | ||
| logging.error(f"Hit invalid ebnf: {key_string=}, {e=}") | ||
| return INVALID_GRAMMAR_OBJ | ||
| return self._from_context(ctx, key_string) | ||
|
|
||
| def dispatch_regex(self, key_string: str) -> Optional[XGrammarGrammar]: | ||
| try: | ||
| ctx = self.grammar_compiler.compile_regex(key_string) | ||
| except RuntimeError as e: | ||
| logging.warning(f"Skip invalid regex: regex={key_string}, {e=}") | ||
| return None | ||
| logging.error(f"Hit invalid regex: {key_string=}, {e=}") | ||
| return INVALID_GRAMMAR_OBJ | ||
| return self._from_context(ctx, key_string) | ||
|
|
||
| def dispatch_structural_tag(self, key_string: str) -> Optional[XGrammarGrammar]: | ||
|
|
@@ -213,13 +216,10 @@ def dispatch_structural_tag(self, key_string: str) -> Optional[XGrammarGrammar]: | |
| ctx = self.grammar_compiler.compile_structural_tag( | ||
| tags, structural_tag["triggers"] | ||
| ) | ||
| except RuntimeError as e: | ||
| logging.warning( | ||
| f"Skip invalid structural_tag: structural_tag={key_string}, {e=}" | ||
| ) | ||
| return None | ||
| except (RuntimeError, json.decoder.JSONDecodeError) as e: | ||
| logging.error(f"Hit invalid structural_tag: {key_string=}, {e=}") | ||
| return INVALID_GRAMMAR_OBJ | ||
| return self._from_context(ctx, key_string) | ||
|
|
||
| def reset(self): | ||
| if self.grammar_compiler: | ||
| self.grammar_compiler.clear_cache() | ||
| self.grammar_compiler.clear_cache() | ||
|
Comment on lines
224
to
+225
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,6 +37,7 @@ | |
| import logging | ||
| import threading | ||
| from enum import Enum, auto | ||
| from http import HTTPStatus | ||
| from typing import TYPE_CHECKING, List, Optional, Set, Tuple, Union | ||
|
|
||
| import numpy as np | ||
|
|
@@ -51,6 +52,7 @@ | |
| from sglang.srt.disaggregation.decode_schedule_batch_mixin import ( | ||
| ScheduleBatchDisaggregationDecodeMixin, | ||
| ) | ||
| from sglang.srt.distributed.parallel_state import get_tensor_model_parallel_rank | ||
| from sglang.srt.layers.multimodal import gpu_tensor_hash | ||
| from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache | ||
| from sglang.srt.mem_cache.chunk_cache import ChunkCache | ||
|
|
@@ -60,7 +62,7 @@ | |
| from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo | ||
| from sglang.srt.sampling.sampling_params import SamplingParams | ||
| from sglang.srt.server_args import ServerArgs | ||
| from sglang.srt.utils import flatten_nested_list, get_compiler_backend, support_triton | ||
| from sglang.srt.utils import flatten_nested_list, support_triton | ||
|
|
||
| if TYPE_CHECKING: | ||
| from sglang.srt.speculative.eagle_utils import EagleDraftInput, EagleVerifyInput | ||
|
|
@@ -771,6 +773,16 @@ def log_time_stats(self): | |
| logger.info(f"{prefix}: {self.time_stats}") | ||
| self.has_log_time_stats = True | ||
|
|
||
| def set_finish_with_abort(self, error_msg: str): | ||
| if get_tensor_model_parallel_rank() == 0: | ||
| logger.error(f"{error_msg}, {self.rid=}") | ||
| self.multimodal_inputs = None | ||
| self.grammar = None | ||
| self.origin_input_ids = [0] # set it to one token to skip the long prefill | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The line |
||
| self.finished_reason = FINISH_ABORT( | ||
| error_msg, HTTPStatus.BAD_REQUEST, "BadRequestError" | ||
| ) | ||
|
|
||
| def __repr__(self): | ||
| return ( | ||
| f"Req(rid={self.rid}, " | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This
if True:block appears to be redundant. Could it be removed to simplify the code?