diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 2be1c045a0bf..7c898a0144b0 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -2042,6 +2042,13 @@ def _handle_mamba_radix_cache( == 0 ), f"For SSM models with extra buffer, either FLA_CHUNK_SIZE or page_size must be divisible by the other, got {FLA_CHUNK_SIZE=}, {self.page_size=}" elif not self.disable_radix_cache: # no_buffer + if self.page_size is not None and self.page_size != 1: + logger.warning( + f"{model_arch} with radix cache requires page_size=1 in the current " + f"Mamba scheduling mode (no_buffer), but got {self.page_size}. " + "Automatically setting page_size=1." + ) + self.page_size = 1 if self.speculative_algorithm is None: logger.warning( "Disabling overlap schedule since mamba no_buffer is not compatible with "