diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index a0272029aa2d..324d5f314790 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -538,6 +538,7 @@ def initialize(self, min_per_gpu_memory: float): draft_model_config = ModelConfig.from_server_args( server_args, model_path=(server_args.speculative_draft_model_path), + model_revision=server_args.speculative_draft_model_revision, is_draft_model=True, ) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 792201428cc3..5720ae3fa0be 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -1610,6 +1610,12 @@ def _handle_hicache(self): ) def _handle_speculative_decoding(self): + if ( + self.speculative_draft_model_path is not None + and self.speculative_draft_model_revision is None + ): + self.speculative_draft_model_revision = "main" + if self.speculative_algorithm == "NEXTN": self.speculative_algorithm = "EAGLE" @@ -1658,6 +1664,7 @@ def _handle_speculative_decoding(self): ]: if self.speculative_draft_model_path is None: self.speculative_draft_model_path = self.model_path + self.speculative_draft_model_revision = self.revision else: logger.warning( "DeepSeek MTP does not require setting speculative_draft_model_path."