diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 2b1c3c04d6cf..94cdae8983ed 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -2704,6 +2704,12 @@ def __init__( self.config = config rope_theta = getattr(config, "rope_theta", 10000) rope_scaling = getattr(config, "rope_scaling", None) + if rope_scaling is not None: + # In transformers 5.0.0rc0+, rope_theta and rope_type are also included in rope_scaling. + # Therefore, if rope_scaling contains only these two keys, + # it effectively means there are no special rope_scaling parameters. + if set(rope_scaling.keys()) <= {"rope_theta", "rope_type"}: + rope_scaling = None max_position_embeddings = getattr(config, "max_position_embeddings", 8192) self.speculative_algorithm = SpeculativeAlgorithm.from_string( get_global_server_args().speculative_algorithm