diff --git a/python/pyproject.toml b/python/pyproject.toml index f3c9b6f18734..9bd53c0b0ae5 100755 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -69,7 +69,7 @@ dependencies = [ "torchvision", "torchao==0.9.0", "tqdm", - "transformers==5.0.0rc0", + "transformers==4.57.1", "uvicorn", "uvloop", "xgrammar==0.1.27", diff --git a/python/pyproject_cpu.toml b/python/pyproject_cpu.toml index 701e08a3ad0b..282d2ff576dd 100644 --- a/python/pyproject_cpu.toml +++ b/python/pyproject_cpu.toml @@ -59,7 +59,7 @@ dependencies = [ "timm==1.0.16", "torchao==0.9.0", "tqdm", - "transformers==5.0.0rc0", + "transformers==4.57.1", "uvicorn", "uvloop", "xgrammar==0.1.27", diff --git a/python/pyproject_other.toml b/python/pyproject_other.toml index 5ea1194c3824..c3160a6bc2a4 100755 --- a/python/pyproject_other.toml +++ b/python/pyproject_other.toml @@ -59,7 +59,7 @@ runtime_common = [ "timm==1.0.16", "torchao==0.9.0", "tqdm", - "transformers==5.0.0rc0", + "transformers==4.57.1", "uvicorn", "uvloop", "xgrammar==0.1.27", diff --git a/python/pyproject_xpu.toml b/python/pyproject_xpu.toml index 31170d8349fb..b094bd22afd1 100644 --- a/python/pyproject_xpu.toml +++ b/python/pyproject_xpu.toml @@ -63,7 +63,7 @@ dependencies = [ "timm==1.0.16", "torchao==0.9.0", "tqdm", - "transformers==5.0.0rc0", + "transformers==4.57.1", "uvicorn", "uvloop", # "xgrammar==0.1.24", , xgrammar depends on CUDA PyTorch and Triton only diff --git a/python/sglang/srt/configs/model_config.py b/python/sglang/srt/configs/model_config.py index bb08df8efba2..471d0b3eea88 100644 --- a/python/sglang/srt/configs/model_config.py +++ b/python/sglang/srt/configs/model_config.py @@ -80,16 +80,6 @@ def get_nsa_index_n_heads(config: PretrainedConfig) -> int: return config.index_n_heads -def handle_rope_parameters(config: PretrainedConfig): - if hasattr(config, "rope_scaling"): - rope_scaling = config.rope_scaling - if isinstance(rope_scaling, dict): - for k, v in rope_scaling.items(): - if not hasattr(config, k): - setattr(config, k, v) - return - - class ModelConfig: def __init__( self, @@ -137,8 +127,6 @@ def __init__( **kwargs, ) self.hf_text_config = get_hf_text_config(self.hf_config) - handle_rope_parameters(self.hf_text_config) - handle_rope_parameters(self.hf_config) self.hf_generation_config = get_generation_config( self.model_path, trust_remote_code=trust_remote_code, @@ -370,10 +358,9 @@ def _derive_model_shapes(self): mscale_all_dim = self.hf_config.rope_scaling.get( "mscale_all_dim", False ) - scaling_factor = self.hf_config.rope_scaling.get("factor") - if scaling_factor is not None: - mscale = yarn_get_mscale(scaling_factor, float(mscale_all_dim)) - self.scaling = self.scaling * mscale * mscale + scaling_factor = self.hf_config.rope_scaling["factor"] + mscale = yarn_get_mscale(scaling_factor, float(mscale_all_dim)) + self.scaling = self.scaling * mscale * mscale elif "MiniCPM3ForCausalLM" in self.hf_config.architectures: self.head_dim = 128 diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 94cdae8983ed..2b1c3c04d6cf 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -2704,12 +2704,6 @@ def __init__( self.config = config rope_theta = getattr(config, "rope_theta", 10000) rope_scaling = getattr(config, "rope_scaling", None) - if rope_scaling is not None: - # In transformers 5.0.0rc0+, rope_theta and rope_type are also included in rope_scaling. - # Therefore, if rope_scaling contains only these two keys, - # it effectively means there are no special rope_scaling parameters. - if set(rope_scaling.keys()) <= {"rope_theta", "rope_type"}: - rope_scaling = None max_position_embeddings = getattr(config, "max_position_embeddings", 8192) self.speculative_algorithm = SpeculativeAlgorithm.from_string( get_global_server_args().speculative_algorithm diff --git a/test/srt/test_pp_single_node.py b/test/srt/test_pp_single_node.py index 037cdb2e0884..eabe17daf54b 100644 --- a/test/srt/test_pp_single_node.py +++ b/test/srt/test_pp_single_node.py @@ -319,6 +319,9 @@ def test_chunked_prefill_with_small_bs(self): ) +@unittest.skipIf( + is_in_ci(), "Skipping GLM41V PP accuracy test before it gets more stable" +) class TestGLM41VPPAccuracy(unittest.TestCase): @classmethod def setUpClass(cls):