sgl-project · Fridge003 · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
@@ -69,7 +69,7 @@ dependencies = [
   "torchvision",
   "torchao==0.9.0",
   "tqdm",
-  "transformers==5.0.0rc0",
+  "transformers==4.57.1",
   "uvicorn",
   "uvloop",
   "xgrammar==0.1.27",

diff --git a/python/pyproject_cpu.toml b/python/pyproject_cpu.toml
@@ -59,7 +59,7 @@ dependencies = [
   "timm==1.0.16",
   "torchao==0.9.0",
   "tqdm",
-  "transformers==5.0.0rc0",
+  "transformers==4.57.1",
   "uvicorn",
   "uvloop",
   "xgrammar==0.1.27",

diff --git a/python/pyproject_other.toml b/python/pyproject_other.toml
@@ -59,7 +59,7 @@ runtime_common = [
   "timm==1.0.16",
   "torchao==0.9.0",
   "tqdm",
-  "transformers==5.0.0rc0",
+  "transformers==4.57.1",
   "uvicorn",
   "uvloop",
   "xgrammar==0.1.27",

diff --git a/python/pyproject_xpu.toml b/python/pyproject_xpu.toml
@@ -63,7 +63,7 @@ dependencies = [
   "timm==1.0.16",
   "torchao==0.9.0",
   "tqdm",
-  "transformers==5.0.0rc0",
+  "transformers==4.57.1",
   "uvicorn",
   "uvloop",
   # "xgrammar==0.1.24", , xgrammar depends on CUDA PyTorch and Triton only

diff --git a/python/sglang/srt/configs/model_config.py b/python/sglang/srt/configs/model_config.py
@@ -80,16 +80,6 @@ def get_nsa_index_n_heads(config: PretrainedConfig) -> int:
     return config.index_n_heads
 
 
-def handle_rope_parameters(config: PretrainedConfig):
-    if hasattr(config, "rope_scaling"):
-        rope_scaling = config.rope_scaling
-        if isinstance(rope_scaling, dict):
-            for k, v in rope_scaling.items():
-                if not hasattr(config, k):
-                    setattr(config, k, v)
-    return
-
-
 class ModelConfig:
     def __init__(
         self,
@@ -137,8 +127,6 @@ def __init__(
             **kwargs,
         )
         self.hf_text_config = get_hf_text_config(self.hf_config)
-        handle_rope_parameters(self.hf_text_config)
-        handle_rope_parameters(self.hf_config)
         self.hf_generation_config = get_generation_config(
             self.model_path,
             trust_remote_code=trust_remote_code,
@@ -370,10 +358,9 @@ def _derive_model_shapes(self):
                 mscale_all_dim = self.hf_config.rope_scaling.get(
                     "mscale_all_dim", False
                 )
-                scaling_factor = self.hf_config.rope_scaling.get("factor")
-                if scaling_factor is not None:
-                    mscale = yarn_get_mscale(scaling_factor, float(mscale_all_dim))
-                    self.scaling = self.scaling * mscale * mscale
+                scaling_factor = self.hf_config.rope_scaling["factor"]
+                mscale = yarn_get_mscale(scaling_factor, float(mscale_all_dim))
+                self.scaling = self.scaling * mscale * mscale
 
         elif "MiniCPM3ForCausalLM" in self.hf_config.architectures:
             self.head_dim = 128

@@ -2704,12 +2704,6 @@ def __init__(
         self.config = config
         rope_theta = getattr(config, "rope_theta", 10000)
         rope_scaling = getattr(config, "rope_scaling", None)
-        if rope_scaling is not None:
-            # In transformers 5.0.0rc0+, rope_theta and rope_type are also included in rope_scaling.
-            # Therefore, if rope_scaling contains only these two keys,
-            # it effectively means there are no special rope_scaling parameters.
-            if set(rope_scaling.keys()) <= {"rope_theta", "rope_type"}:
-                rope_scaling = None
         max_position_embeddings = getattr(config, "max_position_embeddings", 8192)
         self.speculative_algorithm = SpeculativeAlgorithm.from_string(
             get_global_server_args().speculative_algorithm

diff --git a/test/srt/test_pp_single_node.py b/test/srt/test_pp_single_node.py
@@ -319,6 +319,9 @@ def test_chunked_prefill_with_small_bs(self):
         )
 
 
+@unittest.skipIf(
+    is_in_ci(), "Skipping GLM41V PP accuracy test before it gets more stable"
+)
 class TestGLM41VPPAccuracy(unittest.TestCase):
     @classmethod
     def setUpClass(cls):