Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ dependencies = [
"torchvision",
"torchao==0.9.0",
"tqdm",
"transformers==5.0.0rc0",
"transformers==4.57.1",
"uvicorn",
"uvloop",
"xgrammar==0.1.27",
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject_cpu.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ dependencies = [
"timm==1.0.16",
"torchao==0.9.0",
"tqdm",
"transformers==5.0.0rc0",
"transformers==4.57.1",
"uvicorn",
"uvloop",
"xgrammar==0.1.27",
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject_other.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ runtime_common = [
"timm==1.0.16",
"torchao==0.9.0",
"tqdm",
"transformers==5.0.0rc0",
"transformers==4.57.1",
"uvicorn",
"uvloop",
"xgrammar==0.1.27",
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject_xpu.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ dependencies = [
"timm==1.0.16",
"torchao==0.9.0",
"tqdm",
"transformers==5.0.0rc0",
"transformers==4.57.1",
"uvicorn",
"uvloop",
# "xgrammar==0.1.24", , xgrammar depends on CUDA PyTorch and Triton only
Expand Down
19 changes: 3 additions & 16 deletions python/sglang/srt/configs/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,6 @@ def get_nsa_index_n_heads(config: PretrainedConfig) -> int:
return config.index_n_heads


def handle_rope_parameters(config: PretrainedConfig):
if hasattr(config, "rope_scaling"):
rope_scaling = config.rope_scaling
if isinstance(rope_scaling, dict):
for k, v in rope_scaling.items():
if not hasattr(config, k):
setattr(config, k, v)
return


class ModelConfig:
def __init__(
self,
Expand Down Expand Up @@ -137,8 +127,6 @@ def __init__(
**kwargs,
)
self.hf_text_config = get_hf_text_config(self.hf_config)
handle_rope_parameters(self.hf_text_config)
handle_rope_parameters(self.hf_config)
self.hf_generation_config = get_generation_config(
self.model_path,
trust_remote_code=trust_remote_code,
Expand Down Expand Up @@ -370,10 +358,9 @@ def _derive_model_shapes(self):
mscale_all_dim = self.hf_config.rope_scaling.get(
"mscale_all_dim", False
)
scaling_factor = self.hf_config.rope_scaling.get("factor")
if scaling_factor is not None:
mscale = yarn_get_mscale(scaling_factor, float(mscale_all_dim))
self.scaling = self.scaling * mscale * mscale
scaling_factor = self.hf_config.rope_scaling["factor"]
mscale = yarn_get_mscale(scaling_factor, float(mscale_all_dim))
self.scaling = self.scaling * mscale * mscale

elif "MiniCPM3ForCausalLM" in self.hf_config.architectures:
self.head_dim = 128
Expand Down
6 changes: 0 additions & 6 deletions python/sglang/srt/models/deepseek_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2704,12 +2704,6 @@ def __init__(
self.config = config
rope_theta = getattr(config, "rope_theta", 10000)
rope_scaling = getattr(config, "rope_scaling", None)
if rope_scaling is not None:
# In transformers 5.0.0rc0+, rope_theta and rope_type are also included in rope_scaling.
# Therefore, if rope_scaling contains only these two keys,
# it effectively means there are no special rope_scaling parameters.
if set(rope_scaling.keys()) <= {"rope_theta", "rope_type"}:
rope_scaling = None
max_position_embeddings = getattr(config, "max_position_embeddings", 8192)
self.speculative_algorithm = SpeculativeAlgorithm.from_string(
get_global_server_args().speculative_algorithm
Expand Down
3 changes: 3 additions & 0 deletions test/srt/test_pp_single_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,9 @@ def test_chunked_prefill_with_small_bs(self):
)


@unittest.skipIf(
is_in_ci(), "Skipping GLM41V PP accuracy test before it gets more stable"
)
class TestGLM41VPPAccuracy(unittest.TestCase):
@classmethod
def setUpClass(cls):
Expand Down
Loading