diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py index c88aa4d2faa7..74558fd9b3ec 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py @@ -200,8 +200,7 @@ def __init__( self.quant_config = quant_config self.quant_method.create_weights( layer=self, - num_experts=self.num_experts, - num_local_experts=self.num_local_experts, + num_experts=self.num_local_experts, hidden_size=hidden_size, # FIXME: figure out which intermediate_size to use intermediate_size=self.intermediate_size_per_partition, diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py index 64df434ae149..fca0ee38b9c0 100755 --- a/python/sglang/srt/layers/quantization/modelopt_quant.py +++ b/python/sglang/srt/layers/quantization/modelopt_quant.py @@ -752,7 +752,6 @@ def create_weights( self, layer: torch.nn.Module, num_experts: int, - num_local_experts: int, hidden_size: int, intermediate_size_per_partition: int, params_dtype: torch.dtype, @@ -766,7 +765,7 @@ def create_weights( # TODO(ch-wan): check if this is needed layer.num_experts = num_experts - layer.num_local_experts = num_local_experts + layer.num_local_experts = num_experts layer.intermediate_size_per_partition = intermediate_size_per_partition layer.params_dtype = params_dtype layer.quant_config = self.quant_config