diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py index 097d0bc01891..77aedcc471fc 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py @@ -937,7 +937,7 @@ def process_weights_after_loading(self, layer: FusedMoE) -> None: w13, w13_scale, shard_size=layer.intermediate_size_per_partition, - num_experts=layer.num_local_experts, + num_experts=layer.local_num_experts, is_act_and_mul=self.moe.is_act_and_mul, )