sgl-project · hnyls2002 · Nov 19, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
@@ -297,6 +297,7 @@ def __init__(
 
         # Model-specific adjustment
         self.model_specific_adjustment()
+        self.check_quantized_moe_compatibility()
 
         # Set the global server_args in the scheduler process
         set_global_server_args_for_scheduler(server_args)
@@ -553,34 +554,34 @@ def model_specific_adjustment(self):
         if not server_args.disable_chunked_prefix_cache:
             log_info_on_rank0(logger, "Chunked prefix cache is turned on.")
 
-        if self.model_config.hf_config.model_type == "qwen3_vl_moe":
-            if (
-                quantization_config := getattr(
-                    self.model_config.hf_config, "quantization_config", None
-                )
-            ) is not None and "weight_block_size" in quantization_config:
-                weight_block_size_n = quantization_config["weight_block_size"][0]
+    def check_quantized_moe_compatibility(self):
+        if (
+            quantization_config := getattr(
+                self.model_config.hf_config, "quantization_config", None
+            )
+        ) is not None and "weight_block_size" in quantization_config:
+            weight_block_size_n = quantization_config["weight_block_size"][0]
 
-                if self.tp_size % self.moe_ep_size != 0:
-                    raise ValueError(
-                        f"tp_size {self.tp_size} must be divisible by moe_ep_size {self.moe_ep_size}"
-                    )
-                moe_tp_size = self.tp_size // self.moe_ep_size
+            if self.tp_size % self.moe_ep_size != 0:
+                raise ValueError(
+                    f"tp_size {self.tp_size} must be divisible by ep_size {self.moe_ep_size}"
+                )
+            moe_tp_size = self.tp_size // self.moe_ep_size
 
-                moe_intermediate_size = (
-                    self.model_config.hf_text_config.moe_intermediate_size
+            moe_intermediate_size = (
+                self.model_config.hf_text_config.moe_intermediate_size
+            )
+            if moe_intermediate_size % moe_tp_size != 0:
+                raise ValueError(
+                    f"moe_intermediate_size {moe_intermediate_size} must be divisible by moe_tp_size ({moe_tp_size}) which is tp_size ({self.tp_size}) divided by moe_ep_size ({self.moe_ep_size})."
                 )
-                if moe_intermediate_size % moe_tp_size != 0:
-                    raise ValueError(
-                        f"moe_intermediate_size {moe_intermediate_size} must be divisible by moe_tp_size ({moe_tp_size}) which is tp_size ({self.tp_size}) divided by moe_ep_size ({self.moe_ep_size})."
-                    )
 
-                if (moe_intermediate_size // moe_tp_size) % weight_block_size_n != 0:
-                    raise ValueError(
-                        f"For qwen3-vl-fp8 models, please make sure ({moe_intermediate_size=} / {moe_tp_size=}) % {weight_block_size_n=} == 0 "
-                        f"where moe_tp_size is equal to tp_size ({self.tp_size}) divided by moe_ep_size ({self.moe_ep_size}). "
-                        f"You can fix this by setting arguments `--tp-size` and `--ep-size` correctly."
-                    )
+            if (moe_intermediate_size // moe_tp_size) % weight_block_size_n != 0:
+                raise ValueError(
+                    f"For quantized MoE models, please make sure ({moe_intermediate_size=} / {moe_tp_size=}) % {weight_block_size_n=} == 0 "
+                    f"where moe_tp_size is equal to tp_size ({self.tp_size}) divided by ep_size ({self.moe_ep_size}). "
+                    f"You can fix this by setting arguments `--tp` and `--ep` correctly."
+                )
 
     def init_torch_distributed(self):
         logger.info("Init torch distributed begin.")