sgl-project · zhyncs · Jun 24, 2025 · Jun 20, 2025 · Jun 20, 2025 · Jun 20, 2025
diff --git a/python/sglang/srt/model_loader/loader.py b/python/sglang/srt/model_loader/loader.py
@@ -145,11 +145,20 @@ def _initialize_model(
     load_config: LoadConfig,
 ) -> nn.Module:
     """Initialize a model with the given configurations."""
-    model_class, _ = get_model_architecture(model_config)
+    model_class, model_arch = get_model_architecture(model_config)
     packed_modules_mapping = getattr(model_class, "packed_modules_mapping", {})
     quant_config = _get_quantization_config(
         model_config, load_config, packed_modules_mapping
     )
+    if (
+        quant_config is not None
+        and quant_config.get_name() == "modelopt_fp4"
+        and model_arch == "DeepseekV3ForCausalLMNextN"
+    ):
+        logger.warning(
+            "Overriding DeepseekV3ForCausalLMNextN quant config for modelopt_fp4 Deepseek model."
+        )
+        quant_config = None
     return model_class(
         config=model_config.hf_config,
         quant_config=quant_config,

diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py
@@ -2147,7 +2147,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]], is_nextn=Fal
                             q_a_proj_weight = cached_a_proj[q_a_proj_name]
                             kv_a_proj_weight = cached_a_proj[kv_a_proj_name]
                             cat_dim = 0
-                            if (
+                            if self.quant_config is not None and (
                                 self.quant_config.get_name() == "awq"
                                 or self.quant_config.get_name() == "moe_wna16"
                             ):
@@ -2178,6 +2178,13 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]], is_nextn=Fal
                             for scale in ["k_scale", "v_scale"]:
                                 if scale in name:
                                     name = name.replace(f"{scale[0]}_proj", "attn_mqa")
+                                    break
+                        if name not in params_dict:
+                            # modelopt ckpt contains not needed weights for MTP module:
+                            # model.decoder.self_attn.attn_mqa.v_scale and
+                            # model.decoder.self_attn.attn_mqa.k_scale
+                            logger.warning(f"{name} not found in params_dict.")
+                            continue
                         param = params_dict[name]
                         weight_loader = getattr(
                             param, "weight_loader", default_weight_loader