vllm-project · vllm-bot · Aug 3, 2025 · Aug 1, 2025 · Aug 2, 2025
diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py
@@ -906,11 +906,13 @@ def _consolidate_qkv_weights(
     def _rename_weight_for_modelopt_checkpoint(self, name: str) -> str:
         """Rename weights from ModelOpt llama4 fp8 checkpoints to vLLM
         format."""
-        if name.startswith("model."):
+        if name.startswith("model.") or name.startswith(
+                "language_model.model."):
+            renamed = name.replace("model.", "language_model.model.",
+                                   1) if name.startswith("model.") else name
             # Handle expert scale parameters with flat naming
             if "feed_forward.experts." in name and ("_input_scale" in name or
                                                     "_weight_scale" in name):
-                renamed = name.replace("model.", "language_model.model.", 1)
                 # Map checkpoint naming to vLLM's expected naming
                 if "down_proj_input_scale" in renamed:
                     return renamed.replace("down_proj_input_scale",
@@ -929,15 +931,14 @@ def _rename_weight_for_modelopt_checkpoint(self, name: str) -> str:
             # Handle attention scale parameters
             elif "self_attn." in name and (".k_scale" in name
                                            or ".v_scale" in name):
-                renamed = name.replace("model.", "language_model.model.", 1)
                 if ".k_proj.k_scale" in renamed:
                     return renamed.replace(".k_proj.k_scale", ".attn.k_scale")
                 elif ".v_proj.v_scale" in renamed:
                     return renamed.replace(".v_proj.v_scale", ".attn.v_scale")
                 return renamed
 
             # Standard model.* to language_model.model.* renaming
-            return name.replace("model.", "language_model.model.", 1)
+            return renamed
 
         elif name.startswith("lm_head.weight"):
             return name.replace("lm_head.weight",