vllm-project · Isotr0py · Oct 11, 2025 · Oct 11, 2025 · Oct 11, 2025 · Oct 11, 2025
diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py
@@ -325,7 +325,7 @@ class SupportsLoRA(Protocol):
     # are empty by default.
     embedding_modules: ClassVar[dict[str, str]] = {}
     embedding_padding_modules: ClassVar[list[str]] = []
-    packed_modules_mapping: ClassVar[dict[str, list[str]]] = {}
+    packed_modules_mapping: dict[str, list[str]] = {}
 
 
 # We can't use runtime_checkable with ClassVar for issubclass checks

@@ -534,11 +534,7 @@ class Qwen2MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
             "q_proj",
             "k_proj",
             "v_proj",
-        ],
-        "gate_up_proj": [
-            "gate_proj",
-            "up_proj",
-        ],
+        ]
     }
 
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
@@ -547,6 +543,18 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         quant_config = vllm_config.quant_config
         self.config = config
         self.quant_config = quant_config
+        # Only perform the following mapping when Qwen2MoeMLP exists
+        if (
+            getattr(config, "mlp_only_layers", [])
+            or config.shared_expert_intermediate_size > 0
+        ):
+            self.packed_modules_mapping["gate_up_proj"] = (
+                [
+                    "gate_proj",
+                    "up_proj",
+                ],
+            )
+
         self.model = Qwen2MoeModel(
             vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
         )

@@ -634,11 +634,7 @@ class Qwen3MoeForCausalLM(
             "q_proj",
             "k_proj",
             "v_proj",
-        ],
-        "gate_up_proj": [
-            "gate_proj",
-            "up_proj",
-        ],
+        ]
     }
 
     fall_back_to_pt_during_load = False
@@ -649,6 +645,14 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         quant_config = vllm_config.quant_config
         self.config = config
         self.quant_config = quant_config
+        # Only perform the following mapping when Qwen3MoeMLP exists
+        if getattr(config, "mlp_only_layers", []):
+            self.packed_modules_mapping["gate_up_proj"] = (
+                [
+                    "gate_proj",
+                    "up_proj",
+                ],
+            )
-        # Only perform the following mapping when Qwen3MoeMLP exists
-        if getattr(config, "mlp_only_layers", []):
-            self.packed_modules_mapping["gate_up_proj"] = (
-                [
-                    "gate_proj",
-                    "up_proj",
-                ],
-            )
+        # Create a copy of the mapping to avoid modifying the class attribute.
+        self.packed_modules_mapping = self.packed_modules_mapping.copy()
+        # Conditionally add gate_up_proj if dense MLP layers exist. A model has
+        # dense MLP layers if not all layers are sparse MoE layers.
+        if (bool(getattr(config, "mlp_only_layers", [])) or
+                getattr(config, "num_experts", 0) == 0 or
+                getattr(config, "decoder_sparse_step", 1) != 1):
+            self.packed_modules_mapping["gate_up_proj"] = [
+                "gate_proj",
+                "up_proj",
+            ]
-        # Only perform the following mapping when Qwen3MoeMLP exists
-        if getattr(config, "mlp_only_layers", []):
-            self.packed_modules_mapping["gate_up_proj"] = (
-                [
-                    "gate_proj",
-                    "up_proj",
-                ],
-            )
+        # Create a copy of the mapping to avoid modifying the class attribute.
+        self.packed_modules_mapping = self.packed_modules_mapping.copy()
+        # Conditionally add gate_up_proj if dense MLP layers exist. A model has
+        # dense MLP layers if not all layers are sparse MoE layers.
+        if (bool(getattr(config, "mlp_only_layers", [])) or
+                getattr(config, "num_experts", 0) == 0 or
+                getattr(config, "decoder_sparse_step", 1) != 1):
+            self.packed_modules_mapping["gate_up_proj"] = [
+                "gate_proj",
+                "up_proj",
+            ]
         self.model = Qwen3MoeModel(
             vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
         )