Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vllm/model_executor/models/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ class SupportsLoRA(Protocol):
# are empty by default.
embedding_modules: ClassVar[dict[str, str]] = {}
embedding_padding_modules: ClassVar[list[str]] = []
packed_modules_mapping: ClassVar[dict[str, list[str]]] = {}
packed_modules_mapping: dict[str, list[str]] = {}


# We can't use runtime_checkable with ClassVar for issubclass checks
Expand Down
18 changes: 13 additions & 5 deletions vllm/model_executor/models/qwen2_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,11 +534,7 @@ class Qwen2MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
"q_proj",
"k_proj",
"v_proj",
],
"gate_up_proj": [
"gate_proj",
"up_proj",
],
]
}

def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
Expand All @@ -547,6 +543,18 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
quant_config = vllm_config.quant_config
self.config = config
self.quant_config = quant_config
# Only perform the following mapping when Qwen2MoeMLP exists
if (
getattr(config, "mlp_only_layers", [])
or config.shared_expert_intermediate_size > 0
):
self.packed_modules_mapping["gate_up_proj"] = (
[
"gate_proj",
"up_proj",
],
Comment on lines +551 to +555
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Assign gate_up_proj mapping as list, not tuple

The new conditional adds gate_up_proj using self.packed_modules_mapping["gate_up_proj"] = (["gate_proj", "up_proj"],). Because of the parentheses and trailing comma this stores a tuple whose only element is a list, while the rest of the quantization helpers expect dict[str, list[str]]. When the mapping is consumed (e.g., get_layer_partition_names or LoRA utilities), the tuple is iterated and the list itself is passed to string operations such as removesuffix/replace, raising a TypeError. Any model with mlp_only_layers set will fail during packed-module handling. Assign the list directly without wrapping it in a tuple.

Useful? React with 👍 / 👎.

)
Comment on lines +546 to +556
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

This change introduces two critical issues:

  1. Modification of a class attribute: self.packed_modules_mapping is modified in-place. Since packed_modules_mapping is a class attribute, this modification will affect all other instances of Qwen2MoeForCausalLM, which can lead to unexpected behavior if multiple models with different configurations are used in the same process. An instance-specific copy should be created before modification.

  2. Incorrect condition for MLP existence: The condition if getattr(config, "mlp_only_layers", []) is not sufficient to determine if Qwen2MoeMLP layers (and thus gate_up_proj) exist. For example, a model with decoder_sparse_step > 1 and an empty mlp_only_layers list will have dense MLP layers, but this condition will be false, incorrectly omitting gate_up_proj from the mapping.

A more robust approach is to check if not all layers are sparse MoE layers. This is the case if mlp_only_layers is non-empty, or if there are no experts, or if decoder_sparse_step is not 1. The suggested change below addresses both issues.

        # Create a copy of the mapping to avoid modifying the class attribute.
        self.packed_modules_mapping = self.packed_modules_mapping.copy()
        # Conditionally add gate_up_proj if dense MLP layers exist. A model has
        # dense MLP layers if not all layers are sparse MoE layers.
        if (bool(getattr(config, "mlp_only_layers", [])) or
                getattr(config, "num_experts", 0) == 0 or
                getattr(config, "decoder_sparse_step", 1) != 1):
            self.packed_modules_mapping["gate_up_proj"] = [
                "gate_proj",
                "up_proj",
            ]


self.model = Qwen2MoeModel(
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
)
Expand Down
14 changes: 9 additions & 5 deletions vllm/model_executor/models/qwen3_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,11 +634,7 @@ class Qwen3MoeForCausalLM(
"q_proj",
"k_proj",
"v_proj",
],
"gate_up_proj": [
"gate_proj",
"up_proj",
],
]
}

fall_back_to_pt_during_load = False
Expand All @@ -649,6 +645,14 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
quant_config = vllm_config.quant_config
self.config = config
self.quant_config = quant_config
# Only perform the following mapping when Qwen3MoeMLP exists
if getattr(config, "mlp_only_layers", []):
self.packed_modules_mapping["gate_up_proj"] = (
[
"gate_proj",
"up_proj",
Comment on lines +649 to +653
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Avoid tuple-wrapping gate_up_proj mapping

Same issue as above: self.packed_modules_mapping["gate_up_proj"] is assigned (["gate_proj", "up_proj"],), producing a tuple instead of the list that downstream quantization and LoRA helpers expect. Iterating this mapping yields the list itself and causes type errors when string concatenation is attempted, so models with mlp_only_layers enabled will crash when retrieving partition names or applying packed transforms. Assign a plain list here.

Useful? React with 👍 / 👎.

],
)
Comment on lines +648 to +655
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

This change introduces two critical issues:

  1. Modification of a class attribute: self.packed_modules_mapping is modified in-place. Since packed_modules_mapping is a class attribute, this modification will affect all other instances of Qwen3MoeForCausalLM, which can lead to unexpected behavior if multiple models with different configurations are used in the same process. An instance-specific copy should be created before modification.

  2. Incorrect condition for MLP existence: The condition if getattr(config, "mlp_only_layers", []) is not sufficient to determine if Qwen3MoeMLP layers (and thus gate_up_proj) exist. For example, a model with decoder_sparse_step > 1 and an empty mlp_only_layers list will have dense MLP layers, but this condition will be false, incorrectly omitting gate_up_proj from the mapping.

A more robust approach is to check if not all layers are sparse MoE layers. This is the case if mlp_only_layers is non-empty, or if there are no experts, or if decoder_sparse_step is not 1. The suggested change below addresses both issues.

Suggested change
# Only perform the following mapping when Qwen3MoeMLP exists
if getattr(config, "mlp_only_layers", []):
self.packed_modules_mapping["gate_up_proj"] = (
[
"gate_proj",
"up_proj",
],
)
# Create a copy of the mapping to avoid modifying the class attribute.
self.packed_modules_mapping = self.packed_modules_mapping.copy()
# Conditionally add gate_up_proj if dense MLP layers exist. A model has
# dense MLP layers if not all layers are sparse MoE layers.
if (bool(getattr(config, "mlp_only_layers", [])) or
getattr(config, "num_experts", 0) == 0 or
getattr(config, "decoder_sparse_step", 1) != 1):
self.packed_modules_mapping["gate_up_proj"] = [
"gate_proj",
"up_proj",
]

self.model = Qwen3MoeModel(
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
)
Expand Down