diff --git a/python/sglang/srt/models/qwen3_moe.py b/python/sglang/srt/models/qwen3_moe.py index 437c4fa3e09f..3fcf0cfa0cbb 100644 --- a/python/sglang/srt/models/qwen3_moe.py +++ b/python/sglang/srt/models/qwen3_moe.py @@ -890,6 +890,14 @@ def __init__( class Qwen3MoeForCausalLM(nn.Module): fall_back_to_pt_during_load = False + # Mapping from fused module names to their component weight names. + # Required for quantization configs (e.g., ModelOpt FP4) to correctly identify + # which layers should be skipped based on the exclude_modules/ignore list. + packed_modules_mapping = { + "qkv_proj": ["q_proj", "k_proj", "v_proj"], + "gate_up_proj": ["gate_proj", "up_proj"], + } + def __init__( self, config: Qwen3MoeConfig,