sgl-project · b8zhong · Feb 8, 2026 · Feb 3, 2026 · ssshinigami · Feb 4, 2026
diff --git a/python/sglang/srt/models/qwen3_moe.py b/python/sglang/srt/models/qwen3_moe.py
@@ -890,6 +890,14 @@ def __init__(
 class Qwen3MoeForCausalLM(nn.Module):
     fall_back_to_pt_during_load = False
 
+    # Mapping from fused module names to their component weight names.
+    # Required for quantization configs (e.g., ModelOpt FP4) to correctly identify
+    # which layers should be skipped based on the exclude_modules/ignore list.
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+        "gate_up_proj": ["gate_proj", "up_proj"],
+    }
+
     def __init__(
         self,
         config: Qwen3MoeConfig,