NVIDIA · hyukn · Apr 22, 2025 · Apr 22, 2025
@@ -27,6 +27,8 @@
 import torch.nn as nn
 from tqdm import tqdm
 from transformers import AutoConfig, AutoTokenizer
+from transformers.models.qwen2.modeling_qwen2 import Qwen2DecoderLayer
+from transformers.models.qwen2_vl.modeling_qwen2_vl import Qwen2VLDecoderLayer
 from transformers.pytorch_utils import Conv1D
 
 from ..._utils import pad_vocab_size, str_dtype_to_torch
@@ -101,9 +103,9 @@ def smooth_qwen_model(model, scales, alpha, qwen_qkv_para, qwen_smoother):
 @torch.no_grad()
 def smooth_qwen2_model(model, scales, alpha, qwen_qkv_para, qwen_smoother):
     # Smooth the activation and weights with smoother = $\diag{s}$
-    from transformers.models.qwen2.modeling_qwen2 import Qwen2DecoderLayer
     for name, module in model.named_modules():
-        if not isinstance(module, Qwen2DecoderLayer):
+        if not isinstance(module, Qwen2DecoderLayer) and not isinstance(
+                module, Qwen2VLDecoderLayer):
             continue
         # qkv_proj
         layer_name_q = name + ".self_attn.q_proj"

@@ -346,7 +346,6 @@ examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen2_vl_7b_instruct-ena
 examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen2_vl_7b_instruct-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-enable_fmha_fp32_acc] SKIP (https://nvbugs/5141290)
 examples/test_qwen.py::test_llm_qwen_awq_single_gpu_summary[qwen2_vl_7b_instruct-nb:4] SKIP (https://nvbugs/5141290)
 examples/test_qwen.py::test_llm_hf_qwen_quantization_1gpu[qwen2_vl_7b_instruct-fp8-bfloat16] SKIP (https://nvbugs/5141290)
-examples/test_qwen.py::test_llm_qwen_smooth_quant_single_gpu_summary[qwen2_vl_7b_instruct-enable_ptpc-nb:4] SKIP (https://nvbugs/5141291)
 examples/test_gpt.py::test_starcoder_fp8_quantization_2gpu[starcoder] SKIP (https://nvbugs/5141400)
 examples/test_gpt.py::test_starcoder_fp8_quantization_2gpu[starcoderplus] SKIP (https://nvbugs/5141400)
 unittest/_torch/auto_deploy/integration/test_lm_eval.py SKIP (https://nvbugs/5144854)