Skip to content
This repository was archived by the owner on May 11, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions awq/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@
from .internlm2 import InternLM2AWQForCausalLM
from .minicpm3 import MiniCPM3AWQForCausalLM
from .qwen2vl import Qwen2VLAWQForCausalLM
from .qwen2_5_vl import Qwen2_5_VLAWQForCausalLM
1 change: 1 addition & 0 deletions awq/models/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"internlm2": InternLM2AWQForCausalLM,
"minicpm3": MiniCPM3AWQForCausalLM,
"qwen2_vl": Qwen2VLAWQForCausalLM,
"qwen2_5_vl": Qwen2_5_VLAWQForCausalLM,
}


Expand Down
3 changes: 2 additions & 1 deletion awq/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,10 @@
"deepseek_v2": "AutoModelForCausalLM",
"deepseek_v3": "AutoModelForCausalLM",
"minicpm": "AutoModelForCausalLM",
"minicpm3":"AutoModelForCausalLM",
"minicpm3": "AutoModelForCausalLM",
"internlm2": "AutoModelForCausalLM",
"qwen2_vl": "AutoModelForVision2Seq",
"qwen2_5_vl": "AutoModelForVision2Seq",
}


Expand Down
81 changes: 81 additions & 0 deletions awq/models/qwen2_5_vl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from .base import BaseAWQForCausalLM
from typing_extensions import TYPE_CHECKING

if TYPE_CHECKING:
from transformers import Qwen2_5_VLForConditionalGeneration
from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
Qwen2_5_VLDecoderLayer,
)


class Qwen2_5_VLAWQForCausalLM(BaseAWQForCausalLM):
layer_type = "Qwen2_5_VLDecoderLayer"
max_seq_len_key = "max_position_embeddings"
modules_to_not_convert = ["visual"]

@staticmethod
def get_model_layers(model: "Qwen2_5_VLForConditionalGeneration"):
return model.model.layers

@staticmethod
def get_act_for_scaling(module: "Qwen2_5_VLForConditionalGeneration"):
return dict(is_scalable=False)

@staticmethod
def move_embed(model: "Qwen2_5_VLForConditionalGeneration", device: str):
model.model.embed_tokens = model.model.embed_tokens.to(device)
model.visual = model.visual.to(device)
model.model.rotary_emb = model.model.rotary_emb.to(device)

@staticmethod
def get_layers_for_scaling(
module: "Qwen2_5_VLDecoderLayer", input_feat, module_kwargs
):
layers = []

# attention input
layers.append(
dict(
prev_op=module.input_layernorm,
layers=[
module.self_attn.q_proj,
module.self_attn.k_proj,
module.self_attn.v_proj,
],
inp=input_feat["self_attn.q_proj"],
module2inspect=module.self_attn,
kwargs=module_kwargs,
)
)

# attention out
# Please refer to https://github.com/mit-han-lab/llm-awq/pull/67#issue-1850622696
if module.self_attn.v_proj.weight.shape == module.self_attn.o_proj.weight.shape:
layers.append(
dict(
prev_op=module.self_attn.v_proj,
layers=[module.self_attn.o_proj],
inp=input_feat["self_attn.o_proj"],
)
)

# linear 1
layers.append(
dict(
prev_op=module.post_attention_layernorm,
layers=[module.mlp.gate_proj, module.mlp.up_proj],
inp=input_feat["mlp.gate_proj"],
module2inspect=module.mlp,
)
)

# linear 2
layers.append(
dict(
prev_op=module.mlp.up_proj,
layers=[module.mlp.down_proj],
inp=input_feat["mlp.down_proj"],
)
)

return layers
10 changes: 10 additions & 0 deletions awq/quantize/quantizer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import transformers
import torch
import inspect
import logging
Expand Down Expand Up @@ -153,6 +154,15 @@ def quantize(self):
# https://github.com/huggingface/transformers/pull/32617
self.awq_model.move_embed(self.model, common_device)

# Transformers >= 4.48.0 requires positional embeddings should be computed before forward pass
if (
transformers.__version__ >= "4.48.0"
and self.module_kwargs.get("position_embeddings") is None
):
self.module_kwargs["position_embeddings"] = self.model.model.rotary_emb(
self.inps, self.module_kwargs["position_ids"]
)

for k, v in self.module_kwargs.items():
# position embeddings found in tuple
if isinstance(v, tuple):
Expand Down
Loading