Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
5a631aa
Update modelslim_config.py
tanhaoan333 Feb 26, 2026
302056c
Update modelslim_config.py
tanhaoan333 Feb 26, 2026
d67c6d5
Update __init__.py
tanhaoan333 Feb 26, 2026
836fe46
Create patch_qwen3_omni_moe_thinker.py
tanhaoan333 Feb 26, 2026
963a34e
Update w8a8_dynamic.py
tanhaoan333 Feb 26, 2026
0dcf97a
Update moe_mlp.py
tanhaoan333 Feb 26, 2026
44f2ec3
Update patch_qwen3_omni_moe_thinker.py
tanhaoan333 Feb 26, 2026
a11d614
Update patch_qwen3_omni_moe_thinker.py
tanhaoan333 Feb 26, 2026
48c5bd8
Update modelslim_config.py
tanhaoan333 Feb 26, 2026
854d35a
Update patch_qwen3_omni_moe_thinker.py
tanhaoan333 Feb 27, 2026
344047e
Update modelslim_config.py
tanhaoan333 Feb 27, 2026
cc8a936
Update moe_mlp.py
tanhaoan333 Feb 27, 2026
4ee521c
Update moe_mlp.py
tanhaoan333 Feb 27, 2026
b340bf2
Update patch_qwen3_omni_moe_thinker.py
tanhaoan333 Feb 27, 2026
5013c56
Update patch_qwen3_omni_moe_thinker.py
tanhaoan333 Feb 27, 2026
c1a6388
Update patch_qwen3_omni_moe_thinker.py
tanhaoan333 Feb 27, 2026
af07664
Update patch_qwen3_omni_moe_thinker.py
tanhaoan333 Feb 27, 2026
5c62d07
Update moe_mlp.py
tanhaoan333 Feb 27, 2026
dccd361
Update moe_mlp.py
tanhaoan333 Feb 27, 2026
4421343
Update modelslim_config.py
tanhaoan333 Feb 27, 2026
a6473b4
Update modelslim_config.py
tanhaoan333 Feb 27, 2026
913656e
Update patch_qwen3_omni_moe_thinker.py
tanhaoan333 Feb 28, 2026
05c1e24
Update patch_qwen3_omni_moe_thinker.py
tanhaoan333 Feb 28, 2026
78fdd49
Update __init__.py
tanhaoan333 Feb 28, 2026
ee274b2
Create patch_qwen2_5_omni.py
tanhaoan333 Feb 28, 2026
776ed24
Update patch_qwen2_5_omni.py
tanhaoan333 Feb 28, 2026
cdba7a1
Update patch_qwen2_5_omni.py
tanhaoan333 Feb 28, 2026
4d4bf70
Update patch_qwen2_5_omni.py
tanhaoan333 Feb 28, 2026
29f2633
Update patch_qwen2_5_omni.py
tanhaoan333 Feb 28, 2026
f679f3b
Update modelslim_config.py
tanhaoan333 Mar 2, 2026
22de2f9
Update modelslim_config.py
tanhaoan333 Mar 2, 2026
a2eccab
Update modelslim_config.py
tanhaoan333 Mar 2, 2026
8e6a591
Update patch_qwen3_omni_moe_thinker.py
tanhaoan333 Mar 2, 2026
deefaa5
Update modelslim_config.py
tanhaoan333 Mar 2, 2026
52e1332
Update patch_qwen3_omni_moe_thinker.py
tanhaoan333 Mar 2, 2026
212bd77
Update modelslim_config.py
tanhaoan333 Mar 2, 2026
9e46580
Update modelslim_config.py
tanhaoan333 Mar 2, 2026
af76a0f
Delete vllm_ascend/patch/worker/patch_qwen3_omni_moe_thinker.py
tanhaoan333 Mar 2, 2026
5b30f2f
Delete vllm_ascend/patch/worker/patch_qwen2_5_omni.py
tanhaoan333 Mar 2, 2026
4581951
Update __init__.py
tanhaoan333 Mar 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions vllm_ascend/quantization/methods/w8a8_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ def apply(
tp_rank: int | None = 0,
) -> torch.Tensor:
quantized_x, pertoken_scale = torch_npu.npu_dynamic_quant(x)
need_unsqz = False
if pertoken_scale.dim() == 2:
need_unsqz = True
quantized_x = quantized_x.squeeze(dim=1)
pertoken_scale = pertoken_scale.squeeze(dim=1)
output = torch_npu.npu_quant_matmul(
quantized_x,
layer.weight,
Expand All @@ -87,6 +92,8 @@ def apply(
bias=bias,
output_dtype=x.dtype,
)
if need_unsqz:
output = output.unsqueeze(dim=1)
return output

def process_weights_after_loading(self, layer):
Expand Down
24 changes: 23 additions & 1 deletion vllm_ascend/quantization/modelslim_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@
"mm_projector.linear_1": "mm_projector.proj.0",
"mm_projector.linear_2": "mm_projector.proj.2",
},
"qwen3_omni_moe_thinker": {
"thinker.lm_head.": "language_model.lm_head.",
"thinker.model.": "language_model.model.",
"thinker.": "",
"lm_head.": "language_model.lm_head.",
"model.": "language_model.model.",
},
}

# key: model_type
Expand Down Expand Up @@ -186,6 +193,18 @@
],
"experts": ["experts.0.w1", "experts.0.w2", "experts.0.w3"],
},
"qwen3_omni_moe_text": {
"qkv_proj": [
"q_proj",
"k_proj",
"v_proj",
],
"gate_up_proj": [
"gate_proj",
"up_proj",
],
"experts": ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"],
},
}


Expand Down Expand Up @@ -462,7 +481,10 @@ def is_layer_skipped_ascend(self, prefix: str, fused_mapping: Mapping[str, list[
"to have the same precision."
)
else:
is_skipped = self.quant_description[prefix + ".weight"] == "FLOAT"
is_skipped = any(
key.startswith(prefix) and key.endswith(".weight") and value == "FLOAT"
for key, value in self.quant_description.items()
)

assert is_skipped is not None
return is_skipped
Expand Down
Loading