Skip to content

Commit 0dcdd48

Browse files
committed
return empty still if not bit 4
Signed-off-by: Lu Fang <[email protected]>
1 parent bdc02ca commit 0dcdd48

File tree

1 file changed

+3
-9
lines changed

1 file changed

+3
-9
lines changed

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1566,14 +1566,9 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
15661566
def get_fused_moe_quant_config(
15671567
self, layer: torch.nn.Module
15681568
) -> FusedMoEQuantConfig | None:
1569-
assert self.num_bits == 4 or self.num_bits == 8
1570-
config_builder = (
1571-
int4_w4a16_moe_quant_config
1572-
if self.num_bits == 4
1573-
else int8_w8a16_moe_quant_config
1574-
)
1575-
1576-
return config_builder(
1569+
if self.num_bits != 4:
1570+
return None
1571+
return int4_w4a16_moe_quant_config(
15771572
w1_scale=layer.w13_weight_scale,
15781573
w2_scale=layer.w2_weight_scale,
15791574
w1_zp=None,
@@ -1586,7 +1581,6 @@ def select_gemm_impl(
15861581
prepare_finalize: mk.FusedMoEPrepareAndFinalize,
15871582
layer: torch.nn.Module,
15881583
) -> mk.FusedMoEPermuteExpertsUnpermute:
1589-
15901584
layer.w13_weight = layer.w13_weight_packed
15911585
layer.w2_weight = layer.w2_weight_packed
15921586
assert all([w is not None for w in [layer.w13_weight, layer.w2_weight]])

0 commit comments

Comments
 (0)