File tree Expand file tree Collapse file tree 1 file changed +0
-12
lines changed
vllm/model_executor/layers/quantization Expand file tree Collapse file tree 1 file changed +0
-12
lines changed Original file line number Diff line number Diff line change @@ -1542,23 +1542,11 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
15421542 del layer .w2_input_scale_quant
15431543 else :
15441544 # Non-TRT-LLM processing (Cutlass or non-flashinfer)
1545- assert layer .w13_weight_scale .shape [2 ] % 16 == 0 , (
1546- "Expected weight_scale.dim(1) to be divisible by 16"
1547- )
1548- assert layer .w13_weight_scale .dtype == torch .float8_e4m3fn , (
1549- "Weight Blockscale must be represented as FP8-E4M3"
1550- )
15511545 w13_blockscale_swizzled = swizzle_blockscale (layer .w13_weight_scale )
15521546 layer .w13_weight_scale = Parameter (
15531547 w13_blockscale_swizzled , requires_grad = False
15541548 )
15551549
1556- assert layer .w2_weight_scale .shape [2 ] % 16 == 0 , (
1557- "Expected weight_scale.dim(1) to be divisible by 16"
1558- )
1559- assert layer .w2_weight_scale .dtype == torch .float8_e4m3fn , (
1560- "Weight Blockscale must be represented as FP8-E4M3"
1561- )
15621550 w2_blockscale_swizzled = swizzle_blockscale (layer .w2_weight_scale )
15631551 layer .w2_weight_scale = Parameter (
15641552 w2_blockscale_swizzled , requires_grad = False
You can’t perform that action at this time.
0 commit comments