vllm-project · ttanzhiqiang · May 20, 2025 · Jun 23, 2025 · linfeng-yuan · May 23, 2025
@@ -816,8 +816,13 @@
                 1, 2).contiguous()
             layer.w2_weight.data = layer.w2_weight.data.transpose(
                 1, 2).contiguous()
+
+        torch_npu.npu_format_cast_(layer.w13_weight, 29)
+        torch_npu.npu_format_cast_(layer.w2_weight, 29)
+
         if envs.VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP:
             torch_npu.npu_format_cast_(layer.w2_weight, ACL_FORMAT_FRACTAL_NZ)
+
         layer.w13_weight_scale.data = layer.w13_weight_scale.data.view(
             layer.w13_weight_scale.data.shape[0], -1)
         layer.w13_weight_offset.data = layer.w13_weight_offset.data.view(