From 4116ab1e3247bf15440e349e215180756e04b9c4 Mon Sep 17 00:00:00 2001 From: icerain-alt <450125138@qq.com> Date: Fri, 12 Dec 2025 11:17:17 +0800 Subject: [PATCH] [bugfix]: fix matmul allreduce to use original weight Co-authored-by: Shangwei-Li Signed-off-by: icerain-alt <450125138@qq.com> --- vllm_ascend/ops/linear_op.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/vllm_ascend/ops/linear_op.py b/vllm_ascend/ops/linear_op.py index 53130e67e73..8589f7f5741 100644 --- a/vllm_ascend/ops/linear_op.py +++ b/vllm_ascend/ops/linear_op.py @@ -422,7 +422,7 @@ def apply_impl( bias_ = None if (self.tp_rank > 0 or self.skip_bias_add) else self.bias if self.reduce_results and self.tp_size > 1: output = torch_npu.npu_mm_all_reduce_base(input_parallel, - self.weight_t, + self.layer.weight.t(), self.hcomm_info, bias=bias_) else: @@ -449,10 +449,6 @@ def get_hcomm_info(cls, group: ProcessGroup) -> str: cls._HCOMM_INFO = group.get_hccl_comm_name(rank) return cls._HCOMM_INFO - def update_attrs(self): - super().update_attrs() - self.weight_t = self.layer.weight.t() - class SequenceColumnParallelOp(CustomColumnParallelOp):