From 4116ab1e3247bf15440e349e215180756e04b9c4 Mon Sep 17 00:00:00 2001
From: icerain-alt <450125138@qq.com>
Date: Fri, 12 Dec 2025 11:17:17 +0800
Subject: [PATCH] [bugfix]: fix matmul allreduce to use original weight

Co-authored-by: Shangwei-Li <lishangwei@mail.ustc.edu.cn>
Signed-off-by: icerain-alt <450125138@qq.com>
---
 vllm_ascend/ops/linear_op.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/vllm_ascend/ops/linear_op.py b/vllm_ascend/ops/linear_op.py
index 53130e67e73..8589f7f5741 100644
--- a/vllm_ascend/ops/linear_op.py
+++ b/vllm_ascend/ops/linear_op.py
@@ -422,7 +422,7 @@ def apply_impl(
         bias_ = None if (self.tp_rank > 0 or self.skip_bias_add) else self.bias
         if self.reduce_results and self.tp_size > 1:
             output = torch_npu.npu_mm_all_reduce_base(input_parallel,
-                                                      self.weight_t,
+                                                      self.layer.weight.t(),
                                                       self.hcomm_info,
                                                       bias=bias_)
         else:
@@ -449,10 +449,6 @@ def get_hcomm_info(cls, group: ProcessGroup) -> str:
             cls._HCOMM_INFO = group.get_hccl_comm_name(rank)
         return cls._HCOMM_INFO
 
-    def update_attrs(self):
-        super().update_attrs()
-        self.weight_t = self.layer.weight.t()
-
 
 class SequenceColumnParallelOp(CustomColumnParallelOp):