diff --git a/csrc/deepep/ops2/op_kernel/notify_dispatch_a2.h b/csrc/deepep/ops2/op_kernel/notify_dispatch_a2.h index 7d6d84029..6c1dab615 100644 --- a/csrc/deepep/ops2/op_kernel/notify_dispatch_a2.h +++ b/csrc/deepep/ops2/op_kernel/notify_dispatch_a2.h @@ -220,13 +220,12 @@ class NotifyDispatchA2 pipe.InitBuffer(this->tBuf, TEMP_BUF_LEN); LocalTensor tempLocal = tBuf.Get(); - tempLocal(0) = 1; + PipeBarrier(); + tempLocal(0) = magicTensor_.GetValue(blockIdx * EXP_TOKEN_COUNT_FLAG_CNT) + 1; // 使用atomic方式实现+1 - AscendC::SetAtomicAdd(); AscendC::SetFlag(EVENT_ID0); AscendC::WaitFlag(EVENT_ID0); // 等待SetValue完成 DataCopy(magicTensor_[blockIdx * EXP_TOKEN_COUNT_FLAG_CNT], tempLocal, EXP_TOKEN_COUNT_FLAG_CNT); - AscendC::SetAtomicNone(); AscendC::SetFlag(EVENT_ID0); AscendC::WaitFlag(EVENT_ID0); // 等待DataCopy完成 magic = magicTensor_.GetValue(blockIdx * EXP_TOKEN_COUNT_FLAG_CNT);