diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 10138f59ab1b4..8207bd46ea9f7 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -662,8 +662,6 @@ class WaitcntBrackets { void simplifyWaitcnt(const AMDGPU::Waitcnt &CheckWait, AMDGPU::Waitcnt &UpdateWait) const; void simplifyWaitcnt(InstCounterType T, unsigned &Count) const; - bool hasRedundantXCntWithKmCnt(const AMDGPU::Waitcnt &Wait) const; - bool canOptimizeXCntWithLoadCnt(const AMDGPU::Waitcnt &Wait) const; void simplifyXcnt(const AMDGPU::Waitcnt &CheckWait, AMDGPU::Waitcnt &UpdateWait) const; @@ -1354,23 +1352,6 @@ void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) { } } -bool WaitcntBrackets::hasRedundantXCntWithKmCnt( - const AMDGPU::Waitcnt &Wait) const { - // Wait on XCNT is redundant if we are already waiting for a load to complete. - // SMEM can return out of order, so only omit XCNT wait if we are waiting till - // zero. - return Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP); -} - -bool WaitcntBrackets::canOptimizeXCntWithLoadCnt( - const AMDGPU::Waitcnt &Wait) const { - // If we have pending store we cannot optimize XCnt because we do not wait for - // stores. VMEM loads retun in order, so if we only have loads XCnt is - // decremented to the same number as LOADCnt. - return Wait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) && - !hasPendingEvent(STORE_CNT); -} - void WaitcntBrackets::simplifyXcnt(const AMDGPU::Waitcnt &CheckWait, AMDGPU::Waitcnt &UpdateWait) const { // Try to simplify xcnt further by checking for joint kmcnt and loadcnt @@ -1378,10 +1359,16 @@ void WaitcntBrackets::simplifyXcnt(const AMDGPU::Waitcnt &CheckWait, // be pending SMEM and VMEM events active at the same time. // In such cases, only clear one active event at a time. // TODO: Revisit xcnt optimizations for gfx1250. - if (hasRedundantXCntWithKmCnt(CheckWait)) + // Wait on XCNT is redundant if we are already waiting for a load to complete. + // SMEM can return out of order, so only omit XCNT wait if we are waiting till + // zero. + if (CheckWait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP)) UpdateWait.XCnt = ~0u; - if (canOptimizeXCntWithLoadCnt(CheckWait) && - CheckWait.XCnt >= CheckWait.LoadCnt) + // If we have pending store we cannot optimize XCnt because we do not wait for + // stores. VMEM loads retun in order, so if we only have loads XCnt is + // decremented to the same number as LOADCnt. + if (CheckWait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) && + !hasPendingEvent(STORE_CNT) && CheckWait.XCnt >= CheckWait.LoadCnt) UpdateWait.XCnt = ~0u; simplifyWaitcnt(X_CNT, UpdateWait.XCnt); }