Skip to content

Conversation

@gandhi56
Copy link
Contributor

@gandhi56 gandhi56 commented Jan 9, 2026

This patch depends on #175257.

@llvmbot
Copy link
Member

llvmbot commented Jan 9, 2026

@llvm/pr-subscribers-backend-amdgpu

Author: Anshil Gandhi (gandhi56)

Changes

This patch depends on #175257.


Patch is 63.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/175258.diff

28 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+41)
  • (added) llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll (+235)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll (+5-5)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll (+4-4)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll (+2-2)
  • (added) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add-sub.mir (+461)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir (+2-2)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 500aa06899ee6..fb8d9e94d8160 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -74,8 +74,12 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
     return isAnyPtr(MRI.getType(Reg), 64);
   case Ptr128:
     return isAnyPtr(MRI.getType(Reg), 128);
+  case V2S16:
+    return MRI.getType(Reg) == LLT::fixed_vector(2, 16);
   case V2S32:
     return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
+  case V3S32:
+    return MRI.getType(Reg) == LLT::fixed_vector(3, 32);
   case V4S32:
     return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
   case B32:
@@ -664,6 +668,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
       .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
 
+  // Atomic read-modify-write operations: result and value are always VGPR,
+  // pointer varies by address space.
+  addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
+    G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR})
+      .Any({{S32, P0}, {{Vgpr32}, {VgprP0, Vgpr32}}})
+      .Any({{S64, P0}, {{Vgpr64}, {VgprP0, Vgpr64}}})
+      .Any({{S32, P1}, {{Vgpr32}, {VgprP1, Vgpr32}}})
+      .Any({{S64, P1}, {{Vgpr64}, {VgprP1, Vgpr64}}})
+      .Any({{S32, P3}, {{Vgpr32}, {VgprP3, Vgpr32}}})
+      .Any({{S64, P3}, {{Vgpr64}, {VgprP3, Vgpr64}}});
+
   bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
   bool hasSMRDSmall = ST->hasScalarSubwordLoads();
   bool usesTrue16 = ST->useRealTrue16Insts();
@@ -885,6 +900,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
       .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
       .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
+
+  // Atomics always operate per-lane; keep both the pointer and the value/result
+  // in VGPRs regardless of uniformity. Use Ptr32/Ptr64 to cover all addrspaces
+  // (e.g. local/region/private for Ptr32, global/flat for Ptr64).
+  addRulesForGOpcs({G_ATOMICRMW_FADD})
+      .Any({{B32, Ptr32, B32}, {{VgprB32}, {VgprPtr32, VgprB32}}})
+      .Any({{B32, Ptr64, B32}, {{VgprB32}, {VgprPtr64, VgprB32}}})
+      .Any({{B64, Ptr32, B64}, {{VgprB64}, {VgprPtr32, VgprB64}}})
+      .Any({{B64, Ptr64, B64}, {{VgprB64}, {VgprPtr64, VgprB64}}})
+      .Any({{V2S16, Ptr32, V2S16}, {{VgprV2S16}, {VgprPtr32, VgprV2S16}}})
+      .Any({{V2S16, Ptr64, V2S16}, {{VgprV2S16}, {VgprPtr64, VgprV2S16}}});
   // clang-format on
 
   addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
@@ -908,6 +934,21 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
   addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
       .Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
 
+  // Buffer atomics: resource descriptor + scalar offset are SGPR, data and
+  // address components are VGPR.
+  //
+  // Operand order (SIInstructions.td BufferAtomicGenericInstruction):
+  //   dst = op vdata, rsrc, vindex, voffset, soffset, offset_imm, cachepolicy,
+  //        idxen_imm
+  addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
+      .Any({{S32, S32, V4S32, S32, S32, S32},
+            {{Vgpr32}, {Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
+      .Any({{S64, S64, V4S32, S32, S32, S32},
+            {{Vgpr64}, {Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
+      .Any({{V2S16, V2S16, V4S32, S32, S32, S32},
+            {{VgprV2S16},
+             {VgprV2S16, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
+
   addRulesForGOpcs({G_PTR_ADD})
       .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
       .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll
new file mode 100644
index 0000000000000..3301ee207f469
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll
@@ -0,0 +1,235 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
+
+; Test atomicrmw add and sub operations for different address spaces
+
+; =============================================================================
+; atomicrmw add - global address space (addrspace 1)
+; =============================================================================
+
+define i32 @atomicrmw_add_i32_global(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_add_i32_global:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    global_wb scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw add ptr addrspace(1) %ptr, i32 %val seq_cst
+  ret i32 %result
+}
+
+define i64 @atomicrmw_add_i64_global(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_add_i64_global:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    global_wb scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    global_atomic_add_u64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw add ptr addrspace(1) %ptr, i64 %val seq_cst
+  ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw sub - global address space (addrspace 1)
+; =============================================================================
+
+define i32 @atomicrmw_sub_i32_global(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_sub_i32_global:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    global_load_b32 v3, v[0:1], off
+; GFX12-NEXT:    s_mov_b32 s0, 0
+; GFX12-NEXT:  .LBB2_1: ; %atomicrmw.start
+; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_mov_b32_e32 v4, v3
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_sub_nc_u32_e32 v3, v4, v2
+; GFX12-NEXT:    global_wb scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    global_atomic_cmpswap_b32 v3, v[0:1], v[3:4], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SYS
+; GFX12-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v3, v4
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX12-NEXT:    s_cbranch_execnz .LBB2_1
+; GFX12-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX12-NEXT:    v_mov_b32_e32 v0, v3
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw sub ptr addrspace(1) %ptr, i32 %val seq_cst
+  ret i32 %result
+}
+
+; TODO: Add test for atomicrmw_sub_i64_global when G_PHI is supported
+
+; =============================================================================
+; atomicrmw add - local address space (addrspace 3)
+; =============================================================================
+
+define i32 @atomicrmw_add_i32_local(ptr addrspace(3) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_add_i32_local:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    ds_add_rtn_u32 v0, v0, v1
+; GFX12-NEXT:    s_wait_dscnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SE
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw add ptr addrspace(3) %ptr, i32 %val seq_cst
+  ret i32 %result
+}
+
+define i64 @atomicrmw_add_i64_local(ptr addrspace(3) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_add_i64_local:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    ds_add_rtn_u64 v[0:1], v0, v[1:2]
+; GFX12-NEXT:    s_wait_dscnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SE
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw add ptr addrspace(3) %ptr, i64 %val seq_cst
+  ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw sub - local address space (addrspace 3)
+; =============================================================================
+
+define i32 @atomicrmw_sub_i32_local(ptr addrspace(3) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_sub_i32_local:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    ds_sub_rtn_u32 v0, v0, v1
+; GFX12-NEXT:    s_wait_dscnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SE
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw sub ptr addrspace(3) %ptr, i32 %val seq_cst
+  ret i32 %result
+}
+
+define i64 @atomicrmw_sub_i64_local(ptr addrspace(3) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_sub_i64_local:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    ds_sub_rtn_u64 v[0:1], v0, v[1:2]
+; GFX12-NEXT:    s_wait_dscnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SE
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw sub ptr addrspace(3) %ptr, i64 %val seq_cst
+  ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw add - flat address space (addrspace 0)
+; =============================================================================
+
+define i32 @atomicrmw_add_i32_flat(ptr %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_add_i32_flat:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    global_wb scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    flat_atomic_add_u32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw add ptr %ptr, i32 %val seq_cst
+  ret i32 %result
+}
+
+; TODO: Add test for atomicrmw_add_i64_flat when G_PHI is supported
+
+; =============================================================================
+; atomicrmw sub - flat address space (addrspace 0)
+; =============================================================================
+
+define i32 @atomicrmw_sub_i32_flat(ptr %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_sub_i32_flat:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    flat_load_b32 v3, v[0:1]
+; GFX12-NEXT:    s_mov_b32 s0, 0
+; GFX12-NEXT:  .LBB8_1: ; %atomicrmw.start
+; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    v_mov_b32_e32 v4, v3
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_sub_nc_u32_e32 v3, v4, v2
+; GFX12-NEXT:    global_wb scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    flat_atomic_cmpswap_b32 v3, v[0:1], v[3:4] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SYS
+; GFX12-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v3, v4
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX12-NEXT:    s_cbranch_execnz .LBB8_1
+; GFX12-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX12-NEXT:    v_mov_b32_e32 v0, v3
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw sub ptr %ptr, i32 %val seq_cst
+  ret i32 %result
+}
+
+; TODO: Add test for atomicrmw_sub_i64_flat when G_ICMP is supported
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll
index b1314dd34f4e2..f65e609019ef0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX12 %s
 
 define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
   ; GFX908_GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll
index 8567df0d35126..03b28ec3361ec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX12 %s
 
 define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
   ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offset_rtn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll
index 59d60c18e1d31..af5ec0152a4be 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
   ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
index fbbb0deb7d547..1daf97897f500 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
   ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll
index 76e2fca5b00ad..af8463ca69f99 100644
--- a/llvm/test/CodeG...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Jan 9, 2026

@llvm/pr-subscribers-llvm-globalisel

Author: Anshil Gandhi (gandhi56)

Changes

This patch depends on #175257.


Patch is 63.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/175258.diff

28 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+41)
  • (added) llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll (+235)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll (+5-5)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll (+4-4)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll (+2-2)
  • (added) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add-sub.mir (+461)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir (+2-2)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 500aa06899ee6..fb8d9e94d8160 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -74,8 +74,12 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
     return isAnyPtr(MRI.getType(Reg), 64);
   case Ptr128:
     return isAnyPtr(MRI.getType(Reg), 128);
+  case V2S16:
+    return MRI.getType(Reg) == LLT::fixed_vector(2, 16);
   case V2S32:
     return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
+  case V3S32:
+    return MRI.getType(Reg) == LLT::fixed_vector(3, 32);
   case V4S32:
     return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
   case B32:
@@ -664,6 +668,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
       .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
 
+  // Atomic read-modify-write operations: result and value are always VGPR,
+  // pointer varies by address space.
+  addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
+    G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR})
+      .Any({{S32, P0}, {{Vgpr32}, {VgprP0, Vgpr32}}})
+      .Any({{S64, P0}, {{Vgpr64}, {VgprP0, Vgpr64}}})
+      .Any({{S32, P1}, {{Vgpr32}, {VgprP1, Vgpr32}}})
+      .Any({{S64, P1}, {{Vgpr64}, {VgprP1, Vgpr64}}})
+      .Any({{S32, P3}, {{Vgpr32}, {VgprP3, Vgpr32}}})
+      .Any({{S64, P3}, {{Vgpr64}, {VgprP3, Vgpr64}}});
+
   bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
   bool hasSMRDSmall = ST->hasScalarSubwordLoads();
   bool usesTrue16 = ST->useRealTrue16Insts();
@@ -885,6 +900,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
       .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
       .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
+
+  // Atomics always operate per-lane; keep both the pointer and the value/result
+  // in VGPRs regardless of uniformity. Use Ptr32/Ptr64 to cover all addrspaces
+  // (e.g. local/region/private for Ptr32, global/flat for Ptr64).
+  addRulesForGOpcs({G_ATOMICRMW_FADD})
+      .Any({{B32, Ptr32, B32}, {{VgprB32}, {VgprPtr32, VgprB32}}})
+      .Any({{B32, Ptr64, B32}, {{VgprB32}, {VgprPtr64, VgprB32}}})
+      .Any({{B64, Ptr32, B64}, {{VgprB64}, {VgprPtr32, VgprB64}}})
+      .Any({{B64, Ptr64, B64}, {{VgprB64}, {VgprPtr64, VgprB64}}})
+      .Any({{V2S16, Ptr32, V2S16}, {{VgprV2S16}, {VgprPtr32, VgprV2S16}}})
+      .Any({{V2S16, Ptr64, V2S16}, {{VgprV2S16}, {VgprPtr64, VgprV2S16}}});
   // clang-format on
 
   addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
@@ -908,6 +934,21 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
   addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
       .Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
 
+  // Buffer atomics: resource descriptor + scalar offset are SGPR, data and
+  // address components are VGPR.
+  //
+  // Operand order (SIInstructions.td BufferAtomicGenericInstruction):
+  //   dst = op vdata, rsrc, vindex, voffset, soffset, offset_imm, cachepolicy,
+  //        idxen_imm
+  addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
+      .Any({{S32, S32, V4S32, S32, S32, S32},
+            {{Vgpr32}, {Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
+      .Any({{S64, S64, V4S32, S32, S32, S32},
+            {{Vgpr64}, {Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
+      .Any({{V2S16, V2S16, V4S32, S32, S32, S32},
+            {{VgprV2S16},
+             {VgprV2S16, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
+
   addRulesForGOpcs({G_PTR_ADD})
       .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
       .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll
new file mode 100644
index 0000000000000..3301ee207f469
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll
@@ -0,0 +1,235 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
+
+; Test atomicrmw add and sub operations for different address spaces
+
+; =============================================================================
+; atomicrmw add - global address space (addrspace 1)
+; =============================================================================
+
+define i32 @atomicrmw_add_i32_global(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_add_i32_global:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    global_wb scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw add ptr addrspace(1) %ptr, i32 %val seq_cst
+  ret i32 %result
+}
+
+define i64 @atomicrmw_add_i64_global(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_add_i64_global:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    global_wb scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    global_atomic_add_u64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw add ptr addrspace(1) %ptr, i64 %val seq_cst
+  ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw sub - global address space (addrspace 1)
+; =============================================================================
+
+define i32 @atomicrmw_sub_i32_global(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_sub_i32_global:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    global_load_b32 v3, v[0:1], off
+; GFX12-NEXT:    s_mov_b32 s0, 0
+; GFX12-NEXT:  .LBB2_1: ; %atomicrmw.start
+; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    v_mov_b32_e32 v4, v3
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_sub_nc_u32_e32 v3, v4, v2
+; GFX12-NEXT:    global_wb scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    global_atomic_cmpswap_b32 v3, v[0:1], v[3:4], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SYS
+; GFX12-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v3, v4
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX12-NEXT:    s_cbranch_execnz .LBB2_1
+; GFX12-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX12-NEXT:    v_mov_b32_e32 v0, v3
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw sub ptr addrspace(1) %ptr, i32 %val seq_cst
+  ret i32 %result
+}
+
+; TODO: Add test for atomicrmw_sub_i64_global when G_PHI is supported
+
+; =============================================================================
+; atomicrmw add - local address space (addrspace 3)
+; =============================================================================
+
+define i32 @atomicrmw_add_i32_local(ptr addrspace(3) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_add_i32_local:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    ds_add_rtn_u32 v0, v0, v1
+; GFX12-NEXT:    s_wait_dscnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SE
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw add ptr addrspace(3) %ptr, i32 %val seq_cst
+  ret i32 %result
+}
+
+define i64 @atomicrmw_add_i64_local(ptr addrspace(3) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_add_i64_local:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    ds_add_rtn_u64 v[0:1], v0, v[1:2]
+; GFX12-NEXT:    s_wait_dscnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SE
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw add ptr addrspace(3) %ptr, i64 %val seq_cst
+  ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw sub - local address space (addrspace 3)
+; =============================================================================
+
+define i32 @atomicrmw_sub_i32_local(ptr addrspace(3) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_sub_i32_local:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    ds_sub_rtn_u32 v0, v0, v1
+; GFX12-NEXT:    s_wait_dscnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SE
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw sub ptr addrspace(3) %ptr, i32 %val seq_cst
+  ret i32 %result
+}
+
+define i64 @atomicrmw_sub_i64_local(ptr addrspace(3) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_sub_i64_local:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    ds_sub_rtn_u64 v[0:1], v0, v[1:2]
+; GFX12-NEXT:    s_wait_dscnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SE
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw sub ptr addrspace(3) %ptr, i64 %val seq_cst
+  ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw add - flat address space (addrspace 0)
+; =============================================================================
+
+define i32 @atomicrmw_add_i32_flat(ptr %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_add_i32_flat:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    global_wb scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    flat_atomic_add_u32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw add ptr %ptr, i32 %val seq_cst
+  ret i32 %result
+}
+
+; TODO: Add test for atomicrmw_add_i64_flat when G_PHI is supported
+
+; =============================================================================
+; atomicrmw sub - flat address space (addrspace 0)
+; =============================================================================
+
+define i32 @atomicrmw_sub_i32_flat(ptr %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_sub_i32_flat:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    flat_load_b32 v3, v[0:1]
+; GFX12-NEXT:    s_mov_b32 s0, 0
+; GFX12-NEXT:  .LBB8_1: ; %atomicrmw.start
+; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    v_mov_b32_e32 v4, v3
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_sub_nc_u32_e32 v3, v4, v2
+; GFX12-NEXT:    global_wb scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    flat_atomic_cmpswap_b32 v3, v[0:1], v[3:4] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    global_inv scope:SCOPE_SYS
+; GFX12-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v3, v4
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX12-NEXT:    s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX12-NEXT:    s_cbranch_execnz .LBB8_1
+; GFX12-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX12-NEXT:    v_mov_b32_e32 v0, v3
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = atomicrmw sub ptr %ptr, i32 %val seq_cst
+  ret i32 %result
+}
+
+; TODO: Add test for atomicrmw_sub_i64_flat when G_ICMP is supported
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll
index b1314dd34f4e2..f65e609019ef0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX12 %s
 
 define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
   ; GFX908_GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll
index 8567df0d35126..03b28ec3361ec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX12 %s
 
 define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
   ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offset_rtn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll
index 59d60c18e1d31..af5ec0152a4be 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
   ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
index fbbb0deb7d547..1daf97897f500 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
 
 define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
   ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll
index 76e2fca5b00ad..af8463ca69f99 100644
--- a/llvm/test/CodeG...
[truncated]

@github-actions
Copy link

github-actions bot commented Jan 9, 2026

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff origin/main HEAD --extensions cpp -- llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp --diff_from_common_commit

⚠️
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing origin/main to the base branch/commit you want to compare against.
⚠️

View the diff from clang-format here.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index fb8d9e94d..c7852d4c2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -671,7 +671,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
   // Atomic read-modify-write operations: result and value are always VGPR,
   // pointer varies by address space.
   addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
-    G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR})
+                    G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR})
       .Any({{S32, P0}, {{Vgpr32}, {VgprP0, Vgpr32}}})
       .Any({{S64, P0}, {{Vgpr64}, {VgprP0, Vgpr64}}})
       .Any({{S32, P1}, {{Vgpr32}, {VgprP1, Vgpr32}}})

@github-actions
Copy link

github-actions bot commented Jan 9, 2026

🐧 Linux x64 Test Results

  • 3076 tests passed
  • 7 tests skipped

All executed tests passed, but another part of the build failed. Click on a failure below to see the details.

lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPURegBankLegalizeRules.cpp.o
FAILED: lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPURegBankLegalizeRules.cpp.o
sccache /opt/llvm/bin/clang++ -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_GLIBCXX_USE_CXX11_ABI=1 -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/gha/actions-runner/_work/llvm-project/llvm-project/build/lib/Target/AMDGPU -I/home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/Target/AMDGPU -I/home/gha/actions-runner/_work/llvm-project/llvm-project/build/include -I/home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/include -gmlt -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wstring-conversion -Wno-pass-failed -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG -std=c++17 -fvisibility=hidden -UNDEBUG -fno-exceptions -funwind-tables -fno-rtti -MD -MT lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPURegBankLegalizeRules.cpp.o -MF lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPURegBankLegalizeRules.cpp.o.d -o lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPURegBankLegalizeRules.cpp.o -c /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
/home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp:185:3: error: default label in switch which covers all enumeration values [-Werror,-Wcovered-switch-default]
185 |   default:
|   ^
1 error generated.

If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the infrastructure label.

@github-actions
Copy link

github-actions bot commented Jan 9, 2026

🪟 Windows x64 Test Results

  • 129310 tests passed
  • 2854 tests skipped
  • 1 test failed

Failed Tests

(click on a test name to see its output)

LLVM

LLVM.CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 2
c:\_work\llvm-project\llvm-project\build\bin\llc.exe -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\atomicrmw-add-sub.ll | c:\_work\llvm-project\llvm-project\build\bin\filecheck.exe -check-prefixes=GFX12 C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\atomicrmw-add-sub.ll
# executed command: 'c:\_work\llvm-project\llvm-project\build\bin\llc.exe' -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200
# note: command had no output on stdout or stderr
# executed command: 'c:\_work\llvm-project\llvm-project\build\bin\filecheck.exe' -check-prefixes=GFX12 'C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\atomicrmw-add-sub.ll'
# .---command stderr------------
# | C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\atomicrmw-add-sub.ll:23:15: error: GFX12-NEXT: expected string not found in input
# | ; GFX12-NEXT: s_wait_loadcnt 0x0
# |               ^
# | <stdin>:18:28: note: scanning from here
# |  global_inv scope:SCOPE_SYS
# |                            ^
# | <stdin>:29:35: note: possible intended match here
# |  .set atomicrmw_add_i32_global.uses_flat_scratch, 0
# |                                   ^
# | C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\atomicrmw-add-sub.ll:42:15: error: GFX12-NEXT: expected string not found in input
# | ; GFX12-NEXT: s_wait_loadcnt 0x0
# |               ^
# | <stdin>:55:28: note: scanning from here
# |  global_inv scope:SCOPE_SYS
# |                            ^
# | <stdin>:66:35: note: possible intended match here
# |  .set atomicrmw_add_i64_global.uses_flat_scratch, 0
# |                                   ^
# | C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\atomicrmw-add-sub.ll:82:15: error: GFX12-NEXT: expected string not found in input
# | ; GFX12-NEXT: s_wait_loadcnt 0x0
# |               ^
# | <stdin>:109:22: note: scanning from here
# |  v_mov_b32_e32 v0, v3
# |                      ^
# | <stdin>:120:35: note: possible intended match here
# |  .set atomicrmw_sub_i32_global.uses_flat_scratch, 0
# |                                   ^
# | C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\atomicrmw-add-sub.ll:106:15: error: GFX12-NEXT: expected string not found in input
# | ; GFX12-NEXT: s_wait_loadcnt 0x0
# |               ^
# | <stdin>:145:27: note: scanning from here
# |  global_inv scope:SCOPE_SE
# |                           ^
# | <stdin>:156:34: note: possible intended match here
# |  .set atomicrmw_add_i32_local.uses_flat_scratch, 0
# |                                  ^
# | C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\atomicrmw-add-sub.ll:124:15: error: GFX12-NEXT: expected string not found in input
# | ; GFX12-NEXT: s_wait_loadcnt 0x0
# |               ^
# | <stdin>:181:27: note: scanning from here
# |  global_inv scope:SCOPE_SE
# |                           ^
# | <stdin>:192:34: note: possible intended match here
# |  .set atomicrmw_add_i64_local.uses_flat_scratch, 0
# |                                  ^
# | C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\atomicrmw-add-sub.ll:146:15: error: GFX12-NEXT: expected string not found in input
# | ; GFX12-NEXT: s_wait_loadcnt 0x0
# |               ^
# | <stdin>:217:27: note: scanning from here
# |  global_inv scope:SCOPE_SE
# |                           ^
# | <stdin>:228:34: note: possible intended match here
# |  .set atomicrmw_sub_i32_local.uses_flat_scratch, 0
# |                                  ^
# | C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\atomicrmw-add-sub.ll:164:15: error: GFX12-NEXT: expected string not found in input
# | ; GFX12-NEXT: s_wait_loadcnt 0x0
# |               ^
# | <stdin>:253:27: note: scanning from here
# |  global_inv scope:SCOPE_SE
# |                           ^
# | <stdin>:264:34: note: possible intended match here
# |  .set atomicrmw_sub_i64_local.uses_flat_scratch, 0
# |                                  ^
# | C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\atomicrmw-add-sub.ll:187:15: error: GFX12-NEXT: expected string not found in input
# | ; GFX12-NEXT: s_wait_loadcnt 0x0
# |               ^
# | <stdin>:290:28: note: scanning from here
# |  global_inv scope:SCOPE_SYS
# |                            ^
# | <stdin>:301:33: note: possible intended match here
# |  .set atomicrmw_add_i32_flat.uses_flat_scratch, 0
# |                                 ^
# | C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\atomicrmw-add-sub.ll:229:15: error: GFX12-NEXT: expected string not found in input
# | ; GFX12-NEXT: s_wait_loadcnt 0x0
# |               ^
# | <stdin>:344:22: note: scanning from here
# |  v_mov_b32_e32 v0, v3
# |                      ^
# | <stdin>:355:33: note: possible intended match here
# |  .set atomicrmw_sub_i32_flat.uses_flat_scratch, 0
# |                                 ^
# | 
# | Input file: <stdin>
# | Check file: C:\_work\llvm-project\llvm-project\llvm\test\CodeGen\AMDGPU\GlobalISel\atomicrmw-add-sub.ll
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             .
# |             .
# |             .
# |            13:  s_wait_kmcnt 0x0 
# |            14:  global_wb scope:SCOPE_SYS 
# |            15:  s_wait_storecnt 0x0 
# |            16:  global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS 
# |            17:  s_wait_loadcnt 0x0 
# |            18:  global_inv scope:SCOPE_SYS 
# | next:23'0                                 X error: no match found
# |            19:  s_setpc_b64 s[30:31] 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~~~~
# |            20: .Lfunc_end0: 
# | next:23'0      ~~~~~~~~~~~~~
# |            21:  .size atomicrmw_add_i32_global, .Lfunc_end0-atomicrmw_add_i32_global 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            22:  ; -- End function 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~
# |            23:  .set atomicrmw_add_i32_global.num_vgpr, 3 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            24:  .set atomicrmw_add_i32_global.num_agpr, 0 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            25:  .set atomicrmw_add_i32_global.numbered_sgpr, 32 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            26:  .set atomicrmw_add_i32_global.num_named_barrier, 0 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            27:  .set atomicrmw_add_i32_global.private_seg_size, 0 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            28:  .set atomicrmw_add_i32_global.uses_vcc, 0 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            29:  .set atomicrmw_add_i32_global.uses_flat_scratch, 0 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# | next:23'1                                        ?                  possible intended match
# |            30:  .set atomicrmw_add_i32_global.has_dyn_sized_stack, 0 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            31:  .set atomicrmw_add_i32_global.has_recursion, 0 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            32:  .set atomicrmw_add_i32_global.has_indirect_call, 0 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            33:  .section .AMDGPU.csdata,"",@progbits 
# | next:23'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            34: ; Function info: 
# | next:23'0      ~~~~~~~~~~~~~~~~~
# |             .
# |             .
# |             .
# |            50:  s_wait_kmcnt 0x0 
# |            51:  global_wb scope:SCOPE_SYS 
# |            52:  s_wait_storecnt 0x0 
# |            53:  global_atomic_add_u64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS 
# |            54:  s_wait_loadcnt 0x0 
# |            55:  global_inv scope:SCOPE_SYS 
# | next:42'0                                 X error: no match found
# |            56:  s_setpc_b64 s[30:31] 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~~~~
# |            57: .Lfunc_end1: 
# | next:42'0      ~~~~~~~~~~~~~
# |            58:  .size atomicrmw_add_i64_global, .Lfunc_end1-atomicrmw_add_i64_global 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            59:  ; -- End function 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~
# |            60:  .set atomicrmw_add_i64_global.num_vgpr, 4 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            61:  .set atomicrmw_add_i64_global.num_agpr, 0 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            62:  .set atomicrmw_add_i64_global.numbered_sgpr, 32 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            63:  .set atomicrmw_add_i64_global.num_named_barrier, 0 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            64:  .set atomicrmw_add_i64_global.private_seg_size, 0 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            65:  .set atomicrmw_add_i64_global.uses_vcc, 0 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            66:  .set atomicrmw_add_i64_global.uses_flat_scratch, 0 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# | next:42'1                                        ?                  possible intended match
# |            67:  .set atomicrmw_add_i64_global.has_dyn_sized_stack, 0 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            68:  .set atomicrmw_add_i64_global.has_recursion, 0 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            69:  .set atomicrmw_add_i64_global.has_indirect_call, 0 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            70:  .section .AMDGPU.csdata,"",@progbits 
# | next:42'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |            71: ; Function info: 
# | next:42'0      ~~~~~~~~~~~~~~~~~
# |             .
# |             .
# |             .
# |           104:  s_wait_alu depctr_sa_sdst(0) 
# |           105:  s_and_not1_b32 exec_lo, exec_lo, s0 
# |           106:  s_cbranch_execnz .LBB2_1 
# |           107: ; %bb.2: ; %atomicrmw.end 
# |           108:  s_or_b32 exec_lo, exec_lo, s0 
# |           109:  v_mov_b32_e32 v0, v3 
# | next:82'0                           X error: no match found
# |           110:  s_setpc_b64 s[30:31] 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~~~~
# |           111: .Lfunc_end2: 
# | next:82'0      ~~~~~~~~~~~~~
# |           112:  .size atomicrmw_sub_i32_global, .Lfunc_end2-atomicrmw_sub_i32_global 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           113:  ; -- End function 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~
# |           114:  .set atomicrmw_sub_i32_global.num_vgpr, 5 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           115:  .set atomicrmw_sub_i32_global.num_agpr, 0 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           116:  .set atomicrmw_sub_i32_global.numbered_sgpr, 32 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           117:  .set atomicrmw_sub_i32_global.num_named_barrier, 0 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           118:  .set atomicrmw_sub_i32_global.private_seg_size, 0 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           119:  .set atomicrmw_sub_i32_global.uses_vcc, 1 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           120:  .set atomicrmw_sub_i32_global.uses_flat_scratch, 0 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# | next:82'1                                        ?                  possible intended match
# |           121:  .set atomicrmw_sub_i32_global.has_dyn_sized_stack, 0 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           122:  .set atomicrmw_sub_i32_global.has_recursion, 0 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           123:  .set atomicrmw_sub_i32_global.has_indirect_call, 0 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           124:  .section .AMDGPU.csdata,"",@progbits 
# | next:82'0      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           125: ; Function info: 
# | next:82'0      ~~~~~~~~~~~~~~~~~
# |             .
# |             .
# |             .
# |           140:  s_wait_bvhcnt 0x0 
# |           141:  s_wait_kmcnt 0x0 
# |           142:  s_wait_storecnt 0x0 
# |           143:  ds_add_rtn_u32 v0, v0, v1 
# |           144:  s_wait_dscnt 0x0 
# |           145:  global_inv scope:SCOPE_SE 
# | next:106'0                               X error: no match found
# |           146:  s_setpc_b64 s[30:31] 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~~~~
# |           147: .Lfunc_end3: 
# | next:106'0     ~~~~~~~~~~~~~
# |           148:  .size atomicrmw_add_i32_local, .Lfunc_end3-atomicrmw_add_i32_local 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           149:  ; -- End function 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~
# |           150:  .set atomicrmw_add_i32_local.num_vgpr, 2 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           151:  .set atomicrmw_add_i32_local.num_agpr, 0 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           152:  .set atomicrmw_add_i32_local.numbered_sgpr, 32 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           153:  .set atomicrmw_add_i32_local.num_named_barrier, 0 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           154:  .set atomicrmw_add_i32_local.private_seg_size, 0 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           155:  .set atomicrmw_add_i32_local.uses_vcc, 0 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           156:  .set atomicrmw_add_i32_local.uses_flat_scratch, 0 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# | next:106'1                                      ?                  possible intended match
# |           157:  .set atomicrmw_add_i32_local.has_dyn_sized_stack, 0 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           158:  .set atomicrmw_add_i32_local.has_recursion, 0 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           159:  .set atomicrmw_add_i32_local.has_indirect_call, 0 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           160:  .section .AMDGPU.csdata,"",@progbits 
# | next:106'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           161: ; Function info: 
# | next:106'0     ~~~~~~~~~~~~~~~~~
# |             .
# |             .
# |             .
# |           176:  s_wait_bvhcnt 0x0 
# |           177:  s_wait_kmcnt 0x0 
# |           178:  s_wait_storecnt 0x0 
# |           179:  ds_add_rtn_u64 v[0:1], v0, v[1:2] 
# |           180:  s_wait_dscnt 0x0 
# |           181:  global_inv scope:SCOPE_SE 
# | next:124'0                               X error: no match found
# |           182:  s_setpc_b64 s[30:31] 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~~~~
# |           183: .Lfunc_end4: 
# | next:124'0     ~~~~~~~~~~~~~
# |           184:  .size atomicrmw_add_i64_local, .Lfunc_end4-atomicrmw_add_i64_local 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           185:  ; -- End function 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~
# |           186:  .set atomicrmw_add_i64_local.num_vgpr, 3 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           187:  .set atomicrmw_add_i64_local.num_agpr, 0 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           188:  .set atomicrmw_add_i64_local.numbered_sgpr, 32 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           189:  .set atomicrmw_add_i64_local.num_named_barrier, 0 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           190:  .set atomicrmw_add_i64_local.private_seg_size, 0 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           191:  .set atomicrmw_add_i64_local.uses_vcc, 0 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           192:  .set atomicrmw_add_i64_local.uses_flat_scratch, 0 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# | next:124'1                                      ?                  possible intended match
# |           193:  .set atomicrmw_add_i64_local.has_dyn_sized_stack, 0 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           194:  .set atomicrmw_add_i64_local.has_recursion, 0 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           195:  .set atomicrmw_add_i64_local.has_indirect_call, 0 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           196:  .section .AMDGPU.csdata,"",@progbits 
# | next:124'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           197: ; Function info: 
# | next:124'0     ~~~~~~~~~~~~~~~~~
# |             .
# |             .
# |             .
# |           212:  s_wait_bvhcnt 0x0 
# |           213:  s_wait_kmcnt 0x0 
# |           214:  s_wait_storecnt 0x0 
# |           215:  ds_sub_rtn_u32 v0, v0, v1 
# |           216:  s_wait_dscnt 0x0 
# |           217:  global_inv scope:SCOPE_SE 
# | next:146'0                               X error: no match found
# |           218:  s_setpc_b64 s[30:31] 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~~~~
# |           219: .Lfunc_end5: 
# | next:146'0     ~~~~~~~~~~~~~
# |           220:  .size atomicrmw_sub_i32_local, .Lfunc_end5-atomicrmw_sub_i32_local 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           221:  ; -- End function 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~
# |           222:  .set atomicrmw_sub_i32_local.num_vgpr, 2 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           223:  .set atomicrmw_sub_i32_local.num_agpr, 0 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           224:  .set atomicrmw_sub_i32_local.numbered_sgpr, 32 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           225:  .set atomicrmw_sub_i32_local.num_named_barrier, 0 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           226:  .set atomicrmw_sub_i32_local.private_seg_size, 0 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           227:  .set atomicrmw_sub_i32_local.uses_vcc, 0 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           228:  .set atomicrmw_sub_i32_local.uses_flat_scratch, 0 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# | next:146'1                                      ?                  possible intended match
# |           229:  .set atomicrmw_sub_i32_local.has_dyn_sized_stack, 0 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           230:  .set atomicrmw_sub_i32_local.has_recursion, 0 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           231:  .set atomicrmw_sub_i32_local.has_indirect_call, 0 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           232:  .section .AMDGPU.csdata,"",@progbits 
# | next:146'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           233: ; Function info: 
# | next:146'0     ~~~~~~~~~~~~~~~~~
# |             .
# |             .
# |             .
# |           248:  s_wait_bvhcnt 0x0 
# |           249:  s_wait_kmcnt 0x0 
# |           250:  s_wait_storecnt 0x0 
# |           251:  ds_sub_rtn_u64 v[0:1], v0, v[1:2] 
# |           252:  s_wait_dscnt 0x0 
# |           253:  global_inv scope:SCOPE_SE 
# | next:164'0                               X error: no match found
# |           254:  s_setpc_b64 s[30:31] 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~~~~
# |           255: .Lfunc_end6: 
# | next:164'0     ~~~~~~~~~~~~~
# |           256:  .size atomicrmw_sub_i64_local, .Lfunc_end6-atomicrmw_sub_i64_local 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           257:  ; -- End function 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~
# |           258:  .set atomicrmw_sub_i64_local.num_vgpr, 3 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           259:  .set atomicrmw_sub_i64_local.num_agpr, 0 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           260:  .set atomicrmw_sub_i64_local.numbered_sgpr, 32 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           261:  .set atomicrmw_sub_i64_local.num_named_barrier, 0 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           262:  .set atomicrmw_sub_i64_local.private_seg_size, 0 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           263:  .set atomicrmw_sub_i64_local.uses_vcc, 0 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           264:  .set atomicrmw_sub_i64_local.uses_flat_scratch, 0 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# | next:164'1                                      ?                  possible intended match
# |           265:  .set atomicrmw_sub_i64_local.has_dyn_sized_stack, 0 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           266:  .set atomicrmw_sub_i64_local.has_recursion, 0 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           267:  .set atomicrmw_sub_i64_local.has_indirect_call, 0 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           268:  .section .AMDGPU.csdata,"",@progbits 
# | next:164'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           269: ; Function info: 
# | next:164'0     ~~~~~~~~~~~~~~~~~
# |             .
# |             .
# |             .
# |           285:  s_wait_kmcnt 0x0 
# |           286:  global_wb scope:SCOPE_SYS 
# |           287:  s_wait_storecnt 0x0 
# |           288:  flat_atomic_add_u32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS 
# |           289:  s_wait_loadcnt_dscnt 0x0 
# |           290:  global_inv scope:SCOPE_SYS 
# | next:187'0                                X error: no match found
# |           291:  s_setpc_b64 s[30:31] 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~~~~
# |           292: .Lfunc_end7: 
# | next:187'0     ~~~~~~~~~~~~~
# |           293:  .size atomicrmw_add_i32_flat, .Lfunc_end7-atomicrmw_add_i32_flat 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           294:  ; -- End function 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~
# |           295:  .set atomicrmw_add_i32_flat.num_vgpr, 3 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           296:  .set atomicrmw_add_i32_flat.num_agpr, 0 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           297:  .set atomicrmw_add_i32_flat.numbered_sgpr, 32 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           298:  .set atomicrmw_add_i32_flat.num_named_barrier, 0 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           299:  .set atomicrmw_add_i32_flat.private_seg_size, 0 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           300:  .set atomicrmw_add_i32_flat.uses_vcc, 0 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           301:  .set atomicrmw_add_i32_flat.uses_flat_scratch, 0 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# | next:187'1                                     ?                  possible intended match
# |           302:  .set atomicrmw_add_i32_flat.has_dyn_sized_stack, 0 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           303:  .set atomicrmw_add_i32_flat.has_recursion, 0 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           304:  .set atomicrmw_add_i32_flat.has_indirect_call, 0 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           305:  .section .AMDGPU.csdata,"",@progbits 
# | next:187'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           306: ; Function info: 
# | next:187'0     ~~~~~~~~~~~~~~~~~
# |             .
# |             .
# |             .
# |           339:  s_wait_alu depctr_sa_sdst(0) 
# |           340:  s_and_not1_b32 exec_lo, exec_lo, s0 
# |           341:  s_cbranch_execnz .LBB8_1 
# |           342: ; %bb.2: ; %atomicrmw.end 
# |           343:  s_or_b32 exec_lo, exec_lo, s0 
# |           344:  v_mov_b32_e32 v0, v3 
# | next:229'0                          X error: no match found
# |           345:  s_setpc_b64 s[30:31] 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~~~~
# |           346: .Lfunc_end8: 
# | next:229'0     ~~~~~~~~~~~~~
# |           347:  .size atomicrmw_sub_i32_flat, .Lfunc_end8-atomicrmw_sub_i32_flat 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           348:  ; -- End function 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~
# |           349:  .set atomicrmw_sub_i32_flat.num_vgpr, 5 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           350:  .set atomicrmw_sub_i32_flat.num_agpr, 0 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           351:  .set atomicrmw_sub_i32_flat.numbered_sgpr, 32 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           352:  .set atomicrmw_sub_i32_flat.num_named_barrier, 0 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           353:  .set atomicrmw_sub_i32_flat.private_seg_size, 0 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           354:  .set atomicrmw_sub_i32_flat.uses_vcc, 1 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           355:  .set atomicrmw_sub_i32_flat.uses_flat_scratch, 0 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# | next:229'1                                     ?                  possible intended match
# |           356:  .set atomicrmw_sub_i32_flat.has_dyn_sized_stack, 0 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           357:  .set atomicrmw_sub_i32_flat.has_recursion, 0 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           358:  .set atomicrmw_sub_i32_flat.has_indirect_call, 0 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           359:  .section .AMDGPU.csdata,"",@progbits 
# | next:229'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# |           360: ; Function info: 
# | next:229'0     ~~~~~~~~~~~~~~~~~
# |             .
# |             .
# |             .
# | >>>>>>
# `-----------------------------
# error: command failed with exit status: 1

--

If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the infrastructure label.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants