diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 01ccdf84f5303..4f4ad000166d0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -664,6 +664,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}}) .Div(S64, {{Vgpr64}, {Vgpr64, Imm}}); + // Atomic read-modify-write operations: result and value are always VGPR, + // pointer varies by address space. + addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG, + G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR}) + .Any({{S32, P0}, {{Vgpr32}, {VgprP0, Vgpr32}}}) + .Any({{S64, P0}, {{Vgpr64}, {VgprP0, Vgpr64}}}) + .Any({{S32, P1}, {{Vgpr32}, {VgprP1, Vgpr32}}}) + .Any({{S64, P1}, {{Vgpr64}, {VgprP1, Vgpr64}}}) + .Any({{S32, P3}, {{Vgpr32}, {VgprP3, Vgpr32}}}) + .Any({{S64, P3}, {{Vgpr64}, {VgprP3, Vgpr64}}}); + bool hasSMRDx3 = ST->hasScalarDwordx3Loads(); bool hasSMRDSmall = ST->hasScalarSubwordLoads(); bool usesTrue16 = ST->useRealTrue16Insts(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll new file mode 100644 index 0000000000000..69e71e37011a1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s + +; Test atomicrmw add and sub operations for different address spaces + +; ============================================================================= +; atomicrmw add - global address space (addrspace 1) +; ============================================================================= + +define i32 @atomicrmw_add_i32_global(ptr addrspace(1) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_add_i32_global: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw add ptr addrspace(1) %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_add_i64_global(ptr addrspace(1) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_add_i64_global: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_add_u64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw add ptr addrspace(1) %ptr, i64 %val seq_cst + ret i64 %result +} + +; ============================================================================= +; atomicrmw add - local address space (addrspace 3) +; ============================================================================= + +define i32 @atomicrmw_add_i32_local(ptr addrspace(3) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_add_i32_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: ds_add_rtn_u32 v0, v0, v1 +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw add ptr addrspace(3) %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_add_i64_local(ptr addrspace(3) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_add_i64_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: ds_add_rtn_u64 v[0:1], v0, v[1:2] +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw add ptr addrspace(3) %ptr, i64 %val seq_cst + ret i64 %result +} + +; ============================================================================= +; atomicrmw sub - local address space (addrspace 3) +; ============================================================================= + +define i32 @atomicrmw_sub_i32_local(ptr addrspace(3) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_sub_i32_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: ds_sub_rtn_u32 v0, v0, v1 +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw sub ptr addrspace(3) %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_sub_i64_local(ptr addrspace(3) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_sub_i64_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: ds_sub_rtn_u64 v[0:1], v0, v[1:2] +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw sub ptr addrspace(3) %ptr, i64 %val seq_cst + ret i64 %result +} + +; ============================================================================= +; atomicrmw add - flat address space (addrspace 0) +; ============================================================================= + +define i32 @atomicrmw_add_i32_flat(ptr %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_add_i32_flat: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: flat_atomic_add_u32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw add ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_add_i64_flat(ptr %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_add_i64_flat: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: flat_atomic_add_u64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !1 + ret i64 %result +} + +; ============================================================================= +; atomicrmw add - VGPR inputs (loaded from memory) +; ============================================================================= + +define i32 @atomicrmw_add_i32_global_vgpr(ptr addrspace(1) %ptr, ptr addrspace(1) %val_ptr) { +; GFX12-LABEL: atomicrmw_add_i32_global_vgpr: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v2, v[2:3], off +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %val = load i32, ptr addrspace(1) %val_ptr + %result = atomicrmw add ptr addrspace(1) %ptr, i32 %val seq_cst + ret i32 %result +} + +; ============================================================================= +; atomicrmw sub with metadata - global address space (no expansion) +; ============================================================================= + +define i32 @atomicrmw_sub_i32_global_no_remote_memory(ptr addrspace(1) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_sub_i32_global_no_remote_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw sub ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0 + ret i32 %result +} + +define i64 @atomicrmw_sub_i64_global_no_remote_memory(ptr addrspace(1) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_sub_i64_global_no_remote_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_sub_u64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw sub ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.remote.memory !0 + ret i64 %result +} + +define i32 @atomicrmw_sub_i32_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_sub_i32_global_no_fine_grained_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw sub ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.fine.grained.memory !0 + ret i32 %result +} + +define i64 @atomicrmw_sub_i64_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_sub_i64_global_no_fine_grained_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_sub_u64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw sub ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.fine.grained.memory !0 + ret i64 %result +} + +!0 = !{} +!1 = !{i32 5, i32 6} ; Exclude private address space (5) to prevent expansion diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-and.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-and.ll new file mode 100644 index 0000000000000..8f0f56c42ca15 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-and.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s + +; ============================================================================= +; atomicrmw and - generic address space (addrspace 0) +; ============================================================================= + +define i32 @atomicrmw_and_i32_generic(ptr addrspace(0) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_and_i32_generic: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: flat_atomic_and_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw and ptr addrspace(0) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0 + ret i32 %result +} + +define i64 @atomicrmw_and_i64_generic(ptr addrspace(0) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_and_i64_generic: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: flat_atomic_and_b64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw and ptr addrspace(0) %ptr, i64 %val syncscope("workgroup") monotonic, !noalias.addrspace !0 + ret i64 %result +} + +; ============================================================================= +; atomicrmw and - local address space (addrspace 3) +; ============================================================================= + +define i32 @atomicrmw_and_i32_local(ptr addrspace(3) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_and_i32_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: ds_and_rtn_b32 v0, v0, v1 +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw and ptr addrspace(3) %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_and_i64_local(ptr addrspace(3) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_and_i64_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: ds_and_rtn_b64 v[0:1], v0, v[1:2] +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw and ptr addrspace(3) %ptr, i64 %val seq_cst + ret i64 %result +} + +; ============================================================================= +; atomicrmw and with metadata - global address space (no expansion) +; ============================================================================= + +define i32 @atomicrmw_and_i32_global_no_remote_memory(ptr addrspace(1) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_and_i32_global_no_remote_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_and_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw and ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0 + ret i32 %result +} + +define i64 @atomicrmw_and_i64_global_no_remote_memory(ptr addrspace(1) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_and_i64_global_no_remote_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_and_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw and ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.remote.memory !0 + ret i64 %result +} + +define i32 @atomicrmw_and_i32_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_and_i32_global_no_fine_grained_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_and_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw and ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.fine.grained.memory !0 + ret i32 %result +} + +define i64 @atomicrmw_and_i64_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_and_i64_global_no_fine_grained_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_and_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw and ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.fine.grained.memory !0 + ret i64 %result +} + +!0 = !{i32 5, i32 6} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-or.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-or.ll new file mode 100644 index 0000000000000..dfec6bfdce5c1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-or.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s + +; ============================================================================= +; atomicrmw or - generic address space (addrspace 0) +; ============================================================================= + +define i32 @atomicrmw_or_i32_generic(ptr addrspace(0) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_or_i32_generic: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: flat_atomic_or_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw or ptr addrspace(0) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0 + ret i32 %result +} + +define i64 @atomicrmw_or_i64_generic(ptr addrspace(0) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_or_i64_generic: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: flat_atomic_or_b64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw or ptr addrspace(0) %ptr, i64 %val syncscope("workgroup") monotonic, !noalias.addrspace !0 + ret i64 %result +} + +; ============================================================================= +; atomicrmw or - local address space (addrspace 3) +; ============================================================================= + +define i32 @atomicrmw_or_i32_local(ptr addrspace(3) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_or_i32_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: ds_or_rtn_b32 v0, v0, v1 +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw or ptr addrspace(3) %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_or_i64_local(ptr addrspace(3) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_or_i64_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: ds_or_rtn_b64 v[0:1], v0, v[1:2] +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw or ptr addrspace(3) %ptr, i64 %val seq_cst + ret i64 %result +} + +; ============================================================================= +; atomicrmw or with metadata - global address space (no expansion) +; ============================================================================= + +define i32 @atomicrmw_or_i32_global_no_remote_memory(ptr addrspace(1) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_or_i32_global_no_remote_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_or_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw or ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0 + ret i32 %result +} + +define i64 @atomicrmw_or_i64_global_no_remote_memory(ptr addrspace(1) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_or_i64_global_no_remote_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_or_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw or ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.remote.memory !0 + ret i64 %result +} + +define i32 @atomicrmw_or_i32_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_or_i32_global_no_fine_grained_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_or_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw or ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.fine.grained.memory !0 + ret i32 %result +} + +define i64 @atomicrmw_or_i64_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_or_i64_global_no_fine_grained_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_or_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw or ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.fine.grained.memory !0 + ret i64 %result +} + +!0 = !{i32 5, i32 6} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xchg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xchg.ll new file mode 100644 index 0000000000000..b2806fe2396d4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xchg.ll @@ -0,0 +1,228 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s + +; Test atomicrmw xchg operations for different address spaces + +; ============================================================================= +; atomicrmw xchg - global address space (addrspace 1) +; ============================================================================= + +define i32 @atomicrmw_xchg_i32_global(ptr addrspace(1) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_xchg_i32_global: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_swap_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_xchg_i64_global(ptr addrspace(1) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_xchg_i64_global: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %val seq_cst + ret i64 %result +} + +; ============================================================================= +; atomicrmw xchg - local address space (addrspace 3) +; ============================================================================= + +define i32 @atomicrmw_xchg_i32_local(ptr addrspace(3) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_xchg_i32_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_xchg_i64_local(ptr addrspace(3) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_xchg_i64_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: ds_storexchg_rtn_b64 v[0:1], v0, v[1:2] +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 %val seq_cst + ret i64 %result +} + +; ============================================================================= +; atomicrmw xchg - flat address space (addrspace 0) +; ============================================================================= + +define i32 @atomicrmw_xchg_i32_flat(ptr %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_xchg_i32_flat: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xchg ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_xchg_i64_flat(ptr %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_xchg_i64_flat: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: flat_atomic_swap_b64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xchg ptr %ptr, i64 %val seq_cst, !noalias.addrspace !1 + ret i64 %result +} + +; ============================================================================= +; atomicrmw xchg - VGPR inputs (loaded from memory) +; ============================================================================= + +define i32 @atomicrmw_xchg_i32_global_vgpr(ptr addrspace(1) %ptr, ptr addrspace(1) %val_ptr) { +; GFX12-LABEL: atomicrmw_xchg_i32_global_vgpr: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v2, v[2:3], off +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_swap_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %val = load i32, ptr addrspace(1) %val_ptr + %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %val seq_cst + ret i32 %result +} + +; ============================================================================= +; atomicrmw xchg with metadata - global address space +; Note: xchg is natively supported and doesn't expand to CAS for i32/i64, +; but we still test metadata for consistency and documentation purposes +; ============================================================================= + +define i32 @atomicrmw_xchg_i32_global_no_remote_memory(ptr addrspace(1) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_xchg_i32_global_no_remote_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_swap_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0 + ret i32 %result +} + +define i64 @atomicrmw_xchg_i64_global_no_remote_memory(ptr addrspace(1) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_xchg_i64_global_no_remote_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.remote.memory !0 + ret i64 %result +} + +define i32 @atomicrmw_xchg_i32_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_xchg_i32_global_no_fine_grained_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_swap_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.fine.grained.memory !0 + ret i32 %result +} + +define i64 @atomicrmw_xchg_i64_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_xchg_i64_global_no_fine_grained_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.fine.grained.memory !0 + ret i64 %result +} + +!0 = !{} +!1 = !{i32 5, i32 6} ; Exclude private address space (5) to prevent expansion diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xor.ll new file mode 100644 index 0000000000000..e353bb53813dd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xor.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s + +; ============================================================================= +; atomicrmw or - generic address space (addrspace 0) +; ============================================================================= + +define i32 @atomicrmw_or_i32_generic(ptr addrspace(0) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_or_i32_generic: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: flat_atomic_or_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw or ptr addrspace(0) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0 + ret i32 %result +} + +define i64 @atomicrmw_or_i64_generic(ptr addrspace(0) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_or_i64_generic: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: flat_atomic_or_b64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SE +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw or ptr addrspace(0) %ptr, i64 %val syncscope("workgroup") monotonic, !noalias.addrspace !0 + ret i64 %result +} + +; ============================================================================= +; atomicrmw xor - local address space (addrspace 3) +; ============================================================================= + +define i32 @atomicrmw_xor_i32_local(ptr addrspace(3) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_xor_i32_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: ds_xor_rtn_b32 v0, v0, v1 +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xor ptr addrspace(3) %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_xor_i64_local(ptr addrspace(3) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_xor_i64_local: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: ds_xor_rtn_b64 v[0:1], v0, v[1:2] +; GFX12-NEXT: s_wait_dscnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SE +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xor ptr addrspace(3) %ptr, i64 %val seq_cst + ret i64 %result +} + +; ============================================================================= +; atomicrmw xor with metadata - global address space (no expansion) +; ============================================================================= + +define i32 @atomicrmw_xor_i32_global_no_remote_memory(ptr addrspace(1) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_xor_i32_global_no_remote_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_xor_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xor ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0 + ret i32 %result +} + +define i64 @atomicrmw_xor_i64_global_no_remote_memory(ptr addrspace(1) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_xor_i64_global_no_remote_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_xor_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xor ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.remote.memory !0 + ret i64 %result +} + +define i32 @atomicrmw_xor_i32_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %val) { +; GFX12-LABEL: atomicrmw_xor_i32_global_no_fine_grained_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_xor_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xor ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.fine.grained.memory !0 + ret i32 %result +} + +define i64 @atomicrmw_xor_i64_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %val) { +; GFX12-LABEL: atomicrmw_xor_i64_global_no_fine_grained_memory: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_wb scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_atomic_xor_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_inv scope:SCOPE_SYS +; GFX12-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw xor ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.fine.grained.memory !0 + ret i64 %result +} + +!0 = !{i32 5, i32 6} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add-sub.mir new file mode 100644 index 0000000000000..b36e1268bc10c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add-sub.mir @@ -0,0 +1,461 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s + +# Test G_ATOMICRMW_ADD and G_ATOMICRMW_SUB register bank selection and legalization +# for different address spaces (flat P0, global P1, local P3) and data sizes (S32, S64) + +--- +name: atomicrmw_add_flat_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-LABEL: name: atomicrmw_add_flat_s32_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]] + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $sgpr2 + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_add_flat_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_add_flat_s32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]] + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_add_flat_s64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_add_flat_s64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_add_global_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-LABEL: name: atomicrmw_add_global_s32_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]] + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $sgpr2 + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_add_global_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_add_global_s32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]] + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_add_global_s64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_add_global_s64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_add_global_s64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_add_global_s64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_add_local_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: atomicrmw_add_local_s32_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]] + %0:_(p3) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_add_local_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: atomicrmw_add_local_s32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]] + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_add_local_s64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_add_local_s64_ss + ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p3) = COPY $sgpr0 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 3) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_flat_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-LABEL: name: atomicrmw_sub_flat_s32_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]] + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $sgpr2 + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_flat_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_sub_flat_s32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]] + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_flat_s64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_sub_flat_s64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 0) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_global_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-LABEL: name: atomicrmw_sub_global_s32_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]] + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $sgpr2 + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_global_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_sub_global_s32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]] + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_global_s64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_sub_global_s64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_global_s64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_sub_global_s64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_local_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: atomicrmw_sub_local_s32_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]] + %0:_(p3) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_local_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: atomicrmw_sub_local_s32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]] + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_local_s64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_sub_local_s64_ss + ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p3) = COPY $sgpr0 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 3) + %3:_(s64) = G_AND %2, %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir index bcd676f31c90a..005c0e8b109eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s --- name: atomicrmw_add_global_i32_ss @@ -17,9 +16,11 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]] %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 ... --- @@ -37,9 +38,11 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]] %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 ... --- @@ -57,7 +60,87 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]] %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_add_global_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_add_global_i64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_add_flat_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_add_flat_i64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_add_local_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_add_local_i64_ss + ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p3) = COPY $sgpr0 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 3) + %3:_(s64) = G_AND %2, %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir index e4fe9c5c7d9f3..78a2da758a7db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1201 -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s --- name: atomicrmw_and_global_i32_ss @@ -17,9 +16,81 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_AND]], [[ATOMICRMW_AND]] %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_and_global_i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_and_global_i32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_AND]], [[ATOMICRMW_AND]] + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_and_global_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_and_global_i64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_AND [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_AND]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_AND]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_and_global_i64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_and_global_i64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_AND]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_AND]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 ... --- @@ -37,9 +108,81 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_AND]], [[ATOMICRMW_AND]] %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_and_flat_i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_and_flat_i32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_AND]], [[ATOMICRMW_AND]] + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_and_flat_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_and_flat_i64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_AND [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_AND]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_AND]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s64), addrspace 0) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_and_flat_i64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_and_flat_i64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_AND [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_AND]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_AND]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s64), addrspace 0) + %3:_(s64) = G_AND %2, %2 ... --- @@ -57,7 +200,79 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_AND]], [[ATOMICRMW_AND]] %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_and_local_i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: atomicrmw_and_local_i32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_AND]], [[ATOMICRMW_AND]] + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_and_local_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_and_local_i64_ss + ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_AND [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_AND]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_AND]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p3) = COPY $sgpr0 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s64), addrspace 3) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_and_local_i64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_and_local_i64_vv + ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_AND]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_AND]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p3) = COPY $vgpr0 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s64), addrspace 3) + %3:_(s64) = G_AND %2, %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir index 3a16d72cb8ebd..cdc06c96291e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1201 -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s --- name: atomicrmw_or_global_i32_ss @@ -17,9 +16,81 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_OR]], [[ATOMICRMW_OR]] %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_or_global_i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_or_global_i32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_OR]], [[ATOMICRMW_OR]] + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_or_global_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_or_global_i64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_OR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_OR]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_OR]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_or_global_i64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_or_global_i64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_OR]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_OR]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 ... --- @@ -37,9 +108,81 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_OR]], [[ATOMICRMW_OR]] %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_or_flat_i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_or_flat_i32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_OR]], [[ATOMICRMW_OR]] + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_or_flat_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_or_flat_i64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_OR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_OR]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_OR]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s64), addrspace 0) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_or_flat_i64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_or_flat_i64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_OR [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_OR]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_OR]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s64), addrspace 0) + %3:_(s64) = G_AND %2, %2 ... --- @@ -57,7 +200,79 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_OR]], [[ATOMICRMW_OR]] %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_or_local_i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: atomicrmw_or_local_i32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_OR]], [[ATOMICRMW_OR]] + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_or_local_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_or_local_i64_ss + ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_OR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_OR]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_OR]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p3) = COPY $sgpr0 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s64), addrspace 3) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_or_local_i64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_or_local_i64_vv + ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_OR]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_OR]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p3) = COPY $vgpr0 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s64), addrspace 3) + %3:_(s64) = G_AND %2, %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir index e7b9c8efff6ce..87fe3f4da6fe1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1201 -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s --- name: atomicrmw_sub_global_i32_ss @@ -22,6 +21,74 @@ body: | %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1) ... +--- +name: atomicrmw_sub_global_i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_sub_global_i32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1) +... + +--- +name: atomicrmw_sub_global_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_sub_global_i64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_global_i64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_sub_global_i64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 +... + --- name: atomicrmw_sub_flat_i32_ss legalized: true @@ -37,9 +104,81 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]] %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_flat_i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_sub_flat_i32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]] + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_flat_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_sub_flat_i64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 0) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_flat_i64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_sub_flat_i64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 0) + %3:_(s64) = G_AND %2, %2 ... --- @@ -57,7 +196,79 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]] %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_local_i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: atomicrmw_sub_local_i32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]] + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_local_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_sub_local_i64_ss + ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p3) = COPY $sgpr0 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 3) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_sub_local_i64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_sub_local_i64_vv + ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p3) = COPY $vgpr0 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 3) + %3:_(s64) = G_AND %2, %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir index db01a21d061d3..0012a65a33b03 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s --- name: atomicrmw_xchg_global_i32_ss @@ -17,9 +16,11 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_XCHG]], [[ATOMICRMW_XCHG]] %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 ... --- @@ -37,9 +38,11 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_XCHG]], [[ATOMICRMW_XCHG]] %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 ... --- @@ -57,7 +60,9 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_XCHG]], [[ATOMICRMW_XCHG]] %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir index 903d6f54ba46d..f741a60bde763 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1201 -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s --- name: atomicrmw_xor_global_i32_ss @@ -17,9 +16,81 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_XOR]], [[ATOMICRMW_XOR]] %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_xor_global_i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_xor_global_i32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_XOR]], [[ATOMICRMW_XOR]] + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_xor_global_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_xor_global_i64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_XOR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_XOR]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_XOR]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_xor_global_i64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_xor_global_i64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_XOR]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_XOR]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 ... --- @@ -37,9 +108,81 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_XOR]], [[ATOMICRMW_XOR]] %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_xor_flat_i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_xor_flat_i32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_XOR]], [[ATOMICRMW_XOR]] + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_xor_flat_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_xor_flat_i64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_XOR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_XOR]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_XOR]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s64), addrspace 0) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_xor_flat_i64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_xor_flat_i64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_XOR [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_XOR]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_XOR]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s64), addrspace 0) + %3:_(s64) = G_AND %2, %2 ... --- @@ -57,7 +200,79 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_XOR]], [[ATOMICRMW_XOR]] %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_xor_local_i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: atomicrmw_xor_local_i32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_XOR]], [[ATOMICRMW_XOR]] + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_xor_local_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_xor_local_i64_ss + ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_XOR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_XOR]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_XOR]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p3) = COPY $sgpr0 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s64), addrspace 3) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_xor_local_i64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_xor_local_i64_vv + ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_XOR]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_XOR]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p3) = COPY $vgpr0 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s64), addrspace 3) + %3:_(s64) = G_AND %2, %2 ...