diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index da0acf83b6955..6d327b85f3a32 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -932,6 +932,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}}) .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}}) .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}}); + + // Atomics always operate per-lane; keep both the pointer and the value/result + // in VGPRs regardless of uniformity. Use Ptr32/Ptr64 to cover all addrspaces + // (e.g. local/region/private for Ptr32, global/flat for Ptr64). + addRulesForGOpcs({G_ATOMICRMW_FADD}) + .Any({{B32, Ptr32, B32}, {{VgprB32}, {VgprPtr32, VgprB32}}}) + .Any({{B32, Ptr64, B32}, {{VgprB32}, {VgprPtr64, VgprB32}}}) + .Any({{B64, Ptr32, B64}, {{VgprB64}, {VgprPtr32, VgprB64}}}) + .Any({{B64, Ptr64, B64}, {{VgprB64}, {VgprPtr64, VgprB64}}}) + .Any({{V2S16, Ptr32, V2S16}, {{VgprV2S16}, {VgprPtr32, VgprV2S16}}}) + .Any({{V2S16, Ptr64, V2S16}, {{VgprV2S16}, {VgprPtr64, VgprV2S16}}}); // clang-format on addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll index da25ac06be26d..5e87afa4461d1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %data) { ; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll index bf3697924c22c..93968937a9942 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) { ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll index c349051bcc954..b92e139d13fe2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn(ptr %ptr, <2 x half> %data) { ; GFX942-LABEL: name: flat_atomic_fadd_v2f16_rtn diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll index 7766b3ad45962..f9caf56e9180e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942 define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) { ; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll index 9c0db4cd162fc..60dcd014a3c8c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2 x half> %data) { ; GFX908-LABEL: name: global_atomic_fadd_v2f16_no_rtn diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll index 62620a8875a3a..1c680ec154e2a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn(ptr addrspace(1) %ptr, <2 x half> %data) { ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_rtn diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir index 11833cab3c07f..69e228616e1af 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir @@ -1,15 +1,282 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s --- -name: atomicrmw_fadd_local_i32_ss +name: atomicrmw_fadd_global_f32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-LABEL: name: atomicrmw_fadd_global_f32_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]] + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $sgpr2 + %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_global_f32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_fadd_global_f32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]] + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 1) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_global_f64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_fadd_global_f64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_FADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_global_f64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_fadd_global_f64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s64), addrspace 1) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_global_v2f16_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-LABEL: name: atomicrmw_fadd_global_v2f16_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ATOMICRMW_FADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (<2 x s16>), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]] + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(<2 x s16>) = COPY $sgpr2 + %2:_(<2 x s16>) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (<2 x s16>), addrspace 1) + %3:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_global_v2f16_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_fadd_global_v2f16_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (<2 x s16>), addrspace 1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]] + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<2 x s16>) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (<2 x s16>), addrspace 1) + %3:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_flat_f32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-LABEL: name: atomicrmw_fadd_flat_f32_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]] + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $sgpr2 + %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_flat_f32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_fadd_flat_f32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]] + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 0) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_flat_f64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_fadd_flat_f64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_FADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s64), addrspace 0) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_flat_f64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_fadd_flat_f64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_FADD [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s64)) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s64), addrspace 0) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_flat_v2f16_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-LABEL: name: atomicrmw_fadd_flat_v2f16_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ATOMICRMW_FADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (<2 x s16>)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]] + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(<2 x s16>) = COPY $sgpr2 + %2:_(<2 x s16>) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (<2 x s16>), addrspace 0) + %3:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_flat_v2f16_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: atomicrmw_fadd_flat_v2f16_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ATOMICRMW_FADD [[COPY]](p0), [[COPY1]] :: (load store seq_cst (<2 x s16>)) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]] + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<2 x s16>) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (<2 x s16>), addrspace 0) + %3:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_local_f32_ss legalized: true body: | bb.0: liveins: $sgpr0, $sgpr1 - ; CHECK-LABEL: name: atomicrmw_fadd_local_i32_ss + ; CHECK-LABEL: name: atomicrmw_fadd_local_f32_ss ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 @@ -17,7 +284,121 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]] %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_local_f32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: atomicrmw_fadd_local_f32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]] + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 3) + %3:_(s32) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_local_f64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-LABEL: name: atomicrmw_fadd_local_f64_ss + ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_FADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p3) = COPY $sgpr0 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s64), addrspace 3) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_local_f64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr2_vgpr3 + ; CHECK-LABEL: name: atomicrmw_fadd_local_f64_vv + ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s64), addrspace 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(p3) = COPY $vgpr0 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s64), addrspace 3) + %3:_(s64) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_local_v2f16_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: atomicrmw_fadd_local_v2f16_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ATOMICRMW_FADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (<2 x s16>), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]] + %0:_(p3) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $sgpr1 + %2:_(<2 x s16>) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (<2 x s16>), addrspace 3) + %3:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: atomicrmw_fadd_local_v2f16_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: atomicrmw_fadd_local_v2f16_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (<2 x s16>), addrspace 3) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]] + %0:_(p3) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (<2 x s16>), addrspace 3) + %3:_(<2 x s16>) = G_AND %2, %2 ...