Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return isAnyPtr(MRI.getType(Reg), 64);
case Ptr128:
return isAnyPtr(MRI.getType(Reg), 128);
case V2S16:
return MRI.getType(Reg) == LLT::fixed_vector(2, 16);
case V2S32:
return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
case V3S32:
Expand Down Expand Up @@ -769,6 +771,25 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{S32, P3}, {{Vgpr32}, {VgprP3, Vgpr32}}})
.Any({{S64, P3}, {{Vgpr64}, {VgprP3, Vgpr64}}});

bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
bool HasAtomicBufferGlobalPkAddF16Insts =
ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
ST->hasAtomicBufferGlobalPkAddF16Insts();
bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
addRulesForGOpcs({G_ATOMICRMW_FADD})
.Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
.Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
.Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
.Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
.Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
.Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}})
.Any({{DivV2S16, P0, V2S16}, {{VgprV2S16}, {VgprP0, VgprV2S16}}},
HasAtomicFlatPkAdd16Insts)
.Any({{DivV2S16, P1, V2S16}, {{VgprV2S16}, {VgprP1, VgprV2S16}}},
HasAtomicBufferGlobalPkAddF16Insts)
.Any({{DivV2S16, P3, V2S16}, {{VgprV2S16}, {VgprP3, VgprV2S16}}},
HasAtomicDsPkAdd16Insts);

addRulesForGOpcs({G_ATOMIC_CMPXCHG})
.Any({{DivS32, P2}, {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
.Any({{DivS64, P2}, {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
Expand Down Expand Up @@ -1002,6 +1023,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
.Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
.Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});

// clang-format on

addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s

define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %data) {
; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s

define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) {
; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s

define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn(ptr %ptr, <2 x half> %data) {
; GFX942-LABEL: name: flat_atomic_fadd_v2f16_rtn
Expand Down
178 changes: 147 additions & 31 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942

; =============================================================================
; Flat atomic fadd - f32
; =============================================================================

define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat:
Expand Down Expand Up @@ -50,30 +54,67 @@ define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
ret float %ret
}

define <2 x half> @local_atomic_fadd_ret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
; GFX942-LABEL: local_atomic_fadd_ret_v2f16_offset:
; =============================================================================
; Flat atomic fadd - v2f16
; =============================================================================

define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
; GFX942-LABEL: flat_atomic_fadd_ret_v2f16_agent_offset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 offset:65532
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: buffer_wbl2 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:1024 sc0
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
%result = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
%result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
ret <2 x half> %result
}

define void @local_atomic_fadd_noret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
; GFX942-LABEL: local_atomic_fadd_noret_v2f16_offset:
define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
; GFX942-LABEL: flat_atomic_fadd_noret_v2f16_agent_offset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ds_pk_add_f16 v0, v1 offset:65532
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: buffer_wbl2 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:1024
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
%unused = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
%unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
ret void
}

; =============================================================================
; Global atomic fadd - f32
; =============================================================================

define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) {
; GFX942-LABEL: global_atomic_fadd_f32_no_rtn_atomicrmw:
; GFX942: ; %bb.0:
; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off
; GFX942-NEXT: s_endpgm
%ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
ret void
}

define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) {
; GFX942-LABEL: global_atomic_fadd_f32_rtn_atomicrmw:
; GFX942: ; %bb.0:
; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: ; return to shader part epilog
%ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
ret float %ret
}

; =============================================================================
; Global atomic fadd - v2f16
; =============================================================================

define <2 x half> @global_atomic_fadd_ret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) {
; GFX942-LABEL: global_atomic_fadd_ret_v2f16_agent_offset:
; GFX942: ; %bb.0:
Expand Down Expand Up @@ -104,36 +145,111 @@ define void @global_atomic_fadd_noret_v2f16_agent_offset(ptr addrspace(1) %ptr,
ret void
}

define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
; GFX942-LABEL: flat_atomic_fadd_ret_v2f16_agent_offset:
; =============================================================================
; Global atomic fadd - f64
; =============================================================================

define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) {
; GFX942-LABEL: global_atomic_fadd_f64_no_rtn_atomicrmw:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_wbl2 sc1
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off
; GFX942-NEXT: s_endpgm
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
ret void
}

define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) {
; GFX942-LABEL: global_atomic_fadd_f64_rtn_atomicrmw:
; GFX942: ; %bb.0:
; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:1024 sc0
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: v_readfirstlane_b32 s0, v0
; GFX942-NEXT: v_readfirstlane_b32 s1, v1
; GFX942-NEXT: ; return to shader part epilog
%ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
ret double %ret
}

; =============================================================================
; Local atomic fadd - f32
; =============================================================================

define amdgpu_ps void @local_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(3) %ptr, float %data) {
; GFX942-LABEL: local_atomic_fadd_f32_no_rtn_atomicrmw:
; GFX942: ; %bb.0:
; GFX942-NEXT: ds_add_f32 v0, v1
; GFX942-NEXT: s_endpgm
%ret = atomicrmw fadd ptr addrspace(3) %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
ret void
}

define amdgpu_ps float @local_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(3) %ptr, float %data) {
; GFX942-LABEL: local_atomic_fadd_f32_rtn_atomicrmw:
; GFX942: ; %bb.0:
; GFX942-NEXT: ds_add_rtn_f32 v0, v0, v1
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: ; return to shader part epilog
%ret = atomicrmw fadd ptr addrspace(3) %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
ret float %ret
}

; =============================================================================
; Local atomic fadd - v2f16
; =============================================================================

define <2 x half> @local_atomic_fadd_ret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
; GFX942-LABEL: local_atomic_fadd_ret_v2f16_offset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 offset:65532
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
%result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
%gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
%result = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
ret <2 x half> %result
}

define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
; GFX942-LABEL: flat_atomic_fadd_noret_v2f16_agent_offset:
define void @local_atomic_fadd_noret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
; GFX942-LABEL: local_atomic_fadd_noret_v2f16_offset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_wbl2 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:1024
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: ds_pk_add_f16 v0, v1 offset:65532
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
%unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
%gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
%unused = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
ret void
}

; =============================================================================
; Local atomic fadd - f64
; =============================================================================

define amdgpu_ps void @local_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(3) %ptr, double %data) {
; GFX942-LABEL: local_atomic_fadd_f64_no_rtn_atomicrmw:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v4, v1
; GFX942-NEXT: v_mov_b32_e32 v5, v2
; GFX942-NEXT: ds_add_f64 v0, v[4:5]
; GFX942-NEXT: s_endpgm
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
ret void
}

define amdgpu_ps double @local_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(3) %ptr, double %data) {
; GFX942-LABEL: local_atomic_fadd_f64_rtn_atomicrmw:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_mov_b32_e32 v4, v1
; GFX942-NEXT: v_mov_b32_e32 v5, v2
; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[4:5]
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: v_readfirstlane_b32 s0, v0
; GFX942-NEXT: v_readfirstlane_b32 s1, v1
; GFX942-NEXT: ; return to shader part epilog
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
ret double %ret
}

attributes #0 = { denormal_fpenv(float: ieee|ieee) }

!0 = !{}
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s

define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2 x half> %data) {
; GFX908-LABEL: name: global_atomic_fadd_v2f16_no_rtn
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s

define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn(ptr addrspace(1) %ptr, <2 x half> %data) {
; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_rtn
Expand Down
Loading
Loading