Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
.Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
.Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});

// Atomics always operate per-lane; keep both the pointer and the value/result
// in VGPRs regardless of uniformity. Use Ptr32/Ptr64 to cover all addrspaces
// (e.g. local/region/private for Ptr32, global/flat for Ptr64).
addRulesForGOpcs({G_ATOMICRMW_FADD})
.Any({{B32, Ptr32, B32}, {{VgprB32}, {VgprPtr32, VgprB32}}})
Copy link
Collaborator

@petar-avramovic petar-avramovic Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

keep result in VGPRs regardless of uniformity

For the context of RegBankLegalize this is not correct if it was actually uniform result, it would crash somewhere. Missed this in #175253 also.
Think that atomics are always divergent and inputs can be uniform. Can you check this?
This is most probably should use S types (S32 S64 V2S16) not B type, since B32 includes S32, V2S16, ptr32 etc
.Any({{DivS32, Ptr32}, {{Vgpr32}, {VgprPtr32, Vgpr32}}})

Should move the rule to similar opcodes, atomics or fp opcodes, shouldn't be in between loads.

Also if this is Ptr32, should add .ll tests for local/region/private etc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Uniformity analysis marks the result of atomic ops as divergent.
  • I will replace the use of B types with S types.

.Any({{B32, Ptr64, B32}, {{VgprB32}, {VgprPtr64, VgprB32}}})
.Any({{B64, Ptr32, B64}, {{VgprB64}, {VgprPtr32, VgprB64}}})
.Any({{B64, Ptr64, B64}, {{VgprB64}, {VgprPtr64, VgprB64}}})
.Any({{V2S16, Ptr32, V2S16}, {{VgprV2S16}, {VgprPtr32, VgprV2S16}}})
.Any({{V2S16, Ptr64, V2S16}, {{VgprV2S16}, {VgprPtr64, VgprV2S16}}});
// clang-format on

addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s

define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %data) {
; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s

define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) {
; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s

define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn(ptr %ptr, <2 x half> %data) {
; GFX942-LABEL: name: flat_atomic_fadd_v2f16_rtn
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942

define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s

define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2 x half> %data) {
; GFX908-LABEL: name: global_atomic_fadd_v2f16_no_rtn
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s

define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn(ptr addrspace(1) %ptr, <2 x half> %data) {
; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_rtn
Expand Down
Loading