[AMDGPU] Generate canonical additions in AMDGPUPromoteAlloca #157810

ritter-x2a · 2025-09-10T08:31:17Z

When we know that one operand of an addition is a constant, we might was
well put it on the right-hand side and avoid the work to canonicalize it
in a later pass.

ritter-x2a · 2025-09-10T08:31:37Z

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-09-10T08:33:36Z

@llvm/pr-subscribers-backend-amdgpu

Author: Fabian Ritter (ritter-x2a)

Changes

When we know that one operand of an addition is a constant, we might was
well put it on the right-hand side and avoid the work to canonicalize it
in a later pass.

Full diff: https://github.com/llvm/llvm-project/pull/157810.diff

4 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll (+4-4)
(modified) llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll (+3-3)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index bb77cdff778c0..7dbe1235a98b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -478,7 +478,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
 
   ConstantInt *ConstIndex =
       ConstantInt::get(OffsetType, IndexQuot.getSExtValue());
-  Value *IndexAdd = Builder.CreateAdd(ConstIndex, Offset);
+  Value *IndexAdd = Builder.CreateAdd(Offset, ConstIndex);
   if (Instruction *NewInst = dyn_cast<Instruction>(IndexAdd))
     NewInsts.push_back(NewInst);
   return IndexAdd;
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
index d72f158763c61..63622e67e7d0b 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
@@ -312,7 +312,7 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(ptr %out)
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 4, i32 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <6 x i64> [[TMP15]], i64 5, i32 5
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[SEL3]], 3
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 6, [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], 6
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP2]], 1
@@ -464,7 +464,7 @@ define amdgpu_kernel void @i16_2d_load_store(ptr %out, i32 %sel) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <6 x i16> [[TMP3]], i16 3, i32 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <6 x i16> [[TMP4]], i16 4, i32 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <6 x i16> [[TMP5]], i16 5, i32 5
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 3, [[SEL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[SEL]], 3
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <6 x i16> [[TMP6]], i32 [[TMP1]]
 ; CHECK-NEXT:    store i16 [[TMP2]], ptr [[OUT]], align 2
 ; CHECK-NEXT:    ret void
@@ -498,7 +498,7 @@ define amdgpu_kernel void @float_2d_load_store(ptr %out, i32 %sel) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <6 x float> [[TMP3]], float 3.000000e+00, i32 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <6 x float> [[TMP4]], float 4.000000e+00, i32 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <6 x float> [[TMP5]], float 5.000000e+00, i32 5
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 3, [[SEL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[SEL]], 3
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <6 x float> [[TMP6]], i32 [[TMP1]]
 ; CHECK-NEXT:    store float [[TMP2]], ptr [[OUT]], align 4
 ; CHECK-NEXT:    ret void
@@ -538,7 +538,7 @@ define amdgpu_kernel void @ptr_2d_load_store(ptr %out, i32 %sel) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <6 x ptr> [[TMP3]], ptr [[PTR_3]], i32 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <6 x ptr> [[TMP4]], ptr [[PTR_4]], i32 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <6 x ptr> [[TMP5]], ptr [[PTR_5]], i32 5
-; CHECK-NEXT:    [[TMP7:%.*]] = add i32 3, [[SEL]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[SEL]], 3
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <6 x ptr> [[TMP6]], i32 [[TMP7]]
 ; CHECK-NEXT:    store ptr [[TMP8]], ptr [[OUT]], align 8
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
index 1b6ac0bd93c19..a865bf5058d6a 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
@@ -11,7 +11,7 @@ define amdgpu_kernel void @negative_index_byte(ptr %out, i64 %offset) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 1, i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 2, i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i8> [[TMP3]], i8 3, i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 -1, [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i8> [[TMP4]], i64 [[TMP5]]
 ; CHECK-NEXT:    store i8 [[TMP6]], ptr [[OUT:%.*]], align 1
 ; CHECK-NEXT:    ret void
@@ -39,7 +39,7 @@ define amdgpu_kernel void @negative_index_word(ptr %out, i64 %offset) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 1, i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 2, i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 3, i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 -1, [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i64 [[TMP5]]
 ; CHECK-NEXT:    store i32 [[TMP6]], ptr [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll
index a24f041a17857..f95a6a8ec9b45 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll
@@ -10,7 +10,7 @@ define amdgpu_ps void @scalar_alloca_ptr_with_vector_gep_of_gep(i32 %idx, ptr ad
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <20 x i32> [[TMP1]], i32 2, i32 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[IDX]], 2
-; CHECK-NEXT:    [[TMP5:%.*]] = add i32 1, [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <20 x i32> [[TMP3]], i32 [[TMP5]]
 ; CHECK-NEXT:    store i32 [[TMP6]], ptr addrspace(1) [[OUTPUT]], align 4
 ; CHECK-NEXT:    ret void
@@ -31,12 +31,12 @@ define amdgpu_ps void @scalar_alloca_ptr_with_vector_gep_of_gep3(i32 %idx, ptr a
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = freeze <16 x i32> poison
 ; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[IDX]], 2
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 8, [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[ALLOCA]], i32 10, i32 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1]], 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP2]], i32 20, i32 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[IDX]], 2
-; CHECK-NEXT:    [[TMP6:%.*]] = add i32 9, [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 9
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <16 x i32> [[TMP4]], i32 [[TMP6]]
 ; CHECK-NEXT:    store i32 [[TMP7]], ptr addrspace(1) [[OUTPUT]], align 4
 ; CHECK-NEXT:    ret void

When we know that one operand of an addition is a constant, we might was well put it on the right-hand side and avoid the work to canonicalize it in a later pass.

ritter-x2a · 2025-09-10T10:41:08Z

Manual rebase, since Graphite seems to have messed the automatic rebase up.

ritter-x2a · 2025-09-10T12:45:02Z

Merge activity

Sep 10, 12:45 PM UTC: A user started a stack merge that includes this pull request via Graphite.
Sep 10, 12:46 PM UTC: @ritter-x2a merged this pull request with Graphite.

ritter-x2a mentioned this pull request Sep 10, 2025

[AMDGPU] Treat GEP offsets as signed in AMDGPUPromoteAlloca #157682

Merged

ritter-x2a added the backend:AMDGPU label Sep 10, 2025 — with Graphite App

ritter-x2a requested review from arsenm, nikic and perlfu September 10, 2025 08:33

ritter-x2a marked this pull request as ready for review September 10, 2025 08:33

nikic approved these changes Sep 10, 2025

View reviewed changes

arsenm approved these changes Sep 10, 2025

View reviewed changes

Base automatically changed from users/ritter-x2a/09-09-_amdgpu_treat_gep_offsets_as_signed_in_amdgpupromotealloca to main September 10, 2025 09:32

[AMDGPU] Generate canonical additions in AMDGPUPromoteAlloca

43fc13b

When we know that one operand of an addition is a constant, we might was well put it on the right-hand side and avoid the work to canonicalize it in a later pass.

ritter-x2a force-pushed the users/ritter-x2a/09-10-_amdgpu_generate_canonical_additions_in_amdgpupromotealloca branch from f6a8f01 to 43fc13b Compare September 10, 2025 10:40

ritter-x2a merged commit 5b81367 into main Sep 10, 2025
9 checks passed

ritter-x2a deleted the users/ritter-x2a/09-10-_amdgpu_generate_canonical_additions_in_amdgpupromotealloca branch September 10, 2025 12:46

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Generate canonical additions in AMDGPUPromoteAlloca #157810

[AMDGPU] Generate canonical additions in AMDGPUPromoteAlloca #157810

Uh oh!

ritter-x2a commented Sep 10, 2025

Uh oh!

ritter-x2a commented Sep 10, 2025 •

edited

Loading

Uh oh!

llvmbot commented Sep 10, 2025

Uh oh!

ritter-x2a commented Sep 10, 2025

Uh oh!

ritter-x2a commented Sep 10, 2025 •

edited

Loading

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[AMDGPU] Generate canonical additions in AMDGPUPromoteAlloca #157810

[AMDGPU] Generate canonical additions in AMDGPUPromoteAlloca #157810

Uh oh!

Conversation

ritter-x2a commented Sep 10, 2025

Uh oh!

ritter-x2a commented Sep 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Sep 10, 2025

Uh oh!

ritter-x2a commented Sep 10, 2025

Uh oh!

ritter-x2a commented Sep 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Merge activity

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

ritter-x2a commented Sep 10, 2025 •

edited

Loading

ritter-x2a commented Sep 10, 2025 •

edited

Loading