Skip to content

Conversation

ritter-x2a
Copy link
Member

When we know that one operand of an addition is a constant, we might was
well put it on the right-hand side and avoid the work to canonicalize it
in a later pass.

Copy link
Member Author

ritter-x2a commented Sep 10, 2025

@llvmbot
Copy link
Member

llvmbot commented Sep 10, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Fabian Ritter (ritter-x2a)

Changes

When we know that one operand of an addition is a constant, we might was
well put it on the right-hand side and avoid the work to canonicalize it
in a later pass.


Full diff: https://github.com/llvm/llvm-project/pull/157810.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll (+4-4)
  • (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll (+3-3)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index bb77cdff778c0..7dbe1235a98b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -478,7 +478,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
 
   ConstantInt *ConstIndex =
       ConstantInt::get(OffsetType, IndexQuot.getSExtValue());
-  Value *IndexAdd = Builder.CreateAdd(ConstIndex, Offset);
+  Value *IndexAdd = Builder.CreateAdd(Offset, ConstIndex);
   if (Instruction *NewInst = dyn_cast<Instruction>(IndexAdd))
     NewInsts.push_back(NewInst);
   return IndexAdd;
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
index d72f158763c61..63622e67e7d0b 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
@@ -312,7 +312,7 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(ptr %out)
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 4, i32 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <6 x i64> [[TMP15]], i64 5, i32 5
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[SEL3]], 3
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 6, [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], 6
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP2]], 1
@@ -464,7 +464,7 @@ define amdgpu_kernel void @i16_2d_load_store(ptr %out, i32 %sel) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <6 x i16> [[TMP3]], i16 3, i32 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <6 x i16> [[TMP4]], i16 4, i32 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <6 x i16> [[TMP5]], i16 5, i32 5
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 3, [[SEL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[SEL]], 3
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <6 x i16> [[TMP6]], i32 [[TMP1]]
 ; CHECK-NEXT:    store i16 [[TMP2]], ptr [[OUT]], align 2
 ; CHECK-NEXT:    ret void
@@ -498,7 +498,7 @@ define amdgpu_kernel void @float_2d_load_store(ptr %out, i32 %sel) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <6 x float> [[TMP3]], float 3.000000e+00, i32 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <6 x float> [[TMP4]], float 4.000000e+00, i32 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <6 x float> [[TMP5]], float 5.000000e+00, i32 5
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 3, [[SEL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[SEL]], 3
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <6 x float> [[TMP6]], i32 [[TMP1]]
 ; CHECK-NEXT:    store float [[TMP2]], ptr [[OUT]], align 4
 ; CHECK-NEXT:    ret void
@@ -538,7 +538,7 @@ define amdgpu_kernel void @ptr_2d_load_store(ptr %out, i32 %sel) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <6 x ptr> [[TMP3]], ptr [[PTR_3]], i32 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <6 x ptr> [[TMP4]], ptr [[PTR_4]], i32 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <6 x ptr> [[TMP5]], ptr [[PTR_5]], i32 5
-; CHECK-NEXT:    [[TMP7:%.*]] = add i32 3, [[SEL]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[SEL]], 3
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <6 x ptr> [[TMP6]], i32 [[TMP7]]
 ; CHECK-NEXT:    store ptr [[TMP8]], ptr [[OUT]], align 8
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
index 1b6ac0bd93c19..a865bf5058d6a 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
@@ -11,7 +11,7 @@ define amdgpu_kernel void @negative_index_byte(ptr %out, i64 %offset) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 1, i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 2, i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i8> [[TMP3]], i8 3, i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 -1, [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i8> [[TMP4]], i64 [[TMP5]]
 ; CHECK-NEXT:    store i8 [[TMP6]], ptr [[OUT:%.*]], align 1
 ; CHECK-NEXT:    ret void
@@ -39,7 +39,7 @@ define amdgpu_kernel void @negative_index_word(ptr %out, i64 %offset) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 1, i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 2, i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 3, i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 -1, [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i64 [[TMP5]]
 ; CHECK-NEXT:    store i32 [[TMP6]], ptr [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll
index a24f041a17857..f95a6a8ec9b45 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep-of-gep.ll
@@ -10,7 +10,7 @@ define amdgpu_ps void @scalar_alloca_ptr_with_vector_gep_of_gep(i32 %idx, ptr ad
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <20 x i32> [[TMP1]], i32 2, i32 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[IDX]], 2
-; CHECK-NEXT:    [[TMP5:%.*]] = add i32 1, [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <20 x i32> [[TMP3]], i32 [[TMP5]]
 ; CHECK-NEXT:    store i32 [[TMP6]], ptr addrspace(1) [[OUTPUT]], align 4
 ; CHECK-NEXT:    ret void
@@ -31,12 +31,12 @@ define amdgpu_ps void @scalar_alloca_ptr_with_vector_gep_of_gep3(i32 %idx, ptr a
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = freeze <16 x i32> poison
 ; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[IDX]], 2
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 8, [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[ALLOCA]], i32 10, i32 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1]], 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP2]], i32 20, i32 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[IDX]], 2
-; CHECK-NEXT:    [[TMP6:%.*]] = add i32 9, [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 9
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <16 x i32> [[TMP4]], i32 [[TMP6]]
 ; CHECK-NEXT:    store i32 [[TMP7]], ptr addrspace(1) [[OUTPUT]], align 4
 ; CHECK-NEXT:    ret void

@ritter-x2a ritter-x2a marked this pull request as ready for review September 10, 2025 08:33
Base automatically changed from users/ritter-x2a/09-09-_amdgpu_treat_gep_offsets_as_signed_in_amdgpupromotealloca to main September 10, 2025 09:32
When we know that one operand of an addition is a constant, we might was
well put it on the right-hand side and avoid the work to canonicalize it
in a later pass.
@ritter-x2a ritter-x2a force-pushed the users/ritter-x2a/09-10-_amdgpu_generate_canonical_additions_in_amdgpupromotealloca branch from f6a8f01 to 43fc13b Compare September 10, 2025 10:40
Copy link
Member Author

Manual rebase, since Graphite seems to have messed the automatic rebase up.

Copy link
Member Author

ritter-x2a commented Sep 10, 2025

Merge activity

  • Sep 10, 12:45 PM UTC: A user started a stack merge that includes this pull request via Graphite.
  • Sep 10, 12:46 PM UTC: @ritter-x2a merged this pull request with Graphite.

@ritter-x2a ritter-x2a merged commit 5b81367 into main Sep 10, 2025
9 checks passed
@ritter-x2a ritter-x2a deleted the users/ritter-x2a/09-10-_amdgpu_generate_canonical_additions_in_amdgpupromotealloca branch September 10, 2025 12:46
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants