Skip to content

Conversation

@jplehr
Copy link
Contributor

@jplehr jplehr commented Dec 8, 2025

@jplehr jplehr enabled auto-merge (squash) December 8, 2025 07:56
@llvmbot
Copy link
Member

llvmbot commented Dec 8, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Jan Patrick Lehr (jplehr)

Changes

Reverts llvm/llvm-project#166132

Broke libc on GPU tests.
https://lab.llvm.org/buildbot/#/builders/10/builds/18635


Full diff: https://github.com/llvm/llvm-project/pull/171087.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+3-18)
  • (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll (-144)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index bab76e87af40c..b79689c39ef84 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -457,25 +457,10 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
   const auto &VarOffset = VarOffsets.front();
   APInt OffsetQuot;
   APInt::sdivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
-  Value *Offset = VarOffset.first;
-  if (Rem != 0) {
-    unsigned ElemSizeShift = Log2_64(VecElemSize);
-    SimplifyQuery SQ(DL);
-    SQ.CxtI = GEP;
-    KnownBits KB = computeKnownBits(VarOffset.first, SQ);
-    // Bail out if the index may point into the middle of an element.
-    if (KB.countMinTrailingZeros() < ElemSizeShift)
-      return nullptr;
-
-    Value *Scaled = Builder.CreateLShr(VarOffset.first, ElemSizeShift);
-    if (Instruction *NewInst = dyn_cast<Instruction>(Scaled))
-      NewInsts.push_back(NewInst);
-
-    Offset = Scaled;
-    OffsetQuot = APInt(BW, 1);
-    Rem = 0;
-  }
+  if (Rem != 0 || OffsetQuot.isZero())
+    return nullptr;
 
+  Value *Offset = VarOffset.first;
   if (!isa<IntegerType>(Offset->getType()))
     return nullptr;
 
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
index bcc61062640d2..76e1868b3c4b9 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
@@ -250,150 +250,6 @@ bb2:
   store i32 0, ptr addrspace(5) %extractelement
   ret void
 }
-
-define amdgpu_kernel void @scalar_alloca_vector_gep_i8_0_or_4(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_0_or_4(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT:    [[ALLOCA:%.*]] = freeze <3 x float> poison
-; CHECK-NEXT:    [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT:    [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 0, i32 4
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <3 x float> [[VEC]], float [[DATA]], i32 [[TMP1]]
-; CHECK-NEXT:    store <3 x float> [[TMP2]], ptr [[BUFFER]], align 16
-; CHECK-NEXT:    ret void
-;
-  %alloca = alloca <3 x float>, align 16, addrspace(5)
-  %vec = load <3 x float>, ptr %buffer
-  store <3 x float> %vec, ptr addrspace(5) %alloca
-  %index = select i1 %idx_sel, i32 0, i32 4
-  %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
-  store float %data, ptr addrspace(5) %elt, align 4
-  %updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
-  store <3 x float> %updated, ptr %buffer, align 16
-  ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_8(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_8(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT:    [[ALLOCA:%.*]] = freeze <3 x float> poison
-; CHECK-NEXT:    [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT:    [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 8
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <3 x float> [[VEC]], float [[DATA]], i32 [[TMP1]]
-; CHECK-NEXT:    store <3 x float> [[TMP2]], ptr [[BUFFER]], align 16
-; CHECK-NEXT:    ret void
-;
-  %alloca = alloca <3 x float>, align 16, addrspace(5)
-  %vec = load <3 x float>, ptr %buffer
-  store <3 x float> %vec, ptr addrspace(5) %alloca
-  %index = select i1 %idx_sel, i32 4, i32 8
-  %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
-  store float %data, ptr addrspace(5) %elt, align 4
-  %updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
-  store <3 x float> %updated, ptr %buffer, align 16
-  ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_8(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_8(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT:    [[ALLOCA:%.*]] = freeze <8 x float> poison
-; CHECK-NEXT:    [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <3 x float> [[VEC]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x float> [[ALLOCA]], float [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <3 x float> [[VEC]], i64 1
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x float> [[TMP2]], float [[TMP3]], i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <3 x float> [[VEC]], i64 2
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x float> [[TMP4]], float [[TMP5]], i32 2
-; CHECK-NEXT:    [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 8
-; CHECK-NEXT:    [[TMP7:%.*]] = lshr i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x float> [[TMP6]], float [[DATA]], i32 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x float> [[TMP8]], i32 0
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <3 x float> poison, float [[TMP9]], i64 0
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x float> [[TMP8]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <3 x float> [[TMP10]], float [[TMP11]], i64 1
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x float> [[TMP8]], i32 2
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <3 x float> [[TMP12]], float [[TMP13]], i64 2
-; CHECK-NEXT:    store <3 x float> [[TMP14]], ptr [[BUFFER]], align 16
-; CHECK-NEXT:    ret void
-;
-  %alloca = alloca [2 x <3 x float>], align 16, addrspace(5)
-  %row = getelementptr inbounds [2 x <3 x float>], ptr addrspace(5) %alloca, i32 0, i32 0
-  %vec = load <3 x float>, ptr %buffer
-  store <3 x float> %vec, ptr addrspace(5) %row, align 16
-  %index = select i1 %idx_sel, i32 4, i32 8
-  %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %row, i32 %index
-  store float %data, ptr addrspace(5) %elt, align 4
-  %updated = load <3 x float>, ptr addrspace(5) %row, align 16
-  store <3 x float> %updated, ptr %buffer, align 16
-  ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_5_no_promote(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_5_no_promote(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4, !invariant.load [[META0]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr addrspace(4) [[TMP4]], align 4, !range [[RNG1]], !invariant.load [[META0]]
-; CHECK-NEXT:    [[TMP6:%.*]] = lshr i32 [[TMP3]], 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
-; CHECK-NEXT:    [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
-; CHECK-NEXT:    [[TMP9:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
-; CHECK-NEXT:    [[TMP10:%.*]] = mul nuw nsw i32 [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP7]]
-; CHECK-NEXT:    [[TMP12:%.*]] = mul nuw nsw i32 [[TMP8]], [[TMP5]]
-; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[TMP11]], [[TMP12]]
-; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP9]]
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1024 x <3 x float>], ptr addrspace(3) @scalar_alloca_vector_gep_i8_4_or_5_no_promote.alloca, i32 0, i32 [[TMP14]]
-; CHECK-NEXT:    [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT:    store <3 x float> [[VEC]], ptr addrspace(3) [[TMP15]], align 16
-; CHECK-NEXT:    [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 5
-; CHECK-NEXT:    [[ELT:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) [[TMP15]], i32 [[INDEX]]
-; CHECK-NEXT:    store float [[DATA]], ptr addrspace(3) [[ELT]], align 4
-; CHECK-NEXT:    [[UPDATED:%.*]] = load <3 x float>, ptr addrspace(3) [[TMP15]], align 16
-; CHECK-NEXT:    store <3 x float> [[UPDATED]], ptr [[BUFFER]], align 16
-; CHECK-NEXT:    ret void
-;
-  %alloca = alloca <3 x float>, align 16, addrspace(5)
-  %vec = load <3 x float>, ptr %buffer
-  store <3 x float> %vec, ptr addrspace(5) %alloca
-  %index = select i1 %idx_sel, i32 4, i32 5
-  %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
-  store float %data, ptr addrspace(5) %elt, align 4
-  %updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
-  store <3 x float> %updated, ptr %buffer, align 16
-  ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_5_no_promote(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_5_no_promote(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x <3 x float>], align 16, addrspace(5)
-; CHECK-NEXT:    [[ROW:%.*]] = getelementptr inbounds [2 x <3 x float>], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0
-; CHECK-NEXT:    [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT:    store <3 x float> [[VEC]], ptr addrspace(5) [[ROW]], align 16
-; CHECK-NEXT:    [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 5
-; CHECK-NEXT:    [[ELT:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[ROW]], i32 [[INDEX]]
-; CHECK-NEXT:    store float [[DATA]], ptr addrspace(5) [[ELT]], align 4
-; CHECK-NEXT:    [[TMP14:%.*]] = load <3 x float>, ptr addrspace(5) [[ROW]], align 16
-; CHECK-NEXT:    store <3 x float> [[TMP14]], ptr [[BUFFER]], align 16
-; CHECK-NEXT:    ret void
-;
-  %alloca = alloca [2 x <3 x float>], align 16, addrspace(5)
-  %row = getelementptr inbounds [2 x <3 x float>], ptr addrspace(5) %alloca, i32 0, i32 0
-  %vec = load <3 x float>, ptr %buffer
-  store <3 x float> %vec, ptr addrspace(5) %row, align 16
-  %index = select i1 %idx_sel, i32 4, i32 5
-  %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %row, i32 %index
-  store float %data, ptr addrspace(5) %elt, align 4
-  %updated = load <3 x float>, ptr addrspace(5) %row, align 16
-  store <3 x float> %updated, ptr %buffer, align 16
-  ret void
-}
-
 ;.
 ; CHECK: [[META0]] = !{}
 ; CHECK: [[RNG1]] = !{i32 0, i32 1025}

@jplehr jplehr merged commit ec78750 into main Dec 8, 2025
11 of 12 checks passed
@jplehr jplehr deleted the revert-166132-amdgpu/promote-vector branch December 8, 2025 08:25
honeygoyal pushed a commit to honeygoyal/llvm-project that referenced this pull request Dec 9, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants