-
Notifications
You must be signed in to change notification settings - Fork 15.5k
Revert "[AMDGPU] Enable i8 GEP promotion for vector allocas" #171087
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This reverts commit 6ec8c43.
Member
|
@llvm/pr-subscribers-backend-amdgpu Author: Jan Patrick Lehr (jplehr) ChangesReverts llvm/llvm-project#166132 Broke libc on GPU tests. Full diff: https://github.com/llvm/llvm-project/pull/171087.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index bab76e87af40c..b79689c39ef84 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -457,25 +457,10 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
const auto &VarOffset = VarOffsets.front();
APInt OffsetQuot;
APInt::sdivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
- Value *Offset = VarOffset.first;
- if (Rem != 0) {
- unsigned ElemSizeShift = Log2_64(VecElemSize);
- SimplifyQuery SQ(DL);
- SQ.CxtI = GEP;
- KnownBits KB = computeKnownBits(VarOffset.first, SQ);
- // Bail out if the index may point into the middle of an element.
- if (KB.countMinTrailingZeros() < ElemSizeShift)
- return nullptr;
-
- Value *Scaled = Builder.CreateLShr(VarOffset.first, ElemSizeShift);
- if (Instruction *NewInst = dyn_cast<Instruction>(Scaled))
- NewInsts.push_back(NewInst);
-
- Offset = Scaled;
- OffsetQuot = APInt(BW, 1);
- Rem = 0;
- }
+ if (Rem != 0 || OffsetQuot.isZero())
+ return nullptr;
+ Value *Offset = VarOffset.first;
if (!isa<IntegerType>(Offset->getType()))
return nullptr;
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
index bcc61062640d2..76e1868b3c4b9 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
@@ -250,150 +250,6 @@ bb2:
store i32 0, ptr addrspace(5) %extractelement
ret void
}
-
-define amdgpu_kernel void @scalar_alloca_vector_gep_i8_0_or_4(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_0_or_4(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <3 x float> poison
-; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 0, i32 4
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[INDEX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[VEC]], float [[DATA]], i32 [[TMP1]]
-; CHECK-NEXT: store <3 x float> [[TMP2]], ptr [[BUFFER]], align 16
-; CHECK-NEXT: ret void
-;
- %alloca = alloca <3 x float>, align 16, addrspace(5)
- %vec = load <3 x float>, ptr %buffer
- store <3 x float> %vec, ptr addrspace(5) %alloca
- %index = select i1 %idx_sel, i32 0, i32 4
- %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
- store float %data, ptr addrspace(5) %elt, align 4
- %updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
- store <3 x float> %updated, ptr %buffer, align 16
- ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_8(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_8(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <3 x float> poison
-; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 8
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[INDEX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[VEC]], float [[DATA]], i32 [[TMP1]]
-; CHECK-NEXT: store <3 x float> [[TMP2]], ptr [[BUFFER]], align 16
-; CHECK-NEXT: ret void
-;
- %alloca = alloca <3 x float>, align 16, addrspace(5)
- %vec = load <3 x float>, ptr %buffer
- store <3 x float> %vec, ptr addrspace(5) %alloca
- %index = select i1 %idx_sel, i32 4, i32 8
- %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
- store float %data, ptr addrspace(5) %elt, align 4
- %updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
- store <3 x float> %updated, ptr %buffer, align 16
- ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_8(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_8(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <8 x float> poison
-; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x float> [[VEC]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[ALLOCA]], float [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x float> [[VEC]], i64 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP2]], float [[TMP3]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <3 x float> [[VEC]], i64 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP4]], float [[TMP5]], i32 2
-; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 8
-; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[INDEX]], 2
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x float> [[TMP6]], float [[DATA]], i32 [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x float> poison, float [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x float> [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <3 x float> [[TMP10]], float [[TMP11]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[TMP8]], i32 2
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <3 x float> [[TMP12]], float [[TMP13]], i64 2
-; CHECK-NEXT: store <3 x float> [[TMP14]], ptr [[BUFFER]], align 16
-; CHECK-NEXT: ret void
-;
- %alloca = alloca [2 x <3 x float>], align 16, addrspace(5)
- %row = getelementptr inbounds [2 x <3 x float>], ptr addrspace(5) %alloca, i32 0, i32 0
- %vec = load <3 x float>, ptr %buffer
- store <3 x float> %vec, ptr addrspace(5) %row, align 16
- %index = select i1 %idx_sel, i32 4, i32 8
- %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %row, i32 %index
- store float %data, ptr addrspace(5) %elt, align 4
- %updated = load <3 x float>, ptr addrspace(5) %row, align 16
- store <3 x float> %updated, ptr %buffer, align 16
- ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_5_no_promote(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_5_no_promote(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1
-; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4, !invariant.load [[META0]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[TMP4]], align 4, !range [[RNG1]], !invariant.load [[META0]]
-; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP3]], 16
-; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
-; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
-; CHECK-NEXT: [[TMP9:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
-; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw i32 [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw i32 [[TMP8]], [[TMP5]]
-; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP9]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x <3 x float>], ptr addrspace(3) @scalar_alloca_vector_gep_i8_4_or_5_no_promote.alloca, i32 0, i32 [[TMP14]]
-; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT: store <3 x float> [[VEC]], ptr addrspace(3) [[TMP15]], align 16
-; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 5
-; CHECK-NEXT: [[ELT:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) [[TMP15]], i32 [[INDEX]]
-; CHECK-NEXT: store float [[DATA]], ptr addrspace(3) [[ELT]], align 4
-; CHECK-NEXT: [[UPDATED:%.*]] = load <3 x float>, ptr addrspace(3) [[TMP15]], align 16
-; CHECK-NEXT: store <3 x float> [[UPDATED]], ptr [[BUFFER]], align 16
-; CHECK-NEXT: ret void
-;
- %alloca = alloca <3 x float>, align 16, addrspace(5)
- %vec = load <3 x float>, ptr %buffer
- store <3 x float> %vec, ptr addrspace(5) %alloca
- %index = select i1 %idx_sel, i32 4, i32 5
- %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
- store float %data, ptr addrspace(5) %elt, align 4
- %updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
- store <3 x float> %updated, ptr %buffer, align 16
- ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_5_no_promote(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_5_no_promote(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x <3 x float>], align 16, addrspace(5)
-; CHECK-NEXT: [[ROW:%.*]] = getelementptr inbounds [2 x <3 x float>], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0
-; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT: store <3 x float> [[VEC]], ptr addrspace(5) [[ROW]], align 16
-; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 5
-; CHECK-NEXT: [[ELT:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[ROW]], i32 [[INDEX]]
-; CHECK-NEXT: store float [[DATA]], ptr addrspace(5) [[ELT]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = load <3 x float>, ptr addrspace(5) [[ROW]], align 16
-; CHECK-NEXT: store <3 x float> [[TMP14]], ptr [[BUFFER]], align 16
-; CHECK-NEXT: ret void
-;
- %alloca = alloca [2 x <3 x float>], align 16, addrspace(5)
- %row = getelementptr inbounds [2 x <3 x float>], ptr addrspace(5) %alloca, i32 0, i32 0
- %vec = load <3 x float>, ptr %buffer
- store <3 x float> %vec, ptr addrspace(5) %row, align 16
- %index = select i1 %idx_sel, i32 4, i32 5
- %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %row, i32 %index
- store float %data, ptr addrspace(5) %elt, align 4
- %updated = load <3 x float>, ptr addrspace(5) %row, align 16
- store <3 x float> %updated, ptr %buffer, align 16
- ret void
-}
-
;.
; CHECK: [[META0]] = !{}
; CHECK: [[RNG1]] = !{i32 0, i32 1025}
|
honeygoyal
pushed a commit
to honeygoyal/llvm-project
that referenced
this pull request
Dec 9, 2025
…1087) Reverts llvm#166132 Broke libc on GPU tests. https://lab.llvm.org/buildbot/#/builders/10/builds/18635
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Reverts #166132
Broke libc on GPU tests.
https://lab.llvm.org/buildbot/#/builders/10/builds/18635