[AMDGPU] Propagate alias information in AMDGPULowerKernelArguments.#161375
[AMDGPU] Propagate alias information in AMDGPULowerKernelArguments.#161375PeddleSpam merged 12 commits intollvm:mainfrom
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: Leon Clark (PeddleSpam) ChangesEmit Patch is 50.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161375.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/lower-noalias-kernargs.ll b/llvm/test/CodeGen/AMDGPU/lower-noalias-kernargs.ll
new file mode 100644
index 0000000000000..313ae3b883e56
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-noalias-kernargs.ll
@@ -0,0 +1,620 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -mtriple=amdgcn-- -S -o - -passes=amdgpu-lower-kernel-arguments %s | FileCheck %s
+
+define amdgpu_kernel void @aliasinfo_2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+; CHECK-LABEL: define amdgpu_kernel void @aliasinfo_2i32(
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ALIASINFO_2I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ALIASINFO_2I32_KERNARG_SEGMENT]], i64 36
+; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0:![0-9]+]]
+; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ALIASINFO_2I32_KERNARG_SEGMENT]], i64 44
+; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[IN_LOAD]], i32 [[TID]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[IN_GEP]], align 4
+; CHECK-NEXT: [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[VAL]], i1 false) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: store i32 [[CTLZ]], ptr addrspace(1) [[OUT_LOAD]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep, align 4
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ store i32 %ctlz, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @aliasinfo_2i32_NA(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+; CHECK-LABEL: define amdgpu_kernel void @aliasinfo_2i32_NA(
+; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ALIASINFO_2I32_NA_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[IN]], i32 [[TID]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[IN_GEP]], align 4
+; CHECK-NEXT: [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[VAL]], i1 false) #[[ATTR5]]
+; CHECK-NEXT: store i32 [[CTLZ]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep, align 4
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ store i32 %ctlz, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @aliasinfo_2i32_AS(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+; CHECK-LABEL: define amdgpu_kernel void @aliasinfo_2i32_AS(
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ALIASINFO_2I32_AS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ALIASINFO_2I32_AS_KERNARG_SEGMENT]], i64 36
+; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ALIASINFO_2I32_AS_KERNARG_SEGMENT]], i64 44
+; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[IN_LOAD]], i32 [[TID]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[IN_GEP]], align 4, !alias.scope [[META1:![0-9]+]], !noalias [[META4:![0-9]+]]
+; CHECK-NEXT: [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[VAL]], i1 false) #[[ATTR5]]
+; CHECK-NEXT: store i32 [[CTLZ]], ptr addrspace(1) [[OUT_LOAD]], align 4, !alias.scope [[META4]], !noalias [[META1]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep, align 4, !alias.scope !4, !noalias !2
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ store i32 %ctlz, ptr addrspace(1) %out, align 4, !alias.scope !2, !noalias !4
+ ret void
+}
+
+define amdgpu_kernel void @aliasinfo_2i32_NA_AS(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+; CHECK-LABEL: define amdgpu_kernel void @aliasinfo_2i32_NA_AS(
+; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ALIASINFO_2I32_NA_AS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[IN]], i32 [[TID]]
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[IN_GEP]], align 4, !alias.scope [[META1]], !noalias [[META4]]
+; CHECK-NEXT: [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[VAL]], i1 false) #[[ATTR5]]
+; CHECK-NEXT: store i32 [[CTLZ]], ptr addrspace(1) [[OUT]], align 4, !alias.scope [[META4]], !noalias [[META1]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep, align 4, !alias.scope !4, !noalias !2
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ store i32 %ctlz, ptr addrspace(1) %out, align 4, !alias.scope !2, !noalias !4
+ ret void
+}
+
+define amdgpu_kernel void @aliasinfo_v4f32_3v4i8(ptr addrspace(1) %out, ptr addrspace(1) %out1, ptr addrspace(1) %in, ptr addrspace(1) %in1) nounwind {
+; CHECK-LABEL: define amdgpu_kernel void @aliasinfo_v4f32_3v4i8(
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[OUT1:%.*]], ptr addrspace(1) [[IN:%.*]], ptr addrspace(1) [[IN1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ALIASINFO_V4F32_3V4I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ALIASINFO_V4F32_3V4I8_KERNARG_SEGMENT]], i64 36
+; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: [[OUT1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ALIASINFO_V4F32_3V4I8_KERNARG_SEGMENT]], i64 44
+; CHECK-NEXT: [[OUT1_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT1_KERNARG_OFFSET]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ALIASINFO_V4F32_3V4I8_KERNARG_SEGMENT]], i64 52
+; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: [[IN1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ALIASINFO_V4F32_3V4I8_KERNARG_SEGMENT]], i64 60
+; CHECK-NEXT: [[IN1_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN1_KERNARG_OFFSET]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr <4 x i8>, ptr addrspace(1) [[IN_LOAD]], i32 [[TID]]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <4 x i8>, ptr addrspace(1) [[IN1_LOAD]], i32 [[TID]]
+; CHECK-NEXT: [[LOAD:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP]], align 1
+; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP1]], align 1
+; CHECK-NEXT: [[SHUFFLE0_0:%.*]] = shufflevector <4 x i8> [[LOAD]], <4 x i8> [[LOAD1]], <4 x i32> <i32 3, i32 2, i32 6, i32 2>
+; CHECK-NEXT: [[CVT:%.*]] = uitofp <4 x i8> [[SHUFFLE0_0]] to <4 x float>
+; CHECK-NEXT: store <4 x float> [[CVT]], ptr addrspace(1) [[OUT_LOAD]], align 16
+; CHECK-NEXT: store <4 x i8> [[SHUFFLE0_0]], ptr addrspace(1) [[OUT1_LOAD]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid
+ %gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid
+ %load = load <4 x i8>, ptr addrspace(1) %gep, align 1
+ %load1 = load <4 x i8>, ptr addrspace(1) %gep1, align 1
+ %shuffle0_0 = shufflevector <4 x i8> %load, <4 x i8> %load1, <4 x i32> <i32 3, i32 2, i32 6, i32 2>
+ %cvt = uitofp <4 x i8> %shuffle0_0 to <4 x float>
+ store <4 x float> %cvt, ptr addrspace(1) %out, align 16
+ store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1, align 4
+ ret void
+}
+
+define amdgpu_kernel void @aliasinfo_v4f32_3v4i8_NA(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %out1, ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %in1) nounwind {
+; CHECK-LABEL: define amdgpu_kernel void @aliasinfo_v4f32_3v4i8_NA(
+; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[OUT1:%.*]], ptr addrspace(1) noalias [[IN:%.*]], ptr addrspace(1) noalias [[IN1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ALIASINFO_V4F32_3V4I8_NA_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr <4 x i8>, ptr addrspace(1) [[IN]], i32 [[TID]]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <4 x i8>, ptr addrspace(1) [[IN1]], i32 [[TID]]
+; CHECK-NEXT: [[LOAD:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP]], align 1
+; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP1]], align 1
+; CHECK-NEXT: [[SHUFFLE0_0:%.*]] = shufflevector <4 x i8> [[LOAD]], <4 x i8> [[LOAD1]], <4 x i32> <i32 3, i32 2, i32 6, i32 2>
+; CHECK-NEXT: [[CVT:%.*]] = uitofp <4 x i8> [[SHUFFLE0_0]] to <4 x float>
+; CHECK-NEXT: store <4 x float> [[CVT]], ptr addrspace(1) [[OUT]], align 16
+; CHECK-NEXT: store <4 x i8> [[SHUFFLE0_0]], ptr addrspace(1) [[OUT1]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid
+ %gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid
+ %load = load <4 x i8>, ptr addrspace(1) %gep, align 1
+ %load1 = load <4 x i8>, ptr addrspace(1) %gep1, align 1
+ %shuffle0_0 = shufflevector <4 x i8> %load, <4 x i8> %load1, <4 x i32> <i32 3, i32 2, i32 6, i32 2>
+ %cvt = uitofp <4 x i8> %shuffle0_0 to <4 x float>
+ store <4 x float> %cvt, ptr addrspace(1) %out, align 16
+ store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1, align 4
+ ret void
+}
+
+define amdgpu_kernel void @aliasinfo_v4f32_3v4i8_AS(ptr addrspace(1) %out, ptr addrspace(1) %out1, ptr addrspace(1) %in, ptr addrspace(1) %in1) nounwind {
+; CHECK-LABEL: define amdgpu_kernel void @aliasinfo_v4f32_3v4i8_AS(
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[OUT1:%.*]], ptr addrspace(1) [[IN:%.*]], ptr addrspace(1) [[IN1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ALIASINFO_V4F32_3V4I8_AS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; CHECK-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ALIASINFO_V4F32_3V4I8_AS_KERNARG_SEGMENT]], i64 36
+; CHECK-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: [[OUT1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ALIASINFO_V4F32_3V4I8_AS_KERNARG_SEGMENT]], i64 44
+; CHECK-NEXT: [[OUT1_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT1_KERNARG_OFFSET]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ALIASINFO_V4F32_3V4I8_AS_KERNARG_SEGMENT]], i64 52
+; CHECK-NEXT: [[IN_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: [[IN1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ALIASINFO_V4F32_3V4I8_AS_KERNARG_SEGMENT]], i64 60
+; CHECK-NEXT: [[IN1_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[IN1_KERNARG_OFFSET]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr <4 x i8>, ptr addrspace(1) [[IN_LOAD]], i32 [[TID]]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <4 x i8>, ptr addrspace(1) [[IN1_LOAD]], i32 [[TID]]
+; CHECK-NEXT: [[LOAD:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP]], align 1, !alias.scope [[META1]], !noalias [[META4]]
+; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP1]], align 1, !alias.scope [[META1]], !noalias [[META4]]
+; CHECK-NEXT: [[SHUFFLE0_0:%.*]] = shufflevector <4 x i8> [[LOAD]], <4 x i8> [[LOAD1]], <4 x i32> <i32 3, i32 2, i32 6, i32 2>
+; CHECK-NEXT: [[CVT:%.*]] = uitofp <4 x i8> [[SHUFFLE0_0]] to <4 x float>
+; CHECK-NEXT: store <4 x float> [[CVT]], ptr addrspace(1) [[OUT_LOAD]], align 16, !alias.scope [[META4]], !noalias [[META1]]
+; CHECK-NEXT: store <4 x i8> [[SHUFFLE0_0]], ptr addrspace(1) [[OUT1_LOAD]], align 4, !alias.scope [[META4]], !noalias [[META1]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid
+ %gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid
+ %load = load <4 x i8>, ptr addrspace(1) %gep, align 1, !alias.scope !4, !noalias !2
+ %load1 = load <4 x i8>, ptr addrspace(1) %gep1, align 1, !alias.scope !4, !noalias !2
+ %shuffle0_0 = shufflevector <4 x i8> %load, <4 x i8> %load1, <4 x i32> <i32 3, i32 2, i32 6, i32 2>
+ %cvt = uitofp <4 x i8> %shuffle0_0 to <4 x float>
+ store <4 x float> %cvt, ptr addrspace(1) %out, align 16, !alias.scope !2, !noalias !4
+ store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1, align 4, !alias.scope !2, !noalias !4
+ ret void
+}
+
+define amdgpu_kernel void @aliasinfo_v4f32_3v4i8_NA_AS(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %out1, ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %in1) nounwind {
+; CHECK-LABEL: define amdgpu_kernel void @aliasinfo_v4f32_3v4i8_NA_AS(
+; CHECK-SAME: ptr addrspace(1) noalias [[OUT:%.*]], ptr addrspace(1) noalias [[OUT1:%.*]], ptr addrspace(1) noalias [[IN:%.*]], ptr addrspace(1) noalias [[IN1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ALIASINFO_V4F32_3V4I8_NA_AS_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr <4 x i8>, ptr addrspace(1) [[IN]], i32 [[TID]]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr <4 x i8>, ptr addrspace(1) [[IN1]], i32 [[TID]]
+; CHECK-NEXT: [[LOAD:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP]], align 1, !alias.scope [[META1]], !noalias [[META4]]
+; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP1]], align 1, !alias.scope [[META1]], !noalias [[META4]]
+; CHECK-NEXT: [[SHUFFLE0_0:%.*]] = shufflevector <4 x i8> [[LOAD]], <4 x i8> [[LOAD1]], <4 x i32> <i32 3, i32 2, i32 6, i32 2>
+; CHECK-NEXT: [[CVT:%.*]] = uitofp <4 x i8> [[SHUFFLE0_0]] to <4 x float>
+; CHECK-NEXT: store <4 x float> [[CVT]], ptr addrspace(1) [[OUT]], align 16, !alias.scope [[META4]], !noalias [[META1]]
+; CHECK-NEXT: store <4 x i8> [[SHUFFLE0_0]], ptr addrspace(1) [[OUT1]], align 4, !alias.scope [[META4]], !noalias [[META1]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid
+ %gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid
+ %load = load <4 x i8>, ptr addrspace(1) %gep, align 1, !alias.scope !4, !noalias !2
+ %load1 = load <4 x i8>, ptr addrspace(1) %gep1, align 1, !alias.scope !4, !noalias !2
+ %shuffle0_0 = shufflevector <4 x i8> %load, <4 x i8> %load1, <4 x i32> <i32 3, i32 2, i32 6, i32 2>
+ %cvt = uitofp <4 x i8> %shuffle0_0 to <4 x float>
+ store <4 x float> %cvt, ptr addrspace(1) %out, align 16, !alias.scope !2, !noalias !4
+ store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1, align 4, !alias.scope !2, !noalias !4
+ ret void
+}
+
+define amdgpu_kernel void @aliasinfo_10v16f16(ptr addrspace(3) %in, ptr addrspace(3) %out) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @aliasinfo_10v16f16(
+; CHECK-SAME: ptr addrspace(3) [[IN:%.*]], ptr addrspace(3) [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ALIASINFO_10V16F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; CHECK-NEXT: [[IDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[LOAD_0_ADDR:%.*]] = getelementptr <16 x half>, ptr addrspace(3) [[IN]], i32 [[IDX]]
+; CHECK-NEXT: [[LOAD_0:%.*]] = load <16 x half>, ptr addrspace(3) [[LOAD_0_ADDR]], align 32
+; CHECK-NEXT: [[LOAD_1_ADDR:%.*]] = getelementptr <16 x half>, ptr addrspace(3) [[LOAD_0_ADDR]], i32 64
+; CHECK-NEXT: [[LOAD_1:%.*]] = load <16 x half>, ptr addrspace(3) [[LOAD_1_ADDR]], align 32
+; CHECK-NEXT: [[LOAD_2_ADDR:%.*]] = getelementptr <16 x half>, ptr addrspace(3) [[LOAD_1_ADDR]], i32 128
+; CHECK-NEXT: [[LOAD_2:%.*]] = load <16 x half>, ptr addrspace(3) [[LOAD_2_ADDR]], align 32
+; CHECK-NEXT: [[LOAD_3_ADDR:%.*]] = getelementptr <16 x half>, ptr addrspace(3) [[LOAD_2_ADDR]], i32 192
+; CHECK-NEXT: [[LOAD_3:%.*]] = load <16 x half>, ptr addrspace(3) [[LOAD_3_ADDR]], align 32
+; CHECK-NEXT: [[LOAD_4_ADDR:%.*]] = getelementptr <16 x half>, ptr addrspace(3) [[LOAD_3_ADDR]], i32 256
+; CHECK-NEXT: [[LOAD_4:%.*]] = load <16 x half>, ptr addrspace(3) [[LOAD_4_ADDR]], align 32
+; CHECK-NEXT: [[MAI_0:%.*]] = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v16f16.v16f16(<16 x half> [[LOAD_0]], <16 x half> [[LOAD_0]], <16 x half> [[LOAD_0]], i1 false)
+; CHECK-NEXT: [[MAI_1:%.*]] = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v16f16.v16f16(<16 x half> [[LOAD_1]], <16 x half> [[LOAD_1]], <16 x half> [[LOAD_1]], i1 false)
+; CHECK-NEXT: [[MAI_2:%.*]] = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v16f16.v16f16(<16 x half> [[LOAD_2]], <16 x half> [[LOAD_2]], <16 x half> [[LOAD_2]], i1 false)
+; CHECK-NEXT: [[MAI_3:%.*]] = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v16f16.v16f16(<16 x half> [[LOAD_3]], <16 x half> [[LOAD_3]], <16 x half> [[LOAD_3]], i1 false)
+; CHECK-NEXT: [[MAI_4:%.*]] = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v16f16.v16f16(<16 x half> [[LOAD_4]], <16 x half> [[LOAD_4]], <16 x half> [[LOAD_4]], i1 false)
+; CHECK-NEXT: [[STORE_0_ADDR:%.*]] = getelementptr <16 x half>, ptr addrspace(3) [[OUT]], i32 [[IDX]]
+; CHECK-NEXT: store <16 x half> [[MAI_0]], ptr addrspace(3) [[STORE_0_ADDR]], align 32
+; CHECK-NEXT: [[STORE_1_ADDR:%.*]] = getelementptr <16 x half>, ptr addrspace(3) [[OUT]], i32 64
+; CHECK-NEXT: store <16 x half> [[MAI_1]], ptr addrspace(3) [[STORE_1_ADDR]], align ...
[truncated]
|
arsenm
left a comment
There was a problem hiding this comment.
Is this supposed to just be a baseline test? The title sounds like it should be the actual change, but that's missing?
Fixes a bug in `AMDGPUISelLowering` where alias analysis info is not propagated to split loads and stores. This is required for #161375 --------- Co-authored-by: Leon Clark <leoclark@amd.com>
7740f76 to
7396ea1
Compare
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Fixes a bug in `AMDGPUISelLowering` where alias analysis info is not propagated to split loads and stores. This is required for llvm#161375 --------- Co-authored-by: Leon Clark <leoclark@amd.com>
Fixes a bug in `AMDGPUISelLowering` where alias analysis info is not propagated to split loads and stores. This is required for llvm#161375 --------- Co-authored-by: Leon Clark <leoclark@amd.com>
Fixes a bug in `AMDGPUISelLowering` where alias analysis info is not propagated to split loads and stores. This is required for llvm#161375 --------- Co-authored-by: Leon Clark <leoclark@amd.com>
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/85/builds/16928 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/25/builds/14230 Here is the relevant piece of the build log for the reference |
|
Reverted in 1d61ced, as it breaks the sanitizer buildbots, see above. We've also seen test hangs downstream, presumably for the same reason. |
Thanks, I was literally just about to write the same thing about https://green.lab.llvm.org |
…lvm#161375) Emit `!noalias` and `alias.scope` metadata for `noalias` kernel arguments. --------- Co-authored-by: Leon Clark <leoclark@amd.com>
…ments. (llvm#161375)" This reverts commit 9f4f13a. Broke sanitizer buildbots, and causes test hangs in release builds.
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/163/builds/31888 Here is the relevant piece of the build log for the reference |
…lvm#161375) Emit `!noalias` and `alias.scope` metadata for `noalias` kernel arguments. --------- Co-authored-by: Leon Clark <leoclark@amd.com>
…ments. (llvm#161375)" This reverts commit 9f4f13a. Broke sanitizer buildbots, and causes test hangs in release builds.
…uments." (llvm#174977) Emit `!noalias` and `!alias.scope` metadata for `noalias` kernel arguments. Fixes sanitizer issues in llvm#161375. --------- Co-authored-by: Leon Clark <leoclark@amd.com>
Emit
!noaliasandalias.scopemetadata fornoaliaskernel arguments.