From 164679730b572cff94f4ce346d595212eaefc43e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 18 Mar 2025 13:16:36 +0700 Subject: [PATCH 1/2] AMDGPU: Add baseline test for attributor with calling intrinsic --- ...attributor-intrinsic-missing-nocallback.ll | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll new file mode 100644 index 0000000000000..3023b36c2e349 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s + +; Make sure we do not infer anything about implicit inputs through an +; intrinsic call which is not nocallback. + +declare zeroext i32 @return_i32() + +define i32 @test_i32_return() gc "statepoint-example" { +; CHECK-LABEL: define i32 @test_i32_return( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] gc "statepoint-example" { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0) +; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]]) +; CHECK-NEXT: ret i32 [[CALL1]] +; +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token) + ret i32 %call1 +} + +declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...) +declare i32 @llvm.experimental.gc.result.i32(token) #0 + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } +;. From 85f53834d89ec447c5d95d3e3c4a62a5ee3377cc Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 18 Mar 2025 13:20:05 +0700 Subject: [PATCH 2/2] AMDGPU: Do not infer implicit inputs for !nocallback intrinsics This isn't really the right check, we want to know that the intrinsic does not perform a true function call to any code (in the module or not). nocallback appears to be the closest thing to this property we have now though. Also do the same for inferring no-agpr usage. --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 27 ++++++- ...attributor-intrinsic-missing-nocallback.ll | 9 +-- ...amdgpu-attributor-nocallback-intrinsics.ll | 74 +++++++++++++++++++ 3 files changed, 101 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 8669978637f40..56ab040706a13 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -38,9 +38,10 @@ enum ImplicitArgumentPositions { #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS, enum ImplicitArgumentMask { - NOT_IMPLICIT_INPUT = 0, + UNKNOWN_INTRINSIC = 0, #include "AMDGPUAttributes.def" - ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1 + ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1, + NOT_IMPLICIT_INPUT }; #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str}, @@ -115,7 +116,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5); return QUEUE_PTR; default: - return NOT_IMPLICIT_INPUT; + return UNKNOWN_INTRINSIC; } } @@ -534,6 +535,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { ImplicitArgumentMask AttrMask = intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit, HasApertureRegs, SupportsGetDoorbellID, COV); + + if (AttrMask == UNKNOWN_INTRINSIC) { + // Assume not-nocallback intrinsics may invoke a function which accesses + // implicit arguments. + // + // FIXME: This isn't really the correct check. We want to ensure it + // isn't calling any function that may use implicit arguments regardless + // of whether it's internal to the module or not. + // + // TODO: Ignoring callsite attributes. + if (!Callee->hasFnAttribute(Attribute::NoCallback)) + return indicatePessimisticFixpoint(); + continue; + } + if (AttrMask != NOT_IMPLICIT_INPUT) { if ((IsNonEntryFunc || !NonKernelOnly)) removeAssumedBits(AttrMask); @@ -1357,7 +1373,10 @@ struct AAAMDGPUMinAGPRAlloc default: // Some intrinsics may use AGPRs, but if we have a choice, we are not // required to use AGPRs. - return true; + + // Assume !nocallback intrinsics may call a function which requires + // AGPRs. + return CB.hasFnAttr(Attribute::NoCallback); } // TODO: Handle callsite attributes diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll index 3023b36c2e349..d7d623ac89146 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll @@ -8,7 +8,7 @@ declare zeroext i32 @return_i32() define i32 @test_i32_return() gc "statepoint-example" { ; CHECK-LABEL: define i32 @test_i32_return( -; CHECK-SAME: ) #[[ATTR1:[0-9]+]] gc "statepoint-example" { +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] gc "statepoint-example" { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0) ; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]]) @@ -25,8 +25,7 @@ declare i32 @llvm.experimental.gc.result.i32(token) #0 attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll new file mode 100644 index 0000000000000..71c509afa8e64 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-attributor -mcpu=gfx90a %s | FileCheck %s + +; Make sure we infer no inputs are used through some intrinsics + +define void @use_fake_use(i32 %arg) { +; CHECK-LABEL: define void @use_fake_use( +; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[ARG]]) +; CHECK-NEXT: ret void +; + call void (...) @llvm.fake.use(i32 %arg) + ret void +} + +define void @use_donothing() { +; CHECK-LABEL: define void @use_donothing( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: ret void +; + call void @llvm.donothing() + ret void +} + +define void @use_assume(i1 %arg) { +; CHECK-LABEL: define void @use_assume( +; CHECK-SAME: i1 [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.assume(i1 [[ARG]]) +; CHECK-NEXT: ret void +; + call void @llvm.assume(i1 %arg) + ret void +} + +define void @use_trap() { +; CHECK-LABEL: define void @use_trap( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: call void @llvm.trap() +; CHECK-NEXT: ret void +; + call void @llvm.trap() + ret void +} + +define void @use_debugtrap() { +; CHECK-LABEL: define void @use_debugtrap( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.debugtrap() +; CHECK-NEXT: ret void +; + call void @llvm.debugtrap() + ret void +} + +define void @use_ubsantrap() { +; CHECK-LABEL: define void @use_ubsantrap( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.ubsantrap(i8 0) +; CHECK-NEXT: ret void +; + call void @llvm.ubsantrap(i8 0) + ret void +} + +;. +; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" } +;.