Conversation
Contributor
Author
This stack of pull requests is managed by Graphite. Learn more about stacking. |
Member
|
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThis isn't really the right check, we want to know that the intrinsic Also do the same for inferring no-agpr usage. Full diff: https://github.com/llvm/llvm-project/pull/131759.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 0cee3c3cb5e92..9d410ae5a55e6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -48,9 +48,10 @@ enum ImplicitArgumentPositions {
#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
enum ImplicitArgumentMask {
- NOT_IMPLICIT_INPUT = 0,
+ UNKNOWN_INTRINSIC = 0,
#include "AMDGPUAttributes.def"
- ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
+ ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
+ NOT_IMPLICIT_INPUT
};
#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
@@ -118,7 +119,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
return QUEUE_PTR;
default:
- return NOT_IMPLICIT_INPUT;
+ return UNKNOWN_INTRINSIC;
}
}
@@ -522,6 +523,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
ImplicitArgumentMask AttrMask =
intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
HasApertureRegs, SupportsGetDoorbellID, COV);
+
+ if (AttrMask == UNKNOWN_INTRINSIC) {
+ // Assume not-nocallback intrinsics may invoke a function which accesses
+ // implicit arguments.
+ //
+ // FIXME: This isn't really the correct check. We want to ensure it
+ // isn't calling any function that may use implicit arguments regardless
+ // of whether it's internal to the module or not.
+ //
+ // TODO: Ignoring callsite attributes.
+ if (!Callee->hasFnAttribute(Attribute::NoCallback))
+ return indicatePessimisticFixpoint();
+ continue;
+ }
+
if (AttrMask != NOT_IMPLICIT_INPUT) {
if ((IsNonEntryFunc || !NonKernelOnly))
removeAssumedBits(AttrMask);
@@ -1282,8 +1298,11 @@ struct AAAMDGPUNoAGPR
// Some intrinsics may use AGPRs, but if we have a choice, we are not
// required to use AGPRs.
- if (Callee->isIntrinsic())
- return true;
+ if (Callee->isIntrinsic()) {
+ // Assume !nocallback intrinsics may call a function which requires
+ // AGPRs.
+ return CB.hasFnAttr(Attribute::NoCallback);
+ }
// TODO: Handle callsite attributes
const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
new file mode 100644
index 0000000000000..892bfa12140d4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
+
+; Make sure we do not infer anything about implicit inputs through an
+; intrinsic call which is not nocallback.
+
+declare zeroext i32 @return_i32()
+
+define i32 @test_i32_return() gc "statepoint-example" {
+; CHECK-LABEL: define i32 @test_i32_return(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] gc "statepoint-example" {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
+; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]])
+; CHECK-NEXT: ret i32 [[CALL1]]
+;
+entry:
+ %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
+ ret i32 %call1
+}
+
+declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...)
+declare i32 @llvm.experimental.gc.result.i32(token) #0
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
+;.
+; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
new file mode 100644
index 0000000000000..b607c6cd8e720
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-attributor -mcpu=gfx90a %s | FileCheck %s
+
+; Make sure we infer no inputs are used through some intrinsics
+
+define void @use_fake_use(i32 %arg) {
+; CHECK-LABEL: define void @use_fake_use(
+; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[ARG]])
+; CHECK-NEXT: ret void
+;
+ call void (...) @llvm.fake.use(i32 %arg)
+ ret void
+}
+
+define void @use_donothing() {
+; CHECK-LABEL: define void @use_donothing(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.donothing()
+ ret void
+}
+
+define void @use_assume(i1 %arg) {
+; CHECK-LABEL: define void @use_assume(
+; CHECK-SAME: i1 [[ARG:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: call void @llvm.assume(i1 [[ARG]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.assume(i1 %arg)
+ ret void
+}
+
+define void @use_trap() {
+; CHECK-LABEL: define void @use_trap(
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.trap()
+ ret void
+}
+
+define void @use_debugtrap() {
+; CHECK-LABEL: define void @use_debugtrap(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT: call void @llvm.debugtrap()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.debugtrap()
+ ret void
+}
+
+define void @use_ubsantrap() {
+; CHECK-LABEL: define void @use_ubsantrap(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.ubsantrap(i8 0)
+ ret void
+}
+
+;.
+; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { cold noreturn nounwind "target-cpu"="gfx90a" }
+;.
|
a33a619 to
75e6bf7
Compare
4c171fb to
4e4d1a1
Compare
Base automatically changed from
users/arsenm/amdgpu/attributor-fix-skipping-debugtrap-ubsantrap
to
main
March 19, 2025 03:17
4e4d1a1 to
6aef6b0
Compare
shiltian
reviewed
Mar 19, 2025
| // Assume not-nocallback intrinsics may invoke a function which accesses | ||
| // implicit arguments. | ||
| // | ||
| // FIXME: This isn't really the correct check. We want to ensure it |
Contributor
There was a problem hiding this comment.
This needs an iterative approach, similar to other AAs, and then propagate the "attribute".
shiltian
approved these changes
Mar 19, 2025
This isn't really the right check, we want to know that the intrinsic does not perform a true function call to any code (in the module or not). nocallback appears to be the closest thing to this property we have now though. Also do the same for inferring no-agpr usage.
6aef6b0 to
85f5383
Compare
llvm-sync bot
pushed a commit
to arm/arm-toolchain
that referenced
this pull request
Jan 6, 2026
…ck intrinsics" (#174224) Reverts llvm/llvm-project#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite
ronlieb
added a commit
to ROCm/llvm-project
that referenced
this pull request
Jan 6, 2026
…cs" (llvm#174224) Reverts llvm#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite
ronlieb
added a commit
to ROCm/llvm-project
that referenced
this pull request
Jan 6, 2026
#1016) …cs" (llvm#174224) Reverts llvm#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite
navaneethshan
pushed a commit
to qualcomm/cpullvm-toolchain
that referenced
this pull request
Jan 8, 2026
…cs" (#174224) Reverts llvm/llvm-project#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite
navaneethshan
pushed a commit
to qualcomm/cpullvm-toolchain
that referenced
this pull request
Jan 9, 2026
…cs" (#174224) Reverts llvm/llvm-project#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite (cherry picked from commit dff081c)
akadutta
added a commit
that referenced
this pull request
Jan 15, 2026
Priyanshu3820
pushed a commit
to Priyanshu3820/llvm-project
that referenced
this pull request
Jan 18, 2026
…ics" (llvm#176081) This reverts llvm#174224 and re-applies llvm#131759 . Note: If llvm#117544 is reverted, this should also be reverted.
BStott6
pushed a commit
to BStott6/llvm-project
that referenced
this pull request
Jan 22, 2026
…ics" (llvm#176081) This reverts llvm#174224 and re-applies llvm#131759 . Note: If llvm#117544 is reverted, this should also be reverted.
lamb-j
pushed a commit
to ROCm/llvm-project
that referenced
this pull request
Jan 23, 2026
#1016) …cs" (llvm#174224) Reverts llvm#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite
ronlieb
added a commit
to ROCm/llvm-project
that referenced
this pull request
Jan 28, 2026
#1016) …cs" (llvm#174224) Reverts llvm#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite
akadutta
added a commit
that referenced
this pull request
Jan 30, 2026
…allback (#175230) This adds support to whitelist trap intrinsics while handling of intrinsics with !nocallback. This fixes the reasons behind the previous revert of #131759. The attributor was exiting early whenever it saw intrinsics without the nocallback bit, so trap-only kernels lost all the inferred “no implicit arg” metadata and their amdgpu-agpr-alloc=0 guarantees. That conservative fallback broke certain workloads by forcing unnecessary implicit arguments and AGPR reservations. This patch allows the pass to recognize leaf-like trap intrinsics, so they no longer poison the analysis. --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
sshrestha-aa
pushed a commit
to sshrestha-aa/llvm-project
that referenced
this pull request
Feb 4, 2026
…allback (llvm#175230) This adds support to whitelist trap intrinsics while handling of intrinsics with !nocallback. This fixes the reasons behind the previous revert of llvm#131759. The attributor was exiting early whenever it saw intrinsics without the nocallback bit, so trap-only kernels lost all the inferred “no implicit arg” metadata and their amdgpu-agpr-alloc=0 guarantees. That conservative fallback broke certain workloads by forcing unnecessary implicit arguments and AGPR reservations. This patch allows the pass to recognize leaf-like trap intrinsics, so they no longer poison the analysis. --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.

This isn't really the right check, we want to know that the intrinsic
does not perform a true function call to any code (in the module or not). nocallback
appears to be the closest thing to this property we have now though. Fixes theoretically
miscompiles with intrinsics like statepoint, which hide a call to a real function.
Also do the same for inferring no-agpr usage.