-
Notifications
You must be signed in to change notification settings - Fork 16.1k
Revert "AMDGPU: Do not infer implicit inputs for !nocallback intrinsics" #174224
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Revert "AMDGPU: Do not infer implicit inputs for !nocallback intrinsics" #174224
Conversation
This reverts commit 849038c.
|
@llvm/pr-subscribers-backend-amdgpu Author: theRonShark (ronlieb) ChangesReverts llvm/llvm-project#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite Full diff: https://github.com/llvm/llvm-project/pull/174224.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 0b2ee6371da06..821a7198e38c8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -38,10 +38,9 @@ enum ImplicitArgumentPositions {
#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
enum ImplicitArgumentMask {
- UNKNOWN_INTRINSIC = 0,
+ NOT_IMPLICIT_INPUT = 0,
#include "AMDGPUAttributes.def"
- ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
- NOT_IMPLICIT_INPUT
+ ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
};
#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
@@ -116,7 +115,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
return QUEUE_PTR;
default:
- return UNKNOWN_INTRINSIC;
+ return NOT_IMPLICIT_INPUT;
}
}
@@ -526,21 +525,6 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
ImplicitArgumentMask AttrMask =
intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
HasApertureRegs, SupportsGetDoorbellID, COV);
-
- if (AttrMask == UNKNOWN_INTRINSIC) {
- // Assume not-nocallback intrinsics may invoke a function which accesses
- // implicit arguments.
- //
- // FIXME: This isn't really the correct check. We want to ensure it
- // isn't calling any function that may use implicit arguments regardless
- // of whether it's internal to the module or not.
- //
- // TODO: Ignoring callsite attributes.
- if (!Callee->hasFnAttribute(Attribute::NoCallback))
- return indicatePessimisticFixpoint();
- continue;
- }
-
if (AttrMask != NOT_IMPLICIT_INPUT) {
if ((IsNonEntryFunc || !NonKernelOnly))
removeAssumedBits(AttrMask);
@@ -1364,10 +1348,7 @@ struct AAAMDGPUMinAGPRAlloc
default:
// Some intrinsics may use AGPRs, but if we have a choice, we are not
// required to use AGPRs.
-
- // Assume !nocallback intrinsics may call a function which requires
- // AGPRs.
- return CB.hasFnAttr(Attribute::NoCallback);
+ return true;
}
// TODO: Handle callsite attributes
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
deleted file mode 100644
index d7d623ac89146..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
-
-; Make sure we do not infer anything about implicit inputs through an
-; intrinsic call which is not nocallback.
-
-declare zeroext i32 @return_i32()
-
-define i32 @test_i32_return() gc "statepoint-example" {
-; CHECK-LABEL: define i32 @test_i32_return(
-; CHECK-SAME: ) #[[ATTR0:[0-9]+]] gc "statepoint-example" {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
-; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]])
-; CHECK-NEXT: ret i32 [[CALL1]]
-;
-entry:
- %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
- %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
- ret i32 %call1
-}
-
-declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...)
-declare i32 @llvm.experimental.gc.result.i32(token) #0
-
-attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
-;.
-; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
deleted file mode 100644
index 71c509afa8e64..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
-; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-attributor -mcpu=gfx90a %s | FileCheck %s
-
-; Make sure we infer no inputs are used through some intrinsics
-
-define void @use_fake_use(i32 %arg) {
-; CHECK-LABEL: define void @use_fake_use(
-; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[ARG]])
-; CHECK-NEXT: ret void
-;
- call void (...) @llvm.fake.use(i32 %arg)
- ret void
-}
-
-define void @use_donothing() {
-; CHECK-LABEL: define void @use_donothing(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: ret void
-;
- call void @llvm.donothing()
- ret void
-}
-
-define void @use_assume(i1 %arg) {
-; CHECK-LABEL: define void @use_assume(
-; CHECK-SAME: i1 [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @llvm.assume(i1 [[ARG]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.assume(i1 %arg)
- ret void
-}
-
-define void @use_trap() {
-; CHECK-LABEL: define void @use_trap(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: call void @llvm.trap()
-; CHECK-NEXT: ret void
-;
- call void @llvm.trap()
- ret void
-}
-
-define void @use_debugtrap() {
-; CHECK-LABEL: define void @use_debugtrap(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: call void @llvm.debugtrap()
-; CHECK-NEXT: ret void
-;
- call void @llvm.debugtrap()
- ret void
-}
-
-define void @use_ubsantrap() {
-; CHECK-LABEL: define void @use_ubsantrap(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
-; CHECK-NEXT: ret void
-;
- call void @llvm.ubsantrap(i8 0)
- ret void
-}
-
-;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
-;.
|
arsenm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's highly implausible that this broke anything, and is really old
…cs" (llvm#174224) Reverts llvm#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite
#1016) …cs" (llvm#174224) Reverts llvm#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite
…ics" (llvm#174224) This reverts commit dff081c.
…ics" (llvm#176081) This reverts llvm#174224 and re-applies llvm#131759 . Note: If llvm#117544 is reverted, this should also be reverted.
…ics" (llvm#176081) This reverts llvm#174224 and re-applies llvm#131759 . Note: If llvm#117544 is reverted, this should also be reverted.
#1016) …cs" (llvm#174224) Reverts llvm#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite
#1016) …cs" (llvm#174224) Reverts llvm#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite
Reverts #131759
seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite