Revert "AMDGPU: Do not infer implicit inputs for !nocallback intrinsics" #174224

ronlieb · 2026-01-02T17:18:34Z

Reverts #131759

seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite

This reverts commit 849038c.

llvmbot · 2026-01-02T17:19:08Z

@llvm/pr-subscribers-backend-amdgpu

Author: theRonShark (ronlieb)

Changes

Reverts llvm/llvm-project#131759

seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite

Full diff: https://github.com/llvm/llvm-project/pull/174224.diff

3 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+4-23)
(removed) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll (-31)
(removed) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll (-74)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 0b2ee6371da06..821a7198e38c8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -38,10 +38,9 @@ enum ImplicitArgumentPositions {
 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
 
 enum ImplicitArgumentMask {
-  UNKNOWN_INTRINSIC = 0,
+  NOT_IMPLICIT_INPUT = 0,
 #include "AMDGPUAttributes.def"
-  ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
-  NOT_IMPLICIT_INPUT
+  ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
 };
 
 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
@@ -116,7 +115,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
     NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
     return QUEUE_PTR;
   default:
-    return UNKNOWN_INTRINSIC;
+    return NOT_IMPLICIT_INPUT;
   }
 }
 
@@ -526,21 +525,6 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
       ImplicitArgumentMask AttrMask =
           intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
                               HasApertureRegs, SupportsGetDoorbellID, COV);
-
-      if (AttrMask == UNKNOWN_INTRINSIC) {
-        // Assume not-nocallback intrinsics may invoke a function which accesses
-        // implicit arguments.
-        //
-        // FIXME: This isn't really the correct check. We want to ensure it
-        // isn't calling any function that may use implicit arguments regardless
-        // of whether it's internal to the module or not.
-        //
-        // TODO: Ignoring callsite attributes.
-        if (!Callee->hasFnAttribute(Attribute::NoCallback))
-          return indicatePessimisticFixpoint();
-        continue;
-      }
-
       if (AttrMask != NOT_IMPLICIT_INPUT) {
         if ((IsNonEntryFunc || !NonKernelOnly))
           removeAssumedBits(AttrMask);
@@ -1364,10 +1348,7 @@ struct AAAMDGPUMinAGPRAlloc
       default:
         // Some intrinsics may use AGPRs, but if we have a choice, we are not
         // required to use AGPRs.
-
-        // Assume !nocallback intrinsics may call a function which requires
-        // AGPRs.
-        return CB.hasFnAttr(Attribute::NoCallback);
+        return true;
       }
 
       // TODO: Handle callsite attributes
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
deleted file mode 100644
index d7d623ac89146..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
-
-; Make sure we do not infer anything about implicit inputs through an
-; intrinsic call which is not nocallback.
-
-declare zeroext i32 @return_i32()
-
-define i32 @test_i32_return() gc "statepoint-example" {
-; CHECK-LABEL: define i32 @test_i32_return(
-; CHECK-SAME: ) #[[ATTR0:[0-9]+]] gc "statepoint-example" {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
-; CHECK-NEXT:    [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]])
-; CHECK-NEXT:    ret i32 [[CALL1]]
-;
-entry:
-  %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
-  %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
-  ret i32 %call1
-}
-
-declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...)
-declare i32 @llvm.experimental.gc.result.i32(token) #0
-
-attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
-;.
-; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
deleted file mode 100644
index 71c509afa8e64..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
-; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-attributor -mcpu=gfx90a %s | FileCheck %s
-
-; Make sure we infer no inputs are used through some intrinsics
-
-define void @use_fake_use(i32 %arg) {
-; CHECK-LABEL: define void @use_fake_use(
-; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    call void (...) @llvm.fake.use(i32 [[ARG]])
-; CHECK-NEXT:    ret void
-;
-  call void (...) @llvm.fake.use(i32 %arg)
-  ret void
-}
-
-define void @use_donothing() {
-; CHECK-LABEL: define void @use_donothing(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    ret void
-;
-  call void @llvm.donothing()
-  ret void
-}
-
-define void @use_assume(i1 %arg) {
-; CHECK-LABEL: define void @use_assume(
-; CHECK-SAME: i1 [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @llvm.assume(i1 [[ARG]])
-; CHECK-NEXT:    ret void
-;
-  call void @llvm.assume(i1 %arg)
-  ret void
-}
-
-define void @use_trap() {
-; CHECK-LABEL: define void @use_trap(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:    call void @llvm.trap()
-; CHECK-NEXT:    ret void
-;
-  call void @llvm.trap()
-  ret void
-}
-
-define void @use_debugtrap() {
-; CHECK-LABEL: define void @use_debugtrap(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT:    call void @llvm.debugtrap()
-; CHECK-NEXT:    ret void
-;
-  call void @llvm.debugtrap()
-  ret void
-}
-
-define void @use_ubsantrap() {
-; CHECK-LABEL: define void @use_ubsantrap(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT:    call void @llvm.ubsantrap(i8 0)
-; CHECK-NEXT:    ret void
-;
-  call void @llvm.ubsantrap(i8 0)
-  ret void
-}
-
-;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
-;.

arsenm

It's highly implausible that this broke anything, and is really old

ronlieb · 2026-01-06T13:30:06Z

It's highly implausible that this broke anything, and is really old

@arsenm or @bcahoon the CQE test team verified that this revert fixes the issues, please approve it to land.

arsenm · 2026-01-06T14:14:39Z

@arsenm or @bcahoon the CQE test team verified that this revert fixes the issues, please approve it to land.

Fixes is greatly overstating this. Please find someone to investigate this, commit actual regression test, and remand

ronlieb · 2026-01-06T14:18:58Z

@arsenm or @bcahoon the CQE test team verified that this revert fixes the issues, please approve it to land.

Fixes is greatly overstating this. Please find someone to investigate this, commit actual regression test, and remand

Akash is looking into getting a fix to reland

…cs" (llvm#174224) Reverts llvm#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite

#1016) …cs" (llvm#174224) Reverts llvm#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite

…ics" (llvm#174224) This reverts commit dff081c.

…ics" (#176081) This reverts #174224 and re-applies #131759 . Note: If #117544 is reverted, this should also be reverted.

…ics" (llvm#176081) This reverts llvm#174224 and re-applies llvm#131759 . Note: If llvm#117544 is reverted, this should also be reverted.

#1016) …cs" (llvm#174224) Reverts llvm#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite

Revert "AMDGPU: Do not infer implicit inputs for !nocallback intrinsics"

6746206

This reverts commit 849038c.

llvmbot added the backend:AMDGPU label Jan 2, 2026

ronlieb requested review from akadutta, arsenm and bcahoon January 2, 2026 17:19

arsenm requested changes Jan 2, 2026

View reviewed changes

ronlieb requested review from arsenm and macurtis-amd January 6, 2026 13:41

bcahoon approved these changes Jan 6, 2026

View reviewed changes

ronlieb merged commit dff081c into main Jan 6, 2026
12 checks passed

ronlieb deleted the revert-131759-users/arsenm/amdgpu/attributor-no-infer-inputs-intrinsic-missing-nocallback branch January 6, 2026 14:13

ronlieb added a commit to ROCm/llvm-project that referenced this pull request Jan 6, 2026

Revert "AMDGPU: Do not infer implicit inputs for !nocallback intrinsi…

34bf9c4

…cs" (llvm#174224) Reverts llvm#131759 seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite

This was referenced Jan 6, 2026

Amd/dev/rlieberm/rev py torch ut ROCm/llvm-project#1015

Closed

Revert "AMDGPU: Do not infer implicit inputs for !nocallback intrinsi… ROCm/llvm-project#1016

Merged

akadutta added a commit to akadutta/llvm-project that referenced this pull request Jan 15, 2026

Reapply "AMDGPU: Do not infer implicit inputs for !nocallback intrins…

7c4d79e

…ics" (llvm#174224) This reverts commit dff081c.

akadutta mentioned this pull request Jan 15, 2026

Reapply "AMDGPU: Do not infer implicit inputs for !nocallback intrinsics" #176081

Merged

akadutta added a commit that referenced this pull request Jan 15, 2026

Reapply "AMDGPU: Do not infer implicit inputs for !nocallback intrins…

fc10fbb

…ics" (#176081) This reverts #174224 and re-applies #131759 . Note: If #117544 is reverted, this should also be reverted.

ronlieb mentioned this pull request Jan 28, 2026

Revert "AMDGPU: Do not infer implicit inputs for !nocallback intrinsi… ROCm/llvm-project#1253

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Revert "AMDGPU: Do not infer implicit inputs for !nocallback intrinsics" #174224

Revert "AMDGPU: Do not infer implicit inputs for !nocallback intrinsics" #174224

Uh oh!

ronlieb commented Jan 2, 2026

Uh oh!

llvmbot commented Jan 2, 2026

Uh oh!

arsenm left a comment

Uh oh!

ronlieb commented Jan 6, 2026

Uh oh!

Uh oh!

arsenm commented Jan 6, 2026

Uh oh!

ronlieb commented Jan 6, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

Revert "AMDGPU: Do not infer implicit inputs for !nocallback intrinsics" #174224

Revert "AMDGPU: Do not infer implicit inputs for !nocallback intrinsics" #174224

Uh oh!

Conversation

ronlieb commented Jan 2, 2026

Uh oh!

llvmbot commented Jan 2, 2026

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

ronlieb commented Jan 6, 2026

Uh oh!

Uh oh!

arsenm commented Jan 6, 2026

Uh oh!

ronlieb commented Jan 6, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants