diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 0b2ee6371da06..4bcaabfd3263a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1343,7 +1343,6 @@ struct AAAMDGPUMinAGPRAlloc Maximum.takeAssumedMaximum(NumRegs); return true; } - switch (CB.getIntrinsicID()) { case Intrinsic::not_intrinsic: break; @@ -1361,10 +1360,21 @@ struct AAAMDGPUMinAGPRAlloc return true; } + // Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have + // the nocallback attribute, so the AMDGPU attributor can conservatively + // drop all implicitly-known inputs and AGPR allocation information. Make + // sure we still infer that no implicit inputs are required and that the + // AGPR allocation stays at zero. Trap-like intrinsics may invoke a + // function which requires AGPRs, so we need to check if the called + // function has the "trap-func-name" attribute. + case Intrinsic::trap: + case Intrinsic::debugtrap: + case Intrinsic::ubsantrap: + return CB.hasFnAttr(Attribute::NoCallback) || + !CB.hasFnAttr("trap-func-name"); default: // Some intrinsics may use AGPRs, but if we have a choice, we are not // required to use AGPRs. - // Assume !nocallback intrinsics may call a function which requires // AGPRs. return CB.hasFnAttr(Attribute::NoCallback); diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll index 71c509afa8e64..163f1aa72e11f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll @@ -35,7 +35,7 @@ define void @use_assume(i1 %arg) { define void @use_trap() { ; CHECK-LABEL: define void @use_trap( -; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.trap() ; CHECK-NEXT: ret void ; @@ -43,9 +43,19 @@ define void @use_trap() { ret void } +define void @use_trap_with_handler() { +; CHECK-LABEL: define void @use_trap_with_handler( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: call void @llvm.trap() #[[ATTR7:[0-9]+]] +; CHECK-NEXT: ret void +; + call void @llvm.trap() #0 + ret void +} + define void @use_debugtrap() { ; CHECK-LABEL: define void @use_debugtrap( -; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.debugtrap() ; CHECK-NEXT: ret void ; @@ -55,7 +65,7 @@ define void @use_debugtrap() { define void @use_ubsantrap() { ; CHECK-LABEL: define void @use_ubsantrap( -; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.ubsantrap(i8 0) ; CHECK-NEXT: ret void ; @@ -63,6 +73,8 @@ define void @use_ubsantrap() { ret void } + +attributes #0 = { "trap-func-name"="handler" } ;. ; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } @@ -71,4 +83,5 @@ define void @use_ubsantrap() { ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" } ; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR7]] = { "trap-func-name"="handler" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll new file mode 100644 index 0000000000000..e5e7328890146 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s + +; Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have the +; nocallback attribute, so the AMDGPU attributor used to conservatively drop +; all implicitly-known inputs and AGPR allocation information. Make sure we +; still infer that no implicit inputs are required and that the AGPR allocation +; stays at zero. + +declare void @llvm.trap() + +declare void @llvm.debugtrap() + +define amdgpu_kernel void @trap_kernel() { +; CHECK-LABEL: define amdgpu_kernel void @trap_kernel( +; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: call void @llvm.trap() +; CHECK-NEXT: ret void +; + call void @llvm.trap() + ret void +} + +define amdgpu_kernel void @trap_kernel_with_handler() { +; CHECK-LABEL: define amdgpu_kernel void @trap_kernel_with_handler( +; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: call void @llvm.trap() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: ret void +; + call void @llvm.trap() #0 + ret void +} + +define amdgpu_kernel void @debugtrap_kernel() { +; CHECK-LABEL: define amdgpu_kernel void @debugtrap_kernel( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: call void @llvm.debugtrap() +; CHECK-NEXT: ret void +; + call void @llvm.debugtrap() + ret void +} + +; Test that a trap with both trap-func-name and nocallback is still safe +define amdgpu_kernel void @trap_kernel_with_handler_and_nocallback() { +; CHECK-LABEL: define amdgpu_kernel void @trap_kernel_with_handler_and_nocallback( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: call void @llvm.trap() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: ret void +; + call void @llvm.trap() #1 + ret void +} + +attributes #0 = { "trap-func-name"="handler" } +attributes #1 = { nocallback "trap-func-name"="handler" } + +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4]] = { "trap-func-name"="handler" } +; CHECK: attributes #[[ATTR5]] = { nocallback "trap-func-name"="handler" } +;.