Skip to content

Commit fb44b9d

Browse files
committed
[OpenCL][CUDA][HIP][SYCL] Add norecurse
norecurse function attr indicates the function is not called recursively directly or indirectly. Add norecurse to OpenCL functions, SYCL functions in device compilation and CUDA/HIP kernels. Although there is LLVM pass adding norecurse to functions, it only works for whole-program compilation. Also FE adding norecurse can make that pass run faster since functions with norecurse do not need to be checked again. Differential Revision: https://reviews.llvm.org/D73651
1 parent 20c5968 commit fb44b9d

File tree

6 files changed

+95
-38
lines changed

6 files changed

+95
-38
lines changed

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -918,10 +918,20 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
918918
// If we're in C++ mode and the function name is "main", it is guaranteed
919919
// to be norecurse by the standard (3.6.1.3 "The function main shall not be
920920
// used within a program").
921-
if (getLangOpts().CPlusPlus)
922-
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
923-
if (FD->isMain())
924-
Fn->addFnAttr(llvm::Attribute::NoRecurse);
921+
//
922+
// OpenCL C 2.0 v2.2-11 s6.9.i:
923+
// Recursion is not supported.
924+
//
925+
// SYCL v1.2.1 s3.10:
926+
// kernels cannot include RTTI information, exception classes,
927+
// recursive code, virtual functions or make use of C++ libraries that
928+
// are not compiled for the device.
929+
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
930+
if ((getLangOpts().CPlusPlus && FD->isMain()) || getLangOpts().OpenCL ||
931+
getLangOpts().SYCLIsDevice ||
932+
(getLangOpts().CUDA && FD->hasAttr<CUDAGlobalAttr>()))
933+
Fn->addFnAttr(llvm::Attribute::NoRecurse);
934+
}
925935

926936
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
927937
if (FD->usesFPIntrin())
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// REQUIRES: amdgpu-registered-target
2+
// REQUIRES: nvptx-registered-target
3+
4+
// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -fcuda-is-device \
5+
// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
6+
7+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
8+
// RUN: -emit-llvm -disable-llvm-passes -o - -x hip %s | FileCheck %s
9+
10+
#include "Inputs/cuda.h"
11+
12+
__global__ void kernel1(int a) {}
13+
// CHECK: define{{.*}}@_Z7kernel1i{{.*}}#[[ATTR:[0-9]*]]
14+
15+
// CHECK: attributes #[[ATTR]] = {{.*}}norecurse

clang/test/CodeGenCUDA/propagate-metadata.cu

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,16 +48,33 @@ __global__ void kernel() { lib_fn(); }
4848
}
4949

5050
// The kernel and lib function should have the same attributes.
51-
// CHECK: define void @kernel() [[attr:#[0-9]+]]
52-
// CHECK: define internal void @lib_fn() [[attr]]
51+
// CHECK: define void @kernel() [[kattr:#[0-9]+]]
52+
// CHECK: define internal void @lib_fn() [[fattr:#[0-9]+]]
5353

5454
// FIXME: These -NOT checks do not work as intended and do not check on the same
5555
// line.
5656

57-
// Check the attribute list.
58-
// CHECK: attributes [[attr]] = {
57+
// Check the attribute list for kernel.
58+
// CHECK: attributes [[kattr]] = {
5959

6060
// CHECK-SAME: convergent
61+
// CHECK-SAME: norecurse
62+
63+
// FTZ-NOT: "denormal-fp-math"
64+
65+
// FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign"
66+
// NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee"
67+
68+
// CHECK-SAME: "no-trapping-math"="true"
69+
70+
// FAST-SAME: "unsafe-fp-math"="true"
71+
// NOFAST-NOT: "unsafe-fp-math"="true"
72+
73+
// Check the attribute list for lib_fn.
74+
// CHECK: attributes [[fattr]] = {
75+
76+
// CHECK-SAME: convergent
77+
// CHECK-NOT: norecurse
6178

6279
// FTZ-NOT: "denormal-fp-math"
6380

clang/test/CodeGenOpenCL/amdgpu-attrs.cl

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -162,33 +162,33 @@ kernel void default_kernel() {
162162
// CHECK-NOT: "amdgpu-num-sgpr"="0"
163163
// CHECK-NOT: "amdgpu-num-vgpr"="0"
164164

165-
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56"
166-
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" "amdgpu-implicitarg-num-bytes"="56"
167-
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" "amdgpu-implicitarg-num-bytes"="56"
168-
169-
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2"
170-
171-
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4"
172-
// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32"
173-
// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64"
174-
175-
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2"
176-
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4"
177-
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32"
178-
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64"
179-
// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
180-
// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
181-
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
182-
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
183-
// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
184-
185-
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
186-
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
187-
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
188-
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
189-
190-
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
191-
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
192-
193-
// CHECK-DAG: attributes [[A_FUNCTION]] = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false"
194-
// CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56"
165+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56"
166+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = {{.*}} "amdgpu-flat-work-group-size"="64,64" "amdgpu-implicitarg-num-bytes"="56"
167+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = {{.*}} "amdgpu-flat-work-group-size"="16,128" "amdgpu-implicitarg-num-bytes"="56"
168+
169+
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2"
170+
171+
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4"
172+
// CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32"
173+
// CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64"
174+
175+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2"
176+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-waves-per-eu"="2,4"
177+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32"
178+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64"
179+
// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
180+
// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
181+
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
182+
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
183+
// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
184+
185+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
186+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
187+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
188+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
189+
190+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
191+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
192+
193+
// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} "correctly-rounded-divide-sqrt-fp-math"="false"
194+
// CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56"
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// RUN: %clang_cc1 -O0 -emit-llvm -o - %s | FileCheck %s
2+
3+
kernel void kernel1(int a) {}
4+
// CHECK: define{{.*}}@kernel1{{.*}}#[[ATTR:[0-9]*]]
5+
6+
// CHECK: attributes #[[ATTR]] = {{.*}}norecurse
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \
2+
// RUN: -verify -fsyntax-only -verify-ignore-unexpected=note
3+
4+
#include "Inputs/cuda.h"
5+
6+
__global__ void kernel1();
7+
__global__ void kernel2() {
8+
kernel1<<<1,1>>>(); // expected-error {{reference to __global__ function 'kernel1' in __global__ function}}
9+
}

0 commit comments

Comments
 (0)