From d172f6406687f7735bd7250d52d01380bce89569 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Thu, 2 Mar 2023 13:59:34 -0800 Subject: [PATCH 01/29] [SYCL] Support multiple call operators in kernel --- clang/include/clang/Sema/Sema.h | 3 +- clang/lib/Sema/SemaSYCL.cpp | 51 ++++++++++---- clang/test/CodeGenSYCL/kernel-op-calls.cpp | 77 ++++++++++++++++++++++ 3 files changed, 119 insertions(+), 12 deletions(-) create mode 100644 clang/test/CodeGenSYCL/kernel-op-calls.cpp diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 15903a1e46385..19f8b02dbc50b 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -14274,7 +14274,8 @@ class Sema final { bool isDeclAllowedInSYCLDeviceCode(const Decl *D); void checkSYCLDeviceVarDecl(VarDecl *Var); - void copySYCLKernelAttrs(const CXXRecordDecl *KernelObj); + void copySYCLKernelAttrs(const CXXRecordDecl *KernelObj, + FunctionDecl *KernelCallerFunc); void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC); void SetSYCLKernelNames(); void MarkDevices(); diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index e6ee2273fb0fa..05088cadafaf7 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2784,16 +2784,43 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { } }; -static CXXMethodDecl *getOperatorParens(const CXXRecordDecl *Rec) { +static CXXMethodDecl *getOperatorParens(const CXXRecordDecl *Rec, + FunctionDecl *KernelCallerFunc, + Sema &SemaRef) { + + CallGraph SYCLCG; + SYCLCG.addToCallGraph(SemaRef.getASTContext().getTranslationUnitDecl()); + // assert(SYCLCG.getNode(KernelCallerFunc) && "No call graph entry for a + // kernel?"); + + CallGraphNode *KernelCallerFuncNode = SYCLCG.getNode(KernelCallerFunc); + CXXMethodDecl *OperatorCall = nullptr; + + for (const CallGraphNode *CI : *KernelCallerFuncNode) { + if (auto *Callee = dyn_cast(CI->getDecl())) { + Callee = Callee->getMostRecentDecl(); + if (Callee->getParent() == Rec && Callee->isCXXClassMember() && + Callee->getOverloadedOperator() == OO_Call) + OperatorCall = Callee; + return OperatorCall; + } + } + + return nullptr; + + /* for (auto *MD : Rec->methods()) { if (MD->getOverloadedOperator() == OO_Call) return MD; } return nullptr; + */ } -static bool isESIMDKernelType(const CXXRecordDecl *KernelObjType) { - const CXXMethodDecl *OpParens = getOperatorParens(KernelObjType); +static bool isESIMDKernelType(const CXXRecordDecl *KernelObjType, + FunctionDecl *KernelCallerFunc, Sema &SemaRef) { + const CXXMethodDecl *OpParens = + getOperatorParens(KernelObjType, KernelCallerFunc, SemaRef); return (OpParens != nullptr) && OpParens->hasAttr(); } @@ -2886,7 +2913,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { if (KernelObj->isLambda()) WGLambdaFn = KernelObj->getLambdaCallOperator(); else - WGLambdaFn = getOperatorParens(KernelObj); + WGLambdaFn = getOperatorParens(KernelObj, KernelCallerFunc, SemaRef); assert(WGLambdaFn && "non callable object is passed as kernel obj"); // Mark the function that it "works" in a work group scope: // NOTE: In case of parallel_for_work_item the marker call itself is @@ -3199,7 +3226,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { } const llvm::StringLiteral getInitMethodName() const { - bool IsSIMDKernel = isESIMDKernelType(KernelObj); + bool IsSIMDKernel = isESIMDKernelType(KernelObj, KernelCallerFunc, SemaRef); return IsSIMDKernel ? InitESIMDMethodName : InitMethodName; } @@ -3585,7 +3612,7 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { const CXXRecordDecl *KernelObj, QualType NameType, FunctionDecl *KernelFunc) : SyclKernelFieldHandler(S), Header(H) { - bool IsSIMDKernel = isESIMDKernelType(KernelObj); + bool IsSIMDKernel = isESIMDKernelType(KernelObj, KernelFunc, S); // The header needs to access the kernel object size. int64_t ObjSize = SemaRef.getASTContext() .getTypeSizeInChars(KernelObj->getTypeForDecl()) @@ -3999,7 +4026,7 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, if (KernelObj->isInvalidDecl()) return; - bool IsSIMDKernel = isESIMDKernelType(KernelObj); + bool IsSIMDKernel = isESIMDKernelType(KernelObj, KernelFunc, *this); SyclKernelDecompMarker DecompMarker(*this); SyclKernelFieldChecker FieldChecker(*this, IsSIMDKernel); @@ -4033,9 +4060,11 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, // For a wrapped parallel_for, copy attributes from original // kernel to wrapped kernel. -void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj) { +void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj, + FunctionDecl *KernelCallerFunc) { // Get the operator() function of the wrapper. - CXXMethodDecl *OpParens = getOperatorParens(KernelObj); + CXXMethodDecl *OpParens = + getOperatorParens(KernelObj, KernelCallerFunc, *this); assert(OpParens && "invalid kernel object"); typedef std::pair ChildParentPair; @@ -4148,10 +4177,10 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, // Attributes of a user-written SYCL kernel must be copied to the internally // generated alternative kernel, identified by a known string in its name. if (StableName.find("__pf_kernel_wrapper") != std::string::npos) - copySYCLKernelAttrs(KernelObj); + copySYCLKernelAttrs(KernelObj, KernelCallerFunc); } - bool IsSIMDKernel = isESIMDKernelType(KernelObj); + bool IsSIMDKernel = isESIMDKernelType(KernelObj, KernelCallerFunc, *this); SyclKernelDeclCreator kernel_decl(*this, KernelObj->getLocation(), KernelCallerFunc->isInlined(), IsSIMDKernel, diff --git a/clang/test/CodeGenSYCL/kernel-op-calls.cpp b/clang/test/CodeGenSYCL/kernel-op-calls.cpp new file mode 100644 index 0000000000000..f98f62120c0d9 --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-op-calls.cpp @@ -0,0 +1,77 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsycl-is-device %s -o /dev/null +// RUN: FileCheck %s --input-file=%t.h --check-prefixes=UL,CHECK + +// Checks that functors are supported as SYCL kernels. + +#include "Inputs/sycl.hpp" + +constexpr auto sycl_read_write = sycl::access::mode::read_write; +constexpr auto sycl_global_buffer = sycl::access::target::global_buffer; + +// Case 2: +// - functor class is templated and defined in the translation unit scope +// - the '()' operator: +// * has a parameter of type sycl::id<1> (to be used in 'parallel_for'). +template class TmplConstFunctor { +public: + TmplConstFunctor(T X_, sycl::accessor &Acc_) : + X(X_), Acc(Acc_) + {} + + [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const { + Acc.use(id, X); + } + + void operator()() const { + Acc.use(); + } + + + +private: + T X; + sycl::accessor Acc; +}; + + + +#define ARR_LEN(x) sizeof(x)/sizeof(x[0]) + +// Exercise templated functors in 'parallel_for'. +template T bar(T X) { + T A[] = { (T)10, (T)10 }; + { + sycl::queue Q; + sycl::buffer Buf(A, ARR_LEN(A)); + + Q.submit([&](sycl::handler& cgh) { + auto Acc = Buf.template get_access(cgh); + TmplConstFunctor F(X, Acc); + + cgh.parallel_for(sycl::range<1>(ARR_LEN(A)), F); + }); + } + T res = (T)0; + + for (int i = 0; i < ARR_LEN(A); i++) { + res += A[i]; + } + return res; +} + +int main() { + const int Res2 = bar(10); + const int Gold1 = 40; + const int Gold2 = 80; + +#ifndef __SYCL_DEVICE_ONLY__ + + sycl::detail::KernelInfo>::getName(); + // NUL: KernelInfo<::TmplConstFunctor> + // UL: KernelInfoData<'_', 'Z', 'T', 'S', '1', '6', 'T', 'm', 'p', 'l', 'C', 'o', 'n', 's', 't', 'F', 'u', 'n', 'c', 't', 'o', 'r', 'I', 'i', 'E'> + // CHECK: getName() { return "_ZTS16TmplConstFunctorIiE"; } +#endif // __SYCL_DEVICE_ONLY__ + + return 0; +} + From e17002d19b4b57d97087e0b2748ff3ec0baff7f9 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Mon, 6 Mar 2023 21:42:56 -0800 Subject: [PATCH 02/29] Add code to handle multiple call ops in kernel functor. --- clang/lib/Sema/SemaSYCL.cpp | 84 ++++++++++++++-------- clang/test/CodeGenSYCL/Inputs/sycl.hpp | 16 +++++ clang/test/CodeGenSYCL/kernel-op-calls.cpp | 82 ++++++++++++++++----- 3 files changed, 136 insertions(+), 46 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 05088cadafaf7..d9c541fc04224 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2784,43 +2784,70 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { } }; -static CXXMethodDecl *getOperatorParens(const CXXRecordDecl *Rec, - FunctionDecl *KernelCallerFunc, - Sema &SemaRef) { +// This function traverses the static call graph from the root of the kernel +// (e.g. “kernel_parallel_for”) and returns the version of “operator()()” that +// is called by kernelFunc()”. There will only be one call to kernelFunc()” in +// that call graph because the DPC++ headers are structured such that the user’s +// kernel function is only called once. This ensures that the correct +// “operator()()” function call is returned, when a named function object used +// to define a kernel has more than one “operator()()” calls defined in it. For +// example, in the code below, 'operator()(sycl::id<1> id)' is returned based on +// the 'parallel_for' invocation. +// class MyKernel { +// public: +// void operator()() const { +// // code +// } +// +// [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const +// { +// // code +// } +// }; +// +// int main() { +// +// Q.submit([&](sycl::handler& cgh) { +// MyKernel kernelFunctorObject; +// cgh.parallel_for(sycl::range<1>(16), kernelFunctorObject); +// }); +// return 0; +// } + +static CXXMethodDecl * +getCallOperatorInvokedFromKernel(const CXXRecordDecl *KernelFuncObjType, + FunctionDecl *KernelCallerFunc, + Sema &SemaRef) { CallGraph SYCLCG; SYCLCG.addToCallGraph(SemaRef.getASTContext().getTranslationUnitDecl()); - // assert(SYCLCG.getNode(KernelCallerFunc) && "No call graph entry for a - // kernel?"); - - CallGraphNode *KernelCallerFuncNode = SYCLCG.getNode(KernelCallerFunc); - CXXMethodDecl *OperatorCall = nullptr; - - for (const CallGraphNode *CI : *KernelCallerFuncNode) { - if (auto *Callee = dyn_cast(CI->getDecl())) { - Callee = Callee->getMostRecentDecl(); - if (Callee->getParent() == Rec && Callee->isCXXClassMember() && - Callee->getOverloadedOperator() == OO_Call) - OperatorCall = Callee; - return OperatorCall; - } - } - return nullptr; + if (KernelCallerFunc && KernelCallerFunc->hasBody() && + KernelCallerFunc->hasAttr()) { + + CallGraphNode *KernelCallerFuncNode = SYCLCG.getNode(KernelCallerFunc); + CXXMethodDecl *OperatorCall = nullptr; - /* - for (auto *MD : Rec->methods()) { - if (MD->getOverloadedOperator() == OO_Call) - return MD; + // Iterate through each funtion invoked from the kernel root, find the + // function call operator and make sure it is a member of the kernel fuctor. + for (const CallGraphNode *CI : *KernelCallerFuncNode) { + if (auto *Callee = dyn_cast(CI->getDecl())) { + Callee = Callee->getMostRecentDecl(); + if (Callee->getParent() == KernelFuncObjType && + Callee->isCXXClassMember() && + Callee->getOverloadedOperator() == OO_Call) + OperatorCall = Callee; + return OperatorCall; + } + } } return nullptr; - */ } static bool isESIMDKernelType(const CXXRecordDecl *KernelObjType, FunctionDecl *KernelCallerFunc, Sema &SemaRef) { - const CXXMethodDecl *OpParens = - getOperatorParens(KernelObjType, KernelCallerFunc, SemaRef); + const CXXMethodDecl *OpParens = getCallOperatorInvokedFromKernel( + KernelObjType, KernelCallerFunc, SemaRef); return (OpParens != nullptr) && OpParens->hasAttr(); } @@ -2913,7 +2940,8 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { if (KernelObj->isLambda()) WGLambdaFn = KernelObj->getLambdaCallOperator(); else - WGLambdaFn = getOperatorParens(KernelObj, KernelCallerFunc, SemaRef); + WGLambdaFn = getCallOperatorInvokedFromKernel(KernelObj, KernelCallerFunc, + SemaRef); assert(WGLambdaFn && "non callable object is passed as kernel obj"); // Mark the function that it "works" in a work group scope: // NOTE: In case of parallel_for_work_item the marker call itself is @@ -4064,7 +4092,7 @@ void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj, FunctionDecl *KernelCallerFunc) { // Get the operator() function of the wrapper. CXXMethodDecl *OpParens = - getOperatorParens(KernelObj, KernelCallerFunc, *this); + getCallOperatorInvokedFromKernel(KernelObj, KernelCallerFunc, *this); assert(OpParens && "invalid kernel object"); typedef std::pair ChildParentPair; diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 0467659cd5492..8d3559601ed91 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -441,6 +441,15 @@ kernel_parallel_for(const KernelType &KernelFunc) { KernelFunc(id()); } +template +ATTR_SYCL_KERNEL void kernel_parallel_for(const KernelType &kernelFunc) { +#ifdef __SYCL_DEVICE_ONLY__ + kernelFunc(); +#else + (void)kernelFunc; +#endif +} + // Dummy parallel_for_work_item function to mimic calls from // parallel_for_work_group. void parallel_for_work_item() { @@ -458,6 +467,13 @@ kernel_parallel_for_work_group(const KernelType &KernelFunc) { class handler { public: + + template + void parallel_for(const KernelType &kernelObj) { + using NameT = typename get_kernel_name_t::name; + kernel_parallel_for(kernelObj); + } + template void parallel_for(range numWorkItems, const KernelType &kernelFunc) { using NameT = typename get_kernel_name_t::name; diff --git a/clang/test/CodeGenSYCL/kernel-op-calls.cpp b/clang/test/CodeGenSYCL/kernel-op-calls.cpp index f98f62120c0d9..2d5d7b482f687 100644 --- a/clang/test/CodeGenSYCL/kernel-op-calls.cpp +++ b/clang/test/CodeGenSYCL/kernel-op-calls.cpp @@ -1,5 +1,4 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsycl-is-device %s -o /dev/null -// RUN: FileCheck %s --input-file=%t.h --check-prefixes=UL,CHECK +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s // Checks that functors are supported as SYCL kernels. @@ -8,10 +7,17 @@ constexpr auto sycl_read_write = sycl::access::mode::read_write; constexpr auto sycl_global_buffer = sycl::access::target::global_buffer; -// Case 2: +template +struct enable_if { }; +template +struct enable_if { + using type = V; +}; +template +using enable_if_t = typename enable_if::type; + // - functor class is templated and defined in the translation unit scope -// - the '()' operator: -// * has a parameter of type sycl::id<1> (to be used in 'parallel_for'). +// - the '()' operator has a parameter of type sycl::id<1> (to be used in 'parallel_for'). template class TmplConstFunctor { public: TmplConstFunctor(T X_, sycl::accessor &Acc_) : @@ -26,14 +32,49 @@ template class TmplConstFunctor { Acc.use(); } - +private: + T X; + sycl::accessor Acc; +}; + + +template class Functor2 { +public: + Functor2(T X_, sycl::accessor &Acc_) : + X(X_), Acc(Acc_) + {} + + [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const { + Acc.use(id, X); + } + + [[sycl::work_group_size_hint(1, 2, 3)]] void operator()(sycl::id<2> id) const { + Acc.use(id, X); + } private: T X; sycl::accessor Acc; }; +template class Functor3 { +public: + Functor3(T X_, sycl::accessor &Acc_) : + X(X_), Acc(Acc_) + {} + + [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const { + Acc.use(id, X); + } + [[sycl::work_group_size_hint(1, 2, 3)]] void operator()(sycl::id<2> id) const { + Acc.use(id, X); + } + +private: + T X; + sycl::accessor Acc; +}; #define ARR_LEN(x) sizeof(x)/sizeof(x[0]) @@ -47,9 +88,25 @@ template T bar(T X) { Q.submit([&](sycl::handler& cgh) { auto Acc = Buf.template get_access(cgh); TmplConstFunctor F(X, Acc); - + // CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS16TmplConstFunctorIiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !11 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !intel_reqd_sub_group_size !14 { cgh.parallel_for(sycl::range<1>(ARR_LEN(A)), F); + //cgh.parallel_for(F); }); + + Q.submit([&](sycl::handler& cgh) { + auto Acc = Buf.template get_access(cgh); + Functor2 FuncOp2(X, Acc); + // CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS8Functor2IiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !22 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !work_group_size_hint !23 { + cgh.parallel_for(sycl::range<2>(ARR_LEN(A)), FuncOp2); + }); + + Q.submit([&](sycl::handler& cgh) { + auto Acc = Buf.template get_access(cgh); + Functor3 FuncOp1(X, Acc); + // CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS8Functor3IiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !25 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !intel_reqd_sub_group_size !14 + cgh.parallel_for(sycl::range<1>(ARR_LEN(A)), FuncOp1); + }); + } T res = (T)0; @@ -61,17 +118,6 @@ template T bar(T X) { int main() { const int Res2 = bar(10); - const int Gold1 = 40; - const int Gold2 = 80; - -#ifndef __SYCL_DEVICE_ONLY__ - - sycl::detail::KernelInfo>::getName(); - // NUL: KernelInfo<::TmplConstFunctor> - // UL: KernelInfoData<'_', 'Z', 'T', 'S', '1', '6', 'T', 'm', 'p', 'l', 'C', 'o', 'n', 's', 't', 'F', 'u', 'n', 'c', 't', 'o', 'r', 'I', 'i', 'E'> - // CHECK: getName() { return "_ZTS16TmplConstFunctorIiE"; } -#endif // __SYCL_DEVICE_ONLY__ - return 0; } From 4f2dc5b1a7db410c8f633a3d0568e6588852541a Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Tue, 7 Mar 2023 10:51:16 -0800 Subject: [PATCH 03/29] Modify test file --- clang/test/CodeGenSYCL/kernel-op-calls.cpp | 39 +++------------------- 1 file changed, 5 insertions(+), 34 deletions(-) diff --git a/clang/test/CodeGenSYCL/kernel-op-calls.cpp b/clang/test/CodeGenSYCL/kernel-op-calls.cpp index 2d5d7b482f687..0fcf9f35aced8 100644 --- a/clang/test/CodeGenSYCL/kernel-op-calls.cpp +++ b/clang/test/CodeGenSYCL/kernel-op-calls.cpp @@ -1,6 +1,5 @@ // RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s - -// Checks that functors are supported as SYCL kernels. + #include "Inputs/sycl.hpp" @@ -16,11 +15,9 @@ struct enable_if { template using enable_if_t = typename enable_if::type; -// - functor class is templated and defined in the translation unit scope -// - the '()' operator has a parameter of type sycl::id<1> (to be used in 'parallel_for'). -template class TmplConstFunctor { +template class Functor1 { public: - TmplConstFunctor(T X_, sycl::accessor &Acc_) : + Functor1(T X_, sycl::accessor &Acc_) : X(X_), Acc(Acc_) {} @@ -57,28 +54,9 @@ template class Functor2 { sycl::accessor Acc; }; -template class Functor3 { -public: - Functor3(T X_, sycl::accessor &Acc_) : - X(X_), Acc(Acc_) - {} - - [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const { - Acc.use(id, X); - } - - [[sycl::work_group_size_hint(1, 2, 3)]] void operator()(sycl::id<2> id) const { - Acc.use(id, X); - } - -private: - T X; - sycl::accessor Acc; -}; - #define ARR_LEN(x) sizeof(x)/sizeof(x[0]) -// Exercise templated functors in 'parallel_for'. + template T bar(T X) { T A[] = { (T)10, (T)10 }; { @@ -87,7 +65,7 @@ template T bar(T X) { Q.submit([&](sycl::handler& cgh) { auto Acc = Buf.template get_access(cgh); - TmplConstFunctor F(X, Acc); + Functor1 F(X, Acc); // CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS16TmplConstFunctorIiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !11 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !intel_reqd_sub_group_size !14 { cgh.parallel_for(sycl::range<1>(ARR_LEN(A)), F); //cgh.parallel_for(F); @@ -100,13 +78,6 @@ template T bar(T X) { cgh.parallel_for(sycl::range<2>(ARR_LEN(A)), FuncOp2); }); - Q.submit([&](sycl::handler& cgh) { - auto Acc = Buf.template get_access(cgh); - Functor3 FuncOp1(X, Acc); - // CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS8Functor3IiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !25 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !intel_reqd_sub_group_size !14 - cgh.parallel_for(sycl::range<1>(ARR_LEN(A)), FuncOp1); - }); - } T res = (T)0; From 75f1919db0c1e692a6bc9a54955c5c950b452d02 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Wed, 8 Mar 2023 11:06:02 -0800 Subject: [PATCH 04/29] Fix failing test. --- clang/lib/Sema/SemaSYCL.cpp | 18 ++++++++++++++---- clang/test/CodeGenSYCL/kernel-op-calls.cpp | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index d9c541fc04224..1f8b9db57c1cc 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2786,13 +2786,13 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { // This function traverses the static call graph from the root of the kernel // (e.g. “kernel_parallel_for”) and returns the version of “operator()()” that -// is called by kernelFunc()”. There will only be one call to kernelFunc()” in +// is called by kernelFunc(). There will only be one call to kernelFunc() in // that call graph because the DPC++ headers are structured such that the user’s // kernel function is only called once. This ensures that the correct // “operator()()” function call is returned, when a named function object used // to define a kernel has more than one “operator()()” calls defined in it. For // example, in the code below, 'operator()(sycl::id<1> id)' is returned based on -// the 'parallel_for' invocation. +// the 'parallel_for' invocation which takes a 'sycl::range<1>(16)' argument. // class MyKernel { // public: // void operator()() const { @@ -2822,6 +2822,15 @@ getCallOperatorInvokedFromKernel(const CXXRecordDecl *KernelFuncObjType, CallGraph SYCLCG; SYCLCG.addToCallGraph(SemaRef.getASTContext().getTranslationUnitDecl()); + // This code returns the 'lambda' call operator. + if (KernelFuncObjType->isLambda()) { + for (auto *MD : KernelFuncObjType->methods()) { + if (MD->getOverloadedOperator() == OO_Call) + return MD; + } + } + + // This code returns the functor's call operator. if (KernelCallerFunc && KernelCallerFunc->hasBody() && KernelCallerFunc->hasAttr()) { @@ -2830,8 +2839,8 @@ getCallOperatorInvokedFromKernel(const CXXRecordDecl *KernelFuncObjType, // Iterate through each funtion invoked from the kernel root, find the // function call operator and make sure it is a member of the kernel fuctor. - for (const CallGraphNode *CI : *KernelCallerFuncNode) { - if (auto *Callee = dyn_cast(CI->getDecl())) { + for (const CallGraphNode *ChildNode : *KernelCallerFuncNode) { + if (auto *Callee = dyn_cast(ChildNode->getDecl())) { Callee = Callee->getMostRecentDecl(); if (Callee->getParent() == KernelFuncObjType && Callee->isCXXClassMember() && @@ -2841,6 +2850,7 @@ getCallOperatorInvokedFromKernel(const CXXRecordDecl *KernelFuncObjType, } } } + return nullptr; } diff --git a/clang/test/CodeGenSYCL/kernel-op-calls.cpp b/clang/test/CodeGenSYCL/kernel-op-calls.cpp index 0fcf9f35aced8..446945be148b0 100644 --- a/clang/test/CodeGenSYCL/kernel-op-calls.cpp +++ b/clang/test/CodeGenSYCL/kernel-op-calls.cpp @@ -66,7 +66,7 @@ template T bar(T X) { Q.submit([&](sycl::handler& cgh) { auto Acc = Buf.template get_access(cgh); Functor1 F(X, Acc); - // CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS16TmplConstFunctorIiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !11 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !intel_reqd_sub_group_size !14 { + // CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS8Functor1IiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !11 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !intel_reqd_sub_group_size !14 { cgh.parallel_for(sycl::range<1>(ARR_LEN(A)), F); //cgh.parallel_for(F); }); From a8bd38c469c122700e85857ae3fcdc0b397e8541 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Thu, 9 Mar 2023 20:50:33 -0800 Subject: [PATCH 05/29] Address review comments. --- clang/lib/Sema/SemaSYCL.cpp | 16 ++++++++-------- clang/test/CodeGenSYCL/kernel-op-calls.cpp | 9 ++++----- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 1f8b9db57c1cc..03f37ebb8f55f 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2784,10 +2784,10 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { } }; -// This function traverses the static call graph from the root of the kernel -// (e.g. “kernel_parallel_for”) and returns the version of “operator()()” that -// is called by kernelFunc(). There will only be one call to kernelFunc() in -// that call graph because the DPC++ headers are structured such that the user’s +// This function traverses the static call graph from the function with +// `sycl_kernel` attribute and returns the version of “operator()()” that is +// called by kernelFunc(). There will only be one call to kernelFunc() in that +// call graph because the DPC++ headers are structured such that the user’s // kernel function is only called once. This ensures that the correct // “operator()()” function call is returned, when a named function object used // to define a kernel has more than one “operator()()” calls defined in it. For @@ -2820,7 +2820,7 @@ getCallOperatorInvokedFromKernel(const CXXRecordDecl *KernelFuncObjType, Sema &SemaRef) { CallGraph SYCLCG; - SYCLCG.addToCallGraph(SemaRef.getASTContext().getTranslationUnitDecl()); + SYCLCG.addToCallGraph(KernelCallerFunc); // This code returns the 'lambda' call operator. if (KernelFuncObjType->isLambda()) { @@ -2843,10 +2843,10 @@ getCallOperatorInvokedFromKernel(const CXXRecordDecl *KernelFuncObjType, if (auto *Callee = dyn_cast(ChildNode->getDecl())) { Callee = Callee->getMostRecentDecl(); if (Callee->getParent() == KernelFuncObjType && - Callee->isCXXClassMember() && - Callee->getOverloadedOperator() == OO_Call) + Callee->getOverloadedOperator() == OO_Call) { OperatorCall = Callee; - return OperatorCall; + return OperatorCall; + } } } } diff --git a/clang/test/CodeGenSYCL/kernel-op-calls.cpp b/clang/test/CodeGenSYCL/kernel-op-calls.cpp index 446945be148b0..b44556387a7df 100644 --- a/clang/test/CodeGenSYCL/kernel-op-calls.cpp +++ b/clang/test/CodeGenSYCL/kernel-op-calls.cpp @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s -#include "Inputs/sycl.hpp" +#include "sycl.hpp" constexpr auto sycl_read_write = sycl::access::mode::read_write; constexpr auto sycl_global_buffer = sycl::access::target::global_buffer; @@ -66,15 +66,14 @@ template T bar(T X) { Q.submit([&](sycl::handler& cgh) { auto Acc = Buf.template get_access(cgh); Functor1 F(X, Acc); - // CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS8Functor1IiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !11 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !intel_reqd_sub_group_size !14 { + // CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS8Functor1IiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !11 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !intel_reqd_sub_group_size !14 !sycl_fixed_targets !15 { cgh.parallel_for(sycl::range<1>(ARR_LEN(A)), F); - //cgh.parallel_for(F); }); Q.submit([&](sycl::handler& cgh) { auto Acc = Buf.template get_access(cgh); Functor2 FuncOp2(X, Acc); - // CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS8Functor2IiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !22 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !work_group_size_hint !23 { + // CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS8Functor2IiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !22 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !work_group_size_hint !23 !sycl_fixed_targets !15 { cgh.parallel_for(sycl::range<2>(ARR_LEN(A)), FuncOp2); }); From e73efaad5310a27d3481b98662b231c1d65acbea Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Wed, 15 Mar 2023 13:46:02 -0700 Subject: [PATCH 06/29] Re-write code using RecursiveASTVisitor. --- clang/lib/Sema/SemaSYCL.cpp | 114 +++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 48 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 03f37ebb8f55f..447607ecb1d11 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -10,10 +10,13 @@ #include "TreeTransform.h" #include "clang/AST/AST.h" +#include "clang/AST/AttrVisitor.h" +#include "clang/AST/DeclVisitor.h" #include "clang/AST/Mangle.h" #include "clang/AST/QualTypeNames.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/StmtVisitor.h" #include "clang/AST/TemplateArgumentVisitor.h" #include "clang/AST/TypeVisitor.h" #include "clang/Analysis/CallGraph.h" @@ -2784,10 +2787,10 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { } }; -// This function traverses the static call graph from the function with +// This Visitor traverses the AST of the function with // `sycl_kernel` attribute and returns the version of “operator()()” that is // called by kernelFunc(). There will only be one call to kernelFunc() in that -// call graph because the DPC++ headers are structured such that the user’s +// AST because the DPC++ headers are structured such that the user’s // kernel function is only called once. This ensures that the correct // “operator()()” function call is returned, when a named function object used // to define a kernel has more than one “operator()()” calls defined in it. For @@ -2814,50 +2817,47 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { // return 0; // } -static CXXMethodDecl * -getCallOperatorInvokedFromKernel(const CXXRecordDecl *KernelFuncObjType, - FunctionDecl *KernelCallerFunc, - Sema &SemaRef) { +class KernelCallOperatorVisitor + : public RecursiveASTVisitor { - CallGraph SYCLCG; - SYCLCG.addToCallGraph(KernelCallerFunc); + FunctionDecl *KernelCallerFunc; - // This code returns the 'lambda' call operator. - if (KernelFuncObjType->isLambda()) { - for (auto *MD : KernelFuncObjType->methods()) { - if (MD->getOverloadedOperator() == OO_Call) - return MD; +public: + CXXMethodDecl *CallOperator = nullptr; + const CXXRecordDecl *KernelObj; + + KernelCallOperatorVisitor(FunctionDecl *KernelCallerFunc, + const CXXRecordDecl *KernelObj) + : KernelCallerFunc(KernelCallerFunc), KernelObj(KernelObj) {} + + bool VisitCallExpr(CallExpr *CE) { + Decl *CalleeDecl = CE->getCalleeDecl(); + if (isa_and_nonnull(CalleeDecl)) { + CXXMethodDecl *MD = cast(CalleeDecl); + if (MD->getOverloadedOperator() == OO_Call && + MD->getParent() == KernelObj) { + CallOperator = MD; + } } + return true; } +}; - // This code returns the functor's call operator. - if (KernelCallerFunc && KernelCallerFunc->hasBody() && - KernelCallerFunc->hasAttr()) { - - CallGraphNode *KernelCallerFuncNode = SYCLCG.getNode(KernelCallerFunc); - CXXMethodDecl *OperatorCall = nullptr; +static bool isESIMDKernelType(KernelCallOperatorVisitor KernelCallOperator, + const CXXRecordDecl *KernelObjType, + FunctionDecl *KernelCallerFunc, Sema &SemaRef) { + const CXXMethodDecl *OpParens = nullptr; - // Iterate through each funtion invoked from the kernel root, find the - // function call operator and make sure it is a member of the kernel fuctor. - for (const CallGraphNode *ChildNode : *KernelCallerFuncNode) { - if (auto *Callee = dyn_cast(ChildNode->getDecl())) { - Callee = Callee->getMostRecentDecl(); - if (Callee->getParent() == KernelFuncObjType && - Callee->getOverloadedOperator() == OO_Call) { - OperatorCall = Callee; - return OperatorCall; - } - } + if (KernelObjType->isLambda()) { + for (auto *MD : KernelObjType->methods()) { + if (MD->getOverloadedOperator() == OO_Call) + OpParens = MD; } + } else { + KernelCallOperator.TraverseDecl(KernelCallerFunc); + OpParens = KernelCallOperator.CallOperator; } - return nullptr; -} - -static bool isESIMDKernelType(const CXXRecordDecl *KernelObjType, - FunctionDecl *KernelCallerFunc, Sema &SemaRef) { - const CXXMethodDecl *OpParens = getCallOperatorInvokedFromKernel( - KernelObjType, KernelCallerFunc, SemaRef); return (OpParens != nullptr) && OpParens->hasAttr(); } @@ -2933,6 +2933,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { } void annotateHierarchicalParallelismAPICalls() { + // Is this a hierarchical parallelism kernel invocation? if (getKernelInvocationKind(KernelCallerFunc) != InvokeParallelForWorkGroup) return; @@ -2946,12 +2947,14 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { // (of either the lambda or the function object). CXXRecordDecl *KernelObj = GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl(); + + KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); + KernelCallOperator.TraverseDecl(KernelCallerFunc); CXXMethodDecl *WGLambdaFn = nullptr; if (KernelObj->isLambda()) WGLambdaFn = KernelObj->getLambdaCallOperator(); else - WGLambdaFn = getCallOperatorInvokedFromKernel(KernelObj, KernelCallerFunc, - SemaRef); + WGLambdaFn = KernelCallOperator.CallOperator; assert(WGLambdaFn && "non callable object is passed as kernel obj"); // Mark the function that it "works" in a work group scope: // NOTE: In case of parallel_for_work_item the marker call itself is @@ -3264,7 +3267,10 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { } const llvm::StringLiteral getInitMethodName() const { - bool IsSIMDKernel = isESIMDKernelType(KernelObj, KernelCallerFunc, SemaRef); + KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); + + bool IsSIMDKernel = isESIMDKernelType(KernelCallOperator, KernelObj, + KernelCallerFunc, SemaRef); return IsSIMDKernel ? InitESIMDMethodName : InitMethodName; } @@ -3615,6 +3621,7 @@ static bool IsSYCLUnnamedKernel(Sema &SemaRef, const FunctionDecl *FD) { class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { SYCLIntegrationHeader &Header; + KernelCallOperatorVisitor KernelCallOperator; int64_t CurOffset = 0; llvm::SmallVector ArrayBaseOffsets; int StructDepth = 0; @@ -3646,11 +3653,14 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { public: static constexpr const bool VisitInsideSimpleContainers = false; - SyclKernelIntHeaderCreator(Sema &S, SYCLIntegrationHeader &H, + SyclKernelIntHeaderCreator(KernelCallOperatorVisitor KernelCallOperator, + Sema &S, SYCLIntegrationHeader &H, const CXXRecordDecl *KernelObj, QualType NameType, FunctionDecl *KernelFunc) - : SyclKernelFieldHandler(S), Header(H) { - bool IsSIMDKernel = isESIMDKernelType(KernelObj, KernelFunc, S); + : SyclKernelFieldHandler(S), Header(H), + KernelCallOperator(KernelCallOperator) { + bool IsSIMDKernel = + isESIMDKernelType(KernelCallOperator, KernelObj, KernelFunc, S); // The header needs to access the kernel object size. int64_t ObjSize = SemaRef.getASTContext() .getTypeSizeInChars(KernelObj->getTypeForDecl()) @@ -4034,6 +4044,9 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, const CXXRecordDecl *KernelObj = GetSYCLKernelObjectType(KernelFunc)->getAsCXXRecordDecl(); + KernelCallOperatorVisitor KernelCallOperator(KernelFunc, KernelObj); + KernelCallOperator.TraverseDecl(KernelFunc); + if (!KernelObj) { Diag(Args[0]->getExprLoc(), diag::err_sycl_kernel_not_function_object); KernelFunc->setInvalidDecl(); @@ -4064,7 +4077,8 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, if (KernelObj->isInvalidDecl()) return; - bool IsSIMDKernel = isESIMDKernelType(KernelObj, KernelFunc, *this); + bool IsSIMDKernel = + isESIMDKernelType(KernelCallOperator, KernelObj, KernelFunc, *this); SyclKernelDecompMarker DecompMarker(*this); SyclKernelFieldChecker FieldChecker(*this, IsSIMDKernel); @@ -4101,8 +4115,9 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj, FunctionDecl *KernelCallerFunc) { // Get the operator() function of the wrapper. - CXXMethodDecl *OpParens = - getCallOperatorInvokedFromKernel(KernelObj, KernelCallerFunc, *this); + KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); + KernelCallOperator.TraverseDecl(KernelCallerFunc); + CXXMethodDecl *OpParens = KernelCallOperator.CallOperator; assert(OpParens && "invalid kernel object"); typedef std::pair ChildParentPair; @@ -4218,7 +4233,10 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, copySYCLKernelAttrs(KernelObj, KernelCallerFunc); } - bool IsSIMDKernel = isESIMDKernelType(KernelObj, KernelCallerFunc, *this); + KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); + + bool IsSIMDKernel = + isESIMDKernelType(KernelCallOperator, KernelObj, KernelCallerFunc, *this); SyclKernelDeclCreator kernel_decl(*this, KernelObj->getLocation(), KernelCallerFunc->isInlined(), IsSIMDKernel, @@ -4226,7 +4244,7 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, SyclKernelBodyCreator kernel_body(*this, kernel_decl, KernelObj, KernelCallerFunc); SyclKernelIntHeaderCreator int_header( - *this, getSyclIntegrationHeader(), KernelObj, + KernelCallOperator, *this, getSyclIntegrationHeader(), KernelObj, calculateKernelNameType(Context, KernelCallerFunc), KernelCallerFunc); SyclKernelIntFooterCreator int_footer(*this, getSyclIntegrationFooter()); From caddf21f5bd78c96030ee1cbfc02eb4d96e20810 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Wed, 15 Mar 2023 22:13:26 -0700 Subject: [PATCH 07/29] Update test. --- clang/test/CodeGenSYCL/Inputs/sycl.hpp | 9 -- clang/test/CodeGenSYCL/kernel-op-calls.cpp | 95 ++++++---------------- 2 files changed, 24 insertions(+), 80 deletions(-) diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 8d3559601ed91..49e24ee1094c5 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -441,15 +441,6 @@ kernel_parallel_for(const KernelType &KernelFunc) { KernelFunc(id()); } -template -ATTR_SYCL_KERNEL void kernel_parallel_for(const KernelType &kernelFunc) { -#ifdef __SYCL_DEVICE_ONLY__ - kernelFunc(); -#else - (void)kernelFunc; -#endif -} - // Dummy parallel_for_work_item function to mimic calls from // parallel_for_work_group. void parallel_for_work_item() { diff --git a/clang/test/CodeGenSYCL/kernel-op-calls.cpp b/clang/test/CodeGenSYCL/kernel-op-calls.cpp index b44556387a7df..9114c73b3b9ef 100644 --- a/clang/test/CodeGenSYCL/kernel-op-calls.cpp +++ b/clang/test/CodeGenSYCL/kernel-op-calls.cpp @@ -1,93 +1,46 @@ // RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s - +// This test checks that the correct kernel operator call is invoked when there are multiple definitions of the +// 'operator()()' call. #include "sycl.hpp" -constexpr auto sycl_read_write = sycl::access::mode::read_write; -constexpr auto sycl_global_buffer = sycl::access::target::global_buffer; - -template -struct enable_if { }; -template -struct enable_if { - using type = V; -}; -template -using enable_if_t = typename enable_if::type; +sycl::queue Q; -template class Functor1 { +// Check if functor with multiple call operator works. +class Functor1 { public: - Functor1(T X_, sycl::accessor &Acc_) : - X(X_), Acc(Acc_) - {} + Functor1(){} - [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const { - Acc.use(id, X); - } + [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const {} - void operator()() const { - Acc.use(); - } + [[sycl::work_group_size_hint(1, 2, 3)]] void operator()(sycl::id<2> id) const {} -private: - T X; - sycl::accessor Acc; }; +// Check templated 'operator()()' call works. +class kernels { +public: + kernels(){} -template class Functor2 { -public: - Functor2(T X_, sycl::accessor &Acc_) : - X(X_), Acc(Acc_) - {} - - [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const { - Acc.use(id, X); - } - - [[sycl::work_group_size_hint(1, 2, 3)]] void operator()(sycl::id<2> id) const { - Acc.use(id, X); - } - -private: - T X; - sycl::accessor Acc; + template + [[sycl::work_group_size_hint(1, 2, 3)]] void operator()(sycl::id item) const {} + }; -#define ARR_LEN(x) sizeof(x)/sizeof(x[0]) - +int main() { -template T bar(T X) { - T A[] = { (T)10, (T)10 }; - { - sycl::queue Q; - sycl::buffer Buf(A, ARR_LEN(A)); - - Q.submit([&](sycl::handler& cgh) { - auto Acc = Buf.template get_access(cgh); - Functor1 F(X, Acc); - // CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS8Functor1IiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !11 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !intel_reqd_sub_group_size !14 !sycl_fixed_targets !15 { - cgh.parallel_for(sycl::range<1>(ARR_LEN(A)), F); + Q.submit([&](sycl::handler& cgh) { + Functor1 F; + // CHECK: define dso_local spir_kernel void @_ZTS8Functor1() #0 !srcloc !11 !kernel_arg_buffer_location !12 !intel_reqd_sub_group_size !13 !sycl_fixed_targets !12 { + cgh.parallel_for(sycl::range<1>(10), F); }); - Q.submit([&](sycl::handler& cgh) { - auto Acc = Buf.template get_access(cgh); - Functor2 FuncOp2(X, Acc); - // CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS8Functor2IiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !22 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !work_group_size_hint !23 !sycl_fixed_targets !15 { - cgh.parallel_for(sycl::range<2>(ARR_LEN(A)), FuncOp2); + Q.submit([&](sycl::handler& cgh) { + kernels K; + // CHECK: define dso_local spir_kernel void @_ZTS7kernels() #0 !srcloc !15 !kernel_arg_buffer_location !12 !work_group_size_hint !16 !sycl_fixed_targets !12 { + cgh.parallel_for(sycl::range<1>(10), K); }); - } - T res = (T)0; - - for (int i = 0; i < ARR_LEN(A); i++) { - res += A[i]; - } - return res; -} - -int main() { - const int Res2 = bar(10); return 0; } From 53d8de01932f1fd30c8324706c579b0e6fd75f17 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Fri, 17 Mar 2023 15:11:14 -0700 Subject: [PATCH 08/29] Fix review comments. --- clang/lib/Sema/SemaSYCL.cpp | 38 ++++++++++++-------------- clang/test/CodeGenSYCL/Inputs/sycl.hpp | 6 ---- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 447607ecb1d11..841388cd0cee9 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -10,13 +10,10 @@ #include "TreeTransform.h" #include "clang/AST/AST.h" -#include "clang/AST/AttrVisitor.h" -#include "clang/AST/DeclVisitor.h" #include "clang/AST/Mangle.h" #include "clang/AST/QualTypeNames.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/RecursiveASTVisitor.h" -#include "clang/AST/StmtVisitor.h" #include "clang/AST/TemplateArgumentVisitor.h" #include "clang/AST/TypeVisitor.h" #include "clang/Analysis/CallGraph.h" @@ -2841,23 +2838,22 @@ class KernelCallOperatorVisitor } return true; } + + CXXMethodDecl *GetCallOperator() { + if (KernelObj->isLambda()) { + CallOperator = KernelObj->getLambdaCallOperator(); + return CallOperator; + } + TraverseDecl(KernelCallerFunc); + return CallOperator; + } }; static bool isESIMDKernelType(KernelCallOperatorVisitor KernelCallOperator, const CXXRecordDecl *KernelObjType, FunctionDecl *KernelCallerFunc, Sema &SemaRef) { const CXXMethodDecl *OpParens = nullptr; - - if (KernelObjType->isLambda()) { - for (auto *MD : KernelObjType->methods()) { - if (MD->getOverloadedOperator() == OO_Call) - OpParens = MD; - } - } else { - KernelCallOperator.TraverseDecl(KernelCallerFunc); - OpParens = KernelCallOperator.CallOperator; - } - + OpParens = KernelCallOperator.GetCallOperator(); return (OpParens != nullptr) && OpParens->hasAttr(); } @@ -2949,12 +2945,15 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl(); KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); - KernelCallOperator.TraverseDecl(KernelCallerFunc); + CXXMethodDecl *WGLambdaFn = nullptr; - if (KernelObj->isLambda()) + if (KernelObj->isLambda()) { WGLambdaFn = KernelObj->getLambdaCallOperator(); - else - WGLambdaFn = KernelCallOperator.CallOperator; + } else { + KernelCallOperator.TraverseDecl(KernelCallerFunc); + WGLambdaFn = KernelCallOperator.GetCallOperator(); + } + assert(WGLambdaFn && "non callable object is passed as kernel obj"); // Mark the function that it "works" in a work group scope: // NOTE: In case of parallel_for_work_item the marker call itself is @@ -4045,7 +4044,6 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, GetSYCLKernelObjectType(KernelFunc)->getAsCXXRecordDecl(); KernelCallOperatorVisitor KernelCallOperator(KernelFunc, KernelObj); - KernelCallOperator.TraverseDecl(KernelFunc); if (!KernelObj) { Diag(Args[0]->getExprLoc(), diag::err_sycl_kernel_not_function_object); @@ -4117,7 +4115,7 @@ void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj, // Get the operator() function of the wrapper. KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); KernelCallOperator.TraverseDecl(KernelCallerFunc); - CXXMethodDecl *OpParens = KernelCallOperator.CallOperator; + CXXMethodDecl *OpParens = KernelCallOperator.GetCallOperator(); assert(OpParens && "invalid kernel object"); typedef std::pair ChildParentPair; diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 49e24ee1094c5..e7b6acafe06e5 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -459,12 +459,6 @@ kernel_parallel_for_work_group(const KernelType &KernelFunc) { class handler { public: - template - void parallel_for(const KernelType &kernelObj) { - using NameT = typename get_kernel_name_t::name; - kernel_parallel_for(kernelObj); - } - template void parallel_for(range numWorkItems, const KernelType &kernelFunc) { using NameT = typename get_kernel_name_t::name; From ef3ccd3258ff87a01fc09884d9ce9dae0ec6723a Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Fri, 17 Mar 2023 20:13:43 -0700 Subject: [PATCH 09/29] Address review comments. --- clang/lib/Sema/SemaSYCL.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 841388cd0cee9..c35b04d4ed873 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2839,7 +2839,10 @@ class KernelCallOperatorVisitor return true; } - CXXMethodDecl *GetCallOperator() { + CXXMethodDecl *getCallOperator() { + if (CallOperator) + return CallOperator; + if (KernelObj->isLambda()) { CallOperator = KernelObj->getLambdaCallOperator(); return CallOperator; @@ -2853,7 +2856,7 @@ static bool isESIMDKernelType(KernelCallOperatorVisitor KernelCallOperator, const CXXRecordDecl *KernelObjType, FunctionDecl *KernelCallerFunc, Sema &SemaRef) { const CXXMethodDecl *OpParens = nullptr; - OpParens = KernelCallOperator.GetCallOperator(); + OpParens = KernelCallOperator.getCallOperator(); return (OpParens != nullptr) && OpParens->hasAttr(); } @@ -2947,12 +2950,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); CXXMethodDecl *WGLambdaFn = nullptr; - if (KernelObj->isLambda()) { - WGLambdaFn = KernelObj->getLambdaCallOperator(); - } else { - KernelCallOperator.TraverseDecl(KernelCallerFunc); - WGLambdaFn = KernelCallOperator.GetCallOperator(); - } + WGLambdaFn = KernelCallOperator.getCallOperator(); assert(WGLambdaFn && "non callable object is passed as kernel obj"); // Mark the function that it "works" in a work group scope: @@ -4115,7 +4113,7 @@ void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj, // Get the operator() function of the wrapper. KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); KernelCallOperator.TraverseDecl(KernelCallerFunc); - CXXMethodDecl *OpParens = KernelCallOperator.GetCallOperator(); + CXXMethodDecl *OpParens = KernelCallOperator.getCallOperator(); assert(OpParens && "invalid kernel object"); typedef std::pair ChildParentPair; From 8cc706f3d24e779ae45f73c767d13f7b997168ca Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Thu, 23 Mar 2023 12:22:10 -0700 Subject: [PATCH 10/29] Address code review comments. --- clang/lib/Sema/SemaSYCL.cpp | 41 +++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index c35b04d4ed873..5acc92e289333 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2827,7 +2827,7 @@ class KernelCallOperatorVisitor const CXXRecordDecl *KernelObj) : KernelCallerFunc(KernelCallerFunc), KernelObj(KernelObj) {} - bool VisitCallExpr(CallExpr *CE) { + bool visitCallExpr(CallExpr *CE) { Decl *CalleeDecl = CE->getCalleeDecl(); if (isa_and_nonnull(CalleeDecl)) { CXXMethodDecl *MD = cast(CalleeDecl); @@ -2852,11 +2852,8 @@ class KernelCallOperatorVisitor } }; -static bool isESIMDKernelType(KernelCallOperatorVisitor KernelCallOperator, - const CXXRecordDecl *KernelObjType, - FunctionDecl *KernelCallerFunc, Sema &SemaRef) { - const CXXMethodDecl *OpParens = nullptr; - OpParens = KernelCallOperator.getCallOperator(); +static bool isESIMDKernelType(KernelCallOperatorVisitor KernelCallOperator) { + const CXXMethodDecl *OpParens = KernelCallOperator.getCallOperator(); return (OpParens != nullptr) && OpParens->hasAttr(); } @@ -2932,7 +2929,6 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { } void annotateHierarchicalParallelismAPICalls() { - // Is this a hierarchical parallelism kernel invocation? if (getKernelInvocationKind(KernelCallerFunc) != InvokeParallelForWorkGroup) return; @@ -2948,11 +2944,14 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl(); KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); + CXXMethodDecl *WGCallOperator = nullptr; + + if (KernelObj->isLambda()) + WGCallOperator = KernelObj->getLambdaCallOperator(); - CXXMethodDecl *WGLambdaFn = nullptr; - WGLambdaFn = KernelCallOperator.getCallOperator(); + WGCallOperator = KernelCallOperator.getCallOperator(); - assert(WGLambdaFn && "non callable object is passed as kernel obj"); + assert(WGCallOperator && "non callable object is passed as kernel obj"); // Mark the function that it "works" in a work group scope: // NOTE: In case of parallel_for_work_item the marker call itself is // marked with work item scope attribute, here the '()' operator of the @@ -2962,15 +2961,15 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { // all of them in the private address space rather then sharing via // the local AS. See parallel_for_work_group implementation in the // SYCL headers. - if (!WGLambdaFn->hasAttr()) { - WGLambdaFn->addAttr(SYCLScopeAttr::CreateImplicit( + if (!WGCallOperator->hasAttr()) { + WGCallOperator->addAttr(SYCLScopeAttr::CreateImplicit( SemaRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup)); // Search and mark parallel_for_work_item calls: MarkWIScopeFnVisitor MarkWIScope(SemaRef.getASTContext()); - MarkWIScope.TraverseDecl(WGLambdaFn); + MarkWIScope.TraverseDecl(WGCallOperator); // Now mark local variables declared in the PFWG lambda with work group // scope attribute - addScopeAttrToLocalVars(*WGLambdaFn); + addScopeAttrToLocalVars(*WGCallOperator); } } @@ -3266,8 +3265,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { const llvm::StringLiteral getInitMethodName() const { KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); - bool IsSIMDKernel = isESIMDKernelType(KernelCallOperator, KernelObj, - KernelCallerFunc, SemaRef); + bool IsSIMDKernel = isESIMDKernelType(KernelCallOperator); return IsSIMDKernel ? InitESIMDMethodName : InitMethodName; } @@ -3657,7 +3655,7 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { : SyclKernelFieldHandler(S), Header(H), KernelCallOperator(KernelCallOperator) { bool IsSIMDKernel = - isESIMDKernelType(KernelCallOperator, KernelObj, KernelFunc, S); + isESIMDKernelType(KernelCallOperator); // The header needs to access the kernel object size. int64_t ObjSize = SemaRef.getASTContext() .getTypeSizeInChars(KernelObj->getTypeForDecl()) @@ -4074,7 +4072,7 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, return; bool IsSIMDKernel = - isESIMDKernelType(KernelCallOperator, KernelObj, KernelFunc, *this); + isESIMDKernelType(KernelCallOperator); SyclKernelDecompMarker DecompMarker(*this); SyclKernelFieldChecker FieldChecker(*this, IsSIMDKernel); @@ -4112,7 +4110,6 @@ void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj, FunctionDecl *KernelCallerFunc) { // Get the operator() function of the wrapper. KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); - KernelCallOperator.TraverseDecl(KernelCallerFunc); CXXMethodDecl *OpParens = KernelCallOperator.getCallOperator(); assert(OpParens && "invalid kernel object"); @@ -4215,6 +4212,8 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, if (KernelObj->isInvalidDecl()) return; + KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); + { // Do enough to calculate the StableName for the purposes of the hackery // below for __pf_kernel_wrapper. Placed in a scope so that we don't @@ -4229,10 +4228,8 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, copySYCLKernelAttrs(KernelObj, KernelCallerFunc); } - KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); - bool IsSIMDKernel = - isESIMDKernelType(KernelCallOperator, KernelObj, KernelCallerFunc, *this); + isESIMDKernelType(KernelCallOperator); SyclKernelDeclCreator kernel_decl(*this, KernelObj->getLocation(), KernelCallerFunc->isInlined(), IsSIMDKernel, From 78c5bdcb36c0baeb3b3e7ba1f17156e6e3508b60 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Thu, 23 Mar 2023 22:12:04 -0700 Subject: [PATCH 11/29] Fix ESIMD code. --- clang/lib/Sema/SemaSYCL.cpp | 86 +++++++++++----------- clang/test/CodeGenSYCL/kernel-op-calls.cpp | 16 ++++ 2 files changed, 57 insertions(+), 45 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 5acc92e289333..06d3a10471273 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2827,7 +2827,7 @@ class KernelCallOperatorVisitor const CXXRecordDecl *KernelObj) : KernelCallerFunc(KernelCallerFunc), KernelObj(KernelObj) {} - bool visitCallExpr(CallExpr *CE) { + bool VisitCallExpr(CallExpr *CE) { Decl *CalleeDecl = CE->getCalleeDecl(); if (isa_and_nonnull(CalleeDecl)) { CXXMethodDecl *MD = cast(CalleeDecl); @@ -3654,8 +3654,7 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { FunctionDecl *KernelFunc) : SyclKernelFieldHandler(S), Header(H), KernelCallOperator(KernelCallOperator) { - bool IsSIMDKernel = - isESIMDKernelType(KernelCallOperator); + bool IsSIMDKernel = isESIMDKernelType(KernelCallOperator); // The header needs to access the kernel object size. int64_t ObjSize = SemaRef.getASTContext() .getTypeSizeInChars(KernelObj->getTypeForDecl()) @@ -4024,12 +4023,6 @@ class SYCLKernelNameTypeVisitor void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, ArrayRef Args) { - QualType KernelNameType = - calculateKernelNameType(getASTContext(), KernelFunc); - SYCLKernelNameTypeVisitor KernelNameTypeVisitor( - *this, Args[0]->getExprLoc(), KernelNameType, - IsSYCLUnnamedKernel(*this, KernelFunc)); - KernelNameTypeVisitor.Visit(KernelNameType.getCanonicalType()); // FIXME: In place until the library works around its 'host' invocation // issues. @@ -4070,38 +4063,6 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, // Do not visit invalid kernel object. if (KernelObj->isInvalidDecl()) return; - - bool IsSIMDKernel = - isESIMDKernelType(KernelCallOperator); - - SyclKernelDecompMarker DecompMarker(*this); - SyclKernelFieldChecker FieldChecker(*this, IsSIMDKernel); - SyclKernelUnionChecker UnionChecker(*this); - - SyclKernelArgsSizeChecker ArgsSizeChecker(*this, Args[0]->getExprLoc(), - IsSIMDKernel); - - KernelObjVisitor Visitor{*this}; - - DiagnosingSYCLKernel = true; - - // Emit diagnostics for SYCL device kernels only - Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker, DecompMarker); - Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker, - DecompMarker); - // ArgSizeChecker needs to happen after DecompMarker has completed, since it - // cares about the decomp attributes. DecompMarker cannot run before the - // others, since it counts on the FieldChecker to make sure it is visiting - // valid arrays/etc. Thus, ArgSizeChecker has its own visitation. - if (FieldChecker.isValid() && UnionChecker.isValid()) { - Visitor.VisitRecordBases(KernelObj, ArgsSizeChecker); - Visitor.VisitRecordFields(KernelObj, ArgsSizeChecker); - } - DiagnosingSYCLKernel = false; - // Set the kernel function as invalid, if any of the checkers fail validation. - if (!FieldChecker.isValid() || !UnionChecker.isValid() || - !KernelNameTypeVisitor.isValid()) - KernelFunc->setInvalidDecl(); } // For a wrapped parallel_for, copy attributes from original @@ -4228,14 +4189,51 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, copySYCLKernelAttrs(KernelObj, KernelCallerFunc); } - bool IsSIMDKernel = - isESIMDKernelType(KernelCallOperator); + bool IsSIMDKernel = isESIMDKernelType(KernelCallOperator); SyclKernelDeclCreator kernel_decl(*this, KernelObj->getLocation(), KernelCallerFunc->isInlined(), IsSIMDKernel, KernelCallerFunc); SyclKernelBodyCreator kernel_body(*this, kernel_decl, KernelObj, KernelCallerFunc); + QualType KernelNameType = + calculateKernelNameType(getASTContext(), KernelCallerFunc); + + SYCLKernelNameTypeVisitor KernelNameTypeVisitor( + *this, KernelObj->getLocation(), KernelNameType, + IsSYCLUnnamedKernel(*this, KernelCallerFunc)); + KernelNameTypeVisitor.Visit(KernelNameType.getCanonicalType()); + + SyclKernelFieldChecker FieldChecker(*this, IsSIMDKernel); + + SyclKernelArgsSizeChecker ArgsSizeChecker(*this, KernelObj->getLocation(), + IsSIMDKernel); + + SyclKernelDecompMarker DecompMarker(*this); + SyclKernelUnionChecker UnionChecker(*this); + + KernelObjVisitor Visitor{*this}; + + DiagnosingSYCLKernel = true; + + // Emit diagnostics for SYCL device kernels only + Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker, DecompMarker); + Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker, + DecompMarker); + // ArgSizeChecker needs to happen after DecompMarker has completed, since it + // cares about the decomp attributes. DecompMarker cannot run before the + // others, since it counts on the FieldChecker to make sure it is visiting + // valid arrays/etc. Thus, ArgSizeChecker has its own visitation. + if (FieldChecker.isValid() && UnionChecker.isValid()) { + Visitor.VisitRecordBases(KernelObj, ArgsSizeChecker); + Visitor.VisitRecordFields(KernelObj, ArgsSizeChecker); + } + DiagnosingSYCLKernel = false; + // Set the kernel function as invalid, if any of the checkers fail validation. + if (!FieldChecker.isValid() || !UnionChecker.isValid() || + !KernelNameTypeVisitor.isValid()) + KernelCallerFunc->setInvalidDecl(); + SyclKernelIntHeaderCreator int_header( KernelCallOperator, *this, getSyclIntegrationHeader(), KernelObj, calculateKernelNameType(Context, KernelCallerFunc), KernelCallerFunc); @@ -4243,8 +4241,6 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, SyclKernelIntFooterCreator int_footer(*this, getSyclIntegrationFooter()); SyclOptReportCreator opt_report(*this, kernel_decl, KernelObj->getLocation()); - KernelObjVisitor Visitor{*this}; - // Visit handlers to generate information for optimization record only if // optimization record is saved. if (!getLangOpts().OptRecordFile.empty()) { diff --git a/clang/test/CodeGenSYCL/kernel-op-calls.cpp b/clang/test/CodeGenSYCL/kernel-op-calls.cpp index 9114c73b3b9ef..507e8bbc22d3a 100644 --- a/clang/test/CodeGenSYCL/kernel-op-calls.cpp +++ b/clang/test/CodeGenSYCL/kernel-op-calls.cpp @@ -17,6 +17,16 @@ class Functor1 { }; +class ESIMDFunctor { +public: + ESIMDFunctor(){} + + [[intel::sycl_explicit_simd]] void operator()(sycl::id<2> id) const {} + + [[sycl::work_group_size_hint(1, 2, 3)]][[intel::sycl_explicit_simd]] void operator()(sycl::id<1> id) const {} + +}; + // Check templated 'operator()()' call works. class kernels { public: @@ -41,6 +51,12 @@ int main() { cgh.parallel_for(sycl::range<1>(10), K); }); + Q.submit([&](sycl::handler& cgh) { + ESIMDFunctor EF; + // CHECK: define dso_local spir_kernel void @_ZTS12ESIMDFunctor() #0 !srcloc !17 !intel_reqd_sub_group_size !18 !work_group_size_hint !16 !kernel_arg_addr_space !12 !kernel_arg_access_qual !12 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !12 !kernel_arg_accessor_ptr !12 !sycl_explicit_simd !12 !sycl_fixed_targets !12 { + cgh.parallel_for(sycl::range<1>(10), EF); + }); + return 0; } From 8ef7f476af986f691b2e00a939c0b08031bbb577 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Mon, 27 Mar 2023 23:24:50 -0700 Subject: [PATCH 12/29] Move ESIMD check to SyclKernelDeclCreator. --- clang/lib/Sema/SemaSYCL.cpp | 106 +++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 51 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 06d3a10471273..398b24e1580ca 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -1552,7 +1552,6 @@ void KernelObjVisitor::visitArray(const CXXRecordDecl *Owner, FieldDecl *Field, class SyclKernelFieldChecker : public SyclKernelFieldHandler { bool IsInvalid = false; DiagnosticsEngine &Diag; - bool IsSIMD = false; // Keeps track of whether we are currently handling fields inside a struct. // Fields of kernel functor or direct kernel captures will have a depth 0. int StructFieldDepth = 0; @@ -1656,10 +1655,7 @@ class SyclKernelFieldChecker : public SyclKernelFieldHandler { << Ty << /*Struct*/ 1; const RecordDecl *RecD = Ty->getAsRecordDecl(); - if (IsSIMD && !isSyclAccessorType(Ty)) - return SemaRef.Diag(Loc.getBegin(), - diag::err_sycl_esimd_not_supported_for_type) - << RecD; + if (const ClassTemplateSpecializationDecl *CTSD = dyn_cast(RecD)) { const TemplateArgumentList &TAL = CTSD->getTemplateArgs(); @@ -1678,9 +1674,8 @@ class SyclKernelFieldChecker : public SyclKernelFieldHandler { } public: - SyclKernelFieldChecker(Sema &S, bool isSIMD) - : SyclKernelFieldHandler(S), Diag(S.getASTContext().getDiagnostics()), - IsSIMD(isSIMD) {} + SyclKernelFieldChecker(Sema &S) + : SyclKernelFieldHandler(S), Diag(S.getASTContext().getDiagnostics()) {} static constexpr const bool VisitNthArrayElement = false; bool isValid() { return !IsInvalid; } @@ -2150,6 +2145,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { FunctionDecl *KernelDecl; llvm::SmallVector Params; Sema::ContextRAII FuncContext; + SourceLocation Loc; // Holds the last handled field's first parameter. This doesn't store an // iterator as push_back invalidates iterators. size_t LastParamIndex = 0; @@ -2272,6 +2268,11 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { bool handleSpecialType(FieldDecl *FD, QualType FieldTy) { const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); assert(RecordDecl && "The type must be a RecordDecl"); + + if (KernelDecl->hasAttr() && !isSyclAccessorType(FieldTy)) + return SemaRef.Diag(Loc, diag::err_sycl_esimd_not_supported_for_type) + << RecordDecl; + llvm::StringLiteral MethodName = KernelDecl->hasAttr() && isSyclAccessorType(FieldTy) ? InitESIMDMethodName @@ -2365,7 +2366,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { : SyclKernelFieldHandler(S), KernelDecl( createKernelDecl(S.getASTContext(), Loc, IsInline, IsSIMDKernel)), - FuncContext(SemaRef, KernelDecl) { + FuncContext(SemaRef, KernelDecl), Loc(Loc) { S.addSyclOpenCLKernel(SYCLKernel, KernelDecl); if (const auto *AddIRAttrFunc = @@ -2422,6 +2423,10 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { QualType FieldTy) final { const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); assert(RecordDecl && "The type must be a RecordDecl"); + + if (KernelDecl->hasAttr() && !isSyclAccessorType(FieldTy)) + return SemaRef.Diag(Loc, diag::err_sycl_esimd_not_supported_for_type) + << RecordDecl; llvm::StringLiteral MethodName = KernelDecl->hasAttr() && isSyclAccessorType(FieldTy) ? InitESIMDMethodName @@ -4023,6 +4028,12 @@ class SYCLKernelNameTypeVisitor void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, ArrayRef Args) { + QualType KernelNameType = + calculateKernelNameType(getASTContext(), KernelFunc); + SYCLKernelNameTypeVisitor KernelNameTypeVisitor( + *this, Args[0]->getExprLoc(), KernelNameType, + IsSYCLUnnamedKernel(*this, KernelFunc)); + KernelNameTypeVisitor.Visit(KernelNameType.getCanonicalType()); // FIXME: In place until the library works around its 'host' invocation // issues. @@ -4032,8 +4043,6 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, const CXXRecordDecl *KernelObj = GetSYCLKernelObjectType(KernelFunc)->getAsCXXRecordDecl(); - KernelCallOperatorVisitor KernelCallOperator(KernelFunc, KernelObj); - if (!KernelObj) { Diag(Args[0]->getExprLoc(), diag::err_sycl_kernel_not_function_object); KernelFunc->setInvalidDecl(); @@ -4063,6 +4072,37 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, // Do not visit invalid kernel object. if (KernelObj->isInvalidDecl()) return; + KernelCallOperatorVisitor KernelCallOperator(KernelFunc, KernelObj); + bool IsSIMDKernel = isESIMDKernelType(KernelCallOperator); + + SyclKernelDecompMarker DecompMarker(*this); + SyclKernelFieldChecker FieldChecker(*this); + SyclKernelUnionChecker UnionChecker(*this); + + SyclKernelArgsSizeChecker ArgsSizeChecker(*this, Args[0]->getExprLoc(), + IsSIMDKernel); + + KernelObjVisitor Visitor{*this}; + + DiagnosingSYCLKernel = true; + + // Emit diagnostics for SYCL device kernels only + Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker, DecompMarker); + Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker, + DecompMarker); + // ArgSizeChecker needs to happen after DecompMarker has completed, since it + // cares about the decomp attributes. DecompMarker cannot run before the + // others, since it counts on the FieldChecker to make sure it is visiting + // valid arrays/etc. Thus, ArgSizeChecker has its own visitation. + if (FieldChecker.isValid() && UnionChecker.isValid()) { + Visitor.VisitRecordBases(KernelObj, ArgsSizeChecker); + Visitor.VisitRecordFields(KernelObj, ArgsSizeChecker); + } + DiagnosingSYCLKernel = false; + // Set the kernel function as invalid, if any of the checkers fail validation. + if (!FieldChecker.isValid() || !UnionChecker.isValid() || + !KernelNameTypeVisitor.isValid()) + KernelFunc->setInvalidDecl(); } // For a wrapped parallel_for, copy attributes from original @@ -4169,12 +4209,12 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl(); assert(KernelObj && "invalid kernel caller"); + KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); + // Do not visit invalid kernel object. if (KernelObj->isInvalidDecl()) return; - KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); - { // Do enough to calculate the StableName for the purposes of the hackery // below for __pf_kernel_wrapper. Placed in a scope so that we don't @@ -4196,44 +4236,6 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, KernelCallerFunc); SyclKernelBodyCreator kernel_body(*this, kernel_decl, KernelObj, KernelCallerFunc); - QualType KernelNameType = - calculateKernelNameType(getASTContext(), KernelCallerFunc); - - SYCLKernelNameTypeVisitor KernelNameTypeVisitor( - *this, KernelObj->getLocation(), KernelNameType, - IsSYCLUnnamedKernel(*this, KernelCallerFunc)); - KernelNameTypeVisitor.Visit(KernelNameType.getCanonicalType()); - - SyclKernelFieldChecker FieldChecker(*this, IsSIMDKernel); - - SyclKernelArgsSizeChecker ArgsSizeChecker(*this, KernelObj->getLocation(), - IsSIMDKernel); - - SyclKernelDecompMarker DecompMarker(*this); - SyclKernelUnionChecker UnionChecker(*this); - - KernelObjVisitor Visitor{*this}; - - DiagnosingSYCLKernel = true; - - // Emit diagnostics for SYCL device kernels only - Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker, DecompMarker); - Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker, - DecompMarker); - // ArgSizeChecker needs to happen after DecompMarker has completed, since it - // cares about the decomp attributes. DecompMarker cannot run before the - // others, since it counts on the FieldChecker to make sure it is visiting - // valid arrays/etc. Thus, ArgSizeChecker has its own visitation. - if (FieldChecker.isValid() && UnionChecker.isValid()) { - Visitor.VisitRecordBases(KernelObj, ArgsSizeChecker); - Visitor.VisitRecordFields(KernelObj, ArgsSizeChecker); - } - DiagnosingSYCLKernel = false; - // Set the kernel function as invalid, if any of the checkers fail validation. - if (!FieldChecker.isValid() || !UnionChecker.isValid() || - !KernelNameTypeVisitor.isValid()) - KernelCallerFunc->setInvalidDecl(); - SyclKernelIntHeaderCreator int_header( KernelCallOperator, *this, getSyclIntegrationHeader(), KernelObj, calculateKernelNameType(Context, KernelCallerFunc), KernelCallerFunc); @@ -4241,6 +4243,8 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, SyclKernelIntFooterCreator int_footer(*this, getSyclIntegrationFooter()); SyclOptReportCreator opt_report(*this, kernel_decl, KernelObj->getLocation()); + KernelObjVisitor Visitor{*this}; + // Visit handlers to generate information for optimization record only if // optimization record is saved. if (!getLangOpts().OptRecordFile.empty()) { From 5548aae19bc64af33344c31bb602775f99617e84 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Tue, 28 Mar 2023 07:11:38 -0700 Subject: [PATCH 13/29] Remove ESIMD init method generation from KernelArgsSizeChecker. --- clang/lib/Sema/SemaSYCL.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 398b24e1580ca..d8da950567350 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2561,7 +2561,6 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { SourceLocation KernelLoc; unsigned SizeOfParams = 0; - bool IsSIMD = false; void addParam(QualType ArgTy) { SizeOfParams += @@ -2571,9 +2570,7 @@ class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { bool handleSpecialType(QualType FieldTy) { const CXXRecordDecl *RecordDecl = FieldTy->getAsCXXRecordDecl(); assert(RecordDecl && "The type must be a RecordDecl"); - llvm::StringLiteral MethodName = (IsSIMD && isSyclAccessorType(FieldTy)) - ? InitESIMDMethodName - : InitMethodName; + llvm::StringLiteral MethodName = InitMethodName; CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); assert(InitMethod && "The type must have the __init method"); for (const ParmVarDecl *Param : InitMethod->parameters()) @@ -2583,8 +2580,8 @@ class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { public: static constexpr const bool VisitInsideSimpleContainers = false; - SyclKernelArgsSizeChecker(Sema &S, SourceLocation Loc, bool IsSIMD) - : SyclKernelFieldHandler(S), KernelLoc(Loc), IsSIMD(IsSIMD) {} + SyclKernelArgsSizeChecker(Sema &S, SourceLocation Loc) + : SyclKernelFieldHandler(S), KernelLoc(Loc) {} ~SyclKernelArgsSizeChecker() { if (SizeOfParams > MaxKernelArgsSize) @@ -4073,14 +4070,12 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, if (KernelObj->isInvalidDecl()) return; KernelCallOperatorVisitor KernelCallOperator(KernelFunc, KernelObj); - bool IsSIMDKernel = isESIMDKernelType(KernelCallOperator); SyclKernelDecompMarker DecompMarker(*this); SyclKernelFieldChecker FieldChecker(*this); SyclKernelUnionChecker UnionChecker(*this); - SyclKernelArgsSizeChecker ArgsSizeChecker(*this, Args[0]->getExprLoc(), - IsSIMDKernel); + SyclKernelArgsSizeChecker ArgsSizeChecker(*this, Args[0]->getExprLoc()); KernelObjVisitor Visitor{*this}; From ab95f494969c1fc73e29394eb9fb56192d15f896 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Tue, 28 Mar 2023 13:02:12 -0700 Subject: [PATCH 14/29] Fix review comments. --- clang/lib/Sema/SemaSYCL.cpp | 19 ++++++++----------- clang/test/CodeGenSYCL/Inputs/sycl.hpp | 1 - 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index d8da950567350..fabf86f7d4f64 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2788,7 +2788,7 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { // This Visitor traverses the AST of the function with // `sycl_kernel` attribute and returns the version of “operator()()” that is -// called by kernelFunc(). There will only be one call to kernelFunc() in that +// called by KernelFunc. There will only be one call to KernelFunc in that // AST because the DPC++ headers are structured such that the user’s // kernel function is only called once. This ensures that the correct // “operator()()” function call is returned, when a named function object used @@ -3618,10 +3618,10 @@ static bool IsSYCLUnnamedKernel(Sema &SemaRef, const FunctionDecl *FD) { class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { SYCLIntegrationHeader &Header; - KernelCallOperatorVisitor KernelCallOperator; int64_t CurOffset = 0; llvm::SmallVector ArrayBaseOffsets; int StructDepth = 0; + bool IsESIMD = false; // A series of functions to calculate the change in offset based on the type. int64_t offsetOf(const FieldDecl *FD, QualType ArgTy) const { @@ -3650,20 +3650,17 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { public: static constexpr const bool VisitInsideSimpleContainers = false; - SyclKernelIntHeaderCreator(KernelCallOperatorVisitor KernelCallOperator, - Sema &S, SYCLIntegrationHeader &H, + SyclKernelIntHeaderCreator(bool IsESIMD, Sema &S, SYCLIntegrationHeader &H, const CXXRecordDecl *KernelObj, QualType NameType, FunctionDecl *KernelFunc) - : SyclKernelFieldHandler(S), Header(H), - KernelCallOperator(KernelCallOperator) { - bool IsSIMDKernel = isESIMDKernelType(KernelCallOperator); + : SyclKernelFieldHandler(S), Header(H), IsESIMD(IsESIMD) { + // The header needs to access the kernel object size. int64_t ObjSize = SemaRef.getASTContext() .getTypeSizeInChars(KernelObj->getTypeForDecl()) .getQuantity(); - Header.startKernel(KernelFunc, NameType, KernelObj->getLocation(), - IsSIMDKernel, IsSYCLUnnamedKernel(S, KernelFunc), - ObjSize); + Header.startKernel(KernelFunc, NameType, KernelObj->getLocation(), IsESIMD, + IsSYCLUnnamedKernel(S, KernelFunc), ObjSize); } bool handleSyclSpecialType(const CXXRecordDecl *RD, @@ -4232,7 +4229,7 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, SyclKernelBodyCreator kernel_body(*this, kernel_decl, KernelObj, KernelCallerFunc); SyclKernelIntHeaderCreator int_header( - KernelCallOperator, *this, getSyclIntegrationHeader(), KernelObj, + IsSIMDKernel, *this, getSyclIntegrationHeader(), KernelObj, calculateKernelNameType(Context, KernelCallerFunc), KernelCallerFunc); SyclKernelIntFooterCreator int_footer(*this, getSyclIntegrationFooter()); diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index e7b6acafe06e5..0467659cd5492 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -458,7 +458,6 @@ kernel_parallel_for_work_group(const KernelType &KernelFunc) { class handler { public: - template void parallel_for(range numWorkItems, const KernelType &kernelFunc) { using NameT = typename get_kernel_name_t::name; From 0680ce810dd1d6d2c730f5064acf7368cd190758 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Wed, 29 Mar 2023 10:22:31 -0700 Subject: [PATCH 15/29] Add tests. --- clang/lib/Sema/SemaSYCL.cpp | 6 ++- .../kernel-functor-without-definition.cpp | 42 +++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 clang/test/SemaSYCL/kernel-functor-without-definition.cpp diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index fabf86f7d4f64..ef616ac4e069f 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2842,6 +2842,9 @@ class KernelCallOperatorVisitor } CXXMethodDecl *getCallOperator() { + if (!CallOperator) + return CallOperator; + if (CallOperator) return CallOperator; @@ -4037,7 +4040,7 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, const CXXRecordDecl *KernelObj = GetSYCLKernelObjectType(KernelFunc)->getAsCXXRecordDecl(); - if (!KernelObj) { + if (!KernelObj->hasDefinition() || !KernelObj) { Diag(Args[0]->getExprLoc(), diag::err_sycl_kernel_not_function_object); KernelFunc->setInvalidDecl(); return; @@ -4066,7 +4069,6 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, // Do not visit invalid kernel object. if (KernelObj->isInvalidDecl()) return; - KernelCallOperatorVisitor KernelCallOperator(KernelFunc, KernelObj); SyclKernelDecompMarker DecompMarker(*this); SyclKernelFieldChecker FieldChecker(*this); diff --git a/clang/test/SemaSYCL/kernel-functor-without-definition.cpp b/clang/test/SemaSYCL/kernel-functor-without-definition.cpp new file mode 100644 index 0000000000000..ccf6b70c35c60 --- /dev/null +++ b/clang/test/SemaSYCL/kernel-functor-without-definition.cpp @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2020 -verify -fsyntax-only %s +// This test checks that an error is thrown when a functor without a call operator defined or when a fucntor wothout a definition +// is used as a kernel. + +#include "sycl.hpp" + +using namespace sycl; +queue q; + +struct FunctorWithoutCallOperator; // expected-note {{forward declaration of 'FunctorWithoutCallOperator'}} + +struct StructDefined { + int x; +}; + +class FunctorWithCallOpDefined { + int x; + public: + void operator()() const {} +}; + +int main() { + + q.submit([&](sycl::handler &cgh) { + // expected-error@#KernelSingleTask {{kernel parameter must be a lambda or function object}} + // expected-error@+2 {{invalid use of incomplete type 'FunctorWithoutCallOperator'}} + // expected-note@+1 {{in instantiation of function template specialization}} + cgh.single_task(FunctorWithoutCallOperator{}); + }); + + q.submit([&](sycl::handler &cgh) { + // expected-error@#KernelSingleTaskKernelFuncCall {{type 'const StructDefined' does not provide a call operator}} + // expected-note@#KernelSingleTask {{in instantiation of function template specialization 'sycl::kernel_single_task' requested here}} + // expected-note@+1 {{in instantiation of function template specialization 'sycl::handler::single_task' requested here}} + cgh.single_task(StructDefined{}); + }); + + q.submit([&](sycl::handler &cgh) { + cgh.single_task(FunctorWithCallOpDefined{}); + }); + +} From c97193057dbc651861a340072286b36b5723d579 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Wed, 29 Mar 2023 10:25:04 -0700 Subject: [PATCH 16/29] Remove extra line. --- clang/lib/Sema/SemaSYCL.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index ef616ac4e069f..39eede051dff9 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -1655,7 +1655,6 @@ class SyclKernelFieldChecker : public SyclKernelFieldHandler { << Ty << /*Struct*/ 1; const RecordDecl *RecD = Ty->getAsRecordDecl(); - if (const ClassTemplateSpecializationDecl *CTSD = dyn_cast(RecD)) { const TemplateArgumentList &TAL = CTSD->getTemplateArgs(); From 75a5c3f2b3c1ea9fbf98ea28afbce54b0cdd4627 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Wed, 29 Mar 2023 10:32:02 -0700 Subject: [PATCH 17/29] Fix typos. --- clang/test/SemaSYCL/kernel-functor-without-definition.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/SemaSYCL/kernel-functor-without-definition.cpp b/clang/test/SemaSYCL/kernel-functor-without-definition.cpp index ccf6b70c35c60..82df8f1c0342c 100644 --- a/clang/test/SemaSYCL/kernel-functor-without-definition.cpp +++ b/clang/test/SemaSYCL/kernel-functor-without-definition.cpp @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2020 -verify -fsyntax-only %s -// This test checks that an error is thrown when a functor without a call operator defined or when a fucntor wothout a definition -// is used as a kernel. +// This test checks that an error is thrown when a functor without a call operator defined or +// when a functor without a definition is used as a kernel. #include "sycl.hpp" From 42bd23992fdb5b37dd471fdceae97724f13b2c27 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Wed, 29 Mar 2023 14:29:40 -0700 Subject: [PATCH 18/29] Fix failing tests. --- clang/lib/Sema/SemaSYCL.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 39eede051dff9..bb2e52cc04de4 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2841,9 +2841,6 @@ class KernelCallOperatorVisitor } CXXMethodDecl *getCallOperator() { - if (!CallOperator) - return CallOperator; - if (CallOperator) return CallOperator; From 95c181c5ca9a01f210bcc3270671b545563bc716 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Wed, 29 Mar 2023 20:11:02 -0700 Subject: [PATCH 19/29] Fix test failure. --- clang/lib/Sema/SemaSYCL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index bb2e52cc04de4..1395fcc97be45 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -4036,7 +4036,7 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, const CXXRecordDecl *KernelObj = GetSYCLKernelObjectType(KernelFunc)->getAsCXXRecordDecl(); - if (!KernelObj->hasDefinition() || !KernelObj) { + if (!KernelObj || (KernelObj && !KernelObj->hasDefinition())) { Diag(Args[0]->getExprLoc(), diag::err_sycl_kernel_not_function_object); KernelFunc->setInvalidDecl(); return; From e18a1ce7ed85675895ed8a2ac621f0c0b4c88a87 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Sat, 1 Apr 2023 22:46:50 -0700 Subject: [PATCH 20/29] Add IsSIMD flag to SyclKernelBodyCreator. --- clang/lib/Sema/SemaSYCL.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 1395fcc97be45..c89068c4c1ef7 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2881,6 +2881,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { // top-level pointers. uint64_t StructDepth = 0; VarDecl *KernelHandlerClone = nullptr; + bool IsSIMD = false; Stmt *replaceWithLocalClone(ParmVarDecl *OriginalParam, VarDecl *LocalClone, Stmt *FunctionBody) { @@ -3264,10 +3265,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { } const llvm::StringLiteral getInitMethodName() const { - KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); - - bool IsSIMDKernel = isESIMDKernelType(KernelCallOperator); - return IsSIMDKernel ? InitESIMDMethodName : InitMethodName; + return IsSIMD ? InitESIMDMethodName : InitMethodName; } // Default inits the type, then calls the init-method in the body. @@ -3411,13 +3409,14 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { static constexpr const bool VisitInsideSimpleContainers = false; SyclKernelBodyCreator(Sema &S, SyclKernelDeclCreator &DC, const CXXRecordDecl *KernelObj, - FunctionDecl *KernelCallerFunc) + FunctionDecl *KernelCallerFunc, bool IsSIMDKernel) : SyclKernelFieldHandler(S), DeclCreator(DC), KernelObjClone(createKernelObjClone(S.getASTContext(), DC.getKernelDecl(), KernelObj)), VarEntity(InitializedEntity::InitializeVariable(KernelObjClone)), KernelObj(KernelObj), KernelCallerFunc(KernelCallerFunc), - KernelCallerSrcLoc(KernelCallerFunc->getLocation()) { + KernelCallerSrcLoc(KernelCallerFunc->getLocation()), + IsSIMD(IsSIMDKernel) { CollectionInitExprs.push_back(createInitListExpr(KernelObj)); annotateHierarchicalParallelismAPICalls(); @@ -4225,7 +4224,7 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, KernelCallerFunc->isInlined(), IsSIMDKernel, KernelCallerFunc); SyclKernelBodyCreator kernel_body(*this, kernel_decl, KernelObj, - KernelCallerFunc); + KernelCallerFunc, IsSIMDKernel); SyclKernelIntHeaderCreator int_header( IsSIMDKernel, *this, getSyclIntegrationHeader(), KernelObj, calculateKernelNameType(Context, KernelCallerFunc), KernelCallerFunc); From 4eead5914ce85d2ddcb38af031cda69cb89158a5 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Sat, 1 Apr 2023 23:01:47 -0700 Subject: [PATCH 21/29] Add comments. --- clang/lib/Sema/SemaSYCL.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index c89068c4c1ef7..b4e7babbf0843 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2268,6 +2268,9 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); assert(RecordDecl && "The type must be a RecordDecl"); + // Currently samplers/stream are not supported in ESIMD. + // Ensure that the use of sycl_explicit_simd attribute emits a diagnostic + // when used with sampler or stream. if (KernelDecl->hasAttr() && !isSyclAccessorType(FieldTy)) return SemaRef.Diag(Loc, diag::err_sycl_esimd_not_supported_for_type) << RecordDecl; @@ -2423,6 +2426,9 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); assert(RecordDecl && "The type must be a RecordDecl"); + // Currently samplers/stream are not supported in ESIMD. + // Ensure that the use of sycl_explicit_simd attribute emits a diagnostic + // when used with sampler or stream. if (KernelDecl->hasAttr() && !isSyclAccessorType(FieldTy)) return SemaRef.Diag(Loc, diag::err_sycl_esimd_not_supported_for_type) << RecordDecl; From 2cf8b6dd899f549ef09fbbcc3f2cac4ba6b8f98d Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Sun, 2 Apr 2023 21:00:33 -0700 Subject: [PATCH 22/29] Modify copySYCLKernelAttrs signature to take reference to KernelCallOperator --- clang/include/clang/Sema/Sema.h | 72 +++++++++++++++++++++++++++++++- clang/lib/Sema/SemaSYCL.cpp | 74 +-------------------------------- 2 files changed, 72 insertions(+), 74 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 19f8b02dbc50b..d1c8cb2b05e8d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -31,6 +31,7 @@ #include "clang/AST/MangleNumberingContext.h" #include "clang/AST/NSAPI.h" #include "clang/AST/PrettyPrinter.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/TypeLoc.h" @@ -469,6 +470,74 @@ class SYCLIntegrationFooter { void emitSpecIDName(raw_ostream &O, const VarDecl *VD); }; +// This Visitor traverses the AST of the function with +// `sycl_kernel` attribute and returns the version of “operator()()” that is +// called by KernelFunc. There will only be one call to KernelFunc in that +// AST because the DPC++ headers are structured such that the user’s +// kernel function is only called once. This ensures that the correct +// “operator()()” function call is returned, when a named function object used +// to define a kernel has more than one “operator()()” calls defined in it. For +// example, in the code below, 'operator()(sycl::id<1> id)' is returned based on +// the 'parallel_for' invocation which takes a 'sycl::range<1>(16)' argument. +// class MyKernel { +// public: +// void operator()() const { +// // code +// } +// +// [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const +// { +// // code +// } +// }; +// +// int main() { +// +// Q.submit([&](sycl::handler& cgh) { +// MyKernel kernelFunctorObject; +// cgh.parallel_for(sycl::range<1>(16), kernelFunctorObject); +// }); +// return 0; +// } + +class KernelCallOperatorVisitor + : public RecursiveASTVisitor { + + FunctionDecl *KernelCallerFunc; + +public: + CXXMethodDecl *CallOperator = nullptr; + const CXXRecordDecl *KernelObj; + + KernelCallOperatorVisitor(FunctionDecl *KernelCallerFunc, + const CXXRecordDecl *KernelObj) + : KernelCallerFunc(KernelCallerFunc), KernelObj(KernelObj) {} + + bool VisitCallExpr(CallExpr *CE) { + Decl *CalleeDecl = CE->getCalleeDecl(); + if (isa_and_nonnull(CalleeDecl)) { + CXXMethodDecl *MD = cast(CalleeDecl); + if (MD->getOverloadedOperator() == OO_Call && + MD->getParent() == KernelObj) { + CallOperator = MD; + } + } + return true; + } + + CXXMethodDecl *getCallOperator() { + if (CallOperator) + return CallOperator; + + if (KernelObj->isLambda()) { + CallOperator = KernelObj->getLambdaCallOperator(); + return CallOperator; + } + TraverseDecl(KernelCallerFunc); + return CallOperator; + } +}; + /// Tracks expected type during expression parsing, for use in code completion. /// The type is tied to a particular token, all functions that update or consume /// the type take a start location of the token they are looking at as a @@ -14274,8 +14343,7 @@ class Sema final { bool isDeclAllowedInSYCLDeviceCode(const Decl *D); void checkSYCLDeviceVarDecl(VarDecl *Var); - void copySYCLKernelAttrs(const CXXRecordDecl *KernelObj, - FunctionDecl *KernelCallerFunc); + void copySYCLKernelAttrs(KernelCallOperatorVisitor &KernelCallOperator); void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC); void SetSYCLKernelNames(); void MarkDevices(); diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index b4e7babbf0843..dccdf11a03ffb 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2791,74 +2791,6 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { } }; -// This Visitor traverses the AST of the function with -// `sycl_kernel` attribute and returns the version of “operator()()” that is -// called by KernelFunc. There will only be one call to KernelFunc in that -// AST because the DPC++ headers are structured such that the user’s -// kernel function is only called once. This ensures that the correct -// “operator()()” function call is returned, when a named function object used -// to define a kernel has more than one “operator()()” calls defined in it. For -// example, in the code below, 'operator()(sycl::id<1> id)' is returned based on -// the 'parallel_for' invocation which takes a 'sycl::range<1>(16)' argument. -// class MyKernel { -// public: -// void operator()() const { -// // code -// } -// -// [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const -// { -// // code -// } -// }; -// -// int main() { -// -// Q.submit([&](sycl::handler& cgh) { -// MyKernel kernelFunctorObject; -// cgh.parallel_for(sycl::range<1>(16), kernelFunctorObject); -// }); -// return 0; -// } - -class KernelCallOperatorVisitor - : public RecursiveASTVisitor { - - FunctionDecl *KernelCallerFunc; - -public: - CXXMethodDecl *CallOperator = nullptr; - const CXXRecordDecl *KernelObj; - - KernelCallOperatorVisitor(FunctionDecl *KernelCallerFunc, - const CXXRecordDecl *KernelObj) - : KernelCallerFunc(KernelCallerFunc), KernelObj(KernelObj) {} - - bool VisitCallExpr(CallExpr *CE) { - Decl *CalleeDecl = CE->getCalleeDecl(); - if (isa_and_nonnull(CalleeDecl)) { - CXXMethodDecl *MD = cast(CalleeDecl); - if (MD->getOverloadedOperator() == OO_Call && - MD->getParent() == KernelObj) { - CallOperator = MD; - } - } - return true; - } - - CXXMethodDecl *getCallOperator() { - if (CallOperator) - return CallOperator; - - if (KernelObj->isLambda()) { - CallOperator = KernelObj->getLambdaCallOperator(); - return CallOperator; - } - TraverseDecl(KernelCallerFunc); - return CallOperator; - } -}; - static bool isESIMDKernelType(KernelCallOperatorVisitor KernelCallOperator) { const CXXMethodDecl *OpParens = KernelCallOperator.getCallOperator(); return (OpParens != nullptr) && OpParens->hasAttr(); @@ -4102,10 +4034,8 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, // For a wrapped parallel_for, copy attributes from original // kernel to wrapped kernel. -void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj, - FunctionDecl *KernelCallerFunc) { +void Sema::copySYCLKernelAttrs(KernelCallOperatorVisitor &KernelCallOperator) { // Get the operator() function of the wrapper. - KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); CXXMethodDecl *OpParens = KernelCallOperator.getCallOperator(); assert(OpParens && "invalid kernel object"); @@ -4221,7 +4151,7 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, // Attributes of a user-written SYCL kernel must be copied to the internally // generated alternative kernel, identified by a known string in its name. if (StableName.find("__pf_kernel_wrapper") != std::string::npos) - copySYCLKernelAttrs(KernelObj, KernelCallerFunc); + copySYCLKernelAttrs(KernelCallOperator); } bool IsSIMDKernel = isESIMDKernelType(KernelCallOperator); From 98c98394891f3b8e48a38774192da3243a502f3e Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Sun, 2 Apr 2023 22:34:32 -0700 Subject: [PATCH 23/29] Remove lambda check from Visitor class. --- clang/include/clang/Sema/Sema.h | 7 ++----- clang/lib/Sema/SemaSYCL.cpp | 25 +++++++++++++++++++------ 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index d1c8cb2b05e8d..5d3815df1e5fb 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -529,10 +529,6 @@ class KernelCallOperatorVisitor if (CallOperator) return CallOperator; - if (KernelObj->isLambda()) { - CallOperator = KernelObj->getLambdaCallOperator(); - return CallOperator; - } TraverseDecl(KernelCallerFunc); return CallOperator; } @@ -14343,7 +14339,8 @@ class Sema final { bool isDeclAllowedInSYCLDeviceCode(const Decl *D); void checkSYCLDeviceVarDecl(VarDecl *Var); - void copySYCLKernelAttrs(KernelCallOperatorVisitor &KernelCallOperator); + void copySYCLKernelAttrs(KernelCallOperatorVisitor &KernelCallOperator, + const CXXRecordDecl *KernelFuncObj); void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC); void SetSYCLKernelNames(); void MarkDevices(); diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index dccdf11a03ffb..502274c3f6d30 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2791,8 +2791,15 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { } }; -static bool isESIMDKernelType(KernelCallOperatorVisitor KernelCallOperator) { - const CXXMethodDecl *OpParens = KernelCallOperator.getCallOperator(); +static bool isESIMDKernelType(KernelCallOperatorVisitor KernelCallOperator, + const CXXRecordDecl *KernelFuncObj) { + + const CXXMethodDecl *OpParens = nullptr; + + if (KernelFuncObj->isLambda()) + OpParens = KernelFuncObj->getLambdaCallOperator(); + + OpParens = KernelCallOperator.getCallOperator(); return (OpParens != nullptr) && OpParens->hasAttr(); } @@ -4034,9 +4041,15 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, // For a wrapped parallel_for, copy attributes from original // kernel to wrapped kernel. -void Sema::copySYCLKernelAttrs(KernelCallOperatorVisitor &KernelCallOperator) { +void Sema::copySYCLKernelAttrs(KernelCallOperatorVisitor &KernelCallOperator, + const CXXRecordDecl *KernelFuncObj) { // Get the operator() function of the wrapper. - CXXMethodDecl *OpParens = KernelCallOperator.getCallOperator(); + CXXMethodDecl *OpParens = nullptr; + + if (KernelFuncObj->isLambda()) + OpParens = KernelFuncObj->getLambdaCallOperator(); + + OpParens = KernelCallOperator.getCallOperator(); assert(OpParens && "invalid kernel object"); typedef std::pair ChildParentPair; @@ -4151,10 +4164,10 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, // Attributes of a user-written SYCL kernel must be copied to the internally // generated alternative kernel, identified by a known string in its name. if (StableName.find("__pf_kernel_wrapper") != std::string::npos) - copySYCLKernelAttrs(KernelCallOperator); + copySYCLKernelAttrs(KernelCallOperator, KernelObj); } - bool IsSIMDKernel = isESIMDKernelType(KernelCallOperator); + bool IsSIMDKernel = isESIMDKernelType(KernelCallOperator, KernelObj); SyclKernelDeclCreator kernel_decl(*this, KernelObj->getLocation(), KernelCallerFunc->isInlined(), IsSIMDKernel, From 5361bd8e5bd5122d602284f5f70322ad2666d3f3 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Mon, 3 Apr 2023 10:11:58 -0700 Subject: [PATCH 24/29] Pass Call operator to Visitor and functions --- clang/include/clang/Sema/Sema.h | 3 +-- clang/lib/Sema/SemaSYCL.cpp | 43 ++++++++++++++------------------- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 5d3815df1e5fb..c4b7e3a5a4657 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -14339,8 +14339,7 @@ class Sema final { bool isDeclAllowedInSYCLDeviceCode(const Decl *D); void checkSYCLDeviceVarDecl(VarDecl *Var); - void copySYCLKernelAttrs(KernelCallOperatorVisitor &KernelCallOperator, - const CXXRecordDecl *KernelFuncObj); + void copySYCLKernelAttrs(CXXMethodDecl *CallOperator); void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC); void SetSYCLKernelNames(); void MarkDevices(); diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 502274c3f6d30..3a062d49130e9 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2791,15 +2791,8 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { } }; -static bool isESIMDKernelType(KernelCallOperatorVisitor KernelCallOperator, - const CXXRecordDecl *KernelFuncObj) { - - const CXXMethodDecl *OpParens = nullptr; - - if (KernelFuncObj->isLambda()) - OpParens = KernelFuncObj->getLambdaCallOperator(); - - OpParens = KernelCallOperator.getCallOperator(); +static bool isESIMDKernelType(CXXMethodDecl *CallOperator) { + const CXXMethodDecl *OpParens = CallOperator; return (OpParens != nullptr) && OpParens->hasAttr(); } @@ -2827,6 +2820,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { uint64_t StructDepth = 0; VarDecl *KernelHandlerClone = nullptr; bool IsSIMD = false; + CXXMethodDecl *CallOperator; Stmt *replaceWithLocalClone(ParmVarDecl *OriginalParam, VarDecl *LocalClone, Stmt *FunctionBody) { @@ -2893,10 +2887,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); CXXMethodDecl *WGCallOperator = nullptr; - if (KernelObj->isLambda()) - WGCallOperator = KernelObj->getLambdaCallOperator(); - - WGCallOperator = KernelCallOperator.getCallOperator(); + WGCallOperator = CallOperator; assert(WGCallOperator && "non callable object is passed as kernel obj"); // Mark the function that it "works" in a work group scope: @@ -3354,14 +3345,14 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { static constexpr const bool VisitInsideSimpleContainers = false; SyclKernelBodyCreator(Sema &S, SyclKernelDeclCreator &DC, const CXXRecordDecl *KernelObj, - FunctionDecl *KernelCallerFunc, bool IsSIMDKernel) + FunctionDecl *KernelCallerFunc, bool IsSIMDKernel, CXXMethodDecl *CallOperator) : SyclKernelFieldHandler(S), DeclCreator(DC), KernelObjClone(createKernelObjClone(S.getASTContext(), DC.getKernelDecl(), KernelObj)), VarEntity(InitializedEntity::InitializeVariable(KernelObjClone)), KernelObj(KernelObj), KernelCallerFunc(KernelCallerFunc), KernelCallerSrcLoc(KernelCallerFunc->getLocation()), - IsSIMD(IsSIMDKernel) { + IsSIMD(IsSIMDKernel), CallOperator(CallOperator) { CollectionInitExprs.push_back(createInitListExpr(KernelObj)); annotateHierarchicalParallelismAPICalls(); @@ -4041,15 +4032,10 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, // For a wrapped parallel_for, copy attributes from original // kernel to wrapped kernel. -void Sema::copySYCLKernelAttrs(KernelCallOperatorVisitor &KernelCallOperator, - const CXXRecordDecl *KernelFuncObj) { +void Sema::copySYCLKernelAttrs(CXXMethodDecl *CallOperator) { // Get the operator() function of the wrapper. - CXXMethodDecl *OpParens = nullptr; + CXXMethodDecl *OpParens = CallOperator; - if (KernelFuncObj->isLambda()) - OpParens = KernelFuncObj->getLambdaCallOperator(); - - OpParens = KernelCallOperator.getCallOperator(); assert(OpParens && "invalid kernel object"); typedef std::pair ChildParentPair; @@ -4153,6 +4139,13 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, if (KernelObj->isInvalidDecl()) return; + CXXMethodDecl *CallOperator = nullptr; + + if (KernelObj->isLambda()) + CallOperator = KernelObj->getLambdaCallOperator(); + else + CallOperator = KernelCallOperator.getCallOperator(); + { // Do enough to calculate the StableName for the purposes of the hackery // below for __pf_kernel_wrapper. Placed in a scope so that we don't @@ -4164,16 +4157,16 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, // Attributes of a user-written SYCL kernel must be copied to the internally // generated alternative kernel, identified by a known string in its name. if (StableName.find("__pf_kernel_wrapper") != std::string::npos) - copySYCLKernelAttrs(KernelCallOperator, KernelObj); + copySYCLKernelAttrs(CallOperator); } - bool IsSIMDKernel = isESIMDKernelType(KernelCallOperator, KernelObj); + bool IsSIMDKernel = isESIMDKernelType(CallOperator); SyclKernelDeclCreator kernel_decl(*this, KernelObj->getLocation(), KernelCallerFunc->isInlined(), IsSIMDKernel, KernelCallerFunc); SyclKernelBodyCreator kernel_body(*this, kernel_decl, KernelObj, - KernelCallerFunc, IsSIMDKernel); + KernelCallerFunc, IsSIMDKernel, CallOperator); SyclKernelIntHeaderCreator int_header( IsSIMDKernel, *this, getSyclIntegrationHeader(), KernelObj, calculateKernelNameType(Context, KernelCallerFunc), KernelCallerFunc); From 26ea0bdd8341ead3ea90d6765a2aab211132d2c7 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Mon, 3 Apr 2023 11:07:14 -0700 Subject: [PATCH 25/29] Remove unnecessary code. --- clang/lib/Sema/SemaSYCL.cpp | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 3a062d49130e9..5699055e4f5e3 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2792,8 +2792,7 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { }; static bool isESIMDKernelType(CXXMethodDecl *CallOperator) { - const CXXMethodDecl *OpParens = CallOperator; - return (OpParens != nullptr) && OpParens->hasAttr(); + return (CallOperator != nullptr) && CallOperator->hasAttr(); } class SyclKernelBodyCreator : public SyclKernelFieldHandler { @@ -2879,17 +2878,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { KernelObjClone->addAttr(SYCLScopeAttr::CreateImplicit( SemaRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup)); - // Fetch the kernel object and the associated call operator - // (of either the lambda or the function object). - CXXRecordDecl *KernelObj = - GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl(); - - KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); - CXXMethodDecl *WGCallOperator = nullptr; - - WGCallOperator = CallOperator; - - assert(WGCallOperator && "non callable object is passed as kernel obj"); + assert(CallOperator && "non callable object is passed as kernel obj"); // Mark the function that it "works" in a work group scope: // NOTE: In case of parallel_for_work_item the marker call itself is // marked with work item scope attribute, here the '()' operator of the @@ -2899,15 +2888,15 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { // all of them in the private address space rather then sharing via // the local AS. See parallel_for_work_group implementation in the // SYCL headers. - if (!WGCallOperator->hasAttr()) { - WGCallOperator->addAttr(SYCLScopeAttr::CreateImplicit( + if (!CallOperator->hasAttr()) { + CallOperator->addAttr(SYCLScopeAttr::CreateImplicit( SemaRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup)); // Search and mark parallel_for_work_item calls: MarkWIScopeFnVisitor MarkWIScope(SemaRef.getASTContext()); - MarkWIScope.TraverseDecl(WGCallOperator); + MarkWIScope.TraverseDecl(CallOperator); // Now mark local variables declared in the PFWG lambda with work group // scope attribute - addScopeAttrToLocalVars(*WGCallOperator); + addScopeAttrToLocalVars(*CallOperator); } } @@ -4034,14 +4023,12 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, // kernel to wrapped kernel. void Sema::copySYCLKernelAttrs(CXXMethodDecl *CallOperator) { // Get the operator() function of the wrapper. - CXXMethodDecl *OpParens = CallOperator; - - assert(OpParens && "invalid kernel object"); + assert(CallOperator && "invalid kernel object"); typedef std::pair ChildParentPair; llvm::SmallPtrSet Visited; llvm::SmallVector WorkList; - WorkList.push_back({OpParens, nullptr}); + WorkList.push_back({CallOperator, nullptr}); FunctionDecl *KernelBody = nullptr; CallGraph SYCLCG; @@ -4050,7 +4037,7 @@ void Sema::copySYCLKernelAttrs(CXXMethodDecl *CallOperator) { FunctionDecl *FD = WorkList.back().first; FunctionDecl *ParentFD = WorkList.back().second; - if ((ParentFD == OpParens) && isSYCLKernelBodyFunction(FD)) { + if ((ParentFD == CallOperator) && isSYCLKernelBodyFunction(FD)) { KernelBody = FD; break; } @@ -4077,7 +4064,7 @@ void Sema::copySYCLKernelAttrs(CXXMethodDecl *CallOperator) { llvm::SmallVector Attrs; collectSYCLAttributes(*this, KernelBody, Attrs, /*DirectlyCalled*/ true); if (!Attrs.empty()) - llvm::for_each(Attrs, [OpParens](Attr *A) { OpParens->addAttr(A); }); + llvm::for_each(Attrs, [CallOperator](Attr *A) { CallOperator->addAttr(A); }); } } From ae1dc6cbdefe2fd11355e40b9f2d1c65240a8c14 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Wed, 5 Apr 2023 22:47:22 -0700 Subject: [PATCH 26/29] Add new ESIMD Visitor. --- .../clang/Basic/DiagnosticSemaKinds.td | 3 + clang/include/clang/Sema/Sema.h | 65 ------ clang/lib/Sema/SemaSYCL.cpp | 200 +++++++++++++++--- clang/test/SemaSYCL/Inputs/sycl.hpp | 1 + 4 files changed, 171 insertions(+), 98 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5a91a329d5440..f4e130cb3a23f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11823,6 +11823,9 @@ def err_sycl_external_no_rdc : Error< def warn_sycl_kernel_too_big_args : Warning< "size of kernel arguments (%0 bytes) may exceed the supported maximum " "of %1 bytes on some devices">, InGroup, ShowInSystemHeader; +def warn_esimd_kernel_too_big_args : Warning< + "size of esimd kernel arguments (%0 bytes) may exceed the supported maximum " + "of %1 bytes on some devices">, InGroup, ShowInSystemHeader; def err_sycl_virtual_types : Error< "no class with a vtable can be used in a SYCL kernel or any code included in the kernel">; def note_sycl_recursive_function_declared_here: Note<"function implemented using recursion declared here">; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index c4b7e3a5a4657..842c0da4739d8 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -31,7 +31,6 @@ #include "clang/AST/MangleNumberingContext.h" #include "clang/AST/NSAPI.h" #include "clang/AST/PrettyPrinter.h" -#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/TypeLoc.h" @@ -470,70 +469,6 @@ class SYCLIntegrationFooter { void emitSpecIDName(raw_ostream &O, const VarDecl *VD); }; -// This Visitor traverses the AST of the function with -// `sycl_kernel` attribute and returns the version of “operator()()” that is -// called by KernelFunc. There will only be one call to KernelFunc in that -// AST because the DPC++ headers are structured such that the user’s -// kernel function is only called once. This ensures that the correct -// “operator()()” function call is returned, when a named function object used -// to define a kernel has more than one “operator()()” calls defined in it. For -// example, in the code below, 'operator()(sycl::id<1> id)' is returned based on -// the 'parallel_for' invocation which takes a 'sycl::range<1>(16)' argument. -// class MyKernel { -// public: -// void operator()() const { -// // code -// } -// -// [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const -// { -// // code -// } -// }; -// -// int main() { -// -// Q.submit([&](sycl::handler& cgh) { -// MyKernel kernelFunctorObject; -// cgh.parallel_for(sycl::range<1>(16), kernelFunctorObject); -// }); -// return 0; -// } - -class KernelCallOperatorVisitor - : public RecursiveASTVisitor { - - FunctionDecl *KernelCallerFunc; - -public: - CXXMethodDecl *CallOperator = nullptr; - const CXXRecordDecl *KernelObj; - - KernelCallOperatorVisitor(FunctionDecl *KernelCallerFunc, - const CXXRecordDecl *KernelObj) - : KernelCallerFunc(KernelCallerFunc), KernelObj(KernelObj) {} - - bool VisitCallExpr(CallExpr *CE) { - Decl *CalleeDecl = CE->getCalleeDecl(); - if (isa_and_nonnull(CalleeDecl)) { - CXXMethodDecl *MD = cast(CalleeDecl); - if (MD->getOverloadedOperator() == OO_Call && - MD->getParent() == KernelObj) { - CallOperator = MD; - } - } - return true; - } - - CXXMethodDecl *getCallOperator() { - if (CallOperator) - return CallOperator; - - TraverseDecl(KernelCallerFunc); - return CallOperator; - } -}; - /// Tracks expected type during expression parsing, for use in code completion. /// The type is tied to a particular token, all functions that update or consume /// the type take a start location of the token they are looking at as a diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 5699055e4f5e3..ee858ef56b9cb 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2144,7 +2144,6 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { FunctionDecl *KernelDecl; llvm::SmallVector Params; Sema::ContextRAII FuncContext; - SourceLocation Loc; // Holds the last handled field's first parameter. This doesn't store an // iterator as push_back invalidates iterators. size_t LastParamIndex = 0; @@ -2268,17 +2267,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); assert(RecordDecl && "The type must be a RecordDecl"); - // Currently samplers/stream are not supported in ESIMD. - // Ensure that the use of sycl_explicit_simd attribute emits a diagnostic - // when used with sampler or stream. - if (KernelDecl->hasAttr() && !isSyclAccessorType(FieldTy)) - return SemaRef.Diag(Loc, diag::err_sycl_esimd_not_supported_for_type) - << RecordDecl; - - llvm::StringLiteral MethodName = - KernelDecl->hasAttr() && isSyclAccessorType(FieldTy) - ? InitESIMDMethodName - : InitMethodName; + llvm::StringLiteral MethodName = InitMethodName; CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); assert(InitMethod && "The type must have the __init method"); @@ -2368,7 +2357,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { : SyclKernelFieldHandler(S), KernelDecl( createKernelDecl(S.getASTContext(), Loc, IsInline, IsSIMDKernel)), - FuncContext(SemaRef, KernelDecl), Loc(Loc) { + FuncContext(SemaRef, KernelDecl) { S.addSyclOpenCLKernel(SYCLKernel, KernelDecl); if (const auto *AddIRAttrFunc = @@ -2426,16 +2415,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); assert(RecordDecl && "The type must be a RecordDecl"); - // Currently samplers/stream are not supported in ESIMD. - // Ensure that the use of sycl_explicit_simd attribute emits a diagnostic - // when used with sampler or stream. - if (KernelDecl->hasAttr() && !isSyclAccessorType(FieldTy)) - return SemaRef.Diag(Loc, diag::err_sycl_esimd_not_supported_for_type) - << RecordDecl; - llvm::StringLiteral MethodName = - KernelDecl->hasAttr() && isSyclAccessorType(FieldTy) - ? InitESIMDMethodName - : InitMethodName; + llvm::StringLiteral MethodName = InitMethodName; CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); assert(InitMethod && "The type must have the __init method"); @@ -2563,6 +2543,154 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { } }; +// This Visitor traverses the AST of the function with +// `sycl_kernel` attribute and returns the version of “operator()()” that is +// called by KernelFunc. There will only be one call to KernelFunc in that +// AST because the DPC++ headers are structured such that the user’s +// kernel function is only called once. This ensures that the correct +// “operator()()” function call is returned, when a named function object used +// to define a kernel has more than one “operator()()” calls defined in it. For +// example, in the code below, 'operator()(sycl::id<1> id)' is returned based on +// the 'parallel_for' invocation which takes a 'sycl::range<1>(16)' argument. +// class MyKernel { +// public: +// void operator()() const { +// // code +// } +// +// [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const +// { +// // code +// } +// }; +// +// int main() { +// +// Q.submit([&](sycl::handler& cgh) { +// MyKernel kernelFunctorObject; +// cgh.parallel_for(sycl::range<1>(16), kernelFunctorObject); +// }); +// return 0; +// } + +class KernelCallOperatorVisitor + : public RecursiveASTVisitor { + + FunctionDecl *KernelCallerFunc; + +public: + CXXMethodDecl *CallOperator = nullptr; + const CXXRecordDecl *KernelObj; + + KernelCallOperatorVisitor(FunctionDecl *KernelCallerFunc, + const CXXRecordDecl *KernelObj) + : KernelCallerFunc(KernelCallerFunc), KernelObj(KernelObj) {} + + bool VisitCallExpr(CallExpr *CE) { + Decl *CalleeDecl = CE->getCalleeDecl(); + if (isa_and_nonnull(CalleeDecl)) { + CXXMethodDecl *MD = cast(CalleeDecl); + if (MD->getOverloadedOperator() == OO_Call && + MD->getParent() == KernelObj) { + CallOperator = MD; + } + } + return true; + } + + CXXMethodDecl *getCallOperator() { + if (CallOperator) + return CallOperator; + + TraverseDecl(KernelCallerFunc); + return CallOperator; + } +}; + +class ESIMDKernelDiagnostics : public SyclKernelFieldHandler { + + SourceLocation KernelLoc; + unsigned SizeOfParams = 0; + bool IsESIMD = false; + + void addParam(QualType ArgTy) { + SizeOfParams += + SemaRef.getASTContext().getTypeSizeInChars(ArgTy).getQuantity(); + } + + bool handleSpecialType(QualType FieldTy) { + const CXXRecordDecl *RecordDecl = FieldTy->getAsCXXRecordDecl(); + + if (IsESIMD && !isSyclAccessorType(FieldTy)) + return SemaRef.Diag(KernelLoc, + diag::err_sycl_esimd_not_supported_for_type) + << RecordDecl; + + assert(RecordDecl && "The type must be a RecordDecl"); + + StringRef MethodName; + + if (IsESIMD && isSyclAccessorType(FieldTy)) { + MethodName = InitESIMDMethodName; + CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); + assert(InitMethod && "The type must have the __init method"); + for (const ParmVarDecl *Param : InitMethod->parameters()) + addParam(Param->getType()); + } + return true; + } + +public: + ESIMDKernelDiagnostics(Sema &S, SourceLocation Loc, bool IsESIMD) + : SyclKernelFieldHandler(S), KernelLoc(Loc), IsESIMD(IsESIMD) {} + + ~ESIMDKernelDiagnostics() { + if (IsESIMD && (SizeOfParams > MaxKernelArgsSize)) + SemaRef.Diag(KernelLoc, diag::warn_esimd_kernel_too_big_args) + << SizeOfParams << MaxKernelArgsSize; + } + + bool handleSyclSpecialType(FieldDecl *FD, QualType FieldTy) final { + return handleSpecialType(FieldTy); + } + + bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS, + QualType FieldTy) final { + return handleSpecialType(FieldTy); + } + + bool handlePointerType(FieldDecl *FD, QualType FieldTy) final { + addParam(FieldTy); + return true; + } + + bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { + addParam(FieldTy); + return true; + } + + bool handleSimpleArrayType(FieldDecl *FD, QualType FieldTy) final { + addParam(FieldTy); + return true; + } + + bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, + QualType Ty) final { + addParam(Ty); + return true; + } + + bool handleNonDecompStruct(const CXXRecordDecl *Base, + const CXXBaseSpecifier &BS, QualType Ty) final { + addParam(Ty); + return true; + } + + bool handleUnionType(FieldDecl *FD, QualType FieldTy) final { + return handleScalarType(FD, FieldTy); + } +}; + class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { SourceLocation KernelLoc; unsigned SizeOfParams = 0; @@ -2589,6 +2717,7 @@ class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { : SyclKernelFieldHandler(S), KernelLoc(Loc) {} ~SyclKernelArgsSizeChecker() { + if (SizeOfParams > MaxKernelArgsSize) SemaRef.Diag(KernelLoc, diag::warn_sycl_kernel_too_big_args) << SizeOfParams << MaxKernelArgsSize; @@ -2819,7 +2948,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { uint64_t StructDepth = 0; VarDecl *KernelHandlerClone = nullptr; bool IsSIMD = false; - CXXMethodDecl *CallOperator; + CXXMethodDecl *CallOperator = nullptr; Stmt *replaceWithLocalClone(ParmVarDecl *OriginalParam, VarDecl *LocalClone, Stmt *FunctionBody) { @@ -3334,7 +3463,8 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { static constexpr const bool VisitInsideSimpleContainers = false; SyclKernelBodyCreator(Sema &S, SyclKernelDeclCreator &DC, const CXXRecordDecl *KernelObj, - FunctionDecl *KernelCallerFunc, bool IsSIMDKernel, CXXMethodDecl *CallOperator) + FunctionDecl *KernelCallerFunc, bool IsSIMDKernel, + CXXMethodDecl *CallOperator) : SyclKernelFieldHandler(S), DeclCreator(DC), KernelObjClone(createKernelObjClone(S.getASTContext(), DC.getKernelDecl(), KernelObj)), @@ -4064,7 +4194,8 @@ void Sema::copySYCLKernelAttrs(CXXMethodDecl *CallOperator) { llvm::SmallVector Attrs; collectSYCLAttributes(*this, KernelBody, Attrs, /*DirectlyCalled*/ true); if (!Attrs.empty()) - llvm::for_each(Attrs, [CallOperator](Attr *A) { CallOperator->addAttr(A); }); + llvm::for_each(Attrs, + [CallOperator](Attr *A) { CallOperator->addAttr(A); }); } } @@ -4120,12 +4251,11 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl(); assert(KernelObj && "invalid kernel caller"); - KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); - // Do not visit invalid kernel object. if (KernelObj->isInvalidDecl()) return; + KernelCallOperatorVisitor KernelCallOperator(KernelCallerFunc, KernelObj); CXXMethodDecl *CallOperator = nullptr; if (KernelObj->isLambda()) @@ -4149,11 +4279,15 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, bool IsSIMDKernel = isESIMDKernelType(CallOperator); + ESIMDKernelDiagnostics esimdKernel(*this, KernelObj->getLocation(), + IsSIMDKernel); + SyclKernelDeclCreator kernel_decl(*this, KernelObj->getLocation(), KernelCallerFunc->isInlined(), IsSIMDKernel, KernelCallerFunc); SyclKernelBodyCreator kernel_body(*this, kernel_decl, KernelObj, - KernelCallerFunc, IsSIMDKernel, CallOperator); + KernelCallerFunc, IsSIMDKernel, + CallOperator); SyclKernelIntHeaderCreator int_header( IsSIMDKernel, *this, getSyclIntegrationHeader(), KernelObj, calculateKernelNameType(Context, KernelCallerFunc), KernelCallerFunc); @@ -4167,14 +4301,14 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, // optimization record is saved. if (!getLangOpts().OptRecordFile.empty()) { Visitor.VisitRecordBases(KernelObj, kernel_decl, kernel_body, int_header, - int_footer, opt_report); + int_footer, opt_report, esimdKernel); Visitor.VisitRecordFields(KernelObj, kernel_decl, kernel_body, int_header, - int_footer, opt_report); + int_footer, opt_report, esimdKernel); } else { Visitor.VisitRecordBases(KernelObj, kernel_decl, kernel_body, int_header, - int_footer); + int_footer, esimdKernel); Visitor.VisitRecordFields(KernelObj, kernel_decl, kernel_body, int_header, - int_footer); + int_footer, esimdKernel); } if (ParmVarDecl *KernelHandlerArg = diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp index a1ef7bf504641..74af5f66701e9 100644 --- a/clang/test/SemaSYCL/Inputs/sycl.hpp +++ b/clang/test/SemaSYCL/Inputs/sycl.hpp @@ -143,6 +143,7 @@ class __attribute__((sycl_special_class)) __SYCL_TYPE(accessor) accessor { using PtrType = typename DeviceValueType::type *; void __init(PtrType Ptr, range AccessRange, range MemRange, id Offset) {} + void __init_esimd(PtrType Ptr) {} friend class stream; }; From df14a30f83fe8e2577338f1c75051cf6d4df112a Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Thu, 6 Apr 2023 11:50:58 -0700 Subject: [PATCH 27/29] Move ArgsSizeChecker to ConstructOpenCLKernel. --- .../clang/Basic/DiagnosticSemaKinds.td | 3 - clang/lib/Sema/SemaSYCL.cpp | 114 +++++------------- clang/test/SemaSYCL/Inputs/sycl.hpp | 1 - 3 files changed, 30 insertions(+), 88 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f4e130cb3a23f..5a91a329d5440 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11823,9 +11823,6 @@ def err_sycl_external_no_rdc : Error< def warn_sycl_kernel_too_big_args : Warning< "size of kernel arguments (%0 bytes) may exceed the supported maximum " "of %1 bytes on some devices">, InGroup, ShowInSystemHeader; -def warn_esimd_kernel_too_big_args : Warning< - "size of esimd kernel arguments (%0 bytes) may exceed the supported maximum " - "of %1 bytes on some devices">, InGroup, ShowInSystemHeader; def err_sycl_virtual_types : Error< "no class with a vtable can be used in a SYCL kernel or any code included in the kernel">; def note_sycl_recursive_function_declared_here: Note<"function implemented using recursion declared here">; diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index ee858ef56b9cb..669f752aa81e3 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2266,8 +2266,10 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { bool handleSpecialType(FieldDecl *FD, QualType FieldTy) { const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); assert(RecordDecl && "The type must be a RecordDecl"); - - llvm::StringLiteral MethodName = InitMethodName; + llvm::StringLiteral MethodName = + KernelDecl->hasAttr() && isSyclAccessorType(FieldTy) + ? InitESIMDMethodName + : InitMethodName; CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); assert(InitMethod && "The type must have the __init method"); @@ -2414,8 +2416,10 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { QualType FieldTy) final { const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); assert(RecordDecl && "The type must be a RecordDecl"); - - llvm::StringLiteral MethodName = InitMethodName; + llvm::StringLiteral MethodName = + KernelDecl->hasAttr() && isSyclAccessorType(FieldTy) + ? InitESIMDMethodName + : InitMethodName; CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); assert(InitMethod && "The type must have the __init method"); @@ -2610,14 +2614,8 @@ class KernelCallOperatorVisitor class ESIMDKernelDiagnostics : public SyclKernelFieldHandler { SourceLocation KernelLoc; - unsigned SizeOfParams = 0; bool IsESIMD = false; - void addParam(QualType ArgTy) { - SizeOfParams += - SemaRef.getASTContext().getTypeSizeInChars(ArgTy).getQuantity(); - } - bool handleSpecialType(QualType FieldTy) { const CXXRecordDecl *RecordDecl = FieldTy->getAsCXXRecordDecl(); @@ -2625,18 +2623,6 @@ class ESIMDKernelDiagnostics : public SyclKernelFieldHandler { return SemaRef.Diag(KernelLoc, diag::err_sycl_esimd_not_supported_for_type) << RecordDecl; - - assert(RecordDecl && "The type must be a RecordDecl"); - - StringRef MethodName; - - if (IsESIMD && isSyclAccessorType(FieldTy)) { - MethodName = InitESIMDMethodName; - CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); - assert(InitMethod && "The type must have the __init method"); - for (const ParmVarDecl *Param : InitMethod->parameters()) - addParam(Param->getType()); - } return true; } @@ -2644,12 +2630,6 @@ class ESIMDKernelDiagnostics : public SyclKernelFieldHandler { ESIMDKernelDiagnostics(Sema &S, SourceLocation Loc, bool IsESIMD) : SyclKernelFieldHandler(S), KernelLoc(Loc), IsESIMD(IsESIMD) {} - ~ESIMDKernelDiagnostics() { - if (IsESIMD && (SizeOfParams > MaxKernelArgsSize)) - SemaRef.Diag(KernelLoc, diag::warn_esimd_kernel_too_big_args) - << SizeOfParams << MaxKernelArgsSize; - } - bool handleSyclSpecialType(FieldDecl *FD, QualType FieldTy) final { return handleSpecialType(FieldTy); } @@ -2658,42 +2638,12 @@ class ESIMDKernelDiagnostics : public SyclKernelFieldHandler { QualType FieldTy) final { return handleSpecialType(FieldTy); } - - bool handlePointerType(FieldDecl *FD, QualType FieldTy) final { - addParam(FieldTy); - return true; - } - - bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { - addParam(FieldTy); - return true; - } - - bool handleSimpleArrayType(FieldDecl *FD, QualType FieldTy) final { - addParam(FieldTy); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, - QualType Ty) final { - addParam(Ty); - return true; - } - - bool handleNonDecompStruct(const CXXRecordDecl *Base, - const CXXBaseSpecifier &BS, QualType Ty) final { - addParam(Ty); - return true; - } - - bool handleUnionType(FieldDecl *FD, QualType FieldTy) final { - return handleScalarType(FD, FieldTy); - } }; class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { SourceLocation KernelLoc; unsigned SizeOfParams = 0; + bool IsESIMD = false; void addParam(QualType ArgTy) { SizeOfParams += @@ -2703,7 +2653,9 @@ class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { bool handleSpecialType(QualType FieldTy) { const CXXRecordDecl *RecordDecl = FieldTy->getAsCXXRecordDecl(); assert(RecordDecl && "The type must be a RecordDecl"); - llvm::StringLiteral MethodName = InitMethodName; + llvm::StringLiteral MethodName = (IsESIMD && isSyclAccessorType(FieldTy)) + ? InitESIMDMethodName + : InitMethodName; CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); assert(InitMethod && "The type must have the __init method"); for (const ParmVarDecl *Param : InitMethod->parameters()) @@ -2713,11 +2665,10 @@ class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { public: static constexpr const bool VisitInsideSimpleContainers = false; - SyclKernelArgsSizeChecker(Sema &S, SourceLocation Loc) - : SyclKernelFieldHandler(S), KernelLoc(Loc) {} + SyclKernelArgsSizeChecker(Sema &S, SourceLocation Loc, bool IsESIMD) + : SyclKernelFieldHandler(S), KernelLoc(Loc), IsESIMD(IsESIMD) {} ~SyclKernelArgsSizeChecker() { - if (SizeOfParams > MaxKernelArgsSize) SemaRef.Diag(KernelLoc, diag::warn_sycl_kernel_too_big_args) << SizeOfParams << MaxKernelArgsSize; @@ -2947,7 +2898,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { // top-level pointers. uint64_t StructDepth = 0; VarDecl *KernelHandlerClone = nullptr; - bool IsSIMD = false; + bool IsESIMD = false; CXXMethodDecl *CallOperator = nullptr; Stmt *replaceWithLocalClone(ParmVarDecl *OriginalParam, VarDecl *LocalClone, @@ -3319,7 +3270,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { } const llvm::StringLiteral getInitMethodName() const { - return IsSIMD ? InitESIMDMethodName : InitMethodName; + return IsESIMD ? InitESIMDMethodName : InitMethodName; } // Default inits the type, then calls the init-method in the body. @@ -3471,7 +3422,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { VarEntity(InitializedEntity::InitializeVariable(KernelObjClone)), KernelObj(KernelObj), KernelCallerFunc(KernelCallerFunc), KernelCallerSrcLoc(KernelCallerFunc->getLocation()), - IsSIMD(IsSIMDKernel), CallOperator(CallOperator) { + IsESIMD(IsSIMDKernel), CallOperator(CallOperator) { CollectionInitExprs.push_back(createInitListExpr(KernelObj)); annotateHierarchicalParallelismAPICalls(); @@ -4124,8 +4075,6 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SyclKernelFieldChecker FieldChecker(*this); SyclKernelUnionChecker UnionChecker(*this); - SyclKernelArgsSizeChecker ArgsSizeChecker(*this, Args[0]->getExprLoc()); - KernelObjVisitor Visitor{*this}; DiagnosingSYCLKernel = true; @@ -4134,14 +4083,7 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker, DecompMarker); Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker, DecompMarker); - // ArgSizeChecker needs to happen after DecompMarker has completed, since it - // cares about the decomp attributes. DecompMarker cannot run before the - // others, since it counts on the FieldChecker to make sure it is visiting - // valid arrays/etc. Thus, ArgSizeChecker has its own visitation. - if (FieldChecker.isValid() && UnionChecker.isValid()) { - Visitor.VisitRecordBases(KernelObj, ArgsSizeChecker); - Visitor.VisitRecordFields(KernelObj, ArgsSizeChecker); - } + DiagnosingSYCLKernel = false; // Set the kernel function as invalid, if any of the checkers fail validation. if (!FieldChecker.isValid() || !UnionChecker.isValid() || @@ -4279,6 +4221,8 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, bool IsSIMDKernel = isESIMDKernelType(CallOperator); + SyclKernelArgsSizeChecker argsSizeChecker(*this, KernelObj->getLocation(), + IsSIMDKernel); ESIMDKernelDiagnostics esimdKernel(*this, KernelObj->getLocation(), IsSIMDKernel); @@ -4300,15 +4244,17 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, // Visit handlers to generate information for optimization record only if // optimization record is saved. if (!getLangOpts().OptRecordFile.empty()) { - Visitor.VisitRecordBases(KernelObj, kernel_decl, kernel_body, int_header, - int_footer, opt_report, esimdKernel); - Visitor.VisitRecordFields(KernelObj, kernel_decl, kernel_body, int_header, - int_footer, opt_report, esimdKernel); + Visitor.VisitRecordBases(KernelObj, argsSizeChecker, kernel_decl, + kernel_body, int_header, int_footer, opt_report, + esimdKernel); + Visitor.VisitRecordFields(KernelObj, argsSizeChecker, kernel_decl, + kernel_body, int_header, int_footer, opt_report, + esimdKernel); } else { - Visitor.VisitRecordBases(KernelObj, kernel_decl, kernel_body, int_header, - int_footer, esimdKernel); - Visitor.VisitRecordFields(KernelObj, kernel_decl, kernel_body, int_header, - int_footer, esimdKernel); + Visitor.VisitRecordBases(KernelObj, argsSizeChecker, kernel_decl, + kernel_body, int_header, int_footer, esimdKernel); + Visitor.VisitRecordFields(KernelObj, argsSizeChecker, kernel_decl, + kernel_body, int_header, int_footer, esimdKernel); } if (ParmVarDecl *KernelHandlerArg = diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp index 74af5f66701e9..a1ef7bf504641 100644 --- a/clang/test/SemaSYCL/Inputs/sycl.hpp +++ b/clang/test/SemaSYCL/Inputs/sycl.hpp @@ -143,7 +143,6 @@ class __attribute__((sycl_special_class)) __SYCL_TYPE(accessor) accessor { using PtrType = typename DeviceValueType::type *; void __init(PtrType Ptr, range AccessRange, range MemRange, id Offset) {} - void __init_esimd(PtrType Ptr) {} friend class stream; }; From 9f99f92babe1ae48beea2827eb4ad23670738224 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Thu, 6 Apr 2023 13:34:41 -0700 Subject: [PATCH 28/29] Fix test. --- clang/test/SemaSYCL/args-size-overflow.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/SemaSYCL/args-size-overflow.cpp b/clang/test/SemaSYCL/args-size-overflow.cpp index 476dc9cd6ee71..b5b52ce918143 100644 --- a/clang/test/SemaSYCL/args-size-overflow.cpp +++ b/clang/test/SemaSYCL/args-size-overflow.cpp @@ -11,9 +11,9 @@ queue q; using Accessor = accessor; #ifdef SPIR64 -// expected-warning@#KernelSingleTask {{size of kernel arguments (7994 bytes) may exceed the supported maximum of 2048 bytes on some devices}} +// expected-warning@27 {{size of kernel arguments (7994 bytes) may exceed the supported maximum of 2048 bytes on some devices}} #elif SPIR32 -// expected-warning@#KernelSingleTask {{size of kernel arguments (7986 bytes) may exceed the supported maximum of 2048 bytes on some devices}} +// expected-warning@27 {{size of kernel arguments (7986 bytes) may exceed the supported maximum of 2048 bytes on some devices}} #endif void use() { From c41945f15f37b5f01774103ae69bef8cd66773c2 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Sat, 8 Apr 2023 13:21:52 -0700 Subject: [PATCH 29/29] Move ESIMD check after SYCL kernel check. --- clang/lib/Sema/SemaSYCL.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 669f752aa81e3..206cb12f18e8e 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -4244,17 +4244,17 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, // Visit handlers to generate information for optimization record only if // optimization record is saved. if (!getLangOpts().OptRecordFile.empty()) { - Visitor.VisitRecordBases(KernelObj, argsSizeChecker, kernel_decl, - kernel_body, int_header, int_footer, opt_report, - esimdKernel); - Visitor.VisitRecordFields(KernelObj, argsSizeChecker, kernel_decl, - kernel_body, int_header, int_footer, opt_report, - esimdKernel); + Visitor.VisitRecordBases(KernelObj, argsSizeChecker, esimdKernel, + kernel_decl, kernel_body, int_header, int_footer, + opt_report); + Visitor.VisitRecordFields(KernelObj, argsSizeChecker, esimdKernel, + kernel_decl, kernel_body, int_header, int_footer, + opt_report); } else { - Visitor.VisitRecordBases(KernelObj, argsSizeChecker, kernel_decl, - kernel_body, int_header, int_footer, esimdKernel); - Visitor.VisitRecordFields(KernelObj, argsSizeChecker, kernel_decl, - kernel_body, int_header, int_footer, esimdKernel); + Visitor.VisitRecordBases(KernelObj, argsSizeChecker, esimdKernel, + kernel_decl, kernel_body, int_header, int_footer); + Visitor.VisitRecordFields(KernelObj, argsSizeChecker, esimdKernel, + kernel_decl, kernel_body, int_header, int_footer); } if (ParmVarDecl *KernelHandlerArg =