Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
d172f64
[SYCL] Support multiple call operators in kernel
srividya-sundaram Mar 2, 2023
e17002d
Add code to handle multiple call ops in kernel functor.
srividya-sundaram Mar 7, 2023
4f2dc5b
Modify test file
srividya-sundaram Mar 7, 2023
75f1919
Fix failing test.
srividya-sundaram Mar 8, 2023
a8bd38c
Address review comments.
srividya-sundaram Mar 10, 2023
e73efaa
Re-write code using RecursiveASTVisitor.
srividya-sundaram Mar 15, 2023
caddf21
Update test.
srividya-sundaram Mar 16, 2023
53d8de0
Fix review comments.
srividya-sundaram Mar 17, 2023
ef3ccd3
Address review comments.
srividya-sundaram Mar 18, 2023
8cc706f
Address code review comments.
srividya-sundaram Mar 23, 2023
78c5bdc
Fix ESIMD code.
srividya-sundaram Mar 24, 2023
8ef7f47
Move ESIMD check to SyclKernelDeclCreator.
srividya-sundaram Mar 28, 2023
5548aae
Remove ESIMD init method generation from KernelArgsSizeChecker.
srividya-sundaram Mar 28, 2023
ab95f49
Fix review comments.
srividya-sundaram Mar 28, 2023
0680ce8
Add tests.
srividya-sundaram Mar 29, 2023
c971930
Remove extra line.
srividya-sundaram Mar 29, 2023
75a5c3f
Fix typos.
srividya-sundaram Mar 29, 2023
42bd239
Fix failing tests.
srividya-sundaram Mar 29, 2023
95c181c
Fix test failure.
srividya-sundaram Mar 30, 2023
e18a1ce
Add IsSIMD flag to SyclKernelBodyCreator.
srividya-sundaram Apr 2, 2023
4eead59
Add comments.
srividya-sundaram Apr 2, 2023
2cf8b6d
Modify copySYCLKernelAttrs signature to take reference to KernelCallO…
srividya-sundaram Apr 3, 2023
98c9839
Remove lambda check from Visitor class.
srividya-sundaram Apr 3, 2023
5361bd8
Pass Call operator to Visitor and functions
srividya-sundaram Apr 3, 2023
26ea0bd
Remove unnecessary code.
srividya-sundaram Apr 3, 2023
ae1dc6c
Add new ESIMD Visitor.
srividya-sundaram Apr 6, 2023
df14a30
Move ArgsSizeChecker to ConstructOpenCLKernel.
srividya-sundaram Apr 6, 2023
9f99f92
Fix test.
srividya-sundaram Apr 6, 2023
c41945f
Move ESIMD check after SYCL kernel check.
srividya-sundaram Apr 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -14274,7 +14274,8 @@ class Sema final {

bool isDeclAllowedInSYCLDeviceCode(const Decl *D);
void checkSYCLDeviceVarDecl(VarDecl *Var);
void copySYCLKernelAttrs(const CXXRecordDecl *KernelObj);
void copySYCLKernelAttrs(const CXXRecordDecl *KernelObj,
FunctionDecl *KernelCallerFunc);
void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC);
void SetSYCLKernelNames();
void MarkDevices();
Expand Down
95 changes: 81 additions & 14 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2784,16 +2784,80 @@ class SyclOptReportCreator : public SyclKernelFieldHandler {
}
};

static CXXMethodDecl *getOperatorParens(const CXXRecordDecl *Rec) {
for (auto *MD : Rec->methods()) {
if (MD->getOverloadedOperator() == OO_Call)
return MD;
// This function traverses the static call graph from the root of the kernel
// (e.g. “kernel_parallel_for”) and returns the version of “operator()()” that
// is called by kernelFunc(). There will only be one call to kernelFunc() in
// that call graph because the DPC++ headers are structured such that the user’s
// kernel function is only called once. This ensures that the correct
// “operator()()” function call is returned, when a named function object used
// to define a kernel has more than one “operator()()” calls defined in it. For
// example, in the code below, 'operator()(sycl::id<1> id)' is returned based on
// the 'parallel_for' invocation which takes a 'sycl::range<1>(16)' argument.
// class MyKernel {
// public:
// void operator()() const {
// // code
// }
//
// [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const
// {
// // code
// }
// };
//
// int main() {
//
// Q.submit([&](sycl::handler& cgh) {
// MyKernel kernelFunctorObject;
// cgh.parallel_for(sycl::range<1>(16), kernelFunctorObject);
// });
// return 0;
// }

static CXXMethodDecl *
getCallOperatorInvokedFromKernel(const CXXRecordDecl *KernelFuncObjType,
FunctionDecl *KernelCallerFunc,
Sema &SemaRef) {

CallGraph SYCLCG;
SYCLCG.addToCallGraph(SemaRef.getASTContext().getTranslationUnitDecl());

// This code returns the 'lambda' call operator.
if (KernelFuncObjType->isLambda()) {
for (auto *MD : KernelFuncObjType->methods()) {
if (MD->getOverloadedOperator() == OO_Call)
return MD;
}
}

// This code returns the functor's call operator.
if (KernelCallerFunc && KernelCallerFunc->hasBody() &&
KernelCallerFunc->hasAttr<SYCLKernelAttr>()) {

CallGraphNode *KernelCallerFuncNode = SYCLCG.getNode(KernelCallerFunc);
CXXMethodDecl *OperatorCall = nullptr;

// Iterate through each funtion invoked from the kernel root, find the
// function call operator and make sure it is a member of the kernel fuctor.
for (const CallGraphNode *ChildNode : *KernelCallerFuncNode) {
if (auto *Callee = dyn_cast<CXXMethodDecl>(ChildNode->getDecl())) {
Callee = Callee->getMostRecentDecl();
if (Callee->getParent() == KernelFuncObjType &&
Callee->isCXXClassMember() &&
Callee->getOverloadedOperator() == OO_Call)
OperatorCall = Callee;
return OperatorCall;
}
}
}

return nullptr;
}

static bool isESIMDKernelType(const CXXRecordDecl *KernelObjType) {
const CXXMethodDecl *OpParens = getOperatorParens(KernelObjType);
static bool isESIMDKernelType(const CXXRecordDecl *KernelObjType,
FunctionDecl *KernelCallerFunc, Sema &SemaRef) {
const CXXMethodDecl *OpParens = getCallOperatorInvokedFromKernel(
KernelObjType, KernelCallerFunc, SemaRef);
return (OpParens != nullptr) && OpParens->hasAttr<SYCLSimdAttr>();
}

Expand Down Expand Up @@ -2886,7 +2950,8 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
if (KernelObj->isLambda())
WGLambdaFn = KernelObj->getLambdaCallOperator();
else
WGLambdaFn = getOperatorParens(KernelObj);
WGLambdaFn = getCallOperatorInvokedFromKernel(KernelObj, KernelCallerFunc,
SemaRef);
assert(WGLambdaFn && "non callable object is passed as kernel obj");
// Mark the function that it "works" in a work group scope:
// NOTE: In case of parallel_for_work_item the marker call itself is
Expand Down Expand Up @@ -3199,7 +3264,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
}

const llvm::StringLiteral getInitMethodName() const {
bool IsSIMDKernel = isESIMDKernelType(KernelObj);
bool IsSIMDKernel = isESIMDKernelType(KernelObj, KernelCallerFunc, SemaRef);
return IsSIMDKernel ? InitESIMDMethodName : InitMethodName;
}

Expand Down Expand Up @@ -3585,7 +3650,7 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler {
const CXXRecordDecl *KernelObj, QualType NameType,
FunctionDecl *KernelFunc)
: SyclKernelFieldHandler(S), Header(H) {
bool IsSIMDKernel = isESIMDKernelType(KernelObj);
bool IsSIMDKernel = isESIMDKernelType(KernelObj, KernelFunc, S);
// The header needs to access the kernel object size.
int64_t ObjSize = SemaRef.getASTContext()
.getTypeSizeInChars(KernelObj->getTypeForDecl())
Expand Down Expand Up @@ -3999,7 +4064,7 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc,
if (KernelObj->isInvalidDecl())
return;

bool IsSIMDKernel = isESIMDKernelType(KernelObj);
bool IsSIMDKernel = isESIMDKernelType(KernelObj, KernelFunc, *this);

SyclKernelDecompMarker DecompMarker(*this);
SyclKernelFieldChecker FieldChecker(*this, IsSIMDKernel);
Expand Down Expand Up @@ -4033,9 +4098,11 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc,

// For a wrapped parallel_for, copy attributes from original
// kernel to wrapped kernel.
void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj) {
void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj,
FunctionDecl *KernelCallerFunc) {
// Get the operator() function of the wrapper.
CXXMethodDecl *OpParens = getOperatorParens(KernelObj);
CXXMethodDecl *OpParens =
getCallOperatorInvokedFromKernel(KernelObj, KernelCallerFunc, *this);
assert(OpParens && "invalid kernel object");

typedef std::pair<FunctionDecl *, FunctionDecl *> ChildParentPair;
Expand Down Expand Up @@ -4148,10 +4215,10 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc,
// Attributes of a user-written SYCL kernel must be copied to the internally
// generated alternative kernel, identified by a known string in its name.
if (StableName.find("__pf_kernel_wrapper") != std::string::npos)
copySYCLKernelAttrs(KernelObj);
copySYCLKernelAttrs(KernelObj, KernelCallerFunc);
}

bool IsSIMDKernel = isESIMDKernelType(KernelObj);
bool IsSIMDKernel = isESIMDKernelType(KernelObj, KernelCallerFunc, *this);

SyclKernelDeclCreator kernel_decl(*this, KernelObj->getLocation(),
KernelCallerFunc->isInlined(), IsSIMDKernel,
Expand Down
16 changes: 16 additions & 0 deletions clang/test/CodeGenSYCL/Inputs/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,15 @@ kernel_parallel_for(const KernelType &KernelFunc) {
KernelFunc(id<Dims>());
}

template <typename KernelName, typename KernelType>
ATTR_SYCL_KERNEL void kernel_parallel_for(const KernelType &kernelFunc) {
#ifdef __SYCL_DEVICE_ONLY__
kernelFunc();
#else
(void)kernelFunc;
#endif
}

// Dummy parallel_for_work_item function to mimic calls from
// parallel_for_work_group.
void parallel_for_work_item() {
Expand All @@ -458,6 +467,13 @@ kernel_parallel_for_work_group(const KernelType &KernelFunc) {

class handler {
public:

template <typename KernelName = auto_name, typename KernelType>
void parallel_for(const KernelType &kernelObj) {
using NameT = typename get_kernel_name_t<KernelName, KernelType>::name;
kernel_parallel_for<NameT>(kernelObj);
}

template <typename KernelName = auto_name, typename KernelType, int Dims>
void parallel_for(range<Dims> numWorkItems, const KernelType &kernelFunc) {
using NameT = typename get_kernel_name_t<KernelName, KernelType>::name;
Expand Down
94 changes: 94 additions & 0 deletions clang/test/CodeGenSYCL/kernel-op-calls.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s


#include "Inputs/sycl.hpp"

constexpr auto sycl_read_write = sycl::access::mode::read_write;
constexpr auto sycl_global_buffer = sycl::access::target::global_buffer;

template<bool B, typename V = void>
struct enable_if { };
template<typename V>
struct enable_if<true, V> {
using type = V;
};
template<bool B, typename V = void>
using enable_if_t = typename enable_if<B, V>::type;

template <typename T> class Functor1 {
public:
Functor1(T X_, sycl::accessor<T, 1, sycl_read_write, sycl_global_buffer> &Acc_) :
X(X_), Acc(Acc_)
{}

[[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const {
Acc.use(id, X);
}

void operator()() const {
Acc.use();
}

private:
T X;
sycl::accessor<T, 1, sycl_read_write, sycl_global_buffer> Acc;
};


template <typename T> class Functor2 {
public:
Functor2(T X_, sycl::accessor<T, 1, sycl_read_write, sycl_global_buffer> &Acc_) :
X(X_), Acc(Acc_)
{}

[[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const {
Acc.use(id, X);
}

[[sycl::work_group_size_hint(1, 2, 3)]] void operator()(sycl::id<2> id) const {
Acc.use(id, X);
}

private:
T X;
sycl::accessor<T, 1, sycl_read_write, sycl_global_buffer> Acc;
};

#define ARR_LEN(x) sizeof(x)/sizeof(x[0])


template <typename T> T bar(T X) {
T A[] = { (T)10, (T)10 };
{
sycl::queue Q;
sycl::buffer<T, 1> Buf(A, ARR_LEN(A));

Q.submit([&](sycl::handler& cgh) {
auto Acc = Buf.template get_access<sycl_read_write, sycl_global_buffer>(cgh);
Functor1<T> F(X, Acc);
// CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS8Functor1IiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !11 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !intel_reqd_sub_group_size !14 {
cgh.parallel_for(sycl::range<1>(ARR_LEN(A)), F);
//cgh.parallel_for<class name>(F);
});

Q.submit([&](sycl::handler& cgh) {
auto Acc = Buf.template get_access<sycl_read_write, sycl_global_buffer>(cgh);
Functor2<T> FuncOp2(X, Acc);
// CHECK: define {{.*}}spir_kernel void @{{.*}}_ZTS8Functor2IiE(i32 noundef %_arg_X, ptr addrspace(1) noundef align 4 %_arg_Acc, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc1, ptr noundef byval(%"struct.sycl::_V1::range") align 4 %_arg_Acc2, ptr noundef byval(%"struct.sycl::_V1::id") align 4 %_arg_Acc3) #0 !srcloc !22 !kernel_arg_buffer_location !12 !kernel_arg_runtime_aligned !13 !kernel_arg_exclusive_ptr !13 !work_group_size_hint !23 {
cgh.parallel_for(sycl::range<2>(ARR_LEN(A)), FuncOp2);
});

}
T res = (T)0;

for (int i = 0; i < ARR_LEN(A); i++) {
res += A[i];
}
return res;
}

int main() {
const int Res2 = bar(10);
return 0;
}