Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 57 additions & 10 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2200,27 +2200,54 @@ class __SYCL_EXPORT handler {
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
(sizeof...(RestT) > 1)>
parallel_for(range<1> Range, RestT &&...Rest) {
parallel_for<KernelName>(Range,
ext::oneapi::experimental::empty_properties_t{},
std::forward<RestT>(Rest)...);
const auto &KernelObj = (Rest, ...);
if constexpr (ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
decltype(KernelObj)>::value) {
parallel_for<KernelName>(
Range, KernelObj.get(ext::oneapi::experimental::properties_tag{}),
std::forward<RestT>(Rest)...);
} else {
parallel_for<KernelName>(Range,
ext::oneapi::experimental::empty_properties_t{},
std::forward<RestT>(Rest)...);
}
}

template <typename KernelName = detail::auto_name, typename... RestT>
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
(sizeof...(RestT) > 1)>
parallel_for(range<2> Range, RestT &&...Rest) {
parallel_for<KernelName>(Range,
ext::oneapi::experimental::empty_properties_t{},
std::forward<RestT>(Rest)...);
const auto &KernelObj = (Rest, ...);
if constexpr (ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
decltype(KernelObj)>::value) {
parallel_for<KernelName>(
Range, KernelObj.get(ext::oneapi::experimental::properties_tag{}),
std::forward<RestT>(Rest)...);
} else {
parallel_for<KernelName>(Range,
ext::oneapi::experimental::empty_properties_t{},
std::forward<RestT>(Rest)...);
}
}

template <typename KernelName = detail::auto_name, typename... RestT>
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
(sizeof...(RestT) > 1)>
parallel_for(range<3> Range, RestT &&...Rest) {
parallel_for<KernelName>(Range,
ext::oneapi::experimental::empty_properties_t{},
std::forward<RestT>(Rest)...);
const auto &KernelObj = (Rest, ...);
if constexpr (ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
decltype(KernelObj)>::value) {
parallel_for<KernelName>(
Range, KernelObj.get(ext::oneapi::experimental::properties_tag{}),
std::forward<RestT>(Rest)...);
} else {
parallel_for<KernelName>(Range,
ext::oneapi::experimental::empty_properties_t{},
std::forward<RestT>(Rest)...);
}
}

template <typename KernelName = detail::auto_name, int Dims,
Expand All @@ -2245,7 +2272,27 @@ class __SYCL_EXPORT handler {

template <typename KernelName = detail::auto_name, int Dims,
typename... RestT>
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value>
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
(sizeof...(RestT) > 1)> // variant with reductions
parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
const auto &KernelObj = (Rest, ...);
if constexpr (ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<
decltype(KernelObj)>::value) {
parallel_for<KernelName>(
Range, KernelObj.get(ext::oneapi::experimental::properties_tag{}),
std::forward<RestT>(Rest)...);
} else {
parallel_for<KernelName>(Range,
ext::oneapi::experimental::empty_properties_t{},
std::forward<RestT>(Rest)...);
}
}

template <typename KernelName = detail::auto_name, int Dims,
typename... RestT>
std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value &&
(sizeof...(RestT) == 1)> // variant without reductions
parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
parallel_for<KernelName>(Range,
ext::oneapi::experimental::empty_properties_t{},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
// TODO: Currently using the -Wno-deprecated-declarations flag due to issue
// https://github.com/intel/llvm/issues/16320. Remove the flag once the issue is
// resolved.
// RUN: %clangxx -fsycl-device-only -S -Wno-deprecated-declarations -Xclang -emit-llvm -Xclang -disable-llvm-passes %s -o - | FileCheck %s --check-prefix CHECK-IR
// RUN: %clangxx -fsycl -fsyntax-only -Wno-deprecated-declarations -Xclang -verify %s
// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm -Xclang -disable-llvm-passes %s -o - | FileCheck %s --check-prefix CHECK-IR
// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s
// expected-no-diagnostics

#include <sycl/sycl.hpp>
Expand All @@ -27,106 +24,166 @@ static constexpr auto device_has_all = device_has<
aspect::usm_host_allocations, aspect::usm_shared_allocations,
aspect::ext_intel_free_memory, aspect::ext_intel_device_id>;

struct TestKernelHasDevice {
void operator()() const {}
auto get(properties_tag) const { return properties{device_has_all}; }
};

struct TestKernelHasDevice_id1 {
void operator()(id<1>) const {}
auto get(properties_tag) const { return properties{device_has_all}; }
};

struct TestKernelHasDevice_id1_1 {
template <typename T1> void operator()(id<1>, T1 &) const {}
auto get(properties_tag) const { return properties{device_has_all}; }
};

struct TestKernelHasDevice_nd_item1 {
void operator()(nd_item<1>) const {}
auto get(properties_tag) const { return properties{device_has_all}; }
};

struct TestKernelHasDevice_nd_item1_1 {
template <typename T1> void operator()(nd_item<1>, T1 &) const {}
auto get(properties_tag) const { return properties{device_has_all}; }
};

struct TestKernelHasDevice_nd_item1_2 {
template <typename T1, typename T2>
void operator()(nd_item<1>, T1 &, T2 &) const {}
auto get(properties_tag) const { return properties{device_has_all}; }
};

struct TestKernelHasDevice_work_group {
void operator()(group<1> G) const {
G.parallel_for_work_item([&](h_item<1>) {});
}
auto get(properties_tag) const { return properties{device_has_all}; }
};

int main() {
queue Q;
event Ev;

range<1> R1{1};
nd_range<1> NDR1{R1, R1};

constexpr auto Props = properties{device_has_all};

auto Redu1 = reduction<int>(nullptr, plus<int>());
auto Redu2 = reduction<float>(nullptr, multiplies<float>());

// CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel0(){{.*}} #[[DHAttr1:[0-9]+]]
Q.single_task<class WGSizeKernel0>(Props, []() {});
Q.single_task<class WGSizeKernel0>(TestKernelHasDevice{});
// CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel1(){{.*}} #[[DHAttr1]]
Q.single_task<class WGSizeKernel1>(Ev, Props, []() {});
Q.single_task<class WGSizeKernel1>(Ev, TestKernelHasDevice{});
// CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel2(){{.*}} #[[DHAttr1]]
Q.single_task<class WGSizeKernel2>({Ev}, Props, []() {});
Q.single_task<class WGSizeKernel2>({Ev}, TestKernelHasDevice{});

// CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel3(){{.*}} #[[DHAttr2:[0-9]+]]
Q.parallel_for<class WGSizeKernel3>(R1, Props, [](id<1>) {});
Q.parallel_for<class WGSizeKernel3>(R1, TestKernelHasDevice_id1{});
// CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel4(){{.*}} #[[DHAttr2]]
Q.parallel_for<class WGSizeKernel4>(R1, Ev, Props, [](id<1>) {});
Q.parallel_for<class WGSizeKernel4>(R1, Ev, TestKernelHasDevice_id1{});
// CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel5(){{.*}} #[[DHAttr2]]
Q.parallel_for<class WGSizeKernel5>(R1, {Ev}, Props, [](id<1>) {});
Q.parallel_for<class WGSizeKernel5>(R1, {Ev}, TestKernelHasDevice_id1{});

// CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}WGSizeKernel6{{.*}}{{.*}} #[[DHAttr2:[0-9]+]]
Q.parallel_for<class WGSizeKernel6>(R1, Props, Redu1, [](id<1>, auto &) {});
parallel_for<class WGSizeKernel6>(Q, R1, TestKernelHasDevice_id1_1{}, Redu1);
// CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}WGSizeKernel7{{.*}}{{.*}} #[[DHAttr2]]
Q.parallel_for<class WGSizeKernel7>(R1, Ev, Props, Redu1,
[](id<1>, auto &) {});
Q.submit([&](sycl::handler &CGH) {
CGH.depends_on(Ev);
parallel_for<class WGSizeKernel7>(Q, R1, TestKernelHasDevice_id1_1{},
Redu1);
});
// CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}WGSizeKernel8{{.*}}{{.*}} #[[DHAttr2]]
Q.parallel_for<class WGSizeKernel8>(R1, {Ev}, Props, Redu1,
[](id<1>, auto &) {});
Q.submit([&](sycl::handler &CGH) {
CGH.depends_on({Ev});
parallel_for<class WGSizeKernel8>(Q, R1, TestKernelHasDevice_id1_1{},
Redu1);
});

// CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel9(){{.*}} #[[DHAttr2]]
Q.parallel_for<class WGSizeKernel9>(NDR1, Props, [](nd_item<1>) {});
nd_launch<class WGSizeKernel9>(Q, NDR1, TestKernelHasDevice_nd_item1{});
// CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel10(){{.*}} #[[DHAttr2]]
Q.parallel_for<class WGSizeKernel10>(NDR1, Ev, Props, [](nd_item<1>) {});
Q.submit([&](sycl::handler &CGH) {
CGH.depends_on(Ev);
nd_launch<class WGSizeKernel10>(CGH, NDR1, TestKernelHasDevice_nd_item1{});
});
// CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel11(){{.*}} #[[DHAttr2]]
Q.parallel_for<class WGSizeKernel11>(NDR1, {Ev}, Props, [](nd_item<1>) {});
Q.submit([&](sycl::handler &CGH) {
CGH.depends_on({Ev});
nd_launch<class WGSizeKernel11>(CGH, NDR1, TestKernelHasDevice_nd_item1{});
});

// CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}WGSizeKernel12{{.*}}{{.*}} #[[DHAttr2]]
Q.parallel_for<class WGSizeKernel12>(NDR1, Props, Redu1,
[](nd_item<1>, auto &) {});
nd_launch<class WGSizeKernel12>(Q, NDR1, TestKernelHasDevice_nd_item1_1{},
Redu1);
// CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}WGSizeKernel13{{.*}}{{.*}} #[[DHAttr2]]
Q.parallel_for<class WGSizeKernel13>(NDR1, Ev, Props, Redu1,
[](nd_item<1>, auto &) {});
Q.submit([&](sycl::handler &CGH) {
CGH.depends_on(Ev);
nd_launch<class WGSizeKernel13>(CGH, NDR1, TestKernelHasDevice_nd_item1_1{},
Redu1);
});
// CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}WGSizeKernel14{{.*}}{{.*}} #[[DHAttr2]]
Q.parallel_for<class WGSizeKernel14>(NDR1, {Ev}, Props, Redu1,
[](nd_item<1>, auto &) {});
Q.submit([&](sycl::handler &CGH) {
CGH.depends_on({Ev});
nd_launch<class WGSizeKernel14>(CGH, NDR1, TestKernelHasDevice_nd_item1_1{},
Redu1);
});

// CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}WGSizeKernel15{{.*}}{{.*}} #[[DHAttr2]]
Q.parallel_for<class WGSizeKernel15>(NDR1, Props, Redu1, Redu2,
[](nd_item<1>, auto &, auto &) {});
nd_launch<class WGSizeKernel15>(Q, NDR1, TestKernelHasDevice_nd_item1_2{},
Redu1, Redu2);
// CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}WGSizeKernel16{{.*}}{{.*}} #[[DHAttr2]]
Q.parallel_for<class WGSizeKernel16>(NDR1, Ev, Props, Redu1, Redu2,
[](nd_item<1>, auto &, auto &) {});
Q.submit([&](sycl::handler &CGH) {
CGH.depends_on(Ev);
nd_launch<class WGSizeKernel16>(CGH, NDR1, TestKernelHasDevice_nd_item1_2{},
Redu1, Redu2);
});
// CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}WGSizeKernel17{{.*}}{{.*}} #[[DHAttr2]]
Q.parallel_for<class WGSizeKernel17>(NDR1, {Ev}, Props, Redu1, Redu2,
[](nd_item<1>, auto &, auto &) {});
Q.submit([&](sycl::handler &CGH) {
CGH.depends_on({Ev});
nd_launch<class WGSizeKernel17>(CGH, NDR1, TestKernelHasDevice_nd_item1_2{},
Redu1, Redu2);
});

// CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel18(){{.*}} #[[DHAttr1]]
Q.submit([&](handler &CGH) {
CGH.single_task<class WGSizeKernel18>(Props, []() {});
CGH.single_task<class WGSizeKernel18>(TestKernelHasDevice{});
});

// CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel19(){{.*}} #[[DHAttr2]]
Q.submit([&](handler &CGH) {
CGH.parallel_for<class WGSizeKernel19>(R1, Props, [](id<1>) {});
CGH.parallel_for<class WGSizeKernel19>(R1, TestKernelHasDevice_id1{});
});

// CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}WGSizeKernel20{{.*}}{{.*}} #[[DHAttr2]]
Q.submit([&](handler &CGH) {
CGH.parallel_for<class WGSizeKernel20>(R1, Props, Redu1,
[](id<1>, auto &) {});
CGH.parallel_for<class WGSizeKernel20>(R1, Redu1,
TestKernelHasDevice_id1_1{});
});

// CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel21(){{.*}} #[[DHAttr2]]
Q.submit([&](handler &CGH) {
CGH.parallel_for<class WGSizeKernel21>(NDR1, Props, [](nd_item<1>) {});
CGH.parallel_for<class WGSizeKernel21>(NDR1,
TestKernelHasDevice_nd_item1{});
});

// CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}WGSizeKernel22{{.*}}{{.*}} #[[DHAttr2]]
Q.submit([&](handler &CGH) {
CGH.parallel_for<class WGSizeKernel22>(NDR1, Props, Redu1,
[](nd_item<1>, auto &) {});
CGH.parallel_for<class WGSizeKernel22>(NDR1, Redu1,
TestKernelHasDevice_nd_item1_1{});
});

// CHECK-IR: spir_kernel void @{{.*}}MainKrn{{.*}}WGSizeKernel23{{.*}}{{.*}} #[[DHAttr2]]
Q.submit([&](handler &CGH) {
CGH.parallel_for<class WGSizeKernel23>(NDR1, Props, Redu1, Redu2,
[](nd_item<1>, auto &, auto &) {});
CGH.parallel_for<class WGSizeKernel23>(NDR1, Redu1, Redu2,
TestKernelHasDevice_nd_item1_2{});
});

// CHECK-IR: spir_kernel void @{{.*}}WGSizeKernel24(){{.*}} #[[DHAttr2]]
Q.submit([&](handler &CGH) {
CGH.parallel_for_work_group<class WGSizeKernel24>(
R1, Props,
[](group<1> G) { G.parallel_for_work_item([&](h_item<1>) {}); });
R1, TestKernelHasDevice_work_group{});
});

return 0;
Expand Down
Loading