From a691a3f7ec25949433099c1fa5bb32dbadb374d6 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Wed, 10 May 2023 09:45:29 -0700 Subject: [PATCH 1/3] [SYCL] Implement sycl_ext_oneapi_root_group Signed-off-by: Michael Aziz --- .../ext/oneapi/experimental/root_group.hpp | 134 ++++++++++++++++++ .../sycl/ext/oneapi/properties/property.hpp | 3 +- sycl/include/sycl/kernel.hpp | 4 + sycl/include/sycl/nd_item.hpp | 9 ++ sycl/test-e2e/GroupAlgorithm/root_group.cpp | 116 +++++++++++++++ 5 files changed, 265 insertions(+), 1 deletion(-) create mode 100644 sycl/include/sycl/ext/oneapi/experimental/root_group.hpp create mode 100644 sycl/test-e2e/GroupAlgorithm/root_group.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp new file mode 100644 index 0000000000000..271a07cd2445d --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp @@ -0,0 +1,134 @@ +//==--- root_group.hpp --- SYCL extension for root groups ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include + +#define SYCL_EXT_ONEAPI_ROOT_GROUP 1 + +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace ext::oneapi::experimental { + +namespace info::kernel_queue_specific { +struct max_num_work_group_sync { + using return_type = size_t; +}; +} // namespace info::kernel_queue_specific + +struct use_root_sync_key { + using value_t = property_value; +}; + +inline constexpr use_root_sync_key::value_t use_root_sync; + +template <> struct is_property_key : std::true_type {}; + +template <> struct detail::PropertyToKind { + static constexpr PropKind Kind = PropKind::UseRootSync; +}; + +template <> +struct detail::IsCompileTimeProperty : std::true_type {}; + +template class root_group { +public: + using id_type = id; + using range_type = range; + using linear_id_type = size_t; + static constexpr int dimensions = Dimensions; + static constexpr memory_scope fence_scope = memory_scope::device; + + id get_group_id() const { return id{}; }; + + id get_local_id() const { return it.get_global_id(); } + + range get_group_range() const { + if constexpr (Dimensions == 3) { + return range<3>{1, 1, 1}; + } else if constexpr (Dimensions == 2) { + return range<2>{1, 1}; + } else { + return range<1>{1}; + } + } + + range get_local_range() const { return it.get_global_range(); }; + + range get_max_local_range() const { return get_local_range(); }; + + size_t get_group_linear_id() const { return 0; }; + + size_t get_local_linear_id() const { return it.get_global_linear_id(); } + + size_t get_group_linear_range() const { return get_group_range().size(); }; + + size_t get_local_linear_range() const { return get_local_range().size(); }; + + bool leader() const { return get_local_id() == 0; }; + + root_group(nd_item it) : it{it} {} + +private: + sycl::nd_item it; +}; + +template +group get_child_group(root_group g) { + (void)g; + return this_group(); +} + +template sub_group get_child_group(group g) { + (void)g; + return this_sub_group(); +} + +namespace this_kernel { +template +ext::oneapi::experimental::root_group get_root_group() { + return root_group{ext::oneapi::experimental::this_nd_item()}; +} +} // namespace this_kernel + +} // namespace ext::oneapi::experimental + +template <> +typename ext::oneapi::experimental::info::kernel_queue_specific:: + max_num_work_group_sync::return_type + kernel::ext_oneapi_get_info< + ext::oneapi::experimental::info::kernel_queue_specific:: + max_num_work_group_sync>(const queue &q) const { + // TODO: query the backend to return a value >= 1. + return 1; +} + +template +void group_barrier(ext::oneapi::experimental::root_group G, + memory_scope FenceScope = decltype(G)::fence_scope) { + (void)G; + (void)FenceScope; +#ifdef __SYCL_DEVICE_ONLY__ + // TODO: Change __spv::Scope::Workgroup to __spv::Scope::Device once backends + // support device scope. + __spirv_ControlBarrier(__spv::Scope::Workgroup, __spv::Scope::Workgroup, + __spv::MemorySemanticsMask::SubgroupMemory | + __spv::MemorySemanticsMask::WorkgroupMemory | + __spv::MemorySemanticsMask::CrossWorkgroupMemory); +#else + throw sycl::runtime_error("Barriers are not supported on host device", + PI_ERROR_INVALID_DEVICE); +#endif +} + +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 1c63a3966f6fc..6ed12856e339f 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -193,8 +193,9 @@ enum PropKind : uint32_t { PipeProtocol = 27, ReadyLatency = 28, UsesValid = 29, + UseRootSync = 30, // PropKindSize must always be the last value. - PropKindSize = 30, + PropKindSize = 31, }; // This trait must be specialized for all properties and must have a unique diff --git a/sycl/include/sycl/kernel.hpp b/sycl/include/sycl/kernel.hpp index 810b700044e0b..45455c78e30f2 100644 --- a/sycl/include/sycl/kernel.hpp +++ b/sycl/include/sycl/kernel.hpp @@ -25,6 +25,7 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { // Forward declaration class context; +class queue; template class backend_traits; template class kernel_bundle; template @@ -157,6 +158,9 @@ class __SYCL_EXPORT kernel : public detail::OwnerLessBase { typename detail::is_kernel_device_specific_info_desc::return_type get_info(const device &Device, const range<3> &WGSize) const; + template + typename Param::return_type ext_oneapi_get_info(const queue &q) const; + private: /// Constructs a SYCL kernel object from a valid kernel_impl instance. kernel(std::shared_ptr Impl); diff --git a/sycl/include/sycl/nd_item.hpp b/sycl/include/sycl/nd_item.hpp index 2c2a4d7992e39..62fb612910fd6 100644 --- a/sycl/include/sycl/nd_item.hpp +++ b/sycl/include/sycl/nd_item.hpp @@ -29,6 +29,10 @@ namespace detail { class Builder; } +namespace ext::oneapi::experimental { +template class root_group; +} + /// Identifies an instance of the function object executing at each point in an /// nd_range. /// @@ -163,6 +167,11 @@ template class nd_item { Group.wait_for(events...); } + sycl::ext::oneapi::experimental::root_group + ext_oneapi_get_root_group() const { + return sycl::ext::oneapi::experimental::root_group{*this}; + } + nd_item(const nd_item &rhs) = default; nd_item(nd_item &&rhs) = default; diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp new file mode 100644 index 0000000000000..1b8c2534873c4 --- /dev/null +++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp @@ -0,0 +1,116 @@ +// RUN: %{build} -I . -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include + +#include +#include + +static constexpr int WorkGroupSize = 32; + +void testFeatureMacro() { + static_assert(SYCL_EXT_ONEAPI_ROOT_GROUP == 1, + "SYCL_EXT_ONEAPI_ROOT_GROUP must have a value of 1"); +} + +void testQueriesAndProperties() { + sycl::queue q; + const auto bundle = + sycl::get_kernel_bundle(q.get_context()); + const auto kernel = bundle.get_kernel(); + const auto maxWGs = kernel.ext_oneapi_get_info< + sycl::ext::oneapi::experimental::info::kernel_queue_specific:: + max_num_work_group_sync>(q); + const auto props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::use_root_sync}; + q.single_task(props, []() {}); + static_assert(std::is_same_v::type, size_t>, + "max_num_work_group_sync query must return size_t"); + assert(maxWGs >= 1 && "max_num_work_group_sync query failed"); +} + +void testRootGroup() { + sycl::queue q; + const auto bundle = + sycl::get_kernel_bundle(q.get_context()); + const auto kernel = bundle.get_kernel(); + const auto maxWGs = kernel.ext_oneapi_get_info< + sycl::ext::oneapi::experimental::info::kernel_queue_specific:: + max_num_work_group_sync>(q); + const auto props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::use_root_sync}; + + int *data = sycl::malloc_shared(maxWGs * WorkGroupSize, q); + const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize}; + q.parallel_for(range, props, [=](sycl::nd_item<1> it) { + auto root = it.ext_oneapi_get_root_group(); + data[root.get_local_id()] = root.get_local_id(); + sycl::group_barrier(root); + + root = sycl::ext::oneapi::experimental::this_kernel::get_root_group<1>(); + int sum = data[root.get_local_id()] + + data[root.get_local_range() - root.get_local_id() - 1]; + sycl::group_barrier(root); + data[root.get_local_id()] = sum; + }); + q.wait(); + + const int workItemCount = static_cast(range.get_global_range().size()); + for (int i = 0; i < workItemCount; i++) { + assert(data[i] == (workItemCount - 1)); + } +} + +void testRootGroupFunctions() { + sycl::queue q; + const auto bundle = + sycl::get_kernel_bundle(q.get_context()); + const auto kernel = bundle.get_kernel(); + const auto maxWGs = kernel.ext_oneapi_get_info< + sycl::ext::oneapi::experimental::info::kernel_queue_specific:: + max_num_work_group_sync>(q); + const auto props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::use_root_sync}; + + constexpr int testCount = 10; + bool *testResults = sycl::malloc_shared(testCount, q); + const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize}; + q.parallel_for( + range, props, [=](sycl::nd_item<1> it) { + const auto root = it.ext_oneapi_get_root_group(); + if (root.leader()) { + testResults[0] = root.get_group_id() == sycl::id<1>(0); + testResults[1] = root.get_local_id() == sycl::id<1>(0); + testResults[2] = root.get_group_range() == sycl::range<1>(1); + testResults[3] = + root.get_local_range() == sycl::range<1>(WorkGroupSize); + testResults[4] = + root.get_max_local_range() == sycl::range<1>(WorkGroupSize); + testResults[5] = root.get_group_linear_id() == 0; + testResults[6] = root.get_local_linear_id() == 0; + testResults[7] = root.get_group_linear_range() == 1; + testResults[8] = root.get_local_linear_range() == WorkGroupSize; + + const auto child = + sycl::ext::oneapi::experimental::get_child_group(root); + const auto grandchild = + sycl::ext::oneapi::experimental::get_child_group(child); + testResults[9] = child == it.get_group(); + } + }); + q.wait(); + + for (int i = 0; i < testCount; i++) { + assert(testResults[i]); + } +} + +int main() { + testFeatureMacro(); + testQueriesAndProperties(); + testRootGroup(); + testRootGroupFunctions(); + return EXIT_SUCCESS; +} From c986b4dc2d288490a40cd83db171f53ed71b3119 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 15 May 2023 13:33:58 -0700 Subject: [PATCH 2/3] Address review comments Signed-off-by: Michael Aziz --- .../sycl/ext/oneapi/experimental/root_group.hpp | 6 +++++- sycl/include/sycl/kernel.hpp | 2 ++ sycl/test-e2e/GroupAlgorithm/root_group.cpp | 13 ++++++++++--- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp index 271a07cd2445d..1643a90e5f4b6 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp @@ -20,6 +20,8 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace ext::oneapi::experimental { namespace info::kernel_queue_specific { +// TODO: Revisit and align with sycl_ext_oneapi_forward_progress extension once +// #7598 is merged. struct max_num_work_group_sync { using return_type = size_t; }; @@ -119,7 +121,9 @@ void group_barrier(ext::oneapi::experimental::root_group G, (void)FenceScope; #ifdef __SYCL_DEVICE_ONLY__ // TODO: Change __spv::Scope::Workgroup to __spv::Scope::Device once backends - // support device scope. + // support device scope. __spv::Scope::Workgroup is only valid when + // max_num_work_group_sync is 1, so that all work items in a root group will + // also be in the same work group. __spirv_ControlBarrier(__spv::Scope::Workgroup, __spv::Scope::Workgroup, __spv::MemorySemanticsMask::SubgroupMemory | __spv::MemorySemanticsMask::WorkgroupMemory | diff --git a/sycl/include/sycl/kernel.hpp b/sycl/include/sycl/kernel.hpp index 45455c78e30f2..275d07b8e60ac 100644 --- a/sycl/include/sycl/kernel.hpp +++ b/sycl/include/sycl/kernel.hpp @@ -158,6 +158,8 @@ class __SYCL_EXPORT kernel : public detail::OwnerLessBase { typename detail::is_kernel_device_specific_info_desc::return_type get_info(const device &Device, const range<3> &WGSize) const; +// TODO: Revisit and align with sycl_ext_oneapi_forward_progress extension once +// #7598 is merged. template typename Param::return_type ext_oneapi_get_info(const queue &q) const; diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp index 1b8c2534873c4..95feeeb9034cf 100644 --- a/sycl/test-e2e/GroupAlgorithm/root_group.cpp +++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp @@ -80,16 +80,19 @@ void testRootGroupFunctions() { q.parallel_for( range, props, [=](sycl::nd_item<1> it) { const auto root = it.ext_oneapi_get_root_group(); - if (root.leader()) { + if (root.leader() || root.get_local_id() == 3) { testResults[0] = root.get_group_id() == sycl::id<1>(0); - testResults[1] = root.get_local_id() == sycl::id<1>(0); + testResults[1] = root.leader() + ? root.get_local_id() == sycl::id<1>(0) + : root.get_local_id() == sycl::id<1>(3); testResults[2] = root.get_group_range() == sycl::range<1>(1); testResults[3] = root.get_local_range() == sycl::range<1>(WorkGroupSize); testResults[4] = root.get_max_local_range() == sycl::range<1>(WorkGroupSize); testResults[5] = root.get_group_linear_id() == 0; - testResults[6] = root.get_local_linear_id() == 0; + testResults[6] = + root.get_local_linear_id() == root.get_local_id().get(0); testResults[7] = root.get_group_linear_range() == 1; testResults[8] = root.get_local_linear_range() == WorkGroupSize; @@ -98,6 +101,10 @@ void testRootGroupFunctions() { const auto grandchild = sycl::ext::oneapi::experimental::get_child_group(child); testResults[9] = child == it.get_group(); + static_assert( + std::is_same_v::type, + sycl::sub_group>, + "get_child_group(sycl::group) must return a sycl::subgroup"); } }); q.wait(); From 3c1ff0cf96286e5794146f62da152d3e12c9d81f Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Tue, 16 May 2023 06:42:53 -0700 Subject: [PATCH 3/3] Make non-standard constructor private Signed-off-by: Michael Aziz --- .../sycl/ext/oneapi/experimental/root_group.hpp | 10 ++++++---- sycl/include/sycl/ext/oneapi/properties/property.hpp | 2 +- sycl/include/sycl/kernel.hpp | 4 ++-- sycl/test-e2e/GroupAlgorithm/root_group.cpp | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp index 1643a90e5f4b6..c831eb0b12771 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp @@ -78,9 +78,12 @@ template class root_group { bool leader() const { return get_local_id() == 0; }; +private: + friend root_group + nd_item::ext_oneapi_get_root_group() const; + root_group(nd_item it) : it{it} {} -private: sycl::nd_item it; }; @@ -96,9 +99,8 @@ template sub_group get_child_group(group g) { } namespace this_kernel { -template -ext::oneapi::experimental::root_group get_root_group() { - return root_group{ext::oneapi::experimental::this_nd_item()}; +template root_group get_root_group() { + return this_nd_item().ext_oneapi_get_root_group(); } } // namespace this_kernel diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 6ed12856e339f..aeefaee9a994d 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -193,7 +193,7 @@ enum PropKind : uint32_t { PipeProtocol = 27, ReadyLatency = 28, UsesValid = 29, - UseRootSync = 30, + UseRootSync = 30, // PropKindSize must always be the last value. PropKindSize = 31, }; diff --git a/sycl/include/sycl/kernel.hpp b/sycl/include/sycl/kernel.hpp index 275d07b8e60ac..cf58c028b7788 100644 --- a/sycl/include/sycl/kernel.hpp +++ b/sycl/include/sycl/kernel.hpp @@ -158,8 +158,8 @@ class __SYCL_EXPORT kernel : public detail::OwnerLessBase { typename detail::is_kernel_device_specific_info_desc::return_type get_info(const device &Device, const range<3> &WGSize) const; -// TODO: Revisit and align with sycl_ext_oneapi_forward_progress extension once -// #7598 is merged. + // TODO: Revisit and align with sycl_ext_oneapi_forward_progress extension + // once #7598 is merged. template typename Param::return_type ext_oneapi_get_info(const queue &q) const; diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp index 95feeeb9034cf..2d7edc34eec17 100644 --- a/sycl/test-e2e/GroupAlgorithm/root_group.cpp +++ b/sycl/test-e2e/GroupAlgorithm/root_group.cpp @@ -104,7 +104,7 @@ void testRootGroupFunctions() { static_assert( std::is_same_v::type, sycl::sub_group>, - "get_child_group(sycl::group) must return a sycl::subgroup"); + "get_child_group(sycl::group) must return a sycl::sub_group"); } }); q.wait();