From de526451e92171d9c9a6e9863892f3e68157032c Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Wed, 10 Aug 2022 15:31:26 -0700 Subject: [PATCH 1/4] [SYCL] Adjust for all Dims offset in accessor's device __init The optimization done for 1-dim accessor is suitable for all dimensions. --- llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp | 6 ++- sycl/include/sycl/accessor.hpp | 53 ++++++++++++-------- 2 files changed, 36 insertions(+), 23 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp b/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp index 73817cff5802f..d0437d2cfc1be 100644 --- a/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp +++ b/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp @@ -52,10 +52,12 @@ static const char *LegalSYCLFunctions[] = { "^cl::sycl::ext::oneapi::experimental::this_sub_group"}; static const char *LegalSYCLFunctionsInStatelessMode[] = { - "^cl::sycl::multi_ptr<.+>::get", "^cl::sycl::multi_ptr<.+>::multi_ptr", + "^cl::sycl::multi_ptr<.+>::get", + "^cl::sycl::multi_ptr<.+>::multi_ptr", "^cl::sycl::accessor<.+>::get_pointer.+", "^cl::sycl::accessor<.+>::getPointerAdjusted", - "^cl::sycl::accessor<.+>::getQualifiedPtr"}; + "^cl::sycl::accessor<.+>::getQualifiedPtr", + "^cl::sycl::accessor<.+>::getTotalOffset"}; namespace { diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index 5deb98e186e91..befbe3973c9e0 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -832,17 +832,14 @@ class __SYCL_SPECIAL_CLASS accessor : template size_t getLinearIndex(id Id) const { -#ifdef __SYCL_DEVICE_ONLY__ - // Pointer is already adjusted for 1D case. - if (Dimensions == 1) - return Id[0]; -#endif // __SYCL_DEVICE_ONLY__ - size_t Result = 0; // Unroll the following loop for both host and device code __SYCL_UNROLL(3) for (int I = 0; I < Dims; ++I) { Result = Result * getMemoryRange()[I] + Id[I]; + // We've already adjusted for the accessor's offset in the __init, so + // don't include it here in case of device. +#ifndef __SYCL_DEVICE_ONLY__ #if __cplusplus >= 201703L if constexpr (!(PropertyListT::template has_property< sycl::ext::oneapi::property::no_offset>())) { @@ -851,6 +848,7 @@ class __SYCL_SPECIAL_CLASS accessor : #else Result += getOffset()[I]; #endif +#endif // __SYCL_DEVICE_ONLY__ } return Result; } @@ -919,17 +917,10 @@ class __SYCL_SPECIAL_CLASS accessor : getAccessRange()[I] = AccessRange[I]; getMemoryRange()[I] = MemRange[I]; } - // In case of 1D buffer, adjust pointer during initialization rather - // then each time in operator[]. Will have to re-adjust in get_pointer - if (1 == AdjustedDim) -#if __cplusplus >= 201703L - if constexpr (!(PropertyListT::template has_property< - sycl::ext::oneapi::property::no_offset>())) { - MData += Offset[0]; - } -#else - MData += Offset[0]; -#endif + + // Adjust for offsets as that part is invariant for all invocations of + // operator[]. Will have to re-adjust in get_pointer. + MData += getTotalOffset(); } // __init variant used by the device compiler for ESIMD kernels. @@ -1797,17 +1788,37 @@ class __SYCL_SPECIAL_CLASS accessor : bool operator!=(const accessor &Rhs) const { return !(*this == Rhs); } private: +#ifdef __SYCL_DEVICE_ONLY__ + size_t getTotalOffset() const { + size_t TotalOffset = 0; + __SYCL_UNROLL(3) + for (int I = 0; I < Dimensions; ++I) { + TotalOffset = TotalOffset * impl.MemRange[I]; +#if __cplusplus >= 201703L + if constexpr (!(PropertyListT::template has_property< + sycl::ext::oneapi::property::no_offset>())) { + TotalOffset += impl.Offset[I]; + } +#else + TotalOffset += impl.Offset[I]; +#endif + } + + return TotalOffset; + } +#endif + // supporting function for get_pointer() - // when dim==1, MData will have been preadjusted for faster access with [] + // MData has been preadjusted with offset for faster access with [] // but for get_pointer() we must return the original pointer. // On device, getQualifiedPtr() returns MData, so we need to backjust it. // On host, getQualifiedPtr() does not return MData, no need to adjust. PtrType getPointerAdjusted() const { #ifdef __SYCL_DEVICE_ONLY__ - if (1 == AdjustedDim) - return getQualifiedPtr() - impl.Offset[0]; -#endif + return getQualifiedPtr() - getTotalOffset(); +#else return getQualifiedPtr(); +#endif } void preScreenAccessor(const size_t elemInBuffer, From c19262a4889d243b12df581b9abd10cca45ded00 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Wed, 10 Aug 2022 17:49:10 -0700 Subject: [PATCH 2/4] Use template to guarantee loop unrolling --- sycl/include/sycl/accessor.hpp | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index befbe3973c9e0..87e0127c20fed 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -31,6 +31,8 @@ #include +#include + /// \file accessor.hpp /// The file contains implementations of accessor class. /// @@ -224,6 +226,20 @@ template +void dim_loop_impl(std::integer_sequence, F &&f) { +#if __cplusplus >= 201703L + (f(Inds), ...); +#else + (void)std::initializer_list{((void)(f(Inds)), 0)...}; +#endif +} + +template void dim_loop(F &&f) { + dim_loop_impl(std::make_index_sequence{}, std::forward(f)); +} + void __SYCL_EXPORT constructorNotification(void *BufferObj, void *AccessorObj, access::target Target, access::mode Mode, @@ -833,9 +849,7 @@ class __SYCL_SPECIAL_CLASS accessor : template size_t getLinearIndex(id Id) const { size_t Result = 0; - // Unroll the following loop for both host and device code - __SYCL_UNROLL(3) - for (int I = 0; I < Dims; ++I) { + detail::dim_loop([&, this](size_t I) { Result = Result * getMemoryRange()[I] + Id[I]; // We've already adjusted for the accessor's offset in the __init, so // don't include it here in case of device. @@ -849,7 +863,8 @@ class __SYCL_SPECIAL_CLASS accessor : Result += getOffset()[I]; #endif #endif // __SYCL_DEVICE_ONLY__ - } + }); + return Result; } @@ -1791,8 +1806,7 @@ class __SYCL_SPECIAL_CLASS accessor : #ifdef __SYCL_DEVICE_ONLY__ size_t getTotalOffset() const { size_t TotalOffset = 0; - __SYCL_UNROLL(3) - for (int I = 0; I < Dimensions; ++I) { + detail::dim_loop([&, this](size_t I) { TotalOffset = TotalOffset * impl.MemRange[I]; #if __cplusplus >= 201703L if constexpr (!(PropertyListT::template has_property< @@ -1802,7 +1816,7 @@ class __SYCL_SPECIAL_CLASS accessor : #else TotalOffset += impl.Offset[I]; #endif - } + }); return TotalOffset; } From 77ee765a006707283e57a371b5cf9ba5d58d6ff8 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 11 Aug 2022 10:48:38 -0700 Subject: [PATCH 3/4] Make sycl/test/gdb/accessors-device.cpp more reliable --- sycl/test/gdb/accessors-device.cpp | 52 ++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/sycl/test/gdb/accessors-device.cpp b/sycl/test/gdb/accessors-device.cpp index b0aaf7e9d43ff..6fcf8051d5610 100644 --- a/sycl/test/gdb/accessors-device.cpp +++ b/sycl/test/gdb/accessors-device.cpp @@ -1,19 +1,51 @@ -// RUN: %clangxx -fsycl-device-only -c -fno-color-diagnostics -Xclang -ast-dump %s -I %sycl_include -Wno-sycl-strict | FileCheck %s +// RUN: %clangxx -fsycl-device-only -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -I %sycl_include -Wno-sycl-strict | FileCheck %s // UNSUPPORTED: windows -#include +#include typedef sycl::accessor dummy; +using namespace sycl; + +int main() { + queue q; + buffer b(1); + q.submit([&](handler &cgh) { + accessor a{b, cgh}; + + cgh.single_task([=]() { a[0] = 42; }); + }); +} + // AccessorImplDevice must have MemRange and Offset fields -// CHECK: CXXRecordDecl {{.*}} class AccessorImplDevice definition -// CHECK-NOT: CXXRecordDecl {{.*}} definition -// CHECK: FieldDecl {{.*}} referenced Offset -// CHECK-NOT: CXXRecordDecl {{.*}} definition -// CHECK: FieldDecl {{.*}} referenced MemRange +// CHECK: 0 | class sycl::detail::AccessorImplDevice<1> +// CHECK-NEXT: 0 | class sycl::id<1> Offset +// CHECK-NEXT: 0 | class sycl::detail::array<1> (base) +// CHECK-NEXT: 0 | size_t[1] common_array +// CHECK-NEXT: 8 | class sycl::range<1> AccessRange +// CHECK-NEXT: 8 | class sycl::detail::array<1> (base) +// CHECK-NEXT: 8 | size_t[1] common_array +// CHECK-NEXT: 16 | class sycl::range<1> MemRange +// CHECK-NEXT: 16 | class sycl::detail::array<1> (base) +// CHECK-NEXT: 16 | size_t[1] common_array +// CHECK-NEXT: | [sizeof=24, dsize=24, align=8, +// CHECK-NEXT: | nvsize=24, nvalign=8] // accessor.impl must be present and of AccessorImplDevice type -// CHECK: CXXRecordDecl {{.*}} class accessor definition -// CHECK-NOT: CXXRecordDecl {{.*}} definition -// CHECK: FieldDecl {{.*}} referenced impl 'detail::AccessorImplDevice' +// CHECK: 0 | class sycl::accessor +// CHECK-NEXT: 0 | class sycl::detail::accessor_common (base) (empty) +// CHECK-NEXT: 0 | class sycl::detail::AccessorImplDevice<1> impl +// CHECK-NEXT: 0 | class sycl::id<1> Offset +// CHECK-NEXT: 0 | class sycl::detail::array<1> (base) +// CHECK-NEXT: 0 | size_t[1] common_array +// CHECK-NEXT: 8 | class sycl::range<1> AccessRange +// CHECK-NEXT: 8 | class sycl::detail::array<1> (base) +// CHECK-NEXT: 8 | size_t[1] common_array +// CHECK-NEXT: 16 | class sycl::range<1> MemRange +// CHECK-NEXT: 16 | class sycl::detail::array<1> (base) +// CHECK-NEXT: 16 | size_t[1] common_array +// CHECK-NEXT: 24 | union sycl::accessor::(anonymous at +// CHECK-NEXT: 24 | ConcreteASPtrType MData +// CHECK-NEXT: | [sizeof=32, dsize=32, align=8, +// CHECK-NEXT: | nvsize=32, nvalign=8] From 60b593aaf3566e6b77cb6aca26b056bf477b6ef8 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Tue, 16 Aug 2022 14:46:39 -0700 Subject: [PATCH 4/4] Address review comment --- sycl/include/sycl/detail/defines_elementary.hpp | 14 -------------- sycl/test/gdb/accessors-device.cpp | 2 -- 2 files changed, 16 deletions(-) diff --git a/sycl/include/sycl/detail/defines_elementary.hpp b/sycl/include/sycl/detail/defines_elementary.hpp index aa609776521d6..b1c78b9f6f93a 100644 --- a/sycl/include/sycl/detail/defines_elementary.hpp +++ b/sycl/include/sycl/detail/defines_elementary.hpp @@ -105,20 +105,6 @@ #define __SYCL_WARNING(msg) __pragma(message(msg)) #endif // __GNUC__ -// Define __SYCL_UNROLL to add pragma/attribute unroll to a loop. -#ifndef __SYCL_UNROLL -#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) -#define __SYCL_UNROLL(x) _Pragma(__SYCL_STRINGIFY(unroll x)) -#elif defined(__clang__) -#define __SYCL_UNROLL(x) _Pragma(__SYCL_STRINGIFY(unroll x)) -#elif (defined(__GNUC__) && __GNUC__ >= 8) || \ - (defined(__GNUG__) && __GNUG__ >= 8) -#define __SYCL_UNROLL(x) _Pragma(__SYCL_STRINGIFY(GCC unroll x)) -#else -#define __SYCL_UNROLL(x) -#endif // compiler switch -#endif // __SYCL_UNROLL - #if !defined(SYCL_DISABLE_CPP_VERSION_CHECK_WARNING) && __cplusplus < 201703L #if defined(_MSC_VER) && !defined(__clang__) diff --git a/sycl/test/gdb/accessors-device.cpp b/sycl/test/gdb/accessors-device.cpp index 6fcf8051d5610..2f2a079977f1d 100644 --- a/sycl/test/gdb/accessors-device.cpp +++ b/sycl/test/gdb/accessors-device.cpp @@ -2,8 +2,6 @@ // UNSUPPORTED: windows #include -typedef sycl::accessor dummy; - using namespace sycl; int main() {