diff --git a/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp b/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp index 5d08ba67d5c5e..c14f119733697 100644 --- a/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp +++ b/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp @@ -52,10 +52,12 @@ static const char *LegalSYCLFunctions[] = { "^sycl::_V1::ext::oneapi::experimental::this_sub_group"}; static const char *LegalSYCLFunctionsInStatelessMode[] = { - "^sycl::_V1::multi_ptr<.+>::get", "^sycl::_V1::multi_ptr<.+>::multi_ptr", + "^sycl::_V1::multi_ptr<.+>::get", + "^sycl::_V1::multi_ptr<.+>::multi_ptr", "^sycl::_V1::accessor<.+>::get_pointer.+", "^sycl::_V1::accessor<.+>::getPointerAdjusted", - "^sycl::_V1::accessor<.+>::getQualifiedPtr"}; + "^sycl::_V1::accessor<.+>::getQualifiedPtr", + "^sycl::_V1::accessor<.+>::getTotalOffset"}; namespace { diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index 0fd532bebcd57..834fc0cbcc246 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -31,6 +31,8 @@ #include +#include + /// \file accessor.hpp /// The file contains implementations of accessor class. /// @@ -224,6 +226,20 @@ template +void dim_loop_impl(std::integer_sequence, F &&f) { +#if __cplusplus >= 201703L + (f(Inds), ...); +#else + (void)std::initializer_list{((void)(f(Inds)), 0)...}; +#endif +} + +template void dim_loop(F &&f) { + dim_loop_impl(std::make_index_sequence{}, std::forward(f)); +} + void __SYCL_EXPORT constructorNotification(void *BufferObj, void *AccessorObj, access::target Target, access::mode Mode, @@ -832,17 +848,12 @@ class __SYCL_SPECIAL_CLASS accessor : template size_t getLinearIndex(id Id) const { -#ifdef __SYCL_DEVICE_ONLY__ - // Pointer is already adjusted for 1D case. - if (Dimensions == 1) - return Id[0]; -#endif // __SYCL_DEVICE_ONLY__ - size_t Result = 0; - // Unroll the following loop for both host and device code - __SYCL_UNROLL(3) - for (int I = 0; I < Dims; ++I) { + detail::dim_loop([&, this](size_t I) { Result = Result * getMemoryRange()[I] + Id[I]; + // We've already adjusted for the accessor's offset in the __init, so + // don't include it here in case of device. +#ifndef __SYCL_DEVICE_ONLY__ #if __cplusplus >= 201703L if constexpr (!(PropertyListT::template has_property< sycl::ext::oneapi::property::no_offset>())) { @@ -851,7 +862,9 @@ class __SYCL_SPECIAL_CLASS accessor : #else Result += getOffset()[I]; #endif - } +#endif // __SYCL_DEVICE_ONLY__ + }); + return Result; } @@ -919,17 +932,10 @@ class __SYCL_SPECIAL_CLASS accessor : getAccessRange()[I] = AccessRange[I]; getMemoryRange()[I] = MemRange[I]; } - // In case of 1D buffer, adjust pointer during initialization rather - // then each time in operator[]. Will have to re-adjust in get_pointer - if (1 == AdjustedDim) -#if __cplusplus >= 201703L - if constexpr (!(PropertyListT::template has_property< - sycl::ext::oneapi::property::no_offset>())) { - MData += Offset[0]; - } -#else - MData += Offset[0]; -#endif + + // Adjust for offsets as that part is invariant for all invocations of + // operator[]. Will have to re-adjust in get_pointer. + MData += getTotalOffset(); } // __init variant used by the device compiler for ESIMD kernels. @@ -1797,17 +1803,36 @@ class __SYCL_SPECIAL_CLASS accessor : bool operator!=(const accessor &Rhs) const { return !(*this == Rhs); } private: +#ifdef __SYCL_DEVICE_ONLY__ + size_t getTotalOffset() const { + size_t TotalOffset = 0; + detail::dim_loop([&, this](size_t I) { + TotalOffset = TotalOffset * impl.MemRange[I]; +#if __cplusplus >= 201703L + if constexpr (!(PropertyListT::template has_property< + sycl::ext::oneapi::property::no_offset>())) { + TotalOffset += impl.Offset[I]; + } +#else + TotalOffset += impl.Offset[I]; +#endif + }); + + return TotalOffset; + } +#endif + // supporting function for get_pointer() - // when dim==1, MData will have been preadjusted for faster access with [] + // MData has been preadjusted with offset for faster access with [] // but for get_pointer() we must return the original pointer. // On device, getQualifiedPtr() returns MData, so we need to backjust it. // On host, getQualifiedPtr() does not return MData, no need to adjust. PtrType getPointerAdjusted() const { #ifdef __SYCL_DEVICE_ONLY__ - if (1 == AdjustedDim) - return getQualifiedPtr() - impl.Offset[0]; -#endif + return getQualifiedPtr() - getTotalOffset(); +#else return getQualifiedPtr(); +#endif } void preScreenAccessor(const size_t elemInBuffer, diff --git a/sycl/include/sycl/detail/defines_elementary.hpp b/sycl/include/sycl/detail/defines_elementary.hpp index aa609776521d6..b1c78b9f6f93a 100644 --- a/sycl/include/sycl/detail/defines_elementary.hpp +++ b/sycl/include/sycl/detail/defines_elementary.hpp @@ -105,20 +105,6 @@ #define __SYCL_WARNING(msg) __pragma(message(msg)) #endif // __GNUC__ -// Define __SYCL_UNROLL to add pragma/attribute unroll to a loop. -#ifndef __SYCL_UNROLL -#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER) -#define __SYCL_UNROLL(x) _Pragma(__SYCL_STRINGIFY(unroll x)) -#elif defined(__clang__) -#define __SYCL_UNROLL(x) _Pragma(__SYCL_STRINGIFY(unroll x)) -#elif (defined(__GNUC__) && __GNUC__ >= 8) || \ - (defined(__GNUG__) && __GNUG__ >= 8) -#define __SYCL_UNROLL(x) _Pragma(__SYCL_STRINGIFY(GCC unroll x)) -#else -#define __SYCL_UNROLL(x) -#endif // compiler switch -#endif // __SYCL_UNROLL - #if !defined(SYCL_DISABLE_CPP_VERSION_CHECK_WARNING) && __cplusplus < 201703L #if defined(_MSC_VER) && !defined(__clang__) diff --git a/sycl/test/gdb/accessors-device.cpp b/sycl/test/gdb/accessors-device.cpp index b0aaf7e9d43ff..2f2a079977f1d 100644 --- a/sycl/test/gdb/accessors-device.cpp +++ b/sycl/test/gdb/accessors-device.cpp @@ -1,19 +1,49 @@ -// RUN: %clangxx -fsycl-device-only -c -fno-color-diagnostics -Xclang -ast-dump %s -I %sycl_include -Wno-sycl-strict | FileCheck %s +// RUN: %clangxx -fsycl-device-only -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -I %sycl_include -Wno-sycl-strict | FileCheck %s // UNSUPPORTED: windows -#include +#include -typedef sycl::accessor dummy; +using namespace sycl; + +int main() { + queue q; + buffer b(1); + q.submit([&](handler &cgh) { + accessor a{b, cgh}; + + cgh.single_task([=]() { a[0] = 42; }); + }); +} // AccessorImplDevice must have MemRange and Offset fields -// CHECK: CXXRecordDecl {{.*}} class AccessorImplDevice definition -// CHECK-NOT: CXXRecordDecl {{.*}} definition -// CHECK: FieldDecl {{.*}} referenced Offset -// CHECK-NOT: CXXRecordDecl {{.*}} definition -// CHECK: FieldDecl {{.*}} referenced MemRange +// CHECK: 0 | class sycl::detail::AccessorImplDevice<1> +// CHECK-NEXT: 0 | class sycl::id<1> Offset +// CHECK-NEXT: 0 | class sycl::detail::array<1> (base) +// CHECK-NEXT: 0 | size_t[1] common_array +// CHECK-NEXT: 8 | class sycl::range<1> AccessRange +// CHECK-NEXT: 8 | class sycl::detail::array<1> (base) +// CHECK-NEXT: 8 | size_t[1] common_array +// CHECK-NEXT: 16 | class sycl::range<1> MemRange +// CHECK-NEXT: 16 | class sycl::detail::array<1> (base) +// CHECK-NEXT: 16 | size_t[1] common_array +// CHECK-NEXT: | [sizeof=24, dsize=24, align=8, +// CHECK-NEXT: | nvsize=24, nvalign=8] // accessor.impl must be present and of AccessorImplDevice type -// CHECK: CXXRecordDecl {{.*}} class accessor definition -// CHECK-NOT: CXXRecordDecl {{.*}} definition -// CHECK: FieldDecl {{.*}} referenced impl 'detail::AccessorImplDevice' +// CHECK: 0 | class sycl::accessor +// CHECK-NEXT: 0 | class sycl::detail::accessor_common (base) (empty) +// CHECK-NEXT: 0 | class sycl::detail::AccessorImplDevice<1> impl +// CHECK-NEXT: 0 | class sycl::id<1> Offset +// CHECK-NEXT: 0 | class sycl::detail::array<1> (base) +// CHECK-NEXT: 0 | size_t[1] common_array +// CHECK-NEXT: 8 | class sycl::range<1> AccessRange +// CHECK-NEXT: 8 | class sycl::detail::array<1> (base) +// CHECK-NEXT: 8 | size_t[1] common_array +// CHECK-NEXT: 16 | class sycl::range<1> MemRange +// CHECK-NEXT: 16 | class sycl::detail::array<1> (base) +// CHECK-NEXT: 16 | size_t[1] common_array +// CHECK-NEXT: 24 | union sycl::accessor::(anonymous at +// CHECK-NEXT: 24 | ConcreteASPtrType MData +// CHECK-NEXT: | [sizeof=32, dsize=32, align=8, +// CHECK-NEXT: | nvsize=32, nvalign=8]