From 57eb53ad3052af22e150d13dfbe1a03ca3a18144 Mon Sep 17 00:00:00 2001 From: Pablo Reble Date: Fri, 11 Aug 2023 14:19:37 -0500 Subject: [PATCH 1/2] Consistently use unsigned counter in loop over elements --- sycl/include/sycl/reduction.hpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 9d398961181d5..0dfea23e9edd9 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -1328,7 +1328,7 @@ struct NDRangeReduction< // If there are multiple values, reduce each separately // reduce_over_group is only defined for each T, not for span size_t LID = NDId.get_local_id(0); - for (int E = 0; E < NElements; ++E) { + for (size_t E = 0; E < NElements; ++E) { auto &RedElem = *getReducerAccess(Reducer).getElement(E); RedElem = reduce_over_group(Group, RedElem, BOp); if (LID == 0) { @@ -1362,7 +1362,7 @@ struct NDRangeReduction< if (DoReducePartialSumsInLastWG[0]) { // Reduce each result separately // TODO: Opportunity to parallelize across elements. - for (int E = 0; E < NElements; ++E) { + for (size_t E = 0; E < NElements; ++E) { auto LocalSum = getReducerAccess(Reducer).getIdentity(); for (size_t I = LID; I < NWorkGroups; I += WGSize) LocalSum = BOp(LocalSum, PartialSums[I * NElements + E]); @@ -1538,7 +1538,7 @@ template <> struct NDRangeReduction { // If there are multiple values, reduce each separately // This prevents local memory from scaling with elements size_t LID = NDId.get_local_linear_id(); - for (int E = 0; E < NElements; ++E) { + for (size_t E = 0; E < NElements; ++E) { doTreeReduction( WGSize, NDId, LocalReds, ElementCombiner, @@ -1574,7 +1574,7 @@ template <> struct NDRangeReduction { if (DoReducePartialSumsInLastWG[0]) { // Reduce each result separately // TODO: Opportunity to parallelize across elements - for (int E = 0; E < NElements; ++E) { + for (size_t E = 0; E < NElements; ++E) { doTreeReduction( NWorkGroups, NDId, LocalReds, ElementCombiner, [&](size_t I) { return PartialSums[I * NElements + E]; }); @@ -1614,7 +1614,7 @@ struct NDRangeReduction { KernelFunc(NDIt, Reducer); typename Reduction::binary_operation BOp; - for (int E = 0; E < NElements; ++E) { + for (size_t E = 0; E < NElements; ++E) { auto &ReducerElem = getReducerAccess(Reducer).getElement(E); *ReducerElem = reduce_over_group(NDIt.get_group(), *ReducerElem, BOp); } @@ -1663,7 +1663,7 @@ struct NDRangeReduction< // If there are multiple values, reduce each separately // This prevents local memory from scaling with elements - for (int E = 0; E < NElements; ++E) { + for (size_t E = 0; E < NElements; ++E) { doTreeReduction( WGSize, NDIt, LocalReds, ElementCombiner, @@ -1738,7 +1738,7 @@ struct NDRangeReduction< // Compute the partial sum/reduction for the work-group. size_t WGID = NDIt.get_group_linear_id(); typename Reduction::binary_operation BOp; - for (int E = 0; E < NElements; ++E) { + for (size_t E = 0; E < NElements; ++E) { typename Reduction::result_type PSum; PSum = *getReducerAccess(Reducer).getElement(E); PSum = reduce_over_group(NDIt.get_group(), PSum, BOp); @@ -1800,7 +1800,7 @@ struct NDRangeReduction< size_t WGID = NDIt.get_group_linear_id(); size_t GID = NDIt.get_global_linear_id(); - for (int E = 0; E < NElements; ++E) { + for (size_t E = 0; E < NElements; ++E) { typename Reduction::result_type PSum = (HasUniformWG || (GID < NWorkItems)) ? *In[GID * NElements + E] @@ -1897,7 +1897,7 @@ template <> struct NDRangeReduction { // If there are multiple values, reduce each separately // This prevents local memory from scaling with elements - for (int E = 0; E < NElements; ++E) { + for (size_t E = 0; E < NElements; ++E) { doTreeReduction( WGSize, NDIt, LocalReds, ElementCombiner, @@ -2004,7 +2004,7 @@ template <> struct NDRangeReduction { return LHS.combine(BOp, RHS); }; - for (int E = 0; E < NElements; ++E) { + for (size_t E = 0; E < NElements; ++E) { doTreeReduction( RemainingWorkSize, NDIt, LocalReds, ElementCombiner, From 7d6fcbbd085eb19523ae287d5c0df4719778a55b Mon Sep 17 00:00:00 2001 From: Pablo Reble Date: Fri, 11 Aug 2023 14:23:05 -0500 Subject: [PATCH 2/2] add missing cases --- sycl/include/sycl/reduction.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 0dfea23e9edd9..77c60b9bb7829 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -990,7 +990,7 @@ class reduction_impl_algo { // instantiated for the code below. auto DoIt = [&](auto &Out) { auto RWReduVal = std::make_shared>(); - for (int i = 0; i < num_elements; ++i) { + for (size_t i = 0; i < num_elements; ++i) { (*RWReduVal)[i] = decltype(MIdentityContainer)::getIdentity(); } auto Buf = std::make_shared>(RWReduVal.get()->data(), @@ -1021,7 +1021,7 @@ class reduction_impl_algo { size_t NElements = num_elements; CopyHandler.single_task<__sycl_init_mem_for>([=] { - for (int i = 0; i < NElements; ++i) { + for (size_t i = 0; i < NElements; ++i) { if (IsUpdateOfUserVar) Out[i] = BOp(Out[i], Mem[i]); else @@ -1205,7 +1205,7 @@ void reduSaveFinalResultToUserMem(handler &CGH, Reduction &Redu) { bool IsUpdateOfUserVar = !Redu.initializeToIdentity(); auto BOp = Redu.getBinaryOperation(); CGH.single_task([=] { - for (int i = 0; i < NElements; ++i) { + for (size_t i = 0; i < NElements; ++i) { auto Elem = InAcc[i]; if (IsUpdateOfUserVar) UserVarPtr[i] = BOp(UserVarPtr[i], *Elem);