55# it does double the number of operations compared to accumulate,
66# though for cheap operations like + this does not have much impact (20%)
77function _accumulate_pairwise! (op:: Op , c:: AbstractVector{T} , v:: AbstractVector , s, i1, n):: T where {T,Op}
8- @inbounds if n < 128
9- s_ = v[i1]
10- c[i1] = op (s, s_)
8+ if n < 128
9+ @inbounds s_ = v[i1]
10+ ci1 = op (s, s_)
11+ @inbounds c[i1] = ci1
1112 for i = i1+ 1 : i1+ n- 1
12- s_ = op (s_, v[i])
13- c[i] = op (s, s_)
13+ s_ = op (s_, @inbounds (v[i]))
14+ ci = op (s, s_)
15+ @inbounds c[i] = ci
1416 end
1517 else
1618 n2 = n >> 1
@@ -26,7 +28,8 @@ function accumulate_pairwise!(op::Op, result::AbstractVector, v::AbstractVector)
2628 n = length (li)
2729 n == 0 && return result
2830 i1 = first (li)
29- @inbounds result[i1] = v1 = reduce_first (op,v[i1])
31+ v1 = reduce_first (op, @inbounds (v[i1]))
32+ @inbounds result[i1] = v1
3033 n == 1 && return result
3134 _accumulate_pairwise! (op, result, v, v1, i1+ 1 , n- 1 )
3235 return result
@@ -378,16 +381,16 @@ function _accumulate!(op, B, A, dims::Integer, init::Union{Nothing, Some})
378381 # We can accumulate to a temporary variable, which allows
379382 # register usage and will be slightly faster
380383 ind1 = inds_t[1 ]
381- @inbounds for I in CartesianIndices (tail (inds_t))
384+ for I in CartesianIndices (tail (inds_t))
382385 if init === nothing
383- tmp = reduce_first (op, A[first (ind1), I])
386+ tmp = reduce_first (op, @inbounds ( A[first (ind1), I]) )
384387 else
385- tmp = op (something (init), A[first (ind1), I])
388+ tmp = op (something (init), @inbounds ( A[first (ind1), I]) )
386389 end
387- B[first (ind1), I] = tmp
390+ @inbounds B[first (ind1), I] = tmp
388391 for i_1 = first (ind1)+ 1 : last (ind1)
389- tmp = op (tmp, A[i_1, I])
390- B[i_1, I] = tmp
392+ tmp = op (tmp, @inbounds ( A[i_1, I]) )
393+ @inbounds B[i_1, I] = tmp
391394 end
392395 end
393396 else
@@ -401,25 +404,31 @@ end
401404@noinline function _accumulaten! (op, B, A, R1, ind, R2, init:: Nothing )
402405 # Copy the initial element in each 1d vector along dimension `dim`
403406 ii = first (ind)
404- @inbounds for J in R2, I in R1
405- B[I, ii, J] = reduce_first (op, A[I, ii, J])
407+ for J in R2, I in R1
408+ tmp = reduce_first (op, @inbounds (A[I, ii, J]))
409+ @inbounds B[I, ii, J] = tmp
406410 end
407411 # Accumulate
408- @inbounds for J in R2, i in first (ind)+ 1 : last (ind), I in R1
409- B[I, i, J] = op (B[I, i- 1 , J], A[I, i, J])
412+ for J in R2, i in first (ind)+ 1 : last (ind), I in R1
413+ @inbounds Bv, Av = B[I, i- 1 , J], A[I, i, J]
414+ tmp = op (Bv, Av)
415+ @inbounds B[I, i, J] = tmp
410416 end
411417 B
412418end
413419
414420@noinline function _accumulaten! (op, B, A, R1, ind, R2, init:: Some )
415421 # Copy the initial element in each 1d vector along dimension `dim`
416422 ii = first (ind)
417- @inbounds for J in R2, I in R1
418- B[I, ii, J] = op (something (init), A[I, ii, J])
423+ for J in R2, I in R1
424+ tmp = op (something (init), @inbounds (A[I, ii, J]))
425+ @inbounds B[I, ii, J] = tmp
419426 end
420427 # Accumulate
421- @inbounds for J in R2, i in first (ind)+ 1 : last (ind), I in R1
422- B[I, i, J] = op (B[I, i- 1 , J], A[I, i, J])
428+ for J in R2, i in first (ind)+ 1 : last (ind), I in R1
429+ @inbounds Bv, Av = B[I, i- 1 , J], A[I, i, J]
430+ tmp = op (Bv, Av)
431+ @inbounds B[I, i, J] = tmp
423432 end
424433 B
425434end
@@ -433,10 +442,10 @@ function _accumulate1!(op, B, v1, A::AbstractVector, dim::Integer)
433442 cur_val = v1
434443 B[i1] = cur_val
435444 next = iterate (inds, state)
436- @inbounds while next != = nothing
445+ while next != = nothing
437446 (i, state) = next
438- cur_val = op (cur_val, A[i])
439- B[i] = cur_val
447+ cur_val = op (cur_val, @inbounds ( A[i]) )
448+ @inbounds B[i] = cur_val
440449 next = iterate (inds, state)
441450 end
442451 return B
0 commit comments