Add mapreduce_single function (#25051)

Since the demise of `r_promote` in #22825, there is now a type-instability in `mapreduce` if the operator does not give an element of the same type as the input. This arose during my implementation of Kahan summation using a reduction operator, see: JuliaMath/KahanSummation.jl#7 This adds a `mapreduce_single` function which defines what the result should be in these cases.
JuliaLang · Jan 4, 2018 · e422547 · e422547
1 parent e3b7b18
commit e422547
Show file tree

Hide file tree

Showing 4 changed files with 117 additions and 64 deletions.
diff --git a/base/reduce.jl b/base/reduce.jl
@@ -12,17 +12,25 @@ else
     const SmallUnsigned = Union{UInt8,UInt16,UInt32}
 end
 
-# Certain reductions like sum and prod may wish to promote the items being reduced over to
-# an appropriate size. Note we need x + zero(x) because some types like Bool have their sum
-# lie in a larger type.
-promote_sys_size(T::Type) = T
-promote_sys_size(::Type{<:SmallSigned}) = Int
-promote_sys_size(::Type{<:SmallUnsigned}) = UInt
-
-promote_sys_size_add(x) = convert(promote_sys_size(typeof(x + zero(x))), x)
-promote_sys_size_mul(x) = convert(promote_sys_size(typeof(x * one(x))), x)
-const _PromoteSysSizeFunction = Union{typeof(promote_sys_size_add),
-                                      typeof(promote_sys_size_mul)}
+"""
+    Base.add_sum(x,y)
+
+The reduction operator used in `sum`. The main difference from [`+`](@ref) is that small
+integers are promoted to `Int`/`UInt`.
+"""
+add_sum(x,y) = x + y
+add_sum(x::SmallSigned,y::SmallSigned) = Int(x) + Int(y)
+add_sum(x::SmallUnsigned,y::SmallUnsigned) = UInt(x) + UInt(y)
+
+"""
+    Base.mul_prod(x,y)
+
+The reduction operator used in `prod`. The main difference from [`*`](@ref) is that small
+integers are promoted to `Int`/`UInt`.
+"""
+mul_prod(x,y) = x * y
+mul_prod(x::SmallSigned,y::SmallSigned) = Int(x) * Int(y)
+mul_prod(x::SmallUnsigned,y::SmallUnsigned) = UInt(x) * UInt(y)
 
 ## foldl && mapfoldl
 
@@ -64,7 +72,7 @@ function mapfoldl(f, op, itr)
         return Base.mapreduce_empty_iter(f, op, itr, iteratoreltype(itr))
     end
     (x, i) = next(itr, i)
-    v0 = f(x)
+    v0 = mapreduce_first(f, op, x)
     mapfoldl_impl(f, op, v0, itr, i)
 end
 
@@ -133,7 +141,7 @@ function mapfoldr(f, op, itr)
     if isempty(itr)
         return Base.mapreduce_empty_iter(f, op, itr, iteratoreltype(itr))
     end
-    return mapfoldr_impl(f, op, f(itr[i]), itr, i-1)
+    return mapfoldr_impl(f, op, mapreduce_first(f, op, itr[i]), itr, i-1)
 end
 
 """
@@ -174,7 +182,7 @@ foldr(op, itr) = mapfoldr(identity, op, itr)
 @noinline function mapreduce_impl(f, op, A::AbstractArray, ifirst::Integer, ilast::Integer, blksize::Int)
     if ifirst == ilast
         @inbounds a1 = A[ifirst]
-        return f(a1)
+        return mapreduce_first(f, op, a1)
     elseif ifirst + blksize > ilast
         # sequential portion
         @inbounds a1 = A[ifirst]
@@ -238,34 +246,88 @@ pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096
 
 # handling empty arrays
 _empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed"))
+
+"""
+    Base.reduce_empty(op, T)
+
+The value to be returned when calling [`reduce`](@ref), [`foldl`](@ref) or [`foldr`](@ref)
+with reduction `op` over an empty array with element type of `T`.
+
+If not defined, this will throw an `ArgumentError`.
+"""
 reduce_empty(op, T) = _empty_reduce_error()
 reduce_empty(::typeof(+), T) = zero(T)
+reduce_empty(::typeof(+), ::Type{Bool}) = zero(Int)
 reduce_empty(::typeof(*), T) = one(T)
+reduce_empty(::typeof(*), ::Type{Char}) = ""
 reduce_empty(::typeof(&), ::Type{Bool}) = true
 reduce_empty(::typeof(|), ::Type{Bool}) = false
 
+reduce_empty(::typeof(add_sum), T) = reduce_empty(+, T)
+reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallSigned}  = zero(Int)
+reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallUnsigned} = zero(UInt)
+reduce_empty(::typeof(mul_prod), T) = reduce_empty(*, T)
+reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallSigned}  = one(Int)
+reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallUnsigned} = one(UInt)
+
+"""
+    Base.mapreduce_empty(f, op, T)
+
+The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref`) or
+[`mapfoldr`](@ref) with map `f` and reduction `op` over an empty array with element type
+of `T`.
+
+If not defined, this will throw an `ArgumentError`.
+"""
 mapreduce_empty(f, op, T) = _empty_reduce_error()
 mapreduce_empty(::typeof(identity), op, T) = reduce_empty(op, T)
-mapreduce_empty(f::_PromoteSysSizeFunction, op, T) =
-    f(mapreduce_empty(identity, op, T))
-mapreduce_empty(::typeof(abs), ::typeof(+), T) = abs(zero(T))
-mapreduce_empty(::typeof(abs2), ::typeof(+), T) = abs2(zero(T))
-mapreduce_empty(::typeof(abs), ::Union{typeof(scalarmax), typeof(max)}, T) =
-    abs(zero(T))
-mapreduce_empty(::typeof(abs2), ::Union{typeof(scalarmax), typeof(max)}, T) =
-    abs2(zero(T))
-
-# Allow mapreduce_empty to “see through” promote_sys_size
-let ComposedFunction = typename(typeof(identity ∘ identity)).wrapper
-    global mapreduce_empty(f::ComposedFunction{<:_PromoteSysSizeFunction}, op, T) =
-        f.f(mapreduce_empty(f.g, op, T))
-end
+mapreduce_empty(::typeof(abs), op, T)      = abs(reduce_empty(op, T))
+mapreduce_empty(::typeof(abs2), op, T)     = abs2(reduce_empty(op, T))
+
+mapreduce_empty(f::typeof(abs),  ::Union{typeof(scalarmax), typeof(max)}, T) = abs(zero(T))
+mapreduce_empty(f::typeof(abs2), ::Union{typeof(scalarmax), typeof(max)}, T) = abs2(zero(T))
 
 mapreduce_empty_iter(f, op, itr, ::HasEltype) = mapreduce_empty(f, op, eltype(itr))
 mapreduce_empty_iter(f, op::typeof(&), itr, ::EltypeUnknown) = true
 mapreduce_empty_iter(f, op::typeof(|), itr, ::EltypeUnknown) = false
 mapreduce_empty_iter(f, op, itr, ::EltypeUnknown) = _empty_reduce_error()
 
+# handling of single-element iterators
+"""
+    Base.reduce_first(op, x)
+
+The value to be returned when calling [`reduce`](@ref), [`foldl`](@ref`) or
+[`foldr`](@ref) with reduction `op` over an iterator which contains a single element
+`x`. This value may also used to initialise the recursion, so that `reduce(op, [x, y])`
+may call `op(reduce_first(op, x), y)`.
+
+The default is `x` for most types. The main purpose is to ensure type stability, so
+additional methods should only be defined for cases where `op` gives a result with
+different types than its inputs.
+"""
+reduce_first(op, x) = x
+reduce_first(::typeof(+), x::Bool) = Int(x)
+reduce_first(::typeof(*), x::Char) = string(x)
+
+reduce_first(::typeof(add_sum), x) = reduce_first(+, x)
+reduce_first(::typeof(add_sum), x::SmallSigned)   = Int(x)
+reduce_first(::typeof(add_sum), x::SmallUnsigned) = UInt(x)
+reduce_first(::typeof(mul_prod), x) = reduce_first(*, x)
+reduce_first(::typeof(mul_prod), x::SmallSigned)   = Int(x)
+reduce_first(::typeof(mul_prod), x::SmallUnsigned) = UInt(x)
+
+"""
+    Base.mapreduce_first(f, op, x)
+
+The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref`) or
+[`mapfoldr`](@ref) with map `f` and reduction `op` over an iterator which contains a
+single element `x`. This value may also used to initialise the recursion, so that
+`mapreduce(f, op, [x, y])` may call `op(reduce_first(op, f, x), f(y))`.
+
+The default is `reduce_first(op, f(x))`.
+"""
+mapreduce_first(f, op, x) = reduce_first(op, f(x))
+
 _mapreduce(f, op, A::AbstractArray) = _mapreduce(f, op, IndexStyle(A), A)
 
 function _mapreduce(f, op, ::IndexLinear, A::AbstractArray{T}) where T
@@ -275,7 +337,7 @@ function _mapreduce(f, op, ::IndexLinear, A::AbstractArray{T}) where T
         return mapreduce_empty(f, op, T)
     elseif n == 1
         @inbounds a1 = A[inds[1]]
-        return f(a1)
+        return mapreduce_first(f, op, a1)
     elseif n < 16 # process short array here, avoid mapreduce_impl() compilation
         @inbounds i = inds[1]
         @inbounds a1 = A[i]
@@ -294,7 +356,7 @@ end
 _mapreduce(f, op, ::IndexCartesian, A::AbstractArray) = mapfoldl(f, op, A)
 
 mapreduce(f, op, A::AbstractArray) = _mapreduce(f, op, IndexStyle(A), A)
-mapreduce(f, op, a::Number) = f(a)
+mapreduce(f, op, a::Number) = mapreduce_first(f, op, a)
 
 """
     reduce(op, v0, itr)
@@ -372,7 +434,7 @@ In the former case, the integers are widened to system word size and therefore
 the result is 128. In the latter case, no such widening happens and integer
 overflow results in -128.
 """
-sum(f::Callable, a) = mapreduce(promote_sys_size_add ∘ f, +, a)
+sum(f, a) = mapreduce(f, add_sum, a)
 
 """
     sum(itr)
@@ -388,7 +450,7 @@ julia> sum(1:20)
 210
 ```
 """
-sum(a) = mapreduce(promote_sys_size_add, +, a)
+sum(a) = sum(identity, a)
 sum(a::AbstractArray{Bool}) = count(a)
 
 ## prod
@@ -406,7 +468,7 @@ julia> prod(abs2, [2; 3; 4])
 576
 ```
 """
-prod(f::Callable, a) = mapreduce(promote_sys_size_mul ∘ f, *, a)
+prod(f, a) = mapreduce(f, mul_prod, a)
 
 """
     prod(itr)
@@ -422,7 +484,7 @@ julia> prod(1:20)
 2432902008176640000
 ```
 """
-prod(a) = mapreduce(promote_sys_size_mul, *, a)
+prod(a) = mapreduce(identity, mul_prod, a)
 
 ## maximum & minimum
 
@@ -433,7 +495,7 @@ function mapreduce_impl(f, op::Union{typeof(scalarmax),
                         A::AbstractArray, first::Int, last::Int)
     # locate the first non NaN number
     @inbounds a1 = A[first]
-    v = f(a1)
+    v = mapreduce_first(f, op, a1)
     i = first + 1
     while (v == v) && (i <= last)
         @inbounds ai = A[i]

diff --git a/base/reducedim.jl b/base/reducedim.jl
@@ -62,8 +62,8 @@ end
 ###### Generic reduction functions #####
 
 ## initialization
-
-for (Op, initfun) in ((:(typeof(+)), :zero), (:(typeof(*)), :one))
+# initarray! is only called by sum!, prod!, etc.
+for (Op, initfun) in ((:(typeof(add_sum)), :zero), (:(typeof(mul_prod)), :one))
     @eval initarray!(a::AbstractArray{T}, ::$(Op), init::Bool, src::AbstractArray) where {T} = (init && fill!(a, $(initfun)(T)); a)
 end
 
@@ -75,6 +75,7 @@ for (Op, initval) in ((:(typeof(&)), true), (:(typeof(|)), false))
     @eval initarray!(a::AbstractArray, ::$(Op), init::Bool, src::AbstractArray) = (init && fill!(a, $initval); a)
 end
 
+# reducedim_initarray is called by
 reducedim_initarray(A::AbstractArray, region, v0, ::Type{R}) where {R} = fill!(similar(A,R,reduced_indices(A,region)), v0)
 reducedim_initarray(A::AbstractArray, region, v0::T) where {T} = reducedim_initarray(A, region, v0, T)
 
@@ -104,10 +105,10 @@ reducedim_initarray0_empty(A::AbstractArray, region,::typeof(identity), ops) = m
 promote_union(T::Union) = promote_type(promote_union(T.a), promote_union(T.b))
 promote_union(T) = T
 
-function reducedim_init(f, op::typeof(+), A::AbstractArray, region)
+function reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::AbstractArray, region)
     _reducedim_init(f, op, zero, sum, A, region)
 end
-function reducedim_init(f, op::typeof(*), A::AbstractArray, region)
+function reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::AbstractArray, region)
     _reducedim_init(f, op, one, prod, A, region)
 end
 function _reducedim_init(f, op, fv, fop, A, region)
@@ -143,19 +144,12 @@ let
         [AbstractArray{t} for t in uniontypes(BitIntFloat)]...,
         [AbstractArray{Complex{t}} for t in uniontypes(BitIntFloat)]...}
 
-    global reducedim_init(f::typeof(identity), op::typeof(+), A::T, region) =
-        reducedim_initarray(A, region, zero(eltype(A)))
-    global reducedim_init(f::typeof(identity), op::typeof(*), A::T, region) =
-        reducedim_initarray(A, region, one(eltype(A)))
-    global reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(+), A::T, region) =
-        reducedim_initarray(A, region, real(zero(eltype(A))))
-    global reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(*), A::T, region) =
-        reducedim_initarray(A, region, real(one(eltype(A))))
+    global reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::T, region) =
+        reducedim_initarray(A, region, mapreduce_first(f, op, zero(eltype(A))))
+    global reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::T, region) =
+        reducedim_initarray(A, region, mapreduce_first(f, op, one(eltype(A))))
 end
 
-reducedim_init(f::Union{typeof(identity),typeof(abs),typeof(abs2)}, op::typeof(+), A::AbstractArray{Bool}, region) =
-    reducedim_initarray(A, region, 0)
-
 ## generic (map)reduction
 
 has_fast_linear_indexing(a::AbstractArray) = false
@@ -610,26 +604,17 @@ julia> any!([1 1], A)
 """
 any!(r, A)
 
-for (fname, op) in [(:sum, :+), (:prod, :*),
+for (fname, op) in [(:sum, :add_sum), (:prod, :mul_prod),
                     (:maximum, :scalarmax), (:minimum, :scalarmin),
                     (:all, :&), (:any, :|)]
-    function compose_promote_sys_size(x)
-        if fname === :sum
-            :(promote_sys_size_add ∘ $x)
-        elseif fname === :prod
-            :(promote_sys_size_mul ∘ $x)
-        else
-            x
-        end
-    end
     fname! = Symbol(fname, '!')
     @eval begin
         $(fname!)(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) =
             mapreducedim!(f, $(op), initarray!(r, $(op), init, A), A)
         $(fname!)(r::AbstractArray, A::AbstractArray; init::Bool=true) = $(fname!)(identity, r, A; init=init)
 
         $(fname)(f::Function, A::AbstractArray, region) =
-            mapreducedim($(compose_promote_sys_size(:f)), $(op), A, region)
+            mapreducedim(f, $(op), A, region)
         $(fname)(A::AbstractArray, region) = $(fname)(identity, A, region)
     end
 end

diff --git a/test/reduce.jl b/test/reduce.jl
@@ -391,3 +391,8 @@ test18695(r) = sum( t^2 for t in r )
 # test neutral element not picked incorrectly for &, |
 @test @inferred(foldl(&, Int[1])) === 1
 @test_throws ArgumentError foldl(&, Int[])
+
+# prod on Chars
+@test prod(Char[]) == ""
+@test prod(Char['a']) == "a"
+@test prod(Char['a','b']) == "ab"
diff --git a/test/reducedim.jl b/test/reducedim.jl
@@ -339,9 +339,10 @@ for region in Any[-1, 0, (-1, 2), [0, 1], (1,-2,3), [0 1;
 end
 
 # check type of result
-under_test = [UInt8, Int8, Int32, Int64, BigInt]
-@testset "type of sum(::Array{$T}" for T in under_test
+@testset "type of sum(::Array{$T}" for T in [UInt8, Int8, Int32, Int64, BigInt]
     result = sum(T[1 2 3; 4 5 6; 7 8 9], 2)
     @test result == hcat([6, 15, 24])
-    @test eltype(result) === typeof(Base.promote_sys_size_add(zero(T)))
+    @test eltype(result) === (T <: Base.SmallSigned ? Int :
+                              T <: Base.SmallUnsigned ? UInt :
+                              T)
 end