Skip to content

Commit

Permalink
Add mapreduce_single function (#25051)
Browse files Browse the repository at this point in the history
Since the demise of `r_promote` in #22825, there is now a type-instability in `mapreduce` if the operator does not give an element of the same type as the input. This arose during my implementation of Kahan summation using a reduction operator, see: JuliaMath/KahanSummation.jl#7

This adds a `mapreduce_single` function which defines what the result should be in these cases.
  • Loading branch information
simonbyrne authored and JeffBezanson committed Jan 4, 2018
1 parent e3b7b18 commit e422547
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 64 deletions.
132 changes: 97 additions & 35 deletions base/reduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,25 @@ else
const SmallUnsigned = Union{UInt8,UInt16,UInt32}
end

# Certain reductions like sum and prod may wish to promote the items being reduced over to
# an appropriate size. Note we need x + zero(x) because some types like Bool have their sum
# lie in a larger type.
promote_sys_size(T::Type) = T
promote_sys_size(::Type{<:SmallSigned}) = Int
promote_sys_size(::Type{<:SmallUnsigned}) = UInt

promote_sys_size_add(x) = convert(promote_sys_size(typeof(x + zero(x))), x)
promote_sys_size_mul(x) = convert(promote_sys_size(typeof(x * one(x))), x)
const _PromoteSysSizeFunction = Union{typeof(promote_sys_size_add),
typeof(promote_sys_size_mul)}
"""
Base.add_sum(x,y)
The reduction operator used in `sum`. The main difference from [`+`](@ref) is that small
integers are promoted to `Int`/`UInt`.
"""
add_sum(x,y) = x + y
add_sum(x::SmallSigned,y::SmallSigned) = Int(x) + Int(y)
add_sum(x::SmallUnsigned,y::SmallUnsigned) = UInt(x) + UInt(y)

"""
Base.mul_prod(x,y)
The reduction operator used in `prod`. The main difference from [`*`](@ref) is that small
integers are promoted to `Int`/`UInt`.
"""
mul_prod(x,y) = x * y
mul_prod(x::SmallSigned,y::SmallSigned) = Int(x) * Int(y)
mul_prod(x::SmallUnsigned,y::SmallUnsigned) = UInt(x) * UInt(y)

## foldl && mapfoldl

Expand Down Expand Up @@ -64,7 +72,7 @@ function mapfoldl(f, op, itr)
return Base.mapreduce_empty_iter(f, op, itr, iteratoreltype(itr))
end
(x, i) = next(itr, i)
v0 = f(x)
v0 = mapreduce_first(f, op, x)
mapfoldl_impl(f, op, v0, itr, i)
end

Expand Down Expand Up @@ -133,7 +141,7 @@ function mapfoldr(f, op, itr)
if isempty(itr)
return Base.mapreduce_empty_iter(f, op, itr, iteratoreltype(itr))
end
return mapfoldr_impl(f, op, f(itr[i]), itr, i-1)
return mapfoldr_impl(f, op, mapreduce_first(f, op, itr[i]), itr, i-1)
end

"""
Expand Down Expand Up @@ -174,7 +182,7 @@ foldr(op, itr) = mapfoldr(identity, op, itr)
@noinline function mapreduce_impl(f, op, A::AbstractArray, ifirst::Integer, ilast::Integer, blksize::Int)
if ifirst == ilast
@inbounds a1 = A[ifirst]
return f(a1)
return mapreduce_first(f, op, a1)
elseif ifirst + blksize > ilast
# sequential portion
@inbounds a1 = A[ifirst]
Expand Down Expand Up @@ -238,34 +246,88 @@ pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096

# handling empty arrays
_empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed"))

"""
Base.reduce_empty(op, T)
The value to be returned when calling [`reduce`](@ref), [`foldl`](@ref) or [`foldr`](@ref)
with reduction `op` over an empty array with element type of `T`.
If not defined, this will throw an `ArgumentError`.
"""
reduce_empty(op, T) = _empty_reduce_error()
reduce_empty(::typeof(+), T) = zero(T)
reduce_empty(::typeof(+), ::Type{Bool}) = zero(Int)
reduce_empty(::typeof(*), T) = one(T)
reduce_empty(::typeof(*), ::Type{Char}) = ""
reduce_empty(::typeof(&), ::Type{Bool}) = true
reduce_empty(::typeof(|), ::Type{Bool}) = false

reduce_empty(::typeof(add_sum), T) = reduce_empty(+, T)
reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallSigned} = zero(Int)
reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallUnsigned} = zero(UInt)
reduce_empty(::typeof(mul_prod), T) = reduce_empty(*, T)
reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallSigned} = one(Int)
reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallUnsigned} = one(UInt)

"""
Base.mapreduce_empty(f, op, T)
The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref`) or
[`mapfoldr`](@ref) with map `f` and reduction `op` over an empty array with element type
of `T`.
If not defined, this will throw an `ArgumentError`.
"""
mapreduce_empty(f, op, T) = _empty_reduce_error()
mapreduce_empty(::typeof(identity), op, T) = reduce_empty(op, T)
mapreduce_empty(f::_PromoteSysSizeFunction, op, T) =
f(mapreduce_empty(identity, op, T))
mapreduce_empty(::typeof(abs), ::typeof(+), T) = abs(zero(T))
mapreduce_empty(::typeof(abs2), ::typeof(+), T) = abs2(zero(T))
mapreduce_empty(::typeof(abs), ::Union{typeof(scalarmax), typeof(max)}, T) =
abs(zero(T))
mapreduce_empty(::typeof(abs2), ::Union{typeof(scalarmax), typeof(max)}, T) =
abs2(zero(T))

# Allow mapreduce_empty to “see through” promote_sys_size
let ComposedFunction = typename(typeof(identity identity)).wrapper
global mapreduce_empty(f::ComposedFunction{<:_PromoteSysSizeFunction}, op, T) =
f.f(mapreduce_empty(f.g, op, T))
end
mapreduce_empty(::typeof(abs), op, T) = abs(reduce_empty(op, T))
mapreduce_empty(::typeof(abs2), op, T) = abs2(reduce_empty(op, T))

mapreduce_empty(f::typeof(abs), ::Union{typeof(scalarmax), typeof(max)}, T) = abs(zero(T))
mapreduce_empty(f::typeof(abs2), ::Union{typeof(scalarmax), typeof(max)}, T) = abs2(zero(T))

mapreduce_empty_iter(f, op, itr, ::HasEltype) = mapreduce_empty(f, op, eltype(itr))
mapreduce_empty_iter(f, op::typeof(&), itr, ::EltypeUnknown) = true
mapreduce_empty_iter(f, op::typeof(|), itr, ::EltypeUnknown) = false
mapreduce_empty_iter(f, op, itr, ::EltypeUnknown) = _empty_reduce_error()

# handling of single-element iterators
"""
Base.reduce_first(op, x)
The value to be returned when calling [`reduce`](@ref), [`foldl`](@ref`) or
[`foldr`](@ref) with reduction `op` over an iterator which contains a single element
`x`. This value may also used to initialise the recursion, so that `reduce(op, [x, y])`
may call `op(reduce_first(op, x), y)`.
The default is `x` for most types. The main purpose is to ensure type stability, so
additional methods should only be defined for cases where `op` gives a result with
different types than its inputs.
"""
reduce_first(op, x) = x
reduce_first(::typeof(+), x::Bool) = Int(x)
reduce_first(::typeof(*), x::Char) = string(x)

reduce_first(::typeof(add_sum), x) = reduce_first(+, x)
reduce_first(::typeof(add_sum), x::SmallSigned) = Int(x)
reduce_first(::typeof(add_sum), x::SmallUnsigned) = UInt(x)
reduce_first(::typeof(mul_prod), x) = reduce_first(*, x)
reduce_first(::typeof(mul_prod), x::SmallSigned) = Int(x)
reduce_first(::typeof(mul_prod), x::SmallUnsigned) = UInt(x)

"""
Base.mapreduce_first(f, op, x)
The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref`) or
[`mapfoldr`](@ref) with map `f` and reduction `op` over an iterator which contains a
single element `x`. This value may also used to initialise the recursion, so that
`mapreduce(f, op, [x, y])` may call `op(reduce_first(op, f, x), f(y))`.
The default is `reduce_first(op, f(x))`.
"""
mapreduce_first(f, op, x) = reduce_first(op, f(x))

_mapreduce(f, op, A::AbstractArray) = _mapreduce(f, op, IndexStyle(A), A)

function _mapreduce(f, op, ::IndexLinear, A::AbstractArray{T}) where T
Expand All @@ -275,7 +337,7 @@ function _mapreduce(f, op, ::IndexLinear, A::AbstractArray{T}) where T
return mapreduce_empty(f, op, T)
elseif n == 1
@inbounds a1 = A[inds[1]]
return f(a1)
return mapreduce_first(f, op, a1)
elseif n < 16 # process short array here, avoid mapreduce_impl() compilation
@inbounds i = inds[1]
@inbounds a1 = A[i]
Expand All @@ -294,7 +356,7 @@ end
_mapreduce(f, op, ::IndexCartesian, A::AbstractArray) = mapfoldl(f, op, A)

mapreduce(f, op, A::AbstractArray) = _mapreduce(f, op, IndexStyle(A), A)
mapreduce(f, op, a::Number) = f(a)
mapreduce(f, op, a::Number) = mapreduce_first(f, op, a)

"""
reduce(op, v0, itr)
Expand Down Expand Up @@ -372,7 +434,7 @@ In the former case, the integers are widened to system word size and therefore
the result is 128. In the latter case, no such widening happens and integer
overflow results in -128.
"""
sum(f::Callable, a) = mapreduce(promote_sys_size_add f, +, a)
sum(f, a) = mapreduce(f, add_sum, a)

"""
sum(itr)
Expand All @@ -388,7 +450,7 @@ julia> sum(1:20)
210
```
"""
sum(a) = mapreduce(promote_sys_size_add, +, a)
sum(a) = sum(identity, a)
sum(a::AbstractArray{Bool}) = count(a)

## prod
Expand All @@ -406,7 +468,7 @@ julia> prod(abs2, [2; 3; 4])
576
```
"""
prod(f::Callable, a) = mapreduce(promote_sys_size_mul f, *, a)
prod(f, a) = mapreduce(f, mul_prod, a)

"""
prod(itr)
Expand All @@ -422,7 +484,7 @@ julia> prod(1:20)
2432902008176640000
```
"""
prod(a) = mapreduce(promote_sys_size_mul, *, a)
prod(a) = mapreduce(identity, mul_prod, a)

## maximum & minimum

Expand All @@ -433,7 +495,7 @@ function mapreduce_impl(f, op::Union{typeof(scalarmax),
A::AbstractArray, first::Int, last::Int)
# locate the first non NaN number
@inbounds a1 = A[first]
v = f(a1)
v = mapreduce_first(f, op, a1)
i = first + 1
while (v == v) && (i <= last)
@inbounds ai = A[i]
Expand Down
37 changes: 11 additions & 26 deletions base/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ end
###### Generic reduction functions #####

## initialization

for (Op, initfun) in ((:(typeof(+)), :zero), (:(typeof(*)), :one))
# initarray! is only called by sum!, prod!, etc.
for (Op, initfun) in ((:(typeof(add_sum)), :zero), (:(typeof(mul_prod)), :one))
@eval initarray!(a::AbstractArray{T}, ::$(Op), init::Bool, src::AbstractArray) where {T} = (init && fill!(a, $(initfun)(T)); a)
end

Expand All @@ -75,6 +75,7 @@ for (Op, initval) in ((:(typeof(&)), true), (:(typeof(|)), false))
@eval initarray!(a::AbstractArray, ::$(Op), init::Bool, src::AbstractArray) = (init && fill!(a, $initval); a)
end

# reducedim_initarray is called by
reducedim_initarray(A::AbstractArray, region, v0, ::Type{R}) where {R} = fill!(similar(A,R,reduced_indices(A,region)), v0)
reducedim_initarray(A::AbstractArray, region, v0::T) where {T} = reducedim_initarray(A, region, v0, T)

Expand Down Expand Up @@ -104,10 +105,10 @@ reducedim_initarray0_empty(A::AbstractArray, region,::typeof(identity), ops) = m
promote_union(T::Union) = promote_type(promote_union(T.a), promote_union(T.b))
promote_union(T) = T

function reducedim_init(f, op::typeof(+), A::AbstractArray, region)
function reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::AbstractArray, region)
_reducedim_init(f, op, zero, sum, A, region)
end
function reducedim_init(f, op::typeof(*), A::AbstractArray, region)
function reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::AbstractArray, region)
_reducedim_init(f, op, one, prod, A, region)
end
function _reducedim_init(f, op, fv, fop, A, region)
Expand Down Expand Up @@ -143,19 +144,12 @@ let
[AbstractArray{t} for t in uniontypes(BitIntFloat)]...,
[AbstractArray{Complex{t}} for t in uniontypes(BitIntFloat)]...}

global reducedim_init(f::typeof(identity), op::typeof(+), A::T, region) =
reducedim_initarray(A, region, zero(eltype(A)))
global reducedim_init(f::typeof(identity), op::typeof(*), A::T, region) =
reducedim_initarray(A, region, one(eltype(A)))
global reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(+), A::T, region) =
reducedim_initarray(A, region, real(zero(eltype(A))))
global reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(*), A::T, region) =
reducedim_initarray(A, region, real(one(eltype(A))))
global reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::T, region) =
reducedim_initarray(A, region, mapreduce_first(f, op, zero(eltype(A))))
global reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::T, region) =
reducedim_initarray(A, region, mapreduce_first(f, op, one(eltype(A))))
end

reducedim_init(f::Union{typeof(identity),typeof(abs),typeof(abs2)}, op::typeof(+), A::AbstractArray{Bool}, region) =
reducedim_initarray(A, region, 0)

## generic (map)reduction

has_fast_linear_indexing(a::AbstractArray) = false
Expand Down Expand Up @@ -610,26 +604,17 @@ julia> any!([1 1], A)
"""
any!(r, A)

for (fname, op) in [(:sum, :+), (:prod, :*),
for (fname, op) in [(:sum, :add_sum), (:prod, :mul_prod),
(:maximum, :scalarmax), (:minimum, :scalarmin),
(:all, :&), (:any, :|)]
function compose_promote_sys_size(x)
if fname === :sum
:(promote_sys_size_add $x)
elseif fname === :prod
:(promote_sys_size_mul $x)
else
x
end
end
fname! = Symbol(fname, '!')
@eval begin
$(fname!)(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) =
mapreducedim!(f, $(op), initarray!(r, $(op), init, A), A)
$(fname!)(r::AbstractArray, A::AbstractArray; init::Bool=true) = $(fname!)(identity, r, A; init=init)

$(fname)(f::Function, A::AbstractArray, region) =
mapreducedim($(compose_promote_sys_size(:f)), $(op), A, region)
mapreducedim(f, $(op), A, region)
$(fname)(A::AbstractArray, region) = $(fname)(identity, A, region)
end
end
Expand Down
5 changes: 5 additions & 0 deletions test/reduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -391,3 +391,8 @@ test18695(r) = sum( t^2 for t in r )
# test neutral element not picked incorrectly for &, |
@test @inferred(foldl(&, Int[1])) === 1
@test_throws ArgumentError foldl(&, Int[])

# prod on Chars
@test prod(Char[]) == ""
@test prod(Char['a']) == "a"
@test prod(Char['a','b']) == "ab"
7 changes: 4 additions & 3 deletions test/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -339,9 +339,10 @@ for region in Any[-1, 0, (-1, 2), [0, 1], (1,-2,3), [0 1;
end

# check type of result
under_test = [UInt8, Int8, Int32, Int64, BigInt]
@testset "type of sum(::Array{$T}" for T in under_test
@testset "type of sum(::Array{$T}" for T in [UInt8, Int8, Int32, Int64, BigInt]
result = sum(T[1 2 3; 4 5 6; 7 8 9], 2)
@test result == hcat([6, 15, 24])
@test eltype(result) === typeof(Base.promote_sys_size_add(zero(T)))
@test eltype(result) === (T <: Base.SmallSigned ? Int :
T <: Base.SmallUnsigned ? UInt :
T)
end

0 comments on commit e422547

Please sign in to comment.