Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust initialization in maximum and minimum #27845

Merged
merged 5 commits into from
Jul 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion base/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1917,7 +1917,12 @@ function mapslices(f, A::AbstractArray; dims)
Rsize = copy(dimsA)
# TODO: maybe support removing dimensions
if !isa(r1, AbstractArray) || ndims(r1) == 0
r1 = [r1]
# If the result of f on a single slice is a scalar then we add singleton
# dimensions. When adding adding the dimensions, we have to respect the
# index type of the input array (e.g. in the case of OffsetArrays)
tmp = similar(Aslice, typeof(r1), reduced_indices(Aslice, 1:ndims(Aslice)))
tmp[first(CartesianIndices(tmp))] = r1
r1 = tmp
end
nextra = max(0, length(dims)-ndims(r1))
if eltype(Rsize) == Int
Expand Down
75 changes: 47 additions & 28 deletions base/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,41 @@
## Functions to compute the reduced shape

# for reductions that expand 0 dims to 1
reduced_index(i::OneTo) = OneTo(1)
reduced_index(i::Slice) = Slice(first(i):first(i))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like simply making this a UnitRange instead of a Slice will do the trick — make -C test reduce reduced offsetarray passes for me locally with that change.

reduced_index(i::AbstractUnitRange) =
throw(ArgumentError(
"""
No method is implemented for reducing index range of type $typeof(i). Please implement
reduced_index for this index type or report this as an issue.
"""
))
reduced_indices(a::AbstractArray, region) = reduced_indices(axes(a), region)

# for reductions that keep 0 dims as 0
reduced_indices0(a::AbstractArray, region) = reduced_indices0(axes(a), region)

function reduced_indices(inds::Indices{N}, d::Int, rd::AbstractUnitRange) where N
function reduced_indices(inds::Indices{N}, d::Int) where N
d < 1 && throw(ArgumentError("dimension must be ≥ 1, got $d"))
if d == 1
return (oftype(inds[1], rd), tail(inds)...)
return (reduced_index(inds[1]), tail(inds)...)
elseif 1 < d <= N
return tuple(inds[1:d-1]..., oftype(inds[d], rd), inds[d+1:N]...)::typeof(inds)
return tuple(inds[1:d-1]..., oftype(inds[d], reduced_index(inds[d])), inds[d+1:N]...)::typeof(inds)
else
return inds
end
end
reduced_indices(inds::Indices, d::Int) = reduced_indices(inds, d, OneTo(1))

function reduced_indices0(inds::Indices{N}, d::Int) where N
d < 1 && throw(ArgumentError("dimension must be ≥ 1, got $d"))
if d <= N
ind = inds[d]
return reduced_indices(inds, d, (isempty(ind) ? ind : OneTo(1)))
rd = isempty(ind) ? ind : reduced_index(inds[d])
if d == 1
return (rd, tail(inds)...)
else
return tuple(inds[1:d-1]..., oftype(inds[d], rd), inds[d+1:N]...)::typeof(inds)
end
else
return inds
end
Expand All @@ -38,7 +51,7 @@ function reduced_indices(inds::Indices{N}, region) where N
if d < 1
throw(ArgumentError("region dimension(s) must be ≥ 1, got $d"))
elseif d <= N
rinds[d] = oftype(rinds[d], OneTo(1))
rinds[d] = reduced_index(rinds[d])
end
end
tuple(rinds...)::typeof(inds)
Expand All @@ -53,7 +66,7 @@ function reduced_indices0(inds::Indices{N}, region) where N
throw(ArgumentError("region dimension(s) must be ≥ 1, got $d"))
elseif d <= N
rind = rinds[d]
rinds[d] = oftype(rind, (isempty(rind) ? rind : OneTo(1)))
rinds[d] = isempty(rind) ? rind : reduced_index(rind)
end
end
tuple(rinds...)::typeof(inds)
Expand All @@ -79,25 +92,6 @@ end
reducedim_initarray(A::AbstractArray, region, init, ::Type{R}) where {R} = fill!(similar(A,R,reduced_indices(A,region)), init)
reducedim_initarray(A::AbstractArray, region, init::T) where {T} = reducedim_initarray(A, region, init, T)

function reducedim_initarray0(A::AbstractArray{T}, region, f, ops) where T
ri = reduced_indices0(A, region)
if isempty(A)
if prod(length, reduced_indices(A, region)) != 0
reducedim_initarray0_empty(A, region, f, ops) # ops over empty slice of A
else
R = f == identity ? T : Core.Compiler.return_type(f, (T,))
similar(A, R, ri)
end
else
R = f == identity ? T : typeof(f(first(A)))
si = similar(A, R, ri)
mapfirst!(f, si, A)
end
end

reducedim_initarray0_empty(A::AbstractArray, region, f, ops) = mapslices(x->ops(f.(x)), A, dims = region)
reducedim_initarray0_empty(A::AbstractArray, region,::typeof(identity), ops) = mapslices(ops, A, dims = region)

# TODO: better way to handle reducedim initialization
#
# The current scheme is basically following Steven G. Johnson's original implementation
Expand All @@ -124,8 +118,33 @@ function _reducedim_init(f, op, fv, fop, A, region)
return reducedim_initarray(A, region, z, Tr)
end

reducedim_init(f, op::typeof(max), A::AbstractArray{T}, region) where {T} = reducedim_initarray0(A, region, f, maximum)
reducedim_init(f, op::typeof(min), A::AbstractArray{T}, region) where {T} = reducedim_initarray0(A, region, f, minimum)
# initialization when computing minima and maxima requires a little care
for (f1, f2, initval) in ((:min, :max, :Inf), (:max, :min, :(-Inf)))
@eval function reducedim_init(f, op::typeof($f1), A::AbstractArray, region)
# First compute the reduce indices. This will throw an ArgumentError
# if any region is invalid
ri = reduced_indices(A, region)

# Next, throw if reduction is over a region with length zero
any(i -> isempty(axes(A, i)), region) && _empty_reduce_error()

# Make a view of the first slice of the region
A1 = view(A, ri...)

if isempty(A1)
# If the slice is empty just return non-view version as the initial array
return copy(A1)
else
# otherwise use the min/max of the first slice as initial value
v0 = mapreduce(f, $f2, A1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't seem to be correct to use the min/max of the first slice as the initial value for all slices. I'm probably missing something. (I wish I could have used something simpler than what I did for this at #28027).

BTW, typo below: "intial"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it should be fine. If you are computing the maximum along a dimension then it should be fine to initialize with any value less than or equal to the first element in the slice. The minimum over the first slice will satisfy this, right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I misunderstood "first slice". I thought it referred to the first slice along which reduction is performed. Carry on.


# but NaNs need to be avoided as intial values
v0 = v0 != v0 ? typeof(v0)($initval) : v0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAICT the mapreduce call above will give NaN whenever one of the values is NaN. So this code is going to use the fallback initval even if there are non-NaN values which could be used. Maybe it would be better to use a custom loop to go over the first slice and skip NaN values?

Actually, maybe something similar to the mapfirst! method I had to write at #27845 to skip missing values is needed.

Anyway, a few tests for these corner cases would be good. ;-)

Copy link
Member Author

@andreasnoack andreasnoack Jul 10, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is basically a check for floats but tries to be a little more generic. Using the initval is fine for floats and only floats will probably ever be NaN satisfy the test so I don't think there is an issue here.


return reducedim_initarray(A, region, v0)
end
end
end
reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(max), A::AbstractArray{T}, region) where {T} =
reducedim_initarray(A, region, zero(f(zero(T))))

Expand Down
2 changes: 0 additions & 2 deletions stdlib/SparseArrays/src/sparsematrix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1608,8 +1608,6 @@ end
# non-trivial, so use Arrays for output
Base.reducedim_initarray(A::SparseMatrixCSC, region, v0, ::Type{R}) where {R} =
fill(v0, Base.reduced_indices(A,region))
Base.reducedim_initarray0(A::SparseMatrixCSC, region, v0, ::Type{R}) where {R} =
fill(v0, Base.reduced_indices0(A,region))

# General mapreduce
function _mapreducezeros(f, op, ::Type{T}, nzeros::Int, v0) where T
Expand Down
3 changes: 3 additions & 0 deletions stdlib/SparseArrays/test/higherorderfns.jl
Original file line number Diff line number Diff line change
Expand Up @@ -628,5 +628,8 @@ end
@test Diagonal(spzeros(5)) \ view(rand(10), 1:5) == [Inf,Inf,Inf,Inf,Inf]
end

@testset "Issue #27836" begin
@test minimum(sparse([1, 2], [1, 2], ones(Int32, 2)), dims = 1) isa Matrix
end

end # module
16 changes: 8 additions & 8 deletions test/offsetarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ targets2 = ["(1.0, 1.0)",
"([1.0], [1.0])",
"([1.0], [1.0])",
"([1.0], [1.0])"]
for n = 0:4
@testset "printing of OffsetArray with n=$n" for n = 0:4
a = OffsetArray(fill(1.,ntuple(d->1,n)), ntuple(identity,n))
show(IOContext(io, :limit => true), MIME("text/plain"), a)
@test String(take!(io)) == targets1[n+1]
Expand Down Expand Up @@ -339,9 +339,9 @@ A = OffsetArray(rand(4,4), (-3,5))
@test maximum(A) == maximum(parent(A))
@test minimum(A) == minimum(parent(A))
@test extrema(A) == extrema(parent(A))
@test maximum(A, dims=1) == OffsetArray(maximum(parent(A), dims=1), (0,A.offsets[2]))
@test maximum(A, dims=2) == OffsetArray(maximum(parent(A), dims=2), (A.offsets[1],0))
@test maximum(A, dims=1:2) == maximum(parent(A), dims=1:2)
@test maximum(A, dims=1) == OffsetArray(maximum(parent(A), dims=1), A.offsets)
@test maximum(A, dims=2) == OffsetArray(maximum(parent(A), dims=2), A.offsets)
@test maximum(A, dims=1:2) == OffsetArray(maximum(parent(A), dims=1:2), A.offsets)
C = similar(A)
cumsum!(C, A, dims=1)
@test parent(C) == cumsum(parent(A), dims=1)
Expand Down Expand Up @@ -373,11 +373,11 @@ I = findall(!iszero, z)
@test findall(x->x==0, h) == [2]
@test mean(A_3_3) == median(A_3_3) == 5
@test mean(x->2x, A_3_3) == 10
@test mean(A_3_3, dims=1) == median(A_3_3, dims=1) == OffsetArray([2 5 8], (0,A_3_3.offsets[2]))
@test mean(A_3_3, dims=2) == median(A_3_3, dims=2) == OffsetArray(reshape([4,5,6],(3,1)), (A_3_3.offsets[1],0))
@test mean(A_3_3, dims=1) == median(A_3_3, dims=1) == OffsetArray([2 5 8], A_3_3.offsets)
@test mean(A_3_3, dims=2) == median(A_3_3, dims=2) == OffsetArray(reshape([4,5,6],(3,1)), A_3_3.offsets)
@test var(A_3_3) == 7.5
@test std(A_3_3, dims=1) == OffsetArray([1 1 1], (0,A_3_3.offsets[2]))
@test std(A_3_3, dims=2) == OffsetArray(reshape([3,3,3], (3,1)), (A_3_3.offsets[1],0))
@test std(A_3_3, dims=1) == OffsetArray([1 1 1], A_3_3.offsets)
@test std(A_3_3, dims=2) == OffsetArray(reshape([3,3,3], (3,1)), A_3_3.offsets)
@test sum(OffsetArray(fill(1,3000), -1000)) == 3000

@test norm(v) ≈ norm(parent(v))
Expand Down
11 changes: 5 additions & 6 deletions test/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@ safe_sumabs2(A::Array{T}, region) where {T} = safe_mapslices(sum, abs2.(A), regi
safe_maxabs(A::Array{T}, region) where {T} = safe_mapslices(maximum, abs.(A), region)
safe_minabs(A::Array{T}, region) where {T} = safe_mapslices(minimum, abs.(A), region)

Areduc = rand(3, 4, 5, 6)
for region in Any[
@testset "test reductions over region: $region" for region in Any[
1, 2, 3, 4, 5, (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4),
(1, 2, 3), (1, 3, 4), (2, 3, 4), (1, 2, 3, 4)]
# println("region = $region")
Areduc = rand(3, 4, 5, 6)
r = fill(NaN, map(length, Base.reduced_indices(axes(Areduc), region)))
@test sum!(r, Areduc) ≈ safe_sum(Areduc, region)
@test prod!(r, Areduc) ≈ safe_prod(Areduc, region)
Expand Down Expand Up @@ -318,10 +317,10 @@ end
@test sum(Any[1 2;3 4], dims=1) == [4 6]
@test sum(Vector{Int}[[1,2],[4,3]], dims=1)[1] == [5,5]

# issue #10461
Areduc = rand(3, 4, 5, 6)
for region in Any[-1, 0, (-1, 2), [0, 1], (1,-2,3), [0 1;
@testset "Issue #10461. region=$region" for region in Any[-1, 0, (-1, 2), [0, 1], (1,-2,3), [0 1;
2 3], "hello"]
Areduc = rand(3, 4, 5, 6)

@test_throws ArgumentError sum(Areduc, dims=region)
@test_throws ArgumentError prod(Areduc, dims=region)
@test_throws ArgumentError maximum(Areduc, dims=region)
Expand Down