Skip to content

Commit

Permalink
Fix minimum/maximum over dimensions with missing values (#35323)
Browse files Browse the repository at this point in the history
`v0 != v0` returns `missing` for missing values. Use the
largest/smallest non-missing value to initialize the array. This is an
inefficient approach. Faster alternatives would be to avoid using an
initial value at all, and instead keep track of whether a value has been
set in a separate mask; or to use `typemax`/`typemin` for types that
support them.

Fixes #35308.
  • Loading branch information
nalimilan authored Dec 10, 2023
1 parent e2a26aa commit d90fa45
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 11 deletions.
22 changes: 15 additions & 7 deletions base/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -146,16 +146,18 @@ for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min
T = _realtype(f, promote_union(eltype(A)))
Tr = v0 isa T ? T : typeof(v0)

# but NaNs and missing need to be avoided as initial values
# but NaNs, missing and unordered values need to be avoided as initial values
if v0 isa Number && isnan(v0)
# v0 is NaN
v0 = oftype(v0, $initval)
elseif isunordered(v0)
# v0 is missing or a third-party unordered value
Tnm = nonmissingtype(Tr)
# TODO: Some types, like BigInt, don't support typemin/typemax.
# So a Matrix{Union{BigInt, Missing}} can still error here.
v0 = $typeextreme(Tnm)
if Tnm <: Union{BitInteger, IEEEFloat, BigFloat}
v0 = $typeextreme(Tnm)
elseif !all(isunordered, A1)
v0 = mapreduce(f, $f2, Iterators.filter(!isunordered, A1))
end
end
# v0 may have changed type.
Tr = v0 isa T ? T : typeof(v0)
Expand Down Expand Up @@ -186,12 +188,18 @@ function reducedim_init(f::ExtremaMap, op::typeof(_extrema_rf), A::AbstractArray

# but NaNs and missing need to be avoided as initial values
if v0[1] isa Number && isnan(v0[1])
# v0 is NaN
v0 = oftype(v0[1], Inf), oftype(v0[2], -Inf)
elseif isunordered(v0[1])
# v0 is missing or a third-party unordered value
# TODO: Some types, like BigInt, don't support typemin/typemax.
# So a Matrix{Union{BigInt, Missing}} can still error here.
v0 = typemax(nonmissingtype(Tmin)), typemin(nonmissingtype(Tmax))
Tminnm = nonmissingtype(Tmin)
Tmaxnm = nonmissingtype(Tmax)
if Tminnm <: Union{BitInteger, IEEEFloat, BigFloat} &&
Tmaxnm <: Union{BitInteger, IEEEFloat, BigFloat}
v0 = (typemax(Tminnm), typemin(Tmaxnm))
elseif !all(isunordered, A1)
v0 = reverse(mapreduce(f, op, Iterators.filter(!isunordered, A1)))
end
end
# v0 may have changed type.
Tmin = v0[1] isa T ? T : typeof(v0[1])
Expand Down
15 changes: 11 additions & 4 deletions test/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ end
end
@testset "NaN/missing test for extrema with dims #43599" begin
for sz = (3, 10, 100)
for T in (Int, Float64, BigFloat)
for T in (Int, Float64, BigFloat, BigInt)
Aₘ = Matrix{Union{T, Missing}}(rand(-sz:sz, sz, sz))
Aₘ[rand(1:sz*sz, sz)] .= missing
unordered_test_for_extrema(Aₘ)
Expand All @@ -622,9 +622,16 @@ end
end
end
end
@test_broken minimum([missing;BigInt(1)], dims = 1)
@test_broken maximum([missing;BigInt(1)], dims = 1)
@test_broken extrema([missing;BigInt(1)], dims = 1)

@testset "minimum/maximum over dims with missing (#35308)" begin
for T in (Int, Float64, BigInt, BigFloat)
x = Union{T, Missing}[1 missing; 2 missing]
@test isequal(minimum(x, dims=1), reshape([1, missing], 1, :))
@test isequal(maximum(x, dims=1), reshape([2, missing], 1, :))
@test isequal(minimum(x, dims=2), reshape([missing, missing], :, 1))
@test isequal(maximum(x, dims=2), reshape([missing, missing], :, 1))
end
end

# issue #26709
@testset "dimensional reduce with custom non-bitstype types" begin
Expand Down

4 comments on commit d90fa45

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The package evaluation job you requested has completed - possible new issues were detected.
The full report is available.

@vtjnash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here.

Please sign in to comment.