From d90fa45c6ac5da8398e6864128579df48edbf00c Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sun, 10 Dec 2023 16:45:21 +0100 Subject: [PATCH] Fix minimum/maximum over dimensions with missing values (#35323) `v0 != v0` returns `missing` for missing values. Use the largest/smallest non-missing value to initialize the array. This is an inefficient approach. Faster alternatives would be to avoid using an initial value at all, and instead keep track of whether a value has been set in a separate mask; or to use `typemax`/`typemin` for types that support them. Fixes #35308. --- base/reducedim.jl | 22 +++++++++++++++------- test/reducedim.jl | 15 +++++++++++---- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/base/reducedim.jl b/base/reducedim.jl index f5a22940310cf..21dff3b46ab37 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -146,16 +146,18 @@ for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min T = _realtype(f, promote_union(eltype(A))) Tr = v0 isa T ? T : typeof(v0) - # but NaNs and missing need to be avoided as initial values + # but NaNs, missing and unordered values need to be avoided as initial values if v0 isa Number && isnan(v0) # v0 is NaN v0 = oftype(v0, $initval) elseif isunordered(v0) # v0 is missing or a third-party unordered value Tnm = nonmissingtype(Tr) - # TODO: Some types, like BigInt, don't support typemin/typemax. - # So a Matrix{Union{BigInt, Missing}} can still error here. - v0 = $typeextreme(Tnm) + if Tnm <: Union{BitInteger, IEEEFloat, BigFloat} + v0 = $typeextreme(Tnm) + elseif !all(isunordered, A1) + v0 = mapreduce(f, $f2, Iterators.filter(!isunordered, A1)) + end end # v0 may have changed type. Tr = v0 isa T ? T : typeof(v0) @@ -186,12 +188,18 @@ function reducedim_init(f::ExtremaMap, op::typeof(_extrema_rf), A::AbstractArray # but NaNs and missing need to be avoided as initial values if v0[1] isa Number && isnan(v0[1]) + # v0 is NaN v0 = oftype(v0[1], Inf), oftype(v0[2], -Inf) elseif isunordered(v0[1]) # v0 is missing or a third-party unordered value - # TODO: Some types, like BigInt, don't support typemin/typemax. - # So a Matrix{Union{BigInt, Missing}} can still error here. - v0 = typemax(nonmissingtype(Tmin)), typemin(nonmissingtype(Tmax)) + Tminnm = nonmissingtype(Tmin) + Tmaxnm = nonmissingtype(Tmax) + if Tminnm <: Union{BitInteger, IEEEFloat, BigFloat} && + Tmaxnm <: Union{BitInteger, IEEEFloat, BigFloat} + v0 = (typemax(Tminnm), typemin(Tmaxnm)) + elseif !all(isunordered, A1) + v0 = reverse(mapreduce(f, op, Iterators.filter(!isunordered, A1))) + end end # v0 may have changed type. Tmin = v0[1] isa T ? T : typeof(v0[1]) diff --git a/test/reducedim.jl b/test/reducedim.jl index daa0a3fbe1f92..f4767dd7a472c 100644 --- a/test/reducedim.jl +++ b/test/reducedim.jl @@ -608,7 +608,7 @@ end end @testset "NaN/missing test for extrema with dims #43599" begin for sz = (3, 10, 100) - for T in (Int, Float64, BigFloat) + for T in (Int, Float64, BigFloat, BigInt) Aₘ = Matrix{Union{T, Missing}}(rand(-sz:sz, sz, sz)) Aₘ[rand(1:sz*sz, sz)] .= missing unordered_test_for_extrema(Aₘ) @@ -622,9 +622,16 @@ end end end end -@test_broken minimum([missing;BigInt(1)], dims = 1) -@test_broken maximum([missing;BigInt(1)], dims = 1) -@test_broken extrema([missing;BigInt(1)], dims = 1) + +@testset "minimum/maximum over dims with missing (#35308)" begin + for T in (Int, Float64, BigInt, BigFloat) + x = Union{T, Missing}[1 missing; 2 missing] + @test isequal(minimum(x, dims=1), reshape([1, missing], 1, :)) + @test isequal(maximum(x, dims=1), reshape([2, missing], 1, :)) + @test isequal(minimum(x, dims=2), reshape([missing, missing], :, 1)) + @test isequal(maximum(x, dims=2), reshape([missing, missing], :, 1)) + end +end # issue #26709 @testset "dimensional reduce with custom non-bitstype types" begin