Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement Statistics.median #973

Merged
merged 4 commits into from
Dec 26, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 38 additions & 2 deletions ext/StaticArraysStatisticsExt.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
module StaticArraysStatisticsExt

import Statistics: mean
import Statistics: mean, median

using Base.Order: Forward, ord
using Statistics: median!, middle

using StaticArrays
using StaticArrays: _InitialValue, _reduce, _mapreduce
using StaticArrays: BitonicSort, _InitialValue, _reduce, _mapreduce, _bitonic_sort_limit, _sort

_mean_denom(a, ::Colon) = length(a)
_mean_denom(a, dims::Int) = size(a, dims)
Expand All @@ -12,4 +15,37 @@ _mean_denom(a, ::Val{D}) where {D} = size(a, D)
@inline mean(a::StaticArray; dims=:) = _reduce(+, a, dims) / _mean_denom(a, dims)
@inline mean(f::Function, a::StaticArray; dims=:) = _mapreduce(f, +, dims, _InitialValue(), Size(a), a) / _mean_denom(a, dims)

@inline function median(a::StaticArray; dims = :)
if dims == Colon()
median(vec(a))
else
# FIXME: Implement `mapslices` correctly on `StaticArray` to remove
# this fallback.
median(Array(a); dims)
end
end

@inline function median(a::StaticVector)
(isimmutable(a) && length(a) <= _bitonic_sort_limit) ||
return median!(Base.copymutable(a))

# following Statistics.median
isempty(a) &&
throw(ArgumentError("median of empty vector is undefined, $(repr(a))"))
eltype(a) >: Missing && any(ismissing, a) &&
return missing
nanix = findfirst(x -> x isa Number && isnan(x), a)
isnothing(nanix) ||
return a[nanix]

order = ord(isless, identity, nothing, Forward)
sa = _sort(Tuple(a), BitonicSort, order)

n = length(a)
# sa is 1-indexed
return isodd(n) ?
middle(sa[n ÷ 2 + 1]) :
middle(sa[n ÷ 2], sa[n ÷ 2 + 1])
end

end # module
3 changes: 2 additions & 1 deletion src/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ const BitonicSort = BitonicSortAlg()

# BitonicSort has non-optimal asymptotic behaviour, so we define a cutoff
# length. This also prevents compilation time to skyrocket for larger vectors.
const _bitonic_sort_limit = 20
defalg(a::StaticVector) =
isimmutable(a) && length(a) <= 20 ? BitonicSort : QuickSort
isimmutable(a) && length(a) <= _bitonic_sort_limit ? BitonicSort : QuickSort

@inline function sort(a::StaticVector;
alg::Algorithm = defalg(a),
Expand Down
77 changes: 77 additions & 0 deletions test/sort.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using StaticArrays, Test
using Statistics: Statistics, median, median!, middle

@testset "sort" begin

Expand Down Expand Up @@ -30,4 +31,80 @@ using StaticArrays, Test
@test sortperm(SA[1, 1, 1, 0]) == SA[4, 1, 2, 3]
end

@testset "median" begin
@test_throws ArgumentError median(SA[])
@test ismissing(median(SA[1, missing]))
@test isnan(median(SA[1., NaN]))

@testset for T in (Int, Float64)
for N in (1, 2, 3, 10, 20, 30)
v = rand(SVector{N,T})
mref = median(Vector(v))

@test @inferred(median(v) == mref)
end
end

# Tests based on upstream `Statistics.jl`.
# https://github.com/JuliaStats/Statistics.jl/blob/d49c2bf4f81e1efb4980a35fe39c815ef8396297/test/runtests.jl#L31-L92
@test median(SA[1.]) === 1.
@test median(SA[1.,3]) === 2.
@test median(SA[1.,3,2]) === 2.

@test median(SA[1,3,2]) === 2.0
@test median(SA[1,3,2,4]) === 2.5

@test median(SA[0.0,Inf]) == Inf
@test median(SA[0.0,-Inf]) == -Inf
@test median(SA[0.,Inf,-Inf]) == 0.0
@test median(SA[1.,-1.,Inf,-Inf]) == 0.0
@test isnan(median(SA[-Inf,Inf]))

X = SA[2 3 1 -1; 7 4 5 -4]
@test all(median(X, dims=2) .== SA[1.5, 4.5])
@test all(median(X, dims=1) .== SA[4.5 3.5 3.0 -2.5])
@test X == SA[2 3 1 -1; 7 4 5 -4] # issue #17153

@test_throws ArgumentError median(SA[])
@test isnan(median(SA[NaN]))
@test isnan(median(SA[0.0,NaN]))
@test isnan(median(SA[NaN,0.0]))
@test isnan(median(SA[NaN,0.0,1.0]))
@test isnan(median(SA{Any}[NaN,0.0,1.0]))
@test isequal(median(SA[NaN 0.0; 1.2 4.5], dims=2), reshape(SA[NaN; 2.85], 2, 1))

# the specific NaN value is propagated from the input
@test median(SA[NaN]) === NaN
@test median(SA[0.0,NaN]) === NaN
@test median(SA[0.0,NaN,NaN]) === NaN
@test median(SA[-NaN]) === -NaN
@test median(SA[0.0,-NaN]) === -NaN
@test median(SA[0.0,-NaN,-NaN]) === -NaN

@test ismissing(median(SA[1, missing]))
@test ismissing(median(SA[1, 2, missing]))
@test ismissing(median(SA[NaN, 2.0, missing]))
@test ismissing(median(SA[NaN, missing]))
@test ismissing(median(SA[missing, NaN]))
@test ismissing(median(SA{Any}[missing, 2.0, 3.0, 4.0, NaN]))
@test median(skipmissing(SA[1, missing, 2])) === 1.5

@test median!(Base.copymutable(SA[1 2 3 4])) == 2.5
@test median!(Base.copymutable(SA[1 2; 3 4])) == 2.5

@test @inferred(median(SA{Float16}[1, 2, NaN])) === Float16(NaN)
@test @inferred(median(SA{Float16}[1, 2, 3])) === Float16(2)
@test @inferred(median(SA{Float32}[1, 2, NaN])) === NaN32
@test @inferred(median(SA{Float32}[1, 2, 3])) === 2.0f0

# custom type implementing minimal interface
struct A
x
end
Statistics.middle(x::A, y::A) = A(middle(x.x, y.x))
Base.isless(x::A, y::A) = isless(x.x, y.x)
@test median(SA[A(1), A(2)]) === A(1.5)
@test median(SA{Any}[A(1), A(2)]) === A(1.5)
end

end
Loading