Skip to content

Commit

Permalink
implement Statistics.median
Browse files Browse the repository at this point in the history
This implements `Statistics.median` based on the existing bitonic
sorting, avoiding unnecessary allocation.
While it is generally suboptimal to sort the whole array, the compiler
manages to skip some branches since only the middle element(s) are used.
Thus `median` is generally faster than `sort`.

Using a dedicated median selection network could yield better
performance and might be considered for future improvement.
  • Loading branch information
stev47 committed Oct 29, 2024
1 parent b62e257 commit 2461b2f
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 3 deletions.
29 changes: 27 additions & 2 deletions ext/StaticArraysStatisticsExt.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
module StaticArraysStatisticsExt

import Statistics: mean
import Statistics: mean, median

using Base.Order: Forward, ord
using Statistics: median!, middle

using StaticArrays
using StaticArrays: _InitialValue, _reduce, _mapreduce
using StaticArrays: BitonicSort, _InitialValue, _reduce, _mapreduce, _bitonic_sort_limit, _sort

_mean_denom(a, ::Colon) = length(a)
_mean_denom(a, dims::Int) = size(a, dims)
Expand All @@ -12,4 +15,26 @@ _mean_denom(a, ::Val{D}) where {D} = size(a, D)
@inline mean(a::StaticArray; dims=:) = _reduce(+, a, dims) / _mean_denom(a, dims)
@inline mean(f::Function, a::StaticArray; dims=:) = _mapreduce(f, +, dims, _InitialValue(), Size(a), a) / _mean_denom(a, dims)

@inline function median(a::StaticVector)
(isimmutable(a) && length(a) <= _bitonic_sort_limit) ||
return median!(Base.copymutable(a))

# following Statistics.median
isempty(a) &&
throw(ArgumentError("median of empty vector is undefined, $(repr(a))"))
eltype(a) >: Missing && any(ismissing, a) &&
return missing
any(x -> x isa Number && isnan(x), a) &&
return convert(eltype(a), NaN)

order = ord(isless, identity, nothing, Forward)
sa = _sort(Tuple(a), BitonicSort, order)

n = length(a)
# sa is 1-indexed
return isodd(n) ?
middle(sa[n ÷ 2 + 1]) :
middle(sa[n ÷ 2], sa[n ÷ 2 + 1])
end

end # module
3 changes: 2 additions & 1 deletion src/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ const BitonicSort = BitonicSortAlg()

# BitonicSort has non-optimal asymptotic behaviour, so we define a cutoff
# length. This also prevents compilation time to skyrocket for larger vectors.
const _bitonic_sort_limit = 20
defalg(a::StaticVector) =
isimmutable(a) && length(a) <= 20 ? BitonicSort : QuickSort
isimmutable(a) && length(a) <= _bitonic_sort_limit ? BitonicSort : QuickSort

@inline function sort(a::StaticVector;
alg::Algorithm = defalg(a),
Expand Down
16 changes: 16 additions & 0 deletions test/sort.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using StaticArrays, Test
using Statistics: median

@testset "sort" begin

Expand Down Expand Up @@ -30,4 +31,19 @@ using StaticArrays, Test
@test sortperm(SA[1, 1, 1, 0]) == SA[4, 1, 2, 3]
end

@testset "median" begin
@test_throws ArgumentError median(SA[])
@test ismissing(median(SA[1, missing]))
@test isnan(median(SA[1., NaN]))

@testset for T in (Int, Float64)
for N in (1, 2, 3, 10, 20, 30)
v = rand(SVector{N,T})
mref = median(Vector(v))

@test @inferred(median(v) == mref)
end
end
end

end

0 comments on commit 2461b2f

Please sign in to comment.