Skip to content

Commit

Permalink
implement Statistics.median
Browse files Browse the repository at this point in the history
This implements `Statistics.median` based on the existing bitonic
sorting, avoiding unnecessary allocation.
While it is generally suboptimal to sort the whole array, the compiler
manages to skip some branches since only the middle element(s) are used.
Thus `median` is generally faster than `sort`.

Using a dedicated median selection network could yield better
performance and might be considered for future improvement.
  • Loading branch information
stev47 committed Nov 29, 2021
1 parent fa17430 commit d1f0d28
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/StaticArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import Base: getindex, setindex!, size, similar, vec, show, length, convert, pro
iszero, sum, prod, count, any, all, minimum, maximum, extrema,
copy, read, read!, write, reverse

import Statistics: mean
import Statistics: mean, median, median!, middle

using Random
import Random: rand, randn, randexp, rand!, randn!, randexp!
Expand Down
26 changes: 25 additions & 1 deletion src/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ const BitonicSort = BitonicSortAlg()

# BitonicSort has non-optimal asymptotic behaviour, so we define a cutoff
# length. This also prevents compilation time to skyrocket for larger vectors.
const _bitonic_sort_limit = 20
defalg(a::StaticVector) =
isimmutable(a) && length(a) <= 20 ? BitonicSort : QuickSort
isimmutable(a) && length(a) <= _bitonic_sort_limit ? BitonicSort : QuickSort

@inline function sort(a::StaticVector;
alg::Algorithm = defalg(a),
Expand Down Expand Up @@ -87,3 +88,26 @@ _sort(a::NTuple, alg, order) = sort!(Base.copymutable(a); alg=alg, order=order)
return ($(symlist...),)
end
end


@inline function median(a::StaticVector)
(isimmutable(a) && length(a) <= _bitonic_sort_limit) ||
return median!(Base.copymutable(a))

# following Statistics.median
isempty(a) &&
throw(ArgumentError("median of empty vector is undefined, $(repr(a))"))
eltype(a) >: Missing && any(ismissing, a) &&
return missing
any(x -> x isa Number && isnan(x), a) &&
return convert(eltype(a), NaN)

order = ord(isless, identity, nothing, Forward)
sa = _sort(Tuple(a), BitonicSort, order)

n = length(a)
# sa is 1-indexed
return isodd(n) ?
middle(sa[n ÷ 2 + 1]) :
middle(sa[n ÷ 2], sa[n ÷ 2 + 1])
end
11 changes: 11 additions & 0 deletions test/sort.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using StaticArrays, Test
using Statistics: median

@testset "sort" begin

Expand Down Expand Up @@ -30,4 +31,14 @@ using StaticArrays, Test
@test sortperm(SA[1, 1, 1, 0]) == SA[4, 1, 2, 3]
end

@testset "median" begin
@test @inferred(median(SA[1])) == 1.
@test median(SA[1, 5]) == 3.
@test median(SA[1, 5, 2]) == 2.

@test_throws ArgumentError median(SA[])
@test ismissing(median(SA[1, missing]))
@test isnan(median(SA[1., NaN]))
end

end

0 comments on commit d1f0d28

Please sign in to comment.