From 42c70a1a7f7b4ec4c12b4c99bb1cb8e22b7a6943 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Mon, 17 Oct 2022 17:56:59 +0600 Subject: [PATCH 01/29] initial functionality --- base/sort.jl | 946 +++++++++++++++++++++++++----------------------- test/sorting.jl | 34 +- 2 files changed, 515 insertions(+), 465 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index e7e767146abb6..edf946b6c24ab 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -86,7 +86,7 @@ issorted(itr; issorted(itr, ord(lt,by,rev,order)) function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering) - sort!(v, _PartialQuickSort(k), o) + _sort!(v, _PartialQuickSort(k), o) maybeview(v, k) end @@ -407,36 +407,193 @@ function insorted end insorted(x, v::AbstractVector; kw...) = !isempty(searchsorted(v, x; kw...)) insorted(x, r::AbstractRange) = in(x, r) -## sorting algorithms ## +## sorting algorithm components ## abstract type Algorithm end -struct InsertionSortAlg <: Algorithm end -struct MergeSortAlg <: Algorithm end -struct AdaptiveSortAlg <: Algorithm end -""" - PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}) -Indicate that a sorting function should use the partial quick sort algorithm. +# +# Missing values always go at the end +# +struct MissingOptimization{T <: Algorithm} <: Algorithm + next::T +end -Partial quick sort finds and sorts the elements that would end up in positions -`lo:hi` using [`QuickSort`](@ref). +struct WithoutMissingVector{T, U <: AbstractVector{Union{T, Missing}}} <: AbstractVector{T} + data::U + function WithoutMissingVector(data; unsafe=false) + if !unsafe && any(ismissing, data) + throw(ArgumentError("data must not contain missing values")) + end + new{nonmissingtype(eltype(data)), typeof(data)}(data) + end +end +Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i) + out = v.data[i] + @assert !out isa Missing + out::eltype(v) +end +Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector{T}, x::T, i) where T + v.data[i] = x + v +end +Base.size(v::WithoutMissingVector) = size(v.data) -Characteristics: - * *stable*: preserves the ordering of elements which compare equal - (e.g. "a" and "A" in a sort of letters which ignores case). - * *not in-place* in memory. - * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). +# TODO simplify this further, remove redundancy, try a reverse view. """ -struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}} <: Algorithm - lo::L - hi::H + send_to_end!(f::Function, v::AbstractVector) + +Send every element of `v` for which `f` returns `true` to the end of that range and return +the number of elements index of the last element which for which `f` returns `false`. + +Preserves the order of the elements that are not sent to the end. +""" +function send_to_end!(f::Function, v::AbstractVector, ::ReverseOrdering; lo, hi) + i = hi + @inbounds while lo <= i && !f(v[i]) + i -= 1 + end + j = i - 1 + @inbounds while lo <= j + if !f(v[j]) + v[i], v[j] = v[j], v[i] + i -= 1 + end + j -= 1 + end + return (i+1, hi), (lo, i) +end +function send_to_end!(f::Function, v::AbstractVector, ::ForwardOrdering; lo, hi) + i = lo + @inbounds while i <= hi && !f(v[i]) + i += 1 + end + j = i + 1 + @inbounds while j <= hi + if !f(v[j]) + v[i], v[j] = v[j], v[i] + i += 1 + end + j += 1 + end + return (lo, i-1), (i, hi) +end + +function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering; + lo=firstindex(v), hi=lastindex(v), kw...) + if nonmissingtype(eltype(v)) != eltype(v) && o isa DirectOrdering + (lo, hi), _ = send_to_end!(ismissing, v, o; lo, hi) + _sort!(WithoutMissingVector(v, unsafe=true), a.next, o; lo, hi, kw...) + v + elseif eltype(v) <: Integer && o isa Perm{DirectOrdering} && nonmissingtype(eltype(o.data)) != eltype(o.data) + (lo, hi), _ = send_to_end!(i -> ismissing(@inbounds o.data[i]), v, o) + _sort!(v, a.next, Perm(o.order, WithoutMissingVector(o.data, unsafe=true)); lo, hi, kw...) + else + _sort!(v, a.next, o; lo, hi, kw...) + end +end + + + +# +# fast clever sorting for floats +# +struct IEEEFloatOptimization{T <: Algorithm} <: Algorithm + next::T +end + +UIntType(::Type{Float16}) = UInt16 +UIntType(::Type{Float32}) = UInt32 +UIntType(::Type{Float64}) = UInt64 +after_zero(::ForwardOrdering, x) = 0 <= x +after_zero(::ReverseOrdering, x) = x < 0 +is_concrete_IEEEFloat(T::Type) = T <: Base.IEEEFloat && isconcretetype(T) +function _sort!(v::AbstractVector, a::IEEEFloatOptimization, o::Ordering; + lo=firstindex(v), hi=lastindex(v), kw...) + if is_concrete_IEEEFloat(eltype(v)) && o isa DirectOrdering + _, (lo, hi) = send_to_end!(!isnan, v, ReverseOrdering(o); lo, hi) + iv = reinterpret(UIntType(eltype(v)), v) + (_, j), _ = send_to_end!(x -> after_zero(o, x), v, Forward; lo, hi) + _sort!(iv, a.next, Reverse; lo, hi=j, kw...) + _sort!(iv, a.next, Forward; lo=j+1, hi, kw...) + elseif eltype(v) <: Integer && o isa Perm && o.order isa DirectOrdering && is_concrete_IEEEFloat(eltype(o.data)) + _, (lo, hi) = send_to_end!(i -> !isnan(@inbounds o.data[i]), v, ReverseOrdering(o.order); lo, hi) + ip = reinterpret(UIntType(eltype(o.data)), o.data) + (_, j), _ = send_to_end!(i -> after_zero(o.order, @inbounds o.data[i]), v, Forward; lo, hi) + _sort!(v, a.next, Perm(Reverse, ip); lo, hi=j, kw...) + _sort!(v, a.next, Perm(Forward, ip); lo=j+1, hi, kw...) + else + _sort!(v, a.next, o; lo, hi, kw...) + end + v +end + + + +# For AbstractVector{Bool}, counting sort is always best. +# This is an implementation of counting sort specialized for Bools. +# Accepts unused scratch to avoid method ambiguity. +struct BoolOptimization{T <: Algorithm} <: Algorithm + next::T +end +_sort!(v::AbstractVector, a::BoolOptimization, o::Ordering; kw...) = _sort!(v, a.next, o; kw...) +function _sort!(v::AbstractVector{Bool}, ::BoolOptimization, o::Ordering; lo::Integer, hi::Integer, kw...) + first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v + count = 0 + @inbounds for i in lo:hi + if v[i] == first + count += 1 + end + end + @inbounds v[lo:lo+count-1] .= first + @inbounds v[lo+count:hi] .= !first + v end -PartialQuickSort(k::Integer) = PartialQuickSort(missing, k) -PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k)) -_PartialQuickSort(k::Integer) = PartialQuickSort(k, k) -_PartialQuickSort(k::OrdinalRange) = PartialQuickSort(k) + + + +# +# +# +struct IsUIntMappable{T <: Algorithm, U <: Algorithm} <: Algorithm + yes::T + no::U +end +function _sort!(v::AbstractVector, a::IsUIntMappable, o::Ordering; + U = UIntMappable(eltype(v), o), kw...) + if U !== nothing + _sort!(v, a.yes, o; U, kw...) + else + _sort!(v, a.no, o; kw...) + end +end + + + +# +# +# +struct Small{N, T <: Algorithm, U <: Algorithm} <: Algorithm + small::T + big::U +end +Small{N}(big) where N = Small{N, typeof(SMALL_ALGORITHM), typeof(big)}(SMALL_ALGORITHM, big) +function _sort!(v::AbstractVector, a::Small{N}, o::Ordering; + lo::Integer=firstindex(v), hi::Integer=lastindex(v), lenm1 = hi-lo, kw...) where N + if lenm1 < N + _sort!(v, a.small, o; lo, hi, lenm1, kw...) + else + _sort!(v, a.big, o; lo, hi, lenm1, kw...) + end +end + + + +# +# +# +struct InsertionSortAlg <: Algorithm end """ InsertionSort @@ -455,79 +612,241 @@ Characteristics: it is well-suited to small collections but should not be used for large ones. """ const InsertionSort = InsertionSortAlg() +const SMALL_ALGORITHM = InsertionSort +function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering; + lo=firstindex(v), hi=lastindex(v), kw...) + lo_plus_1 = (lo + 1)::Integer + @inbounds for i = lo_plus_1:hi + j = i + x = v[i] + while j > lo + y = v[j-1] + if !(lt(o, x, y)::Bool) + break + end + v[j] = y + j -= 1 + end + v[j] = x + end + return v +end + + + +# +# +# +struct CheckSorted{T <: Algorithm} <: Algorithm + next::T +end +function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering; + lo=firstindex(v), hi=lastindex(v), lenm1 = hi-lo, kw...) + # For most arrays, a presorted check is cheap (overhead < 5%) and for most large + # arrays it is essentially free (<1%). Insertion sort runs in a fast O(n) on presorted + # input and this guarantees presorted input will always be efficiently handled + _issorted(v, lo, hi, o) && return v + + # For large arrays, a reverse-sorted check is essentially free (overhead < 1%) + if lenm1 >= 500 && _issorted(v, lo, hi, ReverseOrdering(o)) + # If reversing is valid, do so. This does not violate stability + # because being UIntMappable implies a linear order. + reverse!(v, lo, hi) + return v + end + + _sort!(v, a.next, o; lo, hi, lenm1, kw...) +end + + + +# +# Prerequisite: region to be sorted [lo, hi] is nonempty +# +struct ComputeExtrema{T <: Algorithm} <: Algorithm + next::T +end +function _sort!(v::AbstractVector, a::ComputeExtrema, o::Ordering; + lo=firstindex(v), hi=lastindex(v), kw...) + mn = mx = v[lo] + @inbounds for i in (lo+1):hi + vi = v[i] + lt(o, vi, mn) && (mn = vi) + lt(o, mx, vi) && (mx = vi) + end + mn, mx + + lt(o, mn, mx) || return v # all same + + _sort!(v, a.next, o; lo, hi, mn, mx, kw...) +end + + + +# +# Consider counting sort +# +struct ConsiderCountingSort{T <: Algorithm, U <: Algorithm} <: Algorithm + counting::T + next::U +end +ConsiderCountingSort(next) = ConsiderCountingSort(CountingSort(), next) +function _sort!(v::AbstractVector{<:Integer}, a::ConsiderCountingSort, o::DirectOrdering; + lo=firstindex(v), hi=lastindex(v), lenm1=hi-lo, + U = UIntMapping(eltype(v), o), + mn, mx, range=maybe_unsigned(o === Reverse ? mn-mx : mx-mn), kw...) + + if range < (sizeof(U) > 8 ? 5lenm1-100 : div(lenm1, 2)) + _sort!(v, a.counting, o; lo, hi, lenm1, mn, mx, range, kw...) + else + _sort!(v, a.next, o; lo, hi, lenm1, mn, mx, range, kw...) + end +end +_sort!(v::AbstractVector, a::ConsiderCountingSort, o::Ordering; kw...) = _sort!(v, a.next, o; kw...) + + + +# +# Counting sort +# +struct CountingSort <: Algorithm end +maybe_reverse(o::ForwardOrdering, x) = x +maybe_reverse(o::ReverseOrdering, x) = reverse(x) +function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering; + lo=firstindex(v), hi=lastindex(v), lenm1=hi-lo, + mn, mx, range=maybe_unsigned(o === Reverse ? mn-mx : mx-mn), kw...) + offs = 1 - (o === Reverse ? mx : mn) + + counts = fill(0, range+1) + @inbounds for i = lo:hi + counts[v[i] + offs] += 1 + end + + idx = lo + @inbounds for i = maybe_reverse(o, 1:range+1) + lastidx = idx + counts[i] - 1 + val = i-offs + for j = idx:lastidx + v[j] = val + end + idx = lastidx + 1 + end + + v +end + + + +# +# Consider radix sort +# +struct ConsiderRadixSort{T <: Algorithm, U <: Algorithm} <: Algorithm + radix::T + next::U +end +ConsiderRadixSort(next) = ConsiderRadixSort(RadixSort(), next) +function _sort!(v::AbstractVector, a::ConsiderRadixSort, o::DirectOrdering; + lo=firstindex(v), hi=lastindex(v), lenm1=hi-lo, + U = UIntMappable(eltype(v), o), + mn, mx, umn=uint_map(mn, o), umx=uint_map(mx, o), urange=umx-umn, + bits = unsigned(8sizeof(urange) - leading_zeros(urange)), kw...) + if sizeof(U) <= 8 && bits+70 < 22log(lenm1) + _sort!(v, a.radix, o; lo, hi, lenm1, mn, mx, umn, umx, urange, bits, kw...) + else + _sort!(v, a.next, o; lo, hi, lenm1, mn, mx, umn, umx, urange, bits, kw...) + end +end + + +# +# Radix sort +# +struct RadixSort <: Algorithm end +function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering; + lo=firstindex(v), hi=lastindex(v), lenm1=hi-lo, + mn, mx, umn=uint_mapping(mn, o), umx=uint_mapping(mx, o), urange=umx-umn, + bits = unsigned(8sizeof(urange) - leading_zeros(urange)), + U = UIntMappable(eltype(v), o), scratch=nothing, kw...) + + # At this point, we are committed to radix sort. + u = uint_map!(v, lo, hi, o) + + # we subtract umn to avoid radixing over unnecessary bits. For example, + # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002] + # which uses all 32 bits, but once we subtract umn = 0x7fffffff, we are left with + # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and + # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4] + # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits. + # the overhead for this subtraction is small enough that it is worthwhile in many cases. + + # this is faster than u[lo:hi] .-= umn as of v1.9.0-DEV.100 + @inbounds for i in lo:hi + u[i] -= umn + end + + len = lenm1 + 1 + if scratch !== nothing && checkbounds(Bool, scratch, lo:hi) # Fully preallocated and aligned scratch + u2 = radix_sort!(u, lo, hi, bits, reinterpret(U, scratch)) + uint_unmap!(v, u2, lo, hi, o, umn) + elseif scratch !== nothing && (applicable(resize!, scratch, len) || length(scratch) >= len) # Viable scratch + length(scratch) >= len || resize!(scratch, len) + t1 = axes(scratch, 1) isa OneTo ? scratch : view(scratch, firstindex(scratch):lastindex(scratch)) + u2 = radix_sort!(view(u, lo:hi), 1, len, bits, reinterpret(U, t1)) + uint_unmap!(view(v, lo:hi), u2, 1, len, o, umn) + else # No viable scratch + u2 = radix_sort!(u, lo, hi, bits, similar(u)) + uint_unmap!(v, u2, lo, hi, o, umn) + end +end + + + +# +# Quicksort +# """ - QuickSort + PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}) -Indicate that a sorting function should use the quick sort algorithm. +Indicate that a sorting function should use the partial quick sort algorithm. -Quick sort picks a pivot element, partitions the array based on the pivot, -and then sorts the elements before and after the pivot recursively. +Partial quick sort finds and sorts the elements that would end up in positions +`lo:hi` using [`QuickSort`](@ref). Characteristics: * *stable*: preserves the ordering of elements which compare equal (e.g. "a" and "A" in a sort of letters which ignores case). * *not in-place* in memory. * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). - * *good performance* for almost all large collections. - * *quadratic worst case runtime* in pathological cases - (vanishingly rare for non-malicious input) """ -const QuickSort = PartialQuickSort(missing, missing) +struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm + lo::L + hi::H + next::T +end +PartialQuickSort(k::Integer) = InitialOptimizations(PartialQuickSort(missing, k, SMALL_ALGORITHM)) +PartialQuickSort(k::OrdinalRange) = InitialOptimizations(PartialQuickSort(first(k), last(k), SMALL_ALGORITHM)) +_PartialQuickSort(k::Integer) = InitialOptimizations(PartialQuickSort(k:k)) +_PartialQuickSort(k::OrdinalRange) = InitialOptimizations(PartialQuickSort(k)) """ - MergeSort + QuickSort -Indicate that a sorting function should use the merge sort algorithm. +Indicate that a sorting function should use the quick sort algorithm. -Merge sort divides the collection into subcollections and -repeatedly merges them, sorting each subcollection at each step, -until the entire collection has been recombined in sorted form. +Quick sort picks a pivot element, partitions the array based on the pivot, +and then sorts the elements before and after the pivot recursively. Characteristics: - * *stable*: preserves the ordering of elements which compare - equal (e.g. "a" and "A" in a sort of letters which ignores - case). + * *stable*: preserves the ordering of elements which compare equal + (e.g. "a" and "A" in a sort of letters which ignores case). * *not in-place* in memory. - * *divide-and-conquer* sort strategy. -""" -const MergeSort = MergeSortAlg() - -""" - AdaptiveSort - -Indicate that a sorting function should use the fastest available stable algorithm. - -Currently, AdaptiveSort uses - * [`InsertionSort`](@ref) for short vectors - * [`QuickSort`](@ref) for vectors that are not [`UIntMappable`](@ref) - * Radix sort for long vectors - * Counting sort for vectors of integers spanning a short range + * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). + * *good performance* for almost all large collections. + * *quadratic worst case runtime* in pathological cases + (vanishingly rare for non-malicious input) """ -const AdaptiveSort = AdaptiveSortAlg() - -const DEFAULT_UNSTABLE = AdaptiveSort -const DEFAULT_STABLE = AdaptiveSort -const SMALL_ALGORITHM = InsertionSort -const SMALL_THRESHOLD = 20 - -function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::InsertionSortAlg, o::Ordering) - lo_plus_1 = (lo + 1)::Integer - @inbounds for i = lo_plus_1:hi - j = i - x = v[i] - while j > lo - y = v[j-1] - if !(lt(o, x, y)::Bool) - break - end - v[j] = y - j -= 1 - end - v[j] = x - end - return v -end +const QuickSort = PartialQuickSort(missing, missing, SMALL_ALGORITHM) # select a pivot for QuickSort # @@ -570,20 +889,10 @@ function partition!(t::AbstractVector, lo::Integer, hi::Integer, o::Ordering, v: pivot, lo-trues end -function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort, - o::Ordering, t::AbstractVector=similar(v), swap=false, rev=false; - check_presorted=true) - - if check_presorted && !rev && !swap - # Even if we are only sorting a short region, we can only short-circuit if the whole - # vector is presorted. A weaker condition is possible, but unlikely to be useful. - if _issorted(v, lo, hi, o) - return v - elseif _issorted(v, lo, hi, Lt((x, y) -> !lt(o, x, y))) - # Reverse only if necessary. Using issorted(..., Reverse(o)) would violate stability. - return reverse!(v, lo, hi) - end - end +function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering; + lo=firstindex(v), hi=lastindex(v), scratch=similar(v), + t=reinterpret(eltype(v), checkbounds(Bool, scratch, lo:hi) ? scratch : resize!(scratch, length(v))), + swap=false, rev=false, kw...) while lo < hi && hi - lo > SMALL_THRESHOLD pivot, j = swap ? partition!(v, lo, hi, o, t, rev) : partition!(t, lo, hi, o, v, rev) @@ -603,62 +912,41 @@ function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort, elseif j-lo < hi-j # Sort the lower part recursively because it is smaller. Recursing on the # smaller part guarantees O(log(n)) stack space even on pathological inputs. - sort!(v, lo, j-1, a, o, t, swap, rev; check_presorted=false) + _sort!(v, a, o; lo, hi=j-1, scratch, t, swap, rev, kw...) lo = j+1 rev = !rev else # Sort the higher part recursively - sort!(v, j+1, hi, a, o, t, swap, !rev; check_presorted=false) + _sort!(v, a, o; lo=j+1, hi, scratch, t, swap, rev=!rev, kw...) hi = j-1 end end hi < lo && return v swap && copyto!(v, lo, t, lo, hi-lo+1) rev && reverse!(v, lo, hi) - sort!(v, lo, hi, SMALL_ALGORITHM, o) + _sort!(v, a.next, o; lo, hi, scratch, t, kw...) end -function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering, - t0::Union{AbstractVector{T}, Nothing}=nothing) where T - @inbounds if lo < hi - hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) - m = midpoint(lo, hi) - - t = t0 === nothing ? similar(v, m-lo+1) : t0 - length(t) < m-lo+1 && resize!(t, m-lo+1) - require_one_based_indexing(t) - - sort!(v, lo, m, a, o, t) - sort!(v, m+1, hi, a, o, t) - - i, j = 1, lo - while j <= m - t[i] = v[j] - i += 1 - j += 1 - end - i, k = 1, lo - while k < j <= hi - if lt(o, v[j], t[i]) - v[k] = v[j] - j += 1 - else - v[k] = t[i] - i += 1 - end - k += 1 - end - while k < j - v[k] = t[i] - k += 1 - i += 1 - end +# +# StableCheckSorted +# +struct StableCheckSorted{T<:Algorithm} <: Algorithm + next::T +end +function _sort!(v::AbstractVector, a::StableCheckSorted, o::Ordering; + lo=firstindex(v), hi=lastindex(v), kw...) + if _issorted(v, lo, hi, o) + return v + elseif _issorted(v, lo, hi, Lt((x, y) -> !lt(o, x, y))) + # Reverse only if necessary. Using issorted(..., Reverse(o)) would violate stability. + return reverse!(v, lo, hi) end - return v + _sort!(v, a.next, o; lo, hi, kw...) end + # This is a stable least significant bit first radix sort. # # That is, it first sorts the entire vector by the last chunk_size bits, then by the second @@ -725,23 +1013,6 @@ function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned) UInt8(cld(bits, cld(bits, guess))) end -# For AbstractVector{Bool}, counting sort is always best. -# This is an implementation of counting sort specialized for Bools. -# Accepts unused scratch space to avoid method ambiguity. -function sort!(v::AbstractVector{Bool}, lo::Integer, hi::Integer, ::AdaptiveSortAlg, o::Ordering, - t::Union{AbstractVector{Bool}, Nothing}=nothing) - first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v - count = 0 - @inbounds for i in lo:hi - if v[i] == first - count += 1 - end - end - @inbounds v[lo:lo+count-1] .= first - @inbounds v[lo+count:hi] .= !first - v -end - maybe_unsigned(x::Integer) = x # this is necessary to avoid calling unsigned on BigInt maybe_unsigned(x::BitSigned) = unsigned(x) function _extrema(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) @@ -760,130 +1031,20 @@ function _issorted(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) end true end -function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, ::AdaptiveSortAlg, o::Ordering, - t::Union{AbstractVector{T}, Nothing}=nothing) where T - # if the sorting task is not UIntMappable, then we can't radix sort or sort_int_range! - # so we skip straight to the fallback algorithm which is comparison based. - U = UIntMappable(eltype(v), o) - U === nothing && return sort!(v, lo, hi, QuickSort, o) - - # to avoid introducing excessive detection costs for the trivial sorting problem - # and to avoid overflow, we check for small inputs before any other runtime checks - hi <= lo && return v - lenm1 = maybe_unsigned(hi-lo) # adding 1 would risk overflow - # only count sort on a short range can compete with insertion sort when lenm1 < 40 - # and the optimization is not worth the detection cost, so we use insertion sort. - lenm1 < 40 && return sort!(v, lo, hi, SMALL_ALGORITHM, o) - - # For most arrays, a presorted check is cheap (overhead < 5%) and for most large - # arrays it is essentially free (<1%). Insertion sort runs in a fast O(n) on presorted - # input and this guarantees presorted input will always be efficiently handled - _issorted(v, lo, hi, o) && return v - - # For large arrays, a reverse-sorted check is essentially free (overhead < 1%) - if lenm1 >= 500 && _issorted(v, lo, hi, ReverseOrdering(o)) - # If reversing is valid, do so. This does not violate stability - # because being UIntMappable implies a linear order. - reverse!(v, lo, hi) - return v - end - - # UInt128 does not support fast bit shifting so we never - # dispatch to radix sort but we may still perform count sort - if sizeof(U) > 8 - if T <: Integer && o isa DirectOrdering - v_min, v_max = _extrema(v, lo, hi, Forward) - v_range = maybe_unsigned(v_max-v_min) - v_range == 0 && return v # all same - - # we know lenm1 ≥ 40, so this will never underflow. - # if lenm1 > 3.7e18 (59 exabytes), then this may incorrectly dispatch to fallback - if v_range < 5lenm1-100 # count sort will outperform comparison sort if v's range is small - return sort_int_range!(v, Int(v_range+1), v_min, o === Forward ? identity : reverse, lo, hi) - end - end - return sort!(v, lo, hi, QuickSort, o; check_presorted=false) - end - - v_min, v_max = _extrema(v, lo, hi, o) - lt(o, v_min, v_max) || return v # all same - if T <: Integer && o isa DirectOrdering - R = o === Reverse - v_range = maybe_unsigned(R ? v_min-v_max : v_max-v_min) - if v_range < div(lenm1, 2) # count sort will be superior if v's range is very small - return sort_int_range!(v, Int(v_range+1), R ? v_max : v_min, R ? reverse : identity, lo, hi) - end - end - - u_min, u_max = uint_map(v_min, o), uint_map(v_max, o) - u_range = maybe_unsigned(u_max-u_min) - if u_range < div(lenm1, 2) # count sort will be superior if u's range is very small - u = uint_map!(v, lo, hi, o) - sort_int_range!(u, Int(u_range+1), u_min, identity, lo, hi) - return uint_unmap!(v, u, lo, hi, o) - end - - # if u's range is small, then once we subtract out v_min, we'll get a vector like - # UInt16[0x001a, 0x0015, 0x0006, 0x001b, 0x0008, 0x000c, 0x0001, 0x000e, 0x001c, 0x0009] - # where we only need to radix over the last few bits (5, in the example). - bits = unsigned(8sizeof(u_range) - leading_zeros(u_range)) - - # radix sort runs in O(bits * lenm1), quick sort runs in O(lenm1 * log(lenm1)). - # dividing both sides by lenm1 and introducing empirical constant factors yields - # the following heuristic for when QuickSort is faster than RadixSort - if 22log(lenm1) < bits + 70 - return if lenm1 > 80 - sort!(v, lo, hi, QuickSort, o; check_presorted=false) - else - sort!(v, lo, hi, SMALL_ALGORITHM, o) - end - end - # At this point, we are committed to radix sort. - u = uint_map!(v, lo, hi, o) - - # we subtract u_min to avoid radixing over unnecessary bits. For example, - # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002] - # which uses all 32 bits, but once we subtract u_min = 0x7fffffff, we are left with - # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and - # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4] - # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits. - # the overhead for this subtraction is small enough that it is worthwhile in many cases. +## default sorting policy ## - # this is faster than u[lo:hi] .-= u_min as of v1.9.0-DEV.100 - @inbounds for i in lo:hi - u[i] -= u_min - end +InitialOptimizations(x) = MissingOptimization(BoolOptimization(Small{10}(IEEEFloatOptimization(x)))) +const DEFAULT_STABLE = InitialOptimizations(IsUIntMappable( + Small{40}(CheckSorted(ComputeExtrema(ConsiderCountingSort(ConsiderRadixSort(Small{80}(QuickSort)))))), + StableCheckSorted(QuickSort))) +const DEFAULT_UNSTABLE = DEFAULT_STABLE +const SMALL_THRESHOLD = 20 - len = lenm1 + 1 - if t !== nothing && checkbounds(Bool, t, lo:hi) # Fully preallocated and aligned scratch space - u2 = radix_sort!(u, lo, hi, bits, reinterpret(U, t)) - uint_unmap!(v, u2, lo, hi, o, u_min) - elseif t !== nothing && (applicable(resize!, t, len) || length(t) >= len) # Viable scratch space - length(t) >= len || resize!(t, len) - t1 = axes(t, 1) isa OneTo ? t : view(t, firstindex(t):lastindex(t)) - u2 = radix_sort!(view(u, lo:hi), 1, len, bits, reinterpret(U, t1)) - uint_unmap!(view(v, lo:hi), u2, 1, len, o, u_min) - else # No viable scratch space - u2 = radix_sort!(u, lo, hi, bits, similar(u)) - uint_unmap!(v, u2, lo, hi, o, u_min) - end -end -## generic sorting methods ## defalg(v::AbstractArray) = DEFAULT_STABLE -function sort!(v::AbstractVector{T}, alg::Algorithm, - order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T - sort!(v, firstindex(v), lastindex(v), alg, order, t) -end - -function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, alg::Algorithm, - order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T - sort!(v, lo, hi, alg, order) -end - """ sort!(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) @@ -931,30 +1092,11 @@ function sort!(v::AbstractVector{T}; rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, scratch::Union{AbstractVector{T}, Nothing}=nothing) where T - sort!(v, alg, ord(lt,by,rev,order), scratch) -end - -# sort! for vectors of few unique integers -function sort_int_range!(x::AbstractVector{<:Integer}, rangelen, minval, maybereverse, - lo=firstindex(x), hi=lastindex(x)) - offs = 1 - minval - - counts = fill(0, rangelen) - @inbounds for i = lo:hi - counts[x[i] + offs] += 1 - end - - idx = lo - @inbounds for i = maybereverse(1:rangelen) - lastidx = idx + counts[i] - 1 - val = i-offs - for j = idx:lastidx - x[j] = val - end - idx = lastidx + 1 + if scratch === nothing # TODO: reduce redundancy + _sort!(v, alg, ord(lt,by,rev,order)) + else + _sort!(v, alg, ord(lt,by,rev,order); scratch) end - - return x end """ @@ -1080,7 +1222,7 @@ function partialsortperm!(ix::AbstractVector{<:Integer}, v::AbstractVector, end # do partial quicksort - sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v)) + _sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v)) maybeview(ix, k) end @@ -1288,13 +1430,22 @@ function sort(A::AbstractArray{T}; end end -@noinline function sort_chunks!(Av, n, alg, order, t) +@noinline function sort_chunks!(Av, n, alg, order, scratch) inds = LinearIndices(Av) - for s = first(inds):n:last(inds) - sort!(Av, s, s+n-1, alg, order, t) + for lo = first(inds):n:last(inds) + _sort!(Av, alg, order; lo, hi=lo+n-1, scratch) end Av end +# TODO: reduce redundancy +@noinline function sort_chunks!(Av, n, alg, order, scratch::Nothing) + inds = LinearIndices(Av) + for lo = first(inds):n:last(inds) + _sort!(Av, alg, order; lo, hi=lo+n-1) + end + Av +end + """ sort!(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) @@ -1346,7 +1497,7 @@ function _sort!(A::AbstractArray{T}, ::Val{K}, remdims = ntuple(i -> i == K ? 1 : axes(A, i), nd) for idx in CartesianIndices(remdims) Av = view(A, ntuple(i -> i == K ? Colon() : idx[i], nd)...) - sort!(Av, alg, order, scratch) + sort!(Av; alg, order, scratch) end A end @@ -1437,167 +1588,72 @@ function uint_unmap!(v::AbstractVector, u::AbstractVector{U}, lo::Integer, hi::I end -## fast clever sorting for floats ## - -module Float -using ..Sort -using ...Order -using Base: IEEEFloat - -import Core.Intrinsics: slt_int -import ..Sort: sort!, UIntMappable, uint_map, uint_unmap -import ...Order: lt, DirectOrdering - -# fpsort is not safe for vectors of mixed bitwidth such as Vector{Union{Float32, Float64}}. -# This type allows us to dispatch only when it is safe to do so. See #42739 for more info. -const FPSortable = Union{ - AbstractVector{Union{Float16, Missing}}, - AbstractVector{Union{Float32, Missing}}, - AbstractVector{Union{Float64, Missing}}, - AbstractVector{Float16}, - AbstractVector{Float32}, - AbstractVector{Float64}, - AbstractVector{Missing}} - -struct Left <: Ordering end -struct Right <: Ordering end - -left(::DirectOrdering) = Left() -right(::DirectOrdering) = Right() - -left(o::Perm) = Perm(left(o.order), o.data) -right(o::Perm) = Perm(right(o.order), o.data) - -lt(::Left, x::T, y::T) where {T<:IEEEFloat} = slt_int(y, x) -lt(::Right, x::T, y::T) where {T<:IEEEFloat} = slt_int(x, y) - -uint_map(x::Float16, ::Left) = ~reinterpret(UInt16, x) -uint_unmap(::Type{Float16}, u::UInt16, ::Left) = reinterpret(Float16, ~u) -uint_map(x::Float16, ::Right) = reinterpret(UInt16, x) -uint_unmap(::Type{Float16}, u::UInt16, ::Right) = reinterpret(Float16, u) -UIntMappable(::Type{Float16}, ::Union{Left, Right}) = UInt16 - -uint_map(x::Float32, ::Left) = ~reinterpret(UInt32, x) -uint_unmap(::Type{Float32}, u::UInt32, ::Left) = reinterpret(Float32, ~u) -uint_map(x::Float32, ::Right) = reinterpret(UInt32, x) -uint_unmap(::Type{Float32}, u::UInt32, ::Right) = reinterpret(Float32, u) -UIntMappable(::Type{Float32}, ::Union{Left, Right}) = UInt32 - -uint_map(x::Float64, ::Left) = ~reinterpret(UInt64, x) -uint_unmap(::Type{Float64}, u::UInt64, ::Left) = reinterpret(Float64, ~u) -uint_map(x::Float64, ::Right) = reinterpret(UInt64, x) -uint_unmap(::Type{Float64}, u::UInt64, ::Right) = reinterpret(Float64, u) -UIntMappable(::Type{Float64}, ::Union{Left, Right}) = UInt64 - -isnan(o::DirectOrdering, x::IEEEFloat) = (x!=x) -isnan(o::DirectOrdering, x::Missing) = false -isnan(o::Perm, i::Integer) = isnan(o.order,o.data[i]) - -ismissing(o::DirectOrdering, x::IEEEFloat) = false -ismissing(o::DirectOrdering, x::Missing) = true -ismissing(o::Perm, i::Integer) = ismissing(o.order,o.data[i]) - -allowsmissing(::AbstractVector{T}, ::DirectOrdering) where {T} = T >: Missing -allowsmissing(::AbstractVector{<:Integer}, - ::Perm{<:DirectOrdering,<:AbstractVector{T}}) where {T} = - T >: Missing - -function specials2left!(testf::Function, v::AbstractVector, o::Ordering, - lo::Integer=firstindex(v), hi::Integer=lastindex(v)) - i = lo - @inbounds while i <= hi && testf(o,v[i]) - i += 1 - end - j = i + 1 - @inbounds while j <= hi - if testf(o,v[j]) - v[i], v[j] = v[j], v[i] - i += 1 - end - j += 1 - end - return i, hi + +### Unused ### + +struct MergeSortAlg{T <: Algorithm} <: Algorithm + next::T end -function specials2right!(testf::Function, v::AbstractVector, o::Ordering, - lo::Integer=firstindex(v), hi::Integer=lastindex(v)) - i = hi - @inbounds while lo <= i && testf(o,v[i]) - i -= 1 - end - j = i - 1 - @inbounds while lo <= j - if testf(o,v[j]) - v[i], v[j] = v[j], v[i] - i -= 1 +""" + MergeSort + +Indicate that a sorting function should use the merge sort algorithm. + +Merge sort divides the collection into subcollections and +repeatedly merges them, sorting each subcollection at each step, +until the entire collection has been recombined in sorted form. + +Characteristics: + * *stable*: preserves the ordering of elements which compare + equal (e.g. "a" and "A" in a sort of letters which ignores + case). + * *not in-place* in memory. + * *divide-and-conquer* sort strategy. +""" +const MergeSort = MergeSortAlg(SMALL_ALGORITHM) + + +function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering; + lo=firstindex(v), hi=lastindex(v), scratch=nothing) + @inbounds if lo < hi + hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o; lo, hi) + + m = midpoint(lo, hi) + + t = scratch === nothing ? similar(v, m-lo+1) : scratch + length(t) < m-lo+1 && resize!(t, m-lo+1) + Base.require_one_based_indexing(t) + + _sort!(v, a, o; lo, hi=m, scratch=t) + _sort!(v, a, o; lo=m+1, hi, scratch=t) + + i, j = 1, lo + while j <= m + t[i] = v[j] + i += 1 + j += 1 end - j -= 1 - end - return lo, i -end -function specials2left!(v::AbstractVector, a::Algorithm, o::Ordering) - lo, hi = firstindex(v), lastindex(v) - if allowsmissing(v, o) - i, _ = specials2left!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi) - sort!(v, lo, i-1, a, o) - return i, hi - else - return specials2left!(isnan, v, o, lo, hi) - end -end -function specials2right!(v::AbstractVector, a::Algorithm, o::Ordering) - lo, hi = firstindex(v), lastindex(v) - if allowsmissing(v, o) - _, i = specials2right!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi) - sort!(v, i+1, hi, a, o) - return lo, i - else - return specials2right!(isnan, v, o, lo, hi) + i, k = 1, lo + while k < j <= hi + if lt(o, v[j], t[i]) + v[k] = v[j] + j += 1 + else + v[k] = t[i] + i += 1 + end + k += 1 + end + while k < j + v[k] = t[i] + k += 1 + i += 1 + end end -end -specials2end!(v::AbstractVector, a::Algorithm, o::ForwardOrdering) = - specials2right!(v, a, o) -specials2end!(v::AbstractVector, a::Algorithm, o::ReverseOrdering) = - specials2left!(v, a, o) -specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ForwardOrdering}) = - specials2right!(v, a, o) -specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ReverseOrdering}) = - specials2left!(v, a, o) - -issignleft(o::ForwardOrdering, x::IEEEFloat) = lt(o, x, zero(x)) -issignleft(o::ReverseOrdering, x::IEEEFloat) = lt(o, x, -zero(x)) -issignleft(o::Perm, i::Integer) = issignleft(o.order, o.data[i]) - -function fpsort!(v::AbstractVector{T}, a::Algorithm, o::Ordering, - t::Union{AbstractVector{T}, Nothing}=nothing) where T - # fpsort!'s optimizations speed up comparisons, of which there are O(nlogn). - # The overhead is O(n). For n < 10, it's not worth it. - length(v) < 10 && return sort!(v, firstindex(v), lastindex(v), SMALL_ALGORITHM, o, t) - - i, j = lo, hi = specials2end!(v,a,o) - @inbounds while true - while i <= j && issignleft(o,v[i]); i += 1; end - while i <= j && !issignleft(o,v[j]); j -= 1; end - i <= j || break - v[i], v[j] = v[j], v[i] - i += 1; j -= 1 - end - sort!(v, lo, j, a, left(o), t) - sort!(v, i, hi, a, right(o), t) return v end -function sort!(v::FPSortable, a::Algorithm, o::DirectOrdering, - t::Union{FPSortable, Nothing}=nothing) - fpsort!(v, a, o, t) -end -function sort!(v::AbstractVector{T}, a::Algorithm, o::Perm{<:DirectOrdering,<:FPSortable}, - t::Union{AbstractVector{T}, Nothing}=nothing) where T <: Union{Signed, Unsigned} - fpsort!(v, a, o, t) -end - -end # module Sort.Float - end # module Sort diff --git a/test/sorting.jl b/test/sorting.jl index 4a0299b2217c2..bd22c62bab6fc 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -79,8 +79,9 @@ end end @testset "stability" begin - for Alg in [InsertionSort, MergeSort, QuickSort, Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, - PartialQuickSort(missing, 1729), PartialQuickSort(1729, missing)] + for Alg in [InsertionSort, MergeSort, QuickSort, Base.DEFAULT_STABLE, + PartialQuickSort(missing, 1729, Base.Sort.SMALL_ALGORITHM), + PartialQuickSort(1729, missing, Base.Sort.SMALL_ALGORITHM)] @test issorted(sort(1:2000, alg=Alg, by=x->0)) @test issorted(sort(1:2000, alg=Alg, by=x->x÷100)) end @@ -534,11 +535,11 @@ end @test issorted(a) a = view([9:-1:0;], :)::SubArray - Base.Sort.sort_int_range!(a, 10, 0, identity) # test it supports non-Vector + Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, mn=0, mx=9) # test it supports non-Vector @test issorted(a) a = OffsetArray([9:-1:0;], -5) - Base.Sort.sort_int_range!(a, 10, 0, identity) + Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, mn=0, mx=9) @test issorted(a) end @@ -632,9 +633,9 @@ end @testset "uint mappings" begin #Construct value lists - floats = [T[-π, -1.0, -1/π, 1/π, 1.0, π, -0.0, 0.0, Inf, -Inf, NaN, -NaN, - prevfloat(T(0)), nextfloat(T(0)), prevfloat(T(Inf)), nextfloat(T(-Inf))] - for T in [Float16, Float32, Float64]] + floats = [reinterpret(U, vcat(T[-π, -1.0, -1/π, 1/π, 1.0, π, -0.0, 0.0, Inf, -Inf, NaN, -NaN, + prevfloat(T(0)), nextfloat(T(0)), prevfloat(T(Inf)), nextfloat(T(-Inf))], randnans(4))) + for (U, T) in [(UInt16, Float16), (UInt32, Float32), (UInt64, Float64)]] ints = [T[17, -T(17), 0, -one(T), 1, typemax(T), typemin(T), typemax(T)-1, typemin(T)+1] for T in Base.BitInteger_types] @@ -650,21 +651,18 @@ end UIntN(::Val{8}) = UInt64 UIntN(::Val{16}) = UInt128 map(vals) do x + x isa Base.ReinterpretArray && return T = eltype(x) U = UIntN(Val(sizeof(T))) append!(x, rand(T, 4)) append!(x, reinterpret.(T, rand(U, 4))) - if T <: AbstractFloat - mask = reinterpret(U, T(NaN)) - append!(x, reinterpret.(T, mask .| rand(U, 4))) - end end for x in vals T = eltype(x) U = UIntN(Val(sizeof(T))) - for order in [Forward, Reverse, Base.Sort.Float.Left(), Base.Sort.Float.Right(), By(Forward, identity)] - if order isa Base.Order.By || ((T <: AbstractFloat) == (order isa DirectOrdering)) + for order in [Forward, Reverse, By(Forward, identity)] + if order isa Base.Order.By @test Base.Sort.UIntMappable(T, order) === nothing continue end @@ -681,10 +679,6 @@ end for a in x for b in x - if order === Base.Sort.Float.Left() || order === Base.Sort.Float.Right() - # Left and Right orderings guarantee homogeneous sign and no NaNs - (isnan(a) || isnan(b) || signbit(a) != signbit(b)) && continue - end @test Base.Order.lt(order, a, b) === Base.Order.lt(Forward, Base.Sort.uint_map(a, order), Base.Sort.uint_map(b, order)) end end @@ -705,7 +699,7 @@ end # Nevertheless, it still works... for alg in [InsertionSort, MergeSort, QuickSort, - Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] @test sort(v, alg=alg, lt = <=) == s end @test partialsort(v, 172, lt = <=) == s[172] @@ -716,7 +710,7 @@ end # this invalid lt order. perm = reverse(sortperm(v, rev=true)) for alg in [InsertionSort, MergeSort, QuickSort, - Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] @test sort(1:n, alg=alg, lt = (i,j) -> v[i]<=v[j]) == perm end @test partialsort(1:n, 172, lt = (i,j) -> v[i]<=v[j]) == perm[172] @@ -724,7 +718,7 @@ end # lt can be very poorly behaved and sort will still permute its input in some way. for alg in [InsertionSort, MergeSort, QuickSort, - Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] @test sort!(sort(v, alg=alg, lt = (x,y) -> rand([false, true]))) == s end @test partialsort(v, 172, lt = (x,y) -> rand([false, true])) ∈ 1:5 From 61e4006e0d1a37bf1d0e86d9de8920104deb1a36 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 29 Oct 2022 16:32:06 +0600 Subject: [PATCH 02/29] support 5- and 3-argument sort! for backwards compatability --- base/sort.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/base/sort.jl b/base/sort.jl index edf946b6c24ab..4d7c260324b14 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -1655,5 +1655,8 @@ function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering; return v end +# Support 3- and 5-argument version of sort! for backwards compatability +sort!(v::AbstractVector, a::Algorithm, o::Ordering) = _sort!(v, a, o) +sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) = _sort!(v, a, o; lo, hi) end # module Sort From 901182cfb87d613fb790a69a79c568e474bd2e44 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Mon, 17 Oct 2022 18:34:53 +0600 Subject: [PATCH 03/29] test for bug that slipped through test suite --- test/sorting.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/sorting.jl b/test/sorting.jl index bd22c62bab6fc..95c303774b661 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -764,6 +764,10 @@ end end end +@testset "Unions with missing" begin + @test issorted(sort(shuffle!(vcat(fill(missing, 10), rand(Int, 100))))) +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, From e032ba6247bc56bca92ce2c90ec96cee58390d31 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Mon, 17 Oct 2022 18:34:59 +0600 Subject: [PATCH 04/29] fix bug --- base/sort.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 4d7c260324b14..9c914aeb88d4e 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -431,7 +431,7 @@ struct WithoutMissingVector{T, U <: AbstractVector{Union{T, Missing}}} <: Abstra end Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i) out = v.data[i] - @assert !out isa Missing + @assert !(out isa Missing) out::eltype(v) end Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector{T}, x::T, i) where T @@ -750,7 +750,7 @@ function _sort!(v::AbstractVector, a::ConsiderRadixSort, o::DirectOrdering; U = UIntMappable(eltype(v), o), mn, mx, umn=uint_map(mn, o), umx=uint_map(mx, o), urange=umx-umn, bits = unsigned(8sizeof(urange) - leading_zeros(urange)), kw...) - if sizeof(U) <= 8 && bits+70 < 22log(lenm1) + if sizeof(U) <= 8 && bits+70 < 22log(lenm1) # TODO there are some unexpected allocations here _sort!(v, a.radix, o; lo, hi, lenm1, mn, mx, umn, umx, urange, bits, kw...) else _sort!(v, a.next, o; lo, hi, lenm1, mn, mx, umn, umx, urange, bits, kw...) From e6cfee0ce46fc992a45227ed98ef500471c1166c Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Thu, 20 Oct 2022 17:36:45 +0600 Subject: [PATCH 05/29] make send_to_end more human friendly (and less compiler friendly! introduces regressions.) --- base/sort.jl | 55 ++++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 9c914aeb88d4e..d39818e87c8fd 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -440,31 +440,17 @@ Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector{T}, x:: end Base.size(v::WithoutMissingVector) = size(v.data) -# TODO simplify this further, remove redundancy, try a reverse view. """ - send_to_end!(f::Function, v::AbstractVector) + send_to_end!(f::Function, v::AbstractVector; [lo, hi]) -Send every element of `v` for which `f` returns `true` to the end of that range and return -the number of elements index of the last element which for which `f` returns `false`. +Send every element of `v` for which `f` returns `true` to the end of the vector and return +the index of the last element which for which `f` returns `false`. + +`send_to_end!(f, v, lo, hi)` is equivalent to `send_to_end!(f, view(v, lo:hi))+lo-1` Preserves the order of the elements that are not sent to the end. """ -function send_to_end!(f::Function, v::AbstractVector, ::ReverseOrdering; lo, hi) - i = hi - @inbounds while lo <= i && !f(v[i]) - i -= 1 - end - j = i - 1 - @inbounds while lo <= j - if !f(v[j]) - v[i], v[j] = v[j], v[i] - i -= 1 - end - j -= 1 - end - return (i+1, hi), (lo, i) -end -function send_to_end!(f::Function, v::AbstractVector, ::ForwardOrdering; lo, hi) +function send_to_end!(f::F, v::AbstractVector; lo=firstindex(v), hi=lastindex(v)) where F <: Function i = lo @inbounds while i <= hi && !f(v[i]) i += 1 @@ -477,17 +463,32 @@ function send_to_end!(f::Function, v::AbstractVector, ::ForwardOrdering; lo, hi) end j += 1 end - return (lo, i-1), (i, hi) + i - 1 end +""" + send_to_end!(f::Function, v::AbstractVector, o::DirectOrdering[, end_stable]; lo, hi) + +Return `(a, b)` where `v[a:b]` are the elements that are not sent to the end. + +If `o isa ReverseOrdering` then the "end" of `v` is `v[lo]`. + +If `end_stable` is set, the elements that are sent to the end are stable instead of the +elements that are not +""" +@inline send_to_end!(f::F, v::AbstractVector, ::ForwardOrdering, end_stable=false; lo, hi) where F <: Function = + end_stable ? (lo, hi-send_to_end!(!f, view(v, hi:-1:lo))) : (lo, send_to_end!(f, v; lo, hi)) +@inline send_to_end!(f::F, v::AbstractVector, ::ReverseOrdering, end_stable=false; lo, hi) where F <: Function = + end_stable ? (send_to_end!(!f, v; lo, hi)+1, hi) : (hi-send_to_end!(f, view(v, hi:-1:lo))+1, hi) + function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering; lo=firstindex(v), hi=lastindex(v), kw...) if nonmissingtype(eltype(v)) != eltype(v) && o isa DirectOrdering - (lo, hi), _ = send_to_end!(ismissing, v, o; lo, hi) + lo, hi = send_to_end!(ismissing, v, o; lo, hi) _sort!(WithoutMissingVector(v, unsafe=true), a.next, o; lo, hi, kw...) v elseif eltype(v) <: Integer && o isa Perm{DirectOrdering} && nonmissingtype(eltype(o.data)) != eltype(o.data) - (lo, hi), _ = send_to_end!(i -> ismissing(@inbounds o.data[i]), v, o) + lo, hi = send_to_end!(i -> ismissing(@inbounds o.data[i]), v, o) _sort!(v, a.next, Perm(o.order, WithoutMissingVector(o.data, unsafe=true)); lo, hi, kw...) else _sort!(v, a.next, o; lo, hi, kw...) @@ -512,15 +513,15 @@ is_concrete_IEEEFloat(T::Type) = T <: Base.IEEEFloat && isconcretetype(T) function _sort!(v::AbstractVector, a::IEEEFloatOptimization, o::Ordering; lo=firstindex(v), hi=lastindex(v), kw...) if is_concrete_IEEEFloat(eltype(v)) && o isa DirectOrdering - _, (lo, hi) = send_to_end!(!isnan, v, ReverseOrdering(o); lo, hi) + lo, hi = send_to_end!(isnan, v, o, true; lo, hi) iv = reinterpret(UIntType(eltype(v)), v) - (_, j), _ = send_to_end!(x -> after_zero(o, x), v, Forward; lo, hi) + j = send_to_end!(x -> after_zero(o, x), v; lo, hi) _sort!(iv, a.next, Reverse; lo, hi=j, kw...) _sort!(iv, a.next, Forward; lo=j+1, hi, kw...) elseif eltype(v) <: Integer && o isa Perm && o.order isa DirectOrdering && is_concrete_IEEEFloat(eltype(o.data)) - _, (lo, hi) = send_to_end!(i -> !isnan(@inbounds o.data[i]), v, ReverseOrdering(o.order); lo, hi) + lo, hi = send_to_end!(i -> isnan(@inbounds o.data[i]), v, o.order, true; lo, hi) ip = reinterpret(UIntType(eltype(o.data)), o.data) - (_, j), _ = send_to_end!(i -> after_zero(o.order, @inbounds o.data[i]), v, Forward; lo, hi) + j = send_to_end!(i -> after_zero(o.order, @inbounds o.data[i]), v; lo, hi) _sort!(v, a.next, Perm(Reverse, ip); lo, hi=j, kw...) _sort!(v, a.next, Perm(Forward, ip); lo=j+1, hi, kw...) else From f16058211d8dc677d279f1ad901a1e59ded623f8 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 29 Oct 2022 11:51:35 +0600 Subject: [PATCH 06/29] Give each sorting pass and DEFAULT_STABLE a docstring --- base/sort.jl | 286 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 210 insertions(+), 76 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index d39818e87c8fd..7216b97226f35 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -412,10 +412,15 @@ insorted(x, r::AbstractRange) = in(x, r) abstract type Algorithm end +""" + MissingOptimization(next) <: Algorithm -# -# Missing values always go at the end -# +Filter out missing values. + +Missing values are placed after other values according to `DirectOrdering`s. This pass puts +them there and passes on a view into the original vector that excludes the missing values. +This pass is triggered for both `sort([1, missing, 3])` and `sortperm([1, missing, 3])`. +""" struct MissingOptimization{T <: Algorithm} <: Algorithm next::T end @@ -496,10 +501,16 @@ function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering; end +""" + IEEEFloatOptimization(next) <: Algorithm -# -# fast clever sorting for floats -# +Move NaN values to the end, partition by sign, and reinterpret the rest as unsigned integers. + +IEEE floating point numbers (`Float64`, `Float32`, and `Float16`) compare the same as +unsigned integers with the bits with a few exceptions. This pass + +This pass is triggered for both `sort([1.0, NaN, 3.0])` and `sortperm([1.0, NaN, 3.0])`. +""" struct IEEEFloatOptimization{T <: Algorithm} <: Algorithm next::T end @@ -531,10 +542,14 @@ function _sort!(v::AbstractVector, a::IEEEFloatOptimization, o::Ordering; end +""" + BoolOptimization(next) <: Algorithm + +Sort `AbstractVector{Bool}`s using a specialized version of counting sort. -# For AbstractVector{Bool}, counting sort is always best. -# This is an implementation of counting sort specialized for Bools. -# Accepts unused scratch to avoid method ambiguity. +Accesses each element at most twice (one read and one write), and performs at most two +comparisons. +""" struct BoolOptimization{T <: Algorithm} <: Algorithm next::T end @@ -553,10 +568,15 @@ function _sort!(v::AbstractVector{Bool}, ::BoolOptimization, o::Ordering; lo::In end +""" + IsUIntMappable(yes, no) <: Algorithm -# -# -# +Determines if the elements of a vector can be mapped to unsigned integers while preserving +their order under the specified ordering. + +If they can be, dispatch to the `yes` algorithm and record the unsigned integer type that +the elements may be mapped to. Otherwise dispatch to the `no` algorithm. +""" struct IsUIntMappable{T <: Algorithm, U <: Algorithm} <: Algorithm yes::T no::U @@ -571,10 +591,12 @@ function _sort!(v::AbstractVector, a::IsUIntMappable, o::Ordering; end +""" + Small{N}(small=SMALL_ALGORITHM, big) <: Algorithm -# -# -# +Sort inputs with `length(lo:hi) <= N` using the `small` algorithm. Otherwise use the `big` +algorithm. +""" struct Small{N, T <: Algorithm, U <: Algorithm} <: Algorithm small::T big::U @@ -590,27 +612,21 @@ function _sort!(v::AbstractVector, a::Small{N}, o::Ordering; end - -# -# -# struct InsertionSortAlg <: Algorithm end - """ - InsertionSort + InseritonSort -Indicate that a sorting function should use the insertion sort algorithm. +Use the insertion sort algorithm. Insertion sort traverses the collection one element at a time, inserting each element into its correct, sorted position in the output vector. Characteristics: - * *stable*: preserves the ordering of elements which - compare equal (e.g. "a" and "A" in a sort of letters - which ignores case). - * *in-place* in memory. - * *quadratic performance* in the number of elements to be sorted: - it is well-suited to small collections but should not be used for large ones. +* *stable*: preserves the ordering of elements which compare equal +(e.g. "a" and "A" in a sort of letters which ignores case). +* *in-place* in memory. +* *quadratic performance* in the number of elements to be sorted: +it is well-suited to small collections but should not be used for large ones. """ const InsertionSort = InsertionSortAlg() const SMALL_ALGORITHM = InsertionSort @@ -634,24 +650,24 @@ function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering; end +""" + CheckSorted(next) <: Algorithm -# -# -# +Check if the input is already sorted and for large inputs, also check if it is +reverse-sorted. The reverse-sorted check is unstable. +""" struct CheckSorted{T <: Algorithm} <: Algorithm next::T end function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering; lo=firstindex(v), hi=lastindex(v), lenm1 = hi-lo, kw...) # For most arrays, a presorted check is cheap (overhead < 5%) and for most large - # arrays it is essentially free (<1%). Insertion sort runs in a fast O(n) on presorted - # input and this guarantees presorted input will always be efficiently handled + # arrays it is essentially free (<1%). _issorted(v, lo, hi, o) && return v - # For large arrays, a reverse-sorted check is essentially free (overhead < 1%) + # For most large arrays, a reverse-sorted check is essentially free (overhead < 1%) if lenm1 >= 500 && _issorted(v, lo, hi, ReverseOrdering(o)) - # If reversing is valid, do so. This does not violate stability - # because being UIntMappable implies a linear order. + # If reversing is valid, do so. This does violates stability. reverse!(v, lo, hi) return v end @@ -660,10 +676,14 @@ function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering; end +""" + ComputeExtrema(next) <: Algorithm -# -# Prerequisite: region to be sorted [lo, hi] is nonempty -# +Compute the extrema of the input under the provided order. + +If the minimum is no less than the maximum, then the input is already sorted. Otherwise, +dispatch to the `next` algorithm. +""" struct ComputeExtrema{T <: Algorithm} <: Algorithm next::T end @@ -683,10 +703,16 @@ function _sort!(v::AbstractVector, a::ComputeExtrema, o::Ordering; end +""" + ConsiderCountingSort(counting=CountingSort(), next) <: Algorithm -# -# Consider counting sort -# +If the input's range is small enough, use the `counting` algorithm. Otherwise, dispatch to +the `next` algorithm. + +For most types, the threshold is if the range is shorter than half the length, but for types +larger than Int64, bitshifts are expensive and RadixSort is not viable, so the threshold is +much more generous. +""" struct ConsiderCountingSort{T <: Algorithm, U <: Algorithm} <: Algorithm counting::T next::U @@ -706,10 +732,15 @@ end _sort!(v::AbstractVector, a::ConsiderCountingSort, o::Ordering; kw...) = _sort!(v, a.next, o; kw...) +""" + CountingSort <: Algorithm -# -# Counting sort -# +Use the counting sort algorithm. + +`CountingSort` is an algorithm for sorting integers that runs in Θ(length + range) time and +space. It counts the number of occurrences of each value in the input and then iterates +through those counts repopulating the input with the values in sorted order. +""" struct CountingSort <: Algorithm end maybe_reverse(o::ForwardOrdering, x) = x maybe_reverse(o::ReverseOrdering, x) = reverse(x) @@ -737,10 +768,12 @@ function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering; end +""" + ConsiderRadixSort(radix=RadixSort(), next) <: Algorithm -# -# Consider radix sort -# +If the number of bits in the input's range is small enough and the input supports efficient +bitshifts, use the `radix` algorithm. Otherwise, dispatch to the `next` algorithm. +""" struct ConsiderRadixSort{T <: Algorithm, U <: Algorithm} <: Algorithm radix::T next::U @@ -759,10 +792,27 @@ function _sort!(v::AbstractVector, a::ConsiderRadixSort, o::DirectOrdering; end +""" + RadixSort <: Algorithm -# -# Radix sort -# +Use the radix sort algorithm. + +`RadixSort` is a stable least significant bit first radix sort algorithm that runs in +`O(length * log(range))` time and linear space. + +It first sorts the entire vector by the last `chunk_size` bits, then by the second +to last `chunk_size` bits, and so on. Stability means that it will not reorder two elements +that compare equal. This is essential so that the order introduced by earlier, +less significant passes is preserved by later passes. + +Each pass divides the input into `2^chunk_size == mask+1` buckets. To do this, it + * counts the number of entries that fall into each bucket + * uses those counts to compute the indices to move elements of those buckets into + * moves elements into the computed indices in the swap array + * switches the swap and working array + +`chunk_size` is larger for larger inputs and determined by an empirical heuristic. +""" struct RadixSort <: Algorithm end function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering; lo=firstindex(v), hi=lastindex(v), lenm1=hi-lo, @@ -802,17 +852,13 @@ function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering; end - -# -# Quicksort -# """ - PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}) + PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}, next::Algorithm) <: Algorithm Indicate that a sorting function should use the partial quick sort algorithm. -Partial quick sort finds and sorts the elements that would end up in positions -`lo:hi` using [`QuickSort`](@ref). +Partial quick sort finds and sorts the elements that would end up in positions `lo:hi` using +[`QuickSort`](@ref). It is recursive and uses the `next` algorithm for small chunks Characteristics: * *stable*: preserves the ordering of elements which compare equal @@ -928,10 +974,15 @@ function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering; end +""" + StableCheckSorted(next) <: Algorithm -# -# StableCheckSorted -# +Check if an input is sorted and/or reverse-sorted. + +The definition of reverse-sorted is that for every pair of adjacent elements, the latter is +less than the former. This is stricter than `issorted(v, Reverse(o))` to avoid swapping pairs +of elements that compare equal. +""" struct StableCheckSorted{T<:Algorithm} <: Algorithm next::T end @@ -948,19 +999,6 @@ function _sort!(v::AbstractVector, a::StableCheckSorted, o::Ordering; end -# This is a stable least significant bit first radix sort. -# -# That is, it first sorts the entire vector by the last chunk_size bits, then by the second -# to last chunk_size bits, and so on. Stability means that it will not reorder two elements -# that compare equal. This is essential so that the order introduced by earlier, -# less significant passes is preserved by later passes. -# -# Each pass divides the input into 2^chunk_size == mask+1 buckets. To do this, it -# * counts the number of entries that fall into each bucket -# * uses those counts to compute the indices to move elements of those buckets into -# * moves elements into the computed indices in the swap array -# * switches the swap and working array -# # In the case of an odd number of passes, the returned vector will === the input vector t, # not v. This is one of the many reasons radix_sort! is not exported. function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsigned, @@ -1033,17 +1071,113 @@ function _issorted(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) true end + ## default sorting policy ## -InitialOptimizations(x) = MissingOptimization(BoolOptimization(Small{10}(IEEEFloatOptimization(x)))) +""" + InitialOptimizations(next) <: Algorithm + +Attempt to apply a suite of low-cost optimizations to the input vector before sorting. + +`InitialOptimizations` is an implementation detail and subject to change or removal in +future versions of Julia. + +If `next` is stable, then `InitialOptimizations(next)` is also stable. + +The specific optimizations attempted by `InitialOptimizations` are +[`MissingOptimization`](@ref), [`BoolOptimization`](@ref), dispatch to +[`InsertionSort`](@ref) for inputs with `length <= 10`, and [`IEEEFloatOptimization`](@ref). +""" +InitialOptimizations(next) = MissingOptimization(BoolOptimization(Small{10}(IEEEFloatOptimization(next)))) +""" + DEFAULT_STABLE + +The default sorting algorithm. + +This algorithm is guaranteed to be stable (i.e. it will not reorder elements that compare +equal). It makes an effort to be fast for most inputs. + +The algorithms used by `DEFAULT_STABLE` are an implementation detail. See extended help +for the current dispatch system. + +# Extended Help + +`DEFAULT_STABLE` is composed of two parts: the [`InitialOptimizations`](@ref) and a hybrid +of Radix, Insertion, Counting, Quick sorts. + +We begin with MissingOptimization because it has no runtime cost when it is not +triggered and can enable other optimizations to be applied later. For example, +BoolOptimization cannot apply to an `AbstractVector{Union{Missing, Bool}}`, but after +[`MissingOptimization`](@ref) is applied, that input will be converted into am +`AbstractVector{Bool}`. + +We next apply [`BoolOptimization`](@ref) because it also has no runtime cost when it is not +triggered and when it is triggered, it is an incredibly efficient algorithm (sorting `Bool`s +is quite easy). + +Next, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 10`. This dispatch +occurs before the [`IEEEFloatOptimization`](@ref) pass because the +[`IEEEFloatOptimization`](@ref)s are not beneficial for very small inputs. + +To conclude the [`InitialOptimizations`](@ref), we apply [`IEEEFloatOptimization`](@ref). + +After these optimizations, we branch on whether radix sort and related algorithms can be +applied to the input vector and ordering. We conduct this branch by testing if +`UIntMappable(v, order) !== nothing`. That is, we see if we know of a reversible mapping +from `eltype(v)` to `UInt` that preserves the ordering `order`. We perform this check after +the initial optimizations because they can change the input vector's type and ordering to +make them `UIntMappable`. + +If the input is not [`UIntMappable`](@ref), then we perform a presorted check and dispatch +to [`QuickSort`](@ref). + +Otherwise, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 40` and then +perform a presorted check ([`CheckSorted`](@ref)). + +We check for short inputs before performing the presorted check to avoid the overhead of the +check for small inputs. Because the alternate dispatch is to [`InseritonSort`](@ref) which +has efficient `O(n)` runtime on presorted inputs, the check is not necessary for small +inputs. + +We check if the input is reverse-sorted for long vectors (more than 500 elements) because +the check is essentially free unless the input is almost entirely reverse sorted. + +Note that once the input is determined to be [`UIntMappable`](@ref), we know the order forms +a [total order](wikipedia.org/wiki/Total_order) over the inputs and so it is impossible to +perform an unstable sort because no two elements can compare equal unless they _are_ equal, +in which case switching them is undetectable. We utilize this fact to perform a more +aggressive reverse sorted check that will reverse the vector `[3, 2, 2, 1]`. + +After these potential fast-paths are tried and failed, we [`ComputeExtrema`](@ref) of the +input. This computation has a fairly fast `O(n)` runtime, but we still try to delay it until +it is necessary. + +Next, we [`ConsiderCountingSort`](@ref). If the range the input is small compared to its +length, we apply [`CountingSort`](@ref). + +Next, we [`ConsiderRadixSort`](@ref). This is similar to the dispatch to counting sort, +but we conside rthe number of _bits_ in the range, rather than the range itself. +Consequently, we apply [`RadixSort`](@ref) for any reasonably long inputs that reach this +stage. + +Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and +otherwise we dispatch to [`QuickSort`](@ref). +""" const DEFAULT_STABLE = InitialOptimizations(IsUIntMappable( Small{40}(CheckSorted(ComputeExtrema(ConsiderCountingSort(ConsiderRadixSort(Small{80}(QuickSort)))))), StableCheckSorted(QuickSort))) +""" + DEFAULT_UNSTABLE + +An efficient sorting algorithm. + +The algorithms used by `DEFAULT_UNSTABLE` are an implementation detail. They are currently +the same as those used by [`DEFAULT_STABLE`](@ref), but this is subject to change in future. +""" const DEFAULT_UNSTABLE = DEFAULT_STABLE const SMALL_THRESHOLD = 20 - defalg(v::AbstractArray) = DEFAULT_STABLE """ From 15a44842e39493a8f5c00ea5056b67ad26b677b2 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sun, 30 Oct 2022 14:20:56 +0600 Subject: [PATCH 07/29] add tests and fix typos they unveiled --- base/sort.jl | 4 ++-- test/sorting.jl | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 7216b97226f35..42b10d32486d8 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -720,7 +720,7 @@ end ConsiderCountingSort(next) = ConsiderCountingSort(CountingSort(), next) function _sort!(v::AbstractVector{<:Integer}, a::ConsiderCountingSort, o::DirectOrdering; lo=firstindex(v), hi=lastindex(v), lenm1=hi-lo, - U = UIntMapping(eltype(v), o), + U = UIntMappable(eltype(v), o), mn, mx, range=maybe_unsigned(o === Reverse ? mn-mx : mx-mn), kw...) if range < (sizeof(U) > 8 ? 5lenm1-100 : div(lenm1, 2)) @@ -816,7 +816,7 @@ Each pass divides the input into `2^chunk_size == mask+1` buckets. To do this, i struct RadixSort <: Algorithm end function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering; lo=firstindex(v), hi=lastindex(v), lenm1=hi-lo, - mn, mx, umn=uint_mapping(mn, o), umx=uint_mapping(mx, o), urange=umx-umn, + mn, mx, umn=uint_map(mn, o), umx=uint_map(mx, o), urange=umx-umn, bits = unsigned(8sizeof(urange) - leading_zeros(urange)), U = UIntMappable(eltype(v), o), scratch=nothing, kw...) diff --git a/test/sorting.jl b/test/sorting.jl index 95c303774b661..d0892b2afb407 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -733,7 +733,6 @@ end @test issorted(k[idx], rev=true) end -# This testset is at the end of the file because it is slow @testset "sort(x; scratch)" begin for n in [1,10,100,1000] v = rand(n) @@ -768,6 +767,54 @@ end @test issorted(sort(shuffle!(vcat(fill(missing, 10), rand(Int, 100))))) end +@testset "Specific algorithms" begin + let + requires_uint_mappable = Union{Base.Sort.RadixSort, Base.Sort.ConsiderRadixSort, + Base.Sort.CountingSort, Base.Sort.ConsiderCountingSort, + typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes), + typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes.big), + typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes.big.next)} + + function test_alg(kw, alg, float=true) + for order in [Base.Forward, Base.Reverse, Base.By(x -> x^2)] + order isa Base.By && alg isa requires_uint_mappable && continue + for n in [1,7,179,1312] + + n == 1 && alg isa Base.Sort.RadixSort && continue + + x = rand(1:n+1, n) + y = sort(x; order) + @test y == Base.Sort._sort!(x, alg, order, (;kw(y)...)) === x + + alg isa requires_uint_mappable && continue + + x = randn(n) + y = sort(x; order) + @test y == Base.Sort._sort!(x, alg, order, (;kw(y)...)) === x + end + end + end + test_alg(alg) = test_alg(x -> (), alg) + + function test_alg_rec(alg, extrema=false) + if extrema + test_alg(alg) do y + (;mn=first(y),mx=last(y)) + end + else + test_alg(alg) + end + extrema |= alg isa Base.Sort.ComputeExtrema + for name in fieldnames(typeof(alg)) + a = getfield(alg, name) + a isa Base.Sort.Algorithm && test_alg_rec(a, extrema) + end + end + + test_alg_rec(Base.DEFAULT_STABLE) + end +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, From d82b09095afd307c52ea0fd36ecee70d9aa29a97 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Tue, 1 Nov 2022 19:05:11 +0600 Subject: [PATCH 08/29] avoid potential name conflict --- base/sort.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 42b10d32486d8..e1fba0466a397 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -1619,9 +1619,9 @@ function sort!(A::AbstractArray{T}; rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, scratch::Union{AbstractVector{T}, Nothing}=similar(A, size(A, dims))) where T - _sort!(A, Val(dims), alg, ord(lt, by, rev, order), scratch) + __sort!(A, Val(dims), alg, ord(lt, by, rev, order), scratch) end -function _sort!(A::AbstractArray{T}, ::Val{K}, +function __sort!(A::AbstractArray{T}, ::Val{K}, alg::Algorithm, order::Ordering, scratch::Union{AbstractVector{T}, Nothing}) where {K,T} From 029cbaed74fefbe260a91cc0a88781cd83a168ff Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Tue, 1 Nov 2022 19:11:39 +0600 Subject: [PATCH 09/29] switch to custom keyword handling FIXES UNEXPECTED ALLOCATIONS removes code that previously harbored bugs that slipped through the test suite --- base/sort.jl | 189 ++++++++++++++++++++++++++---------------------- test/sorting.jl | 4 +- 2 files changed, 104 insertions(+), 89 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index e1fba0466a397..1ab423f0404a9 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -86,7 +86,7 @@ issorted(itr; issorted(itr, ord(lt,by,rev,order)) function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering) - _sort!(v, _PartialQuickSort(k), o) + _sort!(v, _PartialQuickSort(k), o, (;)) maybeview(v, k) end @@ -407,6 +407,42 @@ function insorted end insorted(x, v::AbstractVector; kw...) = !isempty(searchsorted(v, x; kw...)) insorted(x, r::AbstractRange) = in(x, r) +## Alternative keyword management + +macro getkw(syms...) + usyms = (Symbol(:_, sym) for sym in syms) + Expr(:block, (:($(esc(:((kw, $sym) = $usym(v, o, kw))))) for (sym, usym) in zip(syms, usyms))...) +end + +for (sym, deps, exp, type) in [ + (:lo, (), :(firstindex(v)), Integer), + (:hi, (), :(lastindex(v)), Integer), + (:U, (), :(UIntMappable(eltype(v), o)), Any), #type checking this comes at a runtime performance cost ??? + (:lenm1, (:lo, :hi), :(hi-lo), Integer), + (:mn, (), :(throw(ArgumentError("mn is needed but has not been computed"))), :(eltype(v))), + (:mx, (), :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))), + (:range, (:mn, :mx), quote + o isa DirectOrdering || throw(ArgumentError("Cannot compute range under ordering $o")) + maybe_unsigned(o === Reverse ? mn-mx : mx-mn) + end, Integer), + (:umn, (:mn,), :(uint_map(mn, o)), Unsigned), + (:umx, (:mx,), :(uint_map(mx, o)), Unsigned), + (:urange, (:umn, :umx), :(umx-umn), Unsigned), + (:bits, (:urange,), :(unsigned(8sizeof(urange) - leading_zeros(urange))), Unsigned), + (:scratch, (), nothing, :(Union{Nothing, AbstractVector})), # could have different eltype + (:t, (:lo, :hi, :scratch), quote + scratch === nothing ? similar(v) : reinterpret(eltype(v), checkbounds(Bool, scratch, lo:hi) ? scratch : resize!(scratch, length(v))) + end, :(AbstractVector{eltype(v)}))] + str = string(sym) + usym = Symbol(:_, sym) + @eval function $usym(v, o, kw) + Symbol($str) ∈ keys(kw) && return kw, kw[Symbol($str)]::$type # TODO this interpolation feels too complicated + @getkw $(deps...) + $sym = $exp + (;kw..., $sym), $sym::$type + end +end + ## sorting algorithm components ## abstract type Algorithm end @@ -486,17 +522,17 @@ elements that are not end_stable ? (send_to_end!(!f, v; lo, hi)+1, hi) : (hi-send_to_end!(f, view(v, hi:-1:lo))+1, hi) -function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering; - lo=firstindex(v), hi=lastindex(v), kw...) +function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering, kw) + @getkw lo hi if nonmissingtype(eltype(v)) != eltype(v) && o isa DirectOrdering lo, hi = send_to_end!(ismissing, v, o; lo, hi) - _sort!(WithoutMissingVector(v, unsafe=true), a.next, o; lo, hi, kw...) + _sort!(WithoutMissingVector(v, unsafe=true), a.next, o, (;kw..., lo, hi)) v elseif eltype(v) <: Integer && o isa Perm{DirectOrdering} && nonmissingtype(eltype(o.data)) != eltype(o.data) lo, hi = send_to_end!(i -> ismissing(@inbounds o.data[i]), v, o) - _sort!(v, a.next, Perm(o.order, WithoutMissingVector(o.data, unsafe=true)); lo, hi, kw...) + _sort!(v, a.next, Perm(o.order, WithoutMissingVector(o.data, unsafe=true)), (;kw..., lo, hi)) else - _sort!(v, a.next, o; lo, hi, kw...) + _sort!(v, a.next, o, kw) end end @@ -521,22 +557,22 @@ UIntType(::Type{Float64}) = UInt64 after_zero(::ForwardOrdering, x) = 0 <= x after_zero(::ReverseOrdering, x) = x < 0 is_concrete_IEEEFloat(T::Type) = T <: Base.IEEEFloat && isconcretetype(T) -function _sort!(v::AbstractVector, a::IEEEFloatOptimization, o::Ordering; - lo=firstindex(v), hi=lastindex(v), kw...) +function _sort!(v::AbstractVector, a::IEEEFloatOptimization, o::Ordering, kw) + @getkw lo hi if is_concrete_IEEEFloat(eltype(v)) && o isa DirectOrdering lo, hi = send_to_end!(isnan, v, o, true; lo, hi) iv = reinterpret(UIntType(eltype(v)), v) j = send_to_end!(x -> after_zero(o, x), v; lo, hi) - _sort!(iv, a.next, Reverse; lo, hi=j, kw...) - _sort!(iv, a.next, Forward; lo=j+1, hi, kw...) + _sort!(iv, a.next, Reverse, (;kw..., lo, hi=j)) + _sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi)) elseif eltype(v) <: Integer && o isa Perm && o.order isa DirectOrdering && is_concrete_IEEEFloat(eltype(o.data)) lo, hi = send_to_end!(i -> isnan(@inbounds o.data[i]), v, o.order, true; lo, hi) ip = reinterpret(UIntType(eltype(o.data)), o.data) j = send_to_end!(i -> after_zero(o.order, @inbounds o.data[i]), v; lo, hi) - _sort!(v, a.next, Perm(Reverse, ip); lo, hi=j, kw...) - _sort!(v, a.next, Perm(Forward, ip); lo=j+1, hi, kw...) + _sort!(v, a.next, Perm(Reverse, ip), (;kw..., lo, hi=j)) + _sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi)) else - _sort!(v, a.next, o; lo, hi, kw...) + _sort!(v, a.next, o, kw) end v end @@ -553,9 +589,10 @@ comparisons. struct BoolOptimization{T <: Algorithm} <: Algorithm next::T end -_sort!(v::AbstractVector, a::BoolOptimization, o::Ordering; kw...) = _sort!(v, a.next, o; kw...) -function _sort!(v::AbstractVector{Bool}, ::BoolOptimization, o::Ordering; lo::Integer, hi::Integer, kw...) +_sort!(v::AbstractVector, a::BoolOptimization, o::Ordering, kw) = _sort!(v, a.next, o, kw) +function _sort!(v::AbstractVector{Bool}, ::BoolOptimization, o::Ordering, kw) first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v + @getkw lo hi count = 0 @inbounds for i in lo:hi if v[i] == first @@ -581,12 +618,12 @@ struct IsUIntMappable{T <: Algorithm, U <: Algorithm} <: Algorithm yes::T no::U end -function _sort!(v::AbstractVector, a::IsUIntMappable, o::Ordering; - U = UIntMappable(eltype(v), o), kw...) +function _sort!(v::AbstractVector, a::IsUIntMappable, o::Ordering, kw) + @getkw U if U !== nothing - _sort!(v, a.yes, o; U, kw...) + _sort!(v, a.yes, o, kw) else - _sort!(v, a.no, o; kw...) + _sort!(v, a.no, o, kw) end end @@ -602,12 +639,12 @@ struct Small{N, T <: Algorithm, U <: Algorithm} <: Algorithm big::U end Small{N}(big) where N = Small{N, typeof(SMALL_ALGORITHM), typeof(big)}(SMALL_ALGORITHM, big) -function _sort!(v::AbstractVector, a::Small{N}, o::Ordering; - lo::Integer=firstindex(v), hi::Integer=lastindex(v), lenm1 = hi-lo, kw...) where N +function _sort!(v::AbstractVector, a::Small{N}, o::Ordering, kw) where N + @getkw lenm1 if lenm1 < N - _sort!(v, a.small, o; lo, hi, lenm1, kw...) + _sort!(v, a.small, o, kw) else - _sort!(v, a.big, o; lo, hi, lenm1, kw...) + _sort!(v, a.big, o, kw) end end @@ -630,8 +667,8 @@ it is well-suited to small collections but should not be used for large ones. """ const InsertionSort = InsertionSortAlg() const SMALL_ALGORITHM = InsertionSort -function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering; - lo=firstindex(v), hi=lastindex(v), kw...) +function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering, kw) + @getkw lo hi lo_plus_1 = (lo + 1)::Integer @inbounds for i = lo_plus_1:hi j = i @@ -659,8 +696,9 @@ reverse-sorted. The reverse-sorted check is unstable. struct CheckSorted{T <: Algorithm} <: Algorithm next::T end -function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering; - lo=firstindex(v), hi=lastindex(v), lenm1 = hi-lo, kw...) +function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering, kw) + @getkw lo hi lenm1 + # For most arrays, a presorted check is cheap (overhead < 5%) and for most large # arrays it is essentially free (<1%). _issorted(v, lo, hi, o) && return v @@ -672,7 +710,7 @@ function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering; return v end - _sort!(v, a.next, o; lo, hi, lenm1, kw...) + _sort!(v, a.next, o, kw) end @@ -687,8 +725,8 @@ dispatch to the `next` algorithm. struct ComputeExtrema{T <: Algorithm} <: Algorithm next::T end -function _sort!(v::AbstractVector, a::ComputeExtrema, o::Ordering; - lo=firstindex(v), hi=lastindex(v), kw...) +function _sort!(v::AbstractVector, a::ComputeExtrema, o::Ordering, kw) + @getkw lo hi mn = mx = v[lo] @inbounds for i in (lo+1):hi vi = v[i] @@ -699,7 +737,7 @@ function _sort!(v::AbstractVector, a::ComputeExtrema, o::Ordering; lt(o, mn, mx) || return v # all same - _sort!(v, a.next, o; lo, hi, mn, mx, kw...) + _sort!(v, a.next, o, (;kw..., mn, mx)) end @@ -718,18 +756,15 @@ struct ConsiderCountingSort{T <: Algorithm, U <: Algorithm} <: Algorithm next::U end ConsiderCountingSort(next) = ConsiderCountingSort(CountingSort(), next) -function _sort!(v::AbstractVector{<:Integer}, a::ConsiderCountingSort, o::DirectOrdering; - lo=firstindex(v), hi=lastindex(v), lenm1=hi-lo, - U = UIntMappable(eltype(v), o), - mn, mx, range=maybe_unsigned(o === Reverse ? mn-mx : mx-mn), kw...) - +function _sort!(v::AbstractVector{<:Integer}, a::ConsiderCountingSort, o::DirectOrdering, kw) + @getkw lenm1 range U if range < (sizeof(U) > 8 ? 5lenm1-100 : div(lenm1, 2)) - _sort!(v, a.counting, o; lo, hi, lenm1, mn, mx, range, kw...) + _sort!(v, a.counting, o, kw) else - _sort!(v, a.next, o; lo, hi, lenm1, mn, mx, range, kw...) + _sort!(v, a.next, o, kw) end end -_sort!(v::AbstractVector, a::ConsiderCountingSort, o::Ordering; kw...) = _sort!(v, a.next, o; kw...) +_sort!(v::AbstractVector, a::ConsiderCountingSort, o::Ordering, kw) = _sort!(v, a.next, o, kw) """ @@ -744,9 +779,8 @@ through those counts repopulating the input with the values in sorted order. struct CountingSort <: Algorithm end maybe_reverse(o::ForwardOrdering, x) = x maybe_reverse(o::ReverseOrdering, x) = reverse(x) -function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering; - lo=firstindex(v), hi=lastindex(v), lenm1=hi-lo, - mn, mx, range=maybe_unsigned(o === Reverse ? mn-mx : mx-mn), kw...) +function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering, kw) + @getkw lo hi mn mx range offs = 1 - (o === Reverse ? mx : mn) counts = fill(0, range+1) @@ -779,15 +813,12 @@ struct ConsiderRadixSort{T <: Algorithm, U <: Algorithm} <: Algorithm next::U end ConsiderRadixSort(next) = ConsiderRadixSort(RadixSort(), next) -function _sort!(v::AbstractVector, a::ConsiderRadixSort, o::DirectOrdering; - lo=firstindex(v), hi=lastindex(v), lenm1=hi-lo, - U = UIntMappable(eltype(v), o), - mn, mx, umn=uint_map(mn, o), umx=uint_map(mx, o), urange=umx-umn, - bits = unsigned(8sizeof(urange) - leading_zeros(urange)), kw...) - if sizeof(U) <= 8 && bits+70 < 22log(lenm1) # TODO there are some unexpected allocations here - _sort!(v, a.radix, o; lo, hi, lenm1, mn, mx, umn, umx, urange, bits, kw...) +function _sort!(v::AbstractVector, a::ConsiderRadixSort, o::DirectOrdering, kw) + @getkw U bits lenm1 + if sizeof(U) <= 8 && bits+70 < 22log(lenm1) + _sort!(v, a.radix, o, kw) else - _sort!(v, a.next, o; lo, hi, lenm1, mn, mx, umn, umx, urange, bits, kw...) + _sort!(v, a.next, o, kw) end end @@ -814,11 +845,8 @@ Each pass divides the input into `2^chunk_size == mask+1` buckets. To do this, i `chunk_size` is larger for larger inputs and determined by an empirical heuristic. """ struct RadixSort <: Algorithm end -function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering; - lo=firstindex(v), hi=lastindex(v), lenm1=hi-lo, - mn, mx, umn=uint_map(mn, o), umx=uint_map(mx, o), urange=umx-umn, - bits = unsigned(8sizeof(urange) - leading_zeros(urange)), - U = UIntMappable(eltype(v), o), scratch=nothing, kw...) +function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering, kw) + @getkw lo hi umn U scratch lenm1 bits # At this point, we are committed to radix sort. u = uint_map!(v, lo, hi, o) @@ -936,10 +964,9 @@ function partition!(t::AbstractVector, lo::Integer, hi::Integer, o::Ordering, v: pivot, lo-trues end -function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering; - lo=firstindex(v), hi=lastindex(v), scratch=similar(v), - t=reinterpret(eltype(v), checkbounds(Bool, scratch, lo:hi) ? scratch : resize!(scratch, length(v))), - swap=false, rev=false, kw...) +function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw; + t=nothing, swap=false, rev=false) + @getkw lo hi t while lo < hi && hi - lo > SMALL_THRESHOLD pivot, j = swap ? partition!(v, lo, hi, o, t, rev) : partition!(t, lo, hi, o, v, rev) @@ -959,18 +986,18 @@ function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering; elseif j-lo < hi-j # Sort the lower part recursively because it is smaller. Recursing on the # smaller part guarantees O(log(n)) stack space even on pathological inputs. - _sort!(v, a, o; lo, hi=j-1, scratch, t, swap, rev, kw...) + _sort!(v, a, o, (;kw..., lo, hi=j-1); swap, rev) lo = j+1 rev = !rev else # Sort the higher part recursively - _sort!(v, a, o; lo=j+1, hi, scratch, t, swap, rev=!rev, kw...) + _sort!(v, a, o, (;kw..., lo=j+1, hi); swap, rev=!rev) hi = j-1 end end hi < lo && return v swap && copyto!(v, lo, t, lo, hi-lo+1) rev && reverse!(v, lo, hi) - _sort!(v, a.next, o; lo, hi, scratch, t, kw...) + _sort!(v, a.next, o, (;kw..., lo, hi)) end @@ -986,8 +1013,8 @@ of elements that compare equal. struct StableCheckSorted{T<:Algorithm} <: Algorithm next::T end -function _sort!(v::AbstractVector, a::StableCheckSorted, o::Ordering; - lo=firstindex(v), hi=lastindex(v), kw...) +function _sort!(v::AbstractVector, a::StableCheckSorted, o::Ordering, kw) + @getkw lo hi if _issorted(v, lo, hi, o) return v elseif _issorted(v, lo, hi, Lt((x, y) -> !lt(o, x, y))) @@ -995,7 +1022,7 @@ function _sort!(v::AbstractVector, a::StableCheckSorted, o::Ordering; return reverse!(v, lo, hi) end - _sort!(v, a.next, o; lo, hi, kw...) + _sort!(v, a.next, o, kw) end @@ -1227,11 +1254,7 @@ function sort!(v::AbstractVector{T}; rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, scratch::Union{AbstractVector{T}, Nothing}=nothing) where T - if scratch === nothing # TODO: reduce redundancy - _sort!(v, alg, ord(lt,by,rev,order)) - else - _sort!(v, alg, ord(lt,by,rev,order); scratch) - end + _sort!(v, alg, ord(lt,by,rev,order), (;scratch)) end """ @@ -1357,7 +1380,7 @@ function partialsortperm!(ix::AbstractVector{<:Integer}, v::AbstractVector, end # do partial quicksort - _sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v)) + _sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v), (;)) maybeview(ix, k) end @@ -1568,15 +1591,7 @@ end @noinline function sort_chunks!(Av, n, alg, order, scratch) inds = LinearIndices(Av) for lo = first(inds):n:last(inds) - _sort!(Av, alg, order; lo, hi=lo+n-1, scratch) - end - Av -end -# TODO: reduce redundancy -@noinline function sort_chunks!(Av, n, alg, order, scratch::Nothing) - inds = LinearIndices(Av) - for lo = first(inds):n:last(inds) - _sort!(Av, alg, order; lo, hi=lo+n-1) + _sort!(Av, alg, order, (; lo, hi=lo+n-1, scratch)) end Av end @@ -1748,10 +1763,10 @@ Characteristics: const MergeSort = MergeSortAlg(SMALL_ALGORITHM) -function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering; - lo=firstindex(v), hi=lastindex(v), scratch=nothing) +function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw) + @getkw lo hi scratch @inbounds if lo < hi - hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o; lo, hi) + hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o, kw) m = midpoint(lo, hi) @@ -1759,8 +1774,8 @@ function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering; length(t) < m-lo+1 && resize!(t, m-lo+1) Base.require_one_based_indexing(t) - _sort!(v, a, o; lo, hi=m, scratch=t) - _sort!(v, a, o; lo=m+1, hi, scratch=t) + _sort!(v, a, o, (;kw..., hi=m, scratch=t)) + _sort!(v, a, o, (;kw..., lo=m+1, scratch=t)) i, j = 1, lo while j <= m @@ -1791,7 +1806,7 @@ function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering; end # Support 3- and 5-argument version of sort! for backwards compatability -sort!(v::AbstractVector, a::Algorithm, o::Ordering) = _sort!(v, a, o) -sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) = _sort!(v, a, o; lo, hi) +sort!(v::AbstractVector, a::Algorithm, o::Ordering) = _sort!(v, a, o, (;)) +sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) = _sort!(v, a, o, (; lo, hi)) end # module Sort diff --git a/test/sorting.jl b/test/sorting.jl index d0892b2afb407..f9a3e5bd7438a 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -535,11 +535,11 @@ end @test issorted(a) a = view([9:-1:0;], :)::SubArray - Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, mn=0, mx=9) # test it supports non-Vector + Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, (; mn=0, mx=9)) # test it supports non-Vector @test issorted(a) a = OffsetArray([9:-1:0;], -5) - Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, mn=0, mx=9) + Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, (; mn=0, mx=9)) @test issorted(a) end From d3bdca3be5906f4a0898846b00d44cfb51dae504 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Wed, 2 Nov 2022 07:01:28 +0600 Subject: [PATCH 10/29] remove InsertionSortAlg and MergeSortAlg --- base/sort.jl | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 1ab423f0404a9..21b1a56d826d8 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -649,9 +649,8 @@ function _sort!(v::AbstractVector, a::Small{N}, o::Ordering, kw) where N end -struct InsertionSortAlg <: Algorithm end """ - InseritonSort + InsertionSort() Use the insertion sort algorithm. @@ -665,9 +664,10 @@ Characteristics: * *quadratic performance* in the number of elements to be sorted: it is well-suited to small collections but should not be used for large ones. """ -const InsertionSort = InsertionSortAlg() -const SMALL_ALGORITHM = InsertionSort -function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering, kw) +struct InsertionSort <: Algorithm end + +const SMALL_ALGORITHM = InsertionSort() +function _sort!(v::AbstractVector, ::InsertionSort, o::Ordering, kw) @getkw lo hi lo_plus_1 = (lo + 1)::Integer @inbounds for i = lo_plus_1:hi @@ -1248,13 +1248,13 @@ julia> v = [(1, "c"), (3, "a"), (2, "b")]; sort!(v, by = x -> x[2]); v ``` """ function sort!(v::AbstractVector{T}; - alg::Algorithm=defalg(v), + alg::Union{Algorithm, Type{<:Algorithm}}=defalg(v), lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, scratch::Union{AbstractVector{T}, Nothing}=nothing) where T - _sort!(v, alg, ord(lt,by,rev,order), (;scratch)) + _sort!(v, getalg(alg), ord(lt,by,rev,order), (;scratch)) end """ @@ -1432,7 +1432,7 @@ julia> sortperm(A, dims = 2) ``` """ function sortperm(A::AbstractArray; - alg::Algorithm=DEFAULT_UNSTABLE, + alg::Union{Algorithm, Type{<:Algorithm}}=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, @@ -1492,7 +1492,7 @@ julia> sortperm!(p, A; dims=2); p ``` """ function sortperm!(ix::AbstractArray{T}, A::AbstractArray; - alg::Algorithm=DEFAULT_UNSTABLE, + alg::Union{Algorithm, Type{<:Algorithm}}=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, @@ -1566,7 +1566,7 @@ julia> sort(A, dims = 2) """ function sort(A::AbstractArray{T}; dims::Integer, - alg::Algorithm=defalg(A), + alg::Union{Algorithm, Type{<:Algorithm}}=defalg(A), lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, @@ -1591,7 +1591,7 @@ end @noinline function sort_chunks!(Av, n, alg, order, scratch) inds = LinearIndices(Av) for lo = first(inds):n:last(inds) - _sort!(Av, alg, order, (; lo, hi=lo+n-1, scratch)) + _sort!(Av, getalg(alg), order, (; lo, hi=lo+n-1, scratch)) end Av end @@ -1628,16 +1628,16 @@ julia> sort!(A, dims = 2); A """ function sort!(A::AbstractArray{T}; dims::Integer, - alg::Algorithm=defalg(A), + alg::Union{Algorithm, Type{<:Algorithm}}=defalg(A), lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, scratch::Union{AbstractVector{T}, Nothing}=similar(A, size(A, dims))) where T - __sort!(A, Val(dims), alg, ord(lt, by, rev, order), scratch) + __sort!(A, Val(dims), getalg(alg), ord(lt, by, rev, order), scratch) end function __sort!(A::AbstractArray{T}, ::Val{K}, - alg::Algorithm, + alg::Union{Algorithm, Type{<:Algorithm}}, order::Ordering, scratch::Union{AbstractVector{T}, Nothing}) where {K,T} nd = ndims(A) @@ -1741,11 +1741,8 @@ end ### Unused ### -struct MergeSortAlg{T <: Algorithm} <: Algorithm - next::T -end """ - MergeSort + MergeSort() Indicate that a sorting function should use the merge sort algorithm. @@ -1760,10 +1757,12 @@ Characteristics: * *not in-place* in memory. * *divide-and-conquer* sort strategy. """ -const MergeSort = MergeSortAlg(SMALL_ALGORITHM) - +struct MergeSort{T <: Algorithm} <: Algorithm + next::T +end +MergeSort() = MergeSort(SMALL_ALGORITHM) -function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw) +function _sort!(v::AbstractVector, a::MergeSort, o::Ordering, kw) @getkw lo hi scratch @inbounds if lo < hi hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o, kw) @@ -1806,7 +1805,10 @@ function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw) end # Support 3- and 5-argument version of sort! for backwards compatability -sort!(v::AbstractVector, a::Algorithm, o::Ordering) = _sort!(v, a, o, (;)) -sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) = _sort!(v, a, o, (; lo, hi)) +sort!(v::AbstractVector, a::Union{Algorithm, Type{<:Algorithm}}, o::Ordering) = _sort!(v, getalg(a), o, (;)) +sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Union{Algorithm, Type{<:Algorithm}}, o::Ordering) = _sort!(v, getalg(a), o, (; lo, hi)) +# Support alg=InsertionSort and alg=MergeSort for backwards compatability +getalg(a::Algorithm) = a +getalg(::Type{A}) where A <: Algorithm = A() end # module Sort From 2232cac9ca03ca3fbd9612b6e2d834df85ae05a4 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Wed, 2 Nov 2022 08:24:29 +0600 Subject: [PATCH 11/29] better algorithm display --- base/sort.jl | 45 ++++++++++++++++++++++++++++++++++++++++----- test/sorting.jl | 7 +++++++ 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 21b1a56d826d8..8cdb97f36d48f 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -638,7 +638,8 @@ struct Small{N, T <: Algorithm, U <: Algorithm} <: Algorithm small::T big::U end -Small{N}(big) where N = Small{N, typeof(SMALL_ALGORITHM), typeof(big)}(SMALL_ALGORITHM, big) +Small{N}(small, big) where N = Small{N, typeof(small), typeof(big)}(small, big) +Small{N}(big) where N = Small{N}(SMALL_ALGORITHM, big) function _sort!(v::AbstractVector, a::Small{N}, o::Ordering, kw) where N @getkw lenm1 if lenm1 < N @@ -1115,7 +1116,11 @@ The specific optimizations attempted by `InitialOptimizations` are [`MissingOptimization`](@ref), [`BoolOptimization`](@ref), dispatch to [`InsertionSort`](@ref) for inputs with `length <= 10`, and [`IEEEFloatOptimization`](@ref). """ -InitialOptimizations(next) = MissingOptimization(BoolOptimization(Small{10}(IEEEFloatOptimization(next)))) +InitialOptimizations(next) = MissingOptimization( + BoolOptimization( + Small{10}( + IEEEFloatOptimization( + next)))) """ DEFAULT_STABLE @@ -1190,9 +1195,17 @@ stage. Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and otherwise we dispatch to [`QuickSort`](@ref). """ -const DEFAULT_STABLE = InitialOptimizations(IsUIntMappable( - Small{40}(CheckSorted(ComputeExtrema(ConsiderCountingSort(ConsiderRadixSort(Small{80}(QuickSort)))))), - StableCheckSorted(QuickSort))) +const DEFAULT_STABLE = InitialOptimizations( + IsUIntMappable( + Small{40}( + CheckSorted( + ComputeExtrema( + ConsiderCountingSort( + ConsiderRadixSort( + Small{80}( + QuickSort)))))), + StableCheckSorted( + QuickSort))) """ DEFAULT_UNSTABLE @@ -1204,6 +1217,28 @@ the same as those used by [`DEFAULT_STABLE`](@ref), but this is subject to chang const DEFAULT_UNSTABLE = DEFAULT_STABLE const SMALL_THRESHOLD = 20 +function Base.show(io::IO, alg::Algorithm) + print_tree(io, alg, 0) +end +function print_tree(io::IO, alg::Algorithm, cols::Int) + print(io, " "^cols) + show_type(io, alg) + print(io, '(') + for (i, name) in enumerate(fieldnames(typeof(alg))) + arg = getproperty(alg, name) + i > 1 && print(io, ',') + if arg isa Algorithm + println(io) + print_tree(io, arg, cols+1) + else + i > 1 && print(io, ' ') + print(io, arg) + end + end + print(io, ')') +end +show_type(io::IO, alg::Algorithm) = Base.show_type_name(io, typeof(alg).name) +show_type(io::IO, alg::Small{N}) where N = print(io, "Base.Sort.Small{$N}") defalg(v::AbstractArray) = DEFAULT_STABLE diff --git a/test/sorting.jl b/test/sorting.jl index f9a3e5bd7438a..e6c6f9bd30874 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -815,6 +815,13 @@ end end end +@testset "show(::Algorithm)" begin + @test eval(Meta.parse(string(Base.DEFAULT_STABLE))) === Base.DEFAULT_STABLE + lines = split(string(Base.DEFAULT_STABLE), '\n') + @test 10 < maximum(length, lines) < 100 + @test 1 < length(lines) < 30 +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, From a574c7f78d90f387af9180dc193e47d37fe653c6 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Wed, 2 Nov 2022 14:00:00 +0600 Subject: [PATCH 12/29] stop passing U around Fixes a few remaining unexpected allocations U can be statically computed from the type of v and order so there is no need. Further, U is infered as ::DataType rather than Type{U} which causes type instabilities. --- base/sort.jl | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 8cdb97f36d48f..4e92b601c03b1 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -417,7 +417,6 @@ end for (sym, deps, exp, type) in [ (:lo, (), :(firstindex(v)), Integer), (:hi, (), :(lastindex(v)), Integer), - (:U, (), :(UIntMappable(eltype(v), o)), Any), #type checking this comes at a runtime performance cost ??? (:lenm1, (:lo, :hi), :(hi-lo), Integer), (:mn, (), :(throw(ArgumentError("mn is needed but has not been computed"))), :(eltype(v))), (:mx, (), :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))), @@ -619,8 +618,7 @@ struct IsUIntMappable{T <: Algorithm, U <: Algorithm} <: Algorithm no::U end function _sort!(v::AbstractVector, a::IsUIntMappable, o::Ordering, kw) - @getkw U - if U !== nothing + if UIntMappable(eltype(v), o) !== nothing _sort!(v, a.yes, o, kw) else _sort!(v, a.no, o, kw) @@ -758,8 +756,8 @@ struct ConsiderCountingSort{T <: Algorithm, U <: Algorithm} <: Algorithm end ConsiderCountingSort(next) = ConsiderCountingSort(CountingSort(), next) function _sort!(v::AbstractVector{<:Integer}, a::ConsiderCountingSort, o::DirectOrdering, kw) - @getkw lenm1 range U - if range < (sizeof(U) > 8 ? 5lenm1-100 : div(lenm1, 2)) + @getkw lenm1 range + if range < (sizeof(eltype(v)) > 8 ? 5lenm1-100 : div(lenm1, 2)) _sort!(v, a.counting, o, kw) else _sort!(v, a.next, o, kw) @@ -815,8 +813,8 @@ struct ConsiderRadixSort{T <: Algorithm, U <: Algorithm} <: Algorithm end ConsiderRadixSort(next) = ConsiderRadixSort(RadixSort(), next) function _sort!(v::AbstractVector, a::ConsiderRadixSort, o::DirectOrdering, kw) - @getkw U bits lenm1 - if sizeof(U) <= 8 && bits+70 < 22log(lenm1) + @getkw bits lenm1 + if sizeof(eltype(v)) <= 8 && bits+70 < 22log(lenm1) _sort!(v, a.radix, o, kw) else _sort!(v, a.next, o, kw) @@ -847,7 +845,7 @@ Each pass divides the input into `2^chunk_size == mask+1` buckets. To do this, i """ struct RadixSort <: Algorithm end function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering, kw) - @getkw lo hi umn U scratch lenm1 bits + @getkw lo hi umn scratch lenm1 bits # At this point, we are committed to radix sort. u = uint_map!(v, lo, hi, o) @@ -866,6 +864,7 @@ function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering, kw) end len = lenm1 + 1 + U = UIntMappable(eltype(v), o) if scratch !== nothing && checkbounds(Bool, scratch, lo:hi) # Fully preallocated and aligned scratch u2 = radix_sort!(u, lo, hi, bits, reinterpret(U, scratch)) uint_unmap!(v, u2, lo, hi, o, umn) From 05de36ea4b1e82a235d1fdcfb47f3d30f7b0baa3 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sun, 6 Nov 2022 10:14:01 +0600 Subject: [PATCH 13/29] remove lenm1 it is invalid to cache lenm1 because lo and hi may be redefined and we have no cache invalidation system --- base/sort.jl | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 4e92b601c03b1..f5822db1108cb 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -417,7 +417,6 @@ end for (sym, deps, exp, type) in [ (:lo, (), :(firstindex(v)), Integer), (:hi, (), :(lastindex(v)), Integer), - (:lenm1, (:lo, :hi), :(hi-lo), Integer), (:mn, (), :(throw(ArgumentError("mn is needed but has not been computed"))), :(eltype(v))), (:mx, (), :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))), (:range, (:mn, :mx), quote @@ -639,8 +638,8 @@ end Small{N}(small, big) where N = Small{N, typeof(small), typeof(big)}(small, big) Small{N}(big) where N = Small{N}(SMALL_ALGORITHM, big) function _sort!(v::AbstractVector, a::Small{N}, o::Ordering, kw) where N - @getkw lenm1 - if lenm1 < N + @getkw lo hi + if (hi-lo) < N _sort!(v, a.small, o, kw) else _sort!(v, a.big, o, kw) @@ -696,14 +695,14 @@ struct CheckSorted{T <: Algorithm} <: Algorithm next::T end function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering, kw) - @getkw lo hi lenm1 + @getkw lo hi # For most arrays, a presorted check is cheap (overhead < 5%) and for most large # arrays it is essentially free (<1%). _issorted(v, lo, hi, o) && return v # For most large arrays, a reverse-sorted check is essentially free (overhead < 1%) - if lenm1 >= 500 && _issorted(v, lo, hi, ReverseOrdering(o)) + if hi-lo >= 500 && _issorted(v, lo, hi, ReverseOrdering(o)) # If reversing is valid, do so. This does violates stability. reverse!(v, lo, hi) return v @@ -756,8 +755,8 @@ struct ConsiderCountingSort{T <: Algorithm, U <: Algorithm} <: Algorithm end ConsiderCountingSort(next) = ConsiderCountingSort(CountingSort(), next) function _sort!(v::AbstractVector{<:Integer}, a::ConsiderCountingSort, o::DirectOrdering, kw) - @getkw lenm1 range - if range < (sizeof(eltype(v)) > 8 ? 5lenm1-100 : div(lenm1, 2)) + @getkw lo hi range + if range < (sizeof(eltype(v)) > 8 ? 5(hi-lo)-100 : div(hi-lo, 2)) _sort!(v, a.counting, o, kw) else _sort!(v, a.next, o, kw) @@ -813,8 +812,8 @@ struct ConsiderRadixSort{T <: Algorithm, U <: Algorithm} <: Algorithm end ConsiderRadixSort(next) = ConsiderRadixSort(RadixSort(), next) function _sort!(v::AbstractVector, a::ConsiderRadixSort, o::DirectOrdering, kw) - @getkw bits lenm1 - if sizeof(eltype(v)) <= 8 && bits+70 < 22log(lenm1) + @getkw bits lo hi + if sizeof(eltype(v)) <= 8 && bits+70 < 22log(hi-lo) _sort!(v, a.radix, o, kw) else _sort!(v, a.next, o, kw) @@ -845,7 +844,7 @@ Each pass divides the input into `2^chunk_size == mask+1` buckets. To do this, i """ struct RadixSort <: Algorithm end function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering, kw) - @getkw lo hi umn scratch lenm1 bits + @getkw lo hi umn scratch bits # At this point, we are committed to radix sort. u = uint_map!(v, lo, hi, o) @@ -863,7 +862,7 @@ function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering, kw) u[i] -= umn end - len = lenm1 + 1 + len = hi-lo + 1 U = UIntMappable(eltype(v), o) if scratch !== nothing && checkbounds(Bool, scratch, lo:hi) # Fully preallocated and aligned scratch u2 = radix_sort!(u, lo, hi, bits, reinterpret(U, scratch)) From 70290d65c16fb7be070e2193024f20de084ce4b0 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Mon, 7 Nov 2022 15:04:11 +0600 Subject: [PATCH 14/29] fix unexpected allocations in Radix Sort fixes #47474 in this PR rather than separate to avoid dealing with the merge --- base/sort.jl | 56 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index f5822db1108cb..207a6535444a5 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -864,17 +864,30 @@ function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering, kw) len = hi-lo + 1 U = UIntMappable(eltype(v), o) + # A large if-else chain to avoid type instabilities and dynamic dispatch if scratch !== nothing && checkbounds(Bool, scratch, lo:hi) # Fully preallocated and aligned scratch - u2 = radix_sort!(u, lo, hi, bits, reinterpret(U, scratch)) - uint_unmap!(v, u2, lo, hi, o, umn) + t = reinterpret(U, scratch) + if radix_sort!(u, lo, hi, bits, t) + uint_unmap!(v, u, lo, hi, o, umn) + else + uint_unmap!(v, t, lo, hi, o, umn) + end elseif scratch !== nothing && (applicable(resize!, scratch, len) || length(scratch) >= len) # Viable scratch length(scratch) >= len || resize!(scratch, len) t1 = axes(scratch, 1) isa OneTo ? scratch : view(scratch, firstindex(scratch):lastindex(scratch)) - u2 = radix_sort!(view(u, lo:hi), 1, len, bits, reinterpret(U, t1)) - uint_unmap!(view(v, lo:hi), u2, 1, len, o, umn) + t = reinterpret(U, t1) + if radix_sort!(view(u, lo:hi), 1, len, bits, t) + uint_unmap!(view(v, lo:hi), view(u, lo:hi), 1, len, o, umn) + else + uint_unmap!(view(v, lo:hi), t, 1, len, o, umn) + end else # No viable scratch - u2 = radix_sort!(u, lo, hi, bits, similar(u)) - uint_unmap!(v, u2, lo, hi, o, umn) + t = similar(u) + if radix_sort!(u, lo, hi, bits, t) + uint_unmap!(v, u, lo, hi, o, umn) + else + uint_unmap!(v, t, lo, hi, o, umn) + end end end @@ -1025,16 +1038,28 @@ function _sort!(v::AbstractVector, a::StableCheckSorted, o::Ordering, kw) end -# In the case of an odd number of passes, the returned vector will === the input vector t, -# not v. This is one of the many reasons radix_sort! is not exported. +# The return value indicates whether v is sorted (true) or t is sorted (false) +# This is one of the many reasons radix_sort! is not exported. function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsigned, t::AbstractVector{U}, chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned # bits is unsigned for performance reasons. - mask = UInt(1) << chunk_size - 1 - counts = Vector{Int}(undef, mask+2) - - @inbounds for shift in 0:chunk_size:bits-1 - + counts = Vector{Int}(undef, 1 << chunk_size + 1) + + shift = 0 + while true + @noinline radix_sort_pass!(t, lo, hi, counts, v, shift, chunk_size) + # the latest data resides in t + shift += chunk_size + shift < bits || return false + @noinline radix_sort_pass!(v, lo, hi, counts, t, shift, chunk_size) + # the latest data resides in v + shift += chunk_size + shift < bits || return true + end +end +function radix_sort_pass!(t, lo, hi, counts, v, shift, chunk_size) + mask = UInt(1) << chunk_size - 1 # mask is defined in pass so that the compiler + @inbounds begin # ↳ knows it's shape # counts[2:mask+2] will store the number of elements that fall into each bucket. # if chunk_size = 8, counts[2] is bucket 0x00 and counts[257] is bucket 0xff. counts .= 0 @@ -1058,12 +1083,7 @@ function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsig t[j] = x # put the element where it belongs counts[i] = j + 1 # increment the target index for the next end # ↳ element in this bucket - - v, t = t, v # swap the now sorted destination vector t back into primary vector v - end - - v end function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned) # chunk_size is the number of bits to radix over at once. From f06de1089edc3fc261875f201782f171dcecdcd9 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Mon, 7 Nov 2022 19:58:31 +0600 Subject: [PATCH 15/29] fix doctests? I have no idea how --- base/sort.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/base/sort.jl b/base/sort.jl index 207a6535444a5..ddc8ad70942f2 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -1259,6 +1259,9 @@ show_type(io::IO, alg::Algorithm) = Base.show_type_name(io, typeof(alg).name) show_type(io::IO, alg::Small{N}) where N = print(io, "Base.Sort.Small{$N}") defalg(v::AbstractArray) = DEFAULT_STABLE +defalg(v::AbstractArray{<:Union{Number, Missing}}) = DEFAULT_UNSTABLE +defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation +defalg(v::AbstractArray{Union{}}) = DEFAULT_UNSTABLE # for method disambiguation """ sort!(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) From 38f4512d18bdb96d14b4b3e14d299b5d53affcd4 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Wed, 9 Nov 2022 10:25:53 +0600 Subject: [PATCH 16/29] support and test backwards compatability with packages that depend in sorting internals --- base/sort.jl | 36 ++++++++++++++++++++++++++++-------- test/sorting.jl | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index ddc8ad70942f2..481c2bd4996c1 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -410,8 +410,8 @@ insorted(x, r::AbstractRange) = in(x, r) ## Alternative keyword management macro getkw(syms...) - usyms = (Symbol(:_, sym) for sym in syms) - Expr(:block, (:($(esc(:((kw, $sym) = $usym(v, o, kw))))) for (sym, usym) in zip(syms, usyms))...) + getters = (getproperty(Sort, Symbol(:_, sym)) for sym in syms) + Expr(:block, (:($(esc(:((kw, $sym) = $getter(v, o, kw))))) for (sym, getter) in zip(syms, getters))...) end for (sym, deps, exp, type) in [ @@ -430,7 +430,8 @@ for (sym, deps, exp, type) in [ (:scratch, (), nothing, :(Union{Nothing, AbstractVector})), # could have different eltype (:t, (:lo, :hi, :scratch), quote scratch === nothing ? similar(v) : reinterpret(eltype(v), checkbounds(Bool, scratch, lo:hi) ? scratch : resize!(scratch, length(v))) - end, :(AbstractVector{eltype(v)}))] + end, :(AbstractVector{eltype(v)})), + (:allow_legacy_dispatch, (), true, Bool)] str = string(sym) usym = Symbol(:_, sym) @eval function $usym(v, o, kw) @@ -1795,7 +1796,7 @@ end -### Unused ### +### Unused constructs for backward compatability ### """ MergeSort() @@ -1860,11 +1861,30 @@ function _sort!(v::AbstractVector, a::MergeSort, o::Ordering, kw) return v end -# Support 3- and 5-argument version of sort! for backwards compatability -sort!(v::AbstractVector, a::Union{Algorithm, Type{<:Algorithm}}, o::Ordering) = _sort!(v, getalg(a), o, (;)) -sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Union{Algorithm, Type{<:Algorithm}}, o::Ordering) = _sort!(v, getalg(a), o, (; lo, hi)) -# Support alg=InsertionSort and alg=MergeSort for backwards compatability +# Support alg=InsertionSort and alg=MergeSort for backwards compatability (prefer InsertionSort() and MergeSort()) getalg(a::Algorithm) = a getalg(::Type{A}) where A <: Algorithm = A() +# Support 3- and 5-argument versions of sort! for calling into the internals in the old way +sort!(v::AbstractVector, a::Union{Algorithm, Type{<:Algorithm}}, o::Ordering) = _sort!(v, getalg(a), o, (; allow_legacy_dispatch=false)) +sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Union{Algorithm, Type{<:Algorithm}}, o::Ordering) = _sort!(v, getalg(a), o, (; lo, hi, allow_legacy_dispatch=false)) + +# Support dispatch on custom algorithms in the old way +# sort!(::AbstractVector, ::Integer, ::Integer, ::MyCustomAlgorithm, ::Ordering) = ... +function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw) + @getkw lo hi allow_legacy_dispatch + if allow_legacy_dispatch + sort!(v, lo, hi, a, o) + else + # This error prevents infinite recursion for unknown algorithms + throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o))) is not defined")) + end +end + +# Keep old internal types so that people can keep dispatching with +# sort!(::AbstractVector, ::Integer, ::Integer, ::Base.QuickSortAlg, ::Ordering) = ... +const QuickSortAlg = typeof(QuickSort) +const MergeSortAlg = typeof(MergeSort) +const InsertionSortAlg = typeof(InsertionSort) + end # module Sort diff --git a/test/sorting.jl b/test/sorting.jl index e6c6f9bd30874..d321734cdb177 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -822,6 +822,38 @@ end @test 1 < length(lines) < 30 end +@testset "Defining new algorithms & backwards compatibility with packages that use sorting internals" begin + struct MyFirstAlg <: Base.Sort.Algorithm end + # The pre 1.9 dispatch method + function Base.sort!(v::AbstractVector{Int}, lo::Integer, hi::Integer, ::MyFirstAlg, o::Base.Order.Ordering) + v[lo:hi] .= 7 + end + @test sort([1,2,3], alg=MyFirstAlg()) == [7,7,7] + v = shuffle(vcat(fill(missing, 10), rand(Int, 100))) + @test all(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg())) .=== vcat(fill(7, 100), fill(missing, 10))) + + # Use the pre 1.9 hook into the internals + function Base.sort!(v::AbstractVector{Int}, lo::Integer, hi::Integer, ::MyFirstAlg, o::Base.Order.Ordering) + sort!(v, lo, hi, Base.DEFAULT_STABLE, o) + end + @test sort([3,1,2], alg=MyFirstAlg()) == [1,2,3] + v = shuffle(vcat(fill(missing, 10), rand(Int, 100))) + @test issorted(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg()))) + + # Another pre 1.9 hook into the internals + @test issorted(sort!(rand(100), InsertionSort, Base.Order.Forward)) + + struct MySecondAlg <: Base.Sort.Algorithm end + # A new dispatch method + function Base.Sort._sort!(v::AbstractVector, ::MySecondAlg, o::Base.Order.Ordering, kw) + Base.Sort.@getkw lo hi + v[lo:hi] .= 9 + end + @test sort([1,2,3], alg=MySecondAlg()) == [9,9,9] + v = shuffle(vcat(fill(missing, 10), rand(Int, 100))) + @test all(sort(v, alg=Base.Sort.InitialOptimizations(MySecondAlg())) .=== vcat(fill(9, 100), fill(missing, 10))) +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, From d8ae968bc31ef35297d89612511992f55a568c1f Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Thu, 10 Nov 2022 07:22:13 +0600 Subject: [PATCH 17/29] improve extensibility tests --- test/sorting.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/sorting.jl b/test/sorting.jl index d321734cdb177..f9faecd2ff808 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -824,12 +824,16 @@ end @testset "Defining new algorithms & backwards compatibility with packages that use sorting internals" begin struct MyFirstAlg <: Base.Sort.Algorithm end + + @test_throws ArgumentError sort([1,2,3], alg=MyFirstAlg()) # not a stack overflow error + + v = shuffle(vcat(fill(missing, 10), rand(Int, 100))) + # The pre 1.9 dispatch method function Base.sort!(v::AbstractVector{Int}, lo::Integer, hi::Integer, ::MyFirstAlg, o::Base.Order.Ordering) v[lo:hi] .= 7 end @test sort([1,2,3], alg=MyFirstAlg()) == [7,7,7] - v = shuffle(vcat(fill(missing, 10), rand(Int, 100))) @test all(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg())) .=== vcat(fill(7, 100), fill(missing, 10))) # Use the pre 1.9 hook into the internals @@ -837,7 +841,6 @@ end sort!(v, lo, hi, Base.DEFAULT_STABLE, o) end @test sort([3,1,2], alg=MyFirstAlg()) == [1,2,3] - v = shuffle(vcat(fill(missing, 10), rand(Int, 100))) @test issorted(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg()))) # Another pre 1.9 hook into the internals @@ -850,7 +853,6 @@ end v[lo:hi] .= 9 end @test sort([1,2,3], alg=MySecondAlg()) == [9,9,9] - v = shuffle(vcat(fill(missing, 10), rand(Int, 100))) @test all(sort(v, alg=Base.Sort.InitialOptimizations(MySecondAlg())) .=== vcat(fill(9, 100), fill(missing, 10))) end From c633419d63bd726667f9c4c02f40b7ece02f05f8 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Fri, 11 Nov 2022 10:52:30 +0600 Subject: [PATCH 18/29] overhall scratch space handling make _sort! return scratch space rather than sorted vector so that things like IEEEFloatOptimization can re-use the scratch space allocated on their first recursive call --- base/sort.jl | 264 ++++++++++++++++++++++++++++++------------------ test/sorting.jl | 10 +- 2 files changed, 172 insertions(+), 102 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 481c2bd4996c1..93e41ece28725 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -427,10 +427,7 @@ for (sym, deps, exp, type) in [ (:umx, (:mx,), :(uint_map(mx, o)), Unsigned), (:urange, (:umn, :umx), :(umx-umn), Unsigned), (:bits, (:urange,), :(unsigned(8sizeof(urange) - leading_zeros(urange))), Unsigned), - (:scratch, (), nothing, :(Union{Nothing, AbstractVector})), # could have different eltype - (:t, (:lo, :hi, :scratch), quote - scratch === nothing ? similar(v) : reinterpret(eltype(v), checkbounds(Bool, scratch, lo:hi) ? scratch : resize!(scratch, length(v))) - end, :(AbstractVector{eltype(v)})), + (:scratch, (), nothing, :(Union{Nothing, Vector})), # could have different eltype (:allow_legacy_dispatch, (), true, Bool)] str = string(sym) usym = Symbol(:_, sym) @@ -442,8 +439,58 @@ for (sym, deps, exp, type) in [ end end +## Scratch space management + +""" + make_scratch(scratch::Union{Nothing, Vector}, T::Type, len::Integer) + +Returns `(s, t)` where `t` is an `AbstractVector` of type `T` with length at least `len` +that is backed by the `Vector` `s`. If `scratch !== nothing`, then `s === scratch`. + +This function will allocate a new vector if `scratch === nothing`, `resize!` `scratch` if it +is too short, and `reinterpret` `scratch` if its eltype is not `T`. +""" +function make_scratch(scratch::Nothing, T::Type, len::Integer) + s = Vector{T}(undef, len) + s, s +end +function make_scratch(scratch::Vector{T}, ::Type{T}, len::Integer) where T + len > length(scratch) && resize!(scratch, len) + scratch, scratch +end +function make_scratch(scratch::Vector, T::Type, len::Integer) + len_bytes = len * sizeof(T) + len_scratch = div(len_bytes, sizeof(eltype(scratch))) + len_scratch > length(scratch) && resize!(scratch, len_scratch) + scratch, reinterpret(T, scratch) +end + + ## sorting algorithm components ## +""" + _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw; t, offset) + +An internal function that sorts `v` using the algorithm `a` under the ordering `o`, +subject to specifications provided in `kw` (such as `lo` and `hi` in which case it only +sorts `view(v, lo:hi)`) + +Returns a scratch space if provided or constructed during the sort, or `nothing` if +no scratch space is present. + +!!! note + `_sort!` modifies but does not return `v`. + +A returned scratch space will be a `Vector{T}` where `T` is usually the eltype of `v`. There +are some exceptions, for example if `eltype(v) == Union{Missing, T}` then the scratch space +may be be a `Vector{T}` due to `MissingOptimization` changing the eltype of `v` to `T`. + +`t` is an appropriate scratch space for the algorithm at hand, to be accessed as +`t[i + offset]`. `t` is used for an algorithm to pass a scratch space back to itself in +internal or recursive calls. +""" +function _sort! end + abstract type Algorithm end @@ -526,7 +573,6 @@ function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering, kw) if nonmissingtype(eltype(v)) != eltype(v) && o isa DirectOrdering lo, hi = send_to_end!(ismissing, v, o; lo, hi) _sort!(WithoutMissingVector(v, unsafe=true), a.next, o, (;kw..., lo, hi)) - v elseif eltype(v) <: Integer && o isa Perm{DirectOrdering} && nonmissingtype(eltype(o.data)) != eltype(o.data) lo, hi = send_to_end!(i -> ismissing(@inbounds o.data[i]), v, o) _sort!(v, a.next, Perm(o.order, WithoutMissingVector(o.data, unsafe=true)), (;kw..., lo, hi)) @@ -562,18 +608,25 @@ function _sort!(v::AbstractVector, a::IEEEFloatOptimization, o::Ordering, kw) lo, hi = send_to_end!(isnan, v, o, true; lo, hi) iv = reinterpret(UIntType(eltype(v)), v) j = send_to_end!(x -> after_zero(o, x), v; lo, hi) - _sort!(iv, a.next, Reverse, (;kw..., lo, hi=j)) - _sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi)) + scratch = _sort!(iv, a.next, Reverse, (;kw..., lo, hi=j)) + if scratch === nothing # Union split + _sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi, scratch)) + else + _sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi, scratch)) + end elseif eltype(v) <: Integer && o isa Perm && o.order isa DirectOrdering && is_concrete_IEEEFloat(eltype(o.data)) lo, hi = send_to_end!(i -> isnan(@inbounds o.data[i]), v, o.order, true; lo, hi) ip = reinterpret(UIntType(eltype(o.data)), o.data) j = send_to_end!(i -> after_zero(o.order, @inbounds o.data[i]), v; lo, hi) - _sort!(v, a.next, Perm(Reverse, ip), (;kw..., lo, hi=j)) - _sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi)) + scratch = _sort!(v, a.next, Perm(Reverse, ip), (;kw..., lo, hi=j)) + if scratch === nothing # Union split + _sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi, scratch)) + else + _sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi, scratch)) + end else _sort!(v, a.next, o, kw) end - v end @@ -591,7 +644,7 @@ end _sort!(v::AbstractVector, a::BoolOptimization, o::Ordering, kw) = _sort!(v, a.next, o, kw) function _sort!(v::AbstractVector{Bool}, ::BoolOptimization, o::Ordering, kw) first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v - @getkw lo hi + @getkw lo hi scratch count = 0 @inbounds for i in lo:hi if v[i] == first @@ -600,7 +653,7 @@ function _sort!(v::AbstractVector{Bool}, ::BoolOptimization, o::Ordering, kw) end @inbounds v[lo:lo+count-1] .= first @inbounds v[lo+count:hi] .= !first - v + scratch end @@ -667,7 +720,7 @@ struct InsertionSort <: Algorithm end const SMALL_ALGORITHM = InsertionSort() function _sort!(v::AbstractVector, ::InsertionSort, o::Ordering, kw) - @getkw lo hi + @getkw lo hi scratch lo_plus_1 = (lo + 1)::Integer @inbounds for i = lo_plus_1:hi j = i @@ -682,7 +735,7 @@ function _sort!(v::AbstractVector, ::InsertionSort, o::Ordering, kw) end v[j] = x end - return v + scratch end @@ -696,17 +749,17 @@ struct CheckSorted{T <: Algorithm} <: Algorithm next::T end function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering, kw) - @getkw lo hi + @getkw lo hi scratch # For most arrays, a presorted check is cheap (overhead < 5%) and for most large # arrays it is essentially free (<1%). - _issorted(v, lo, hi, o) && return v + _issorted(v, lo, hi, o) && return scratch # For most large arrays, a reverse-sorted check is essentially free (overhead < 1%) if hi-lo >= 500 && _issorted(v, lo, hi, ReverseOrdering(o)) # If reversing is valid, do so. This does violates stability. reverse!(v, lo, hi) - return v + return scratch end _sort!(v, a.next, o, kw) @@ -725,7 +778,7 @@ struct ComputeExtrema{T <: Algorithm} <: Algorithm next::T end function _sort!(v::AbstractVector, a::ComputeExtrema, o::Ordering, kw) - @getkw lo hi + @getkw lo hi scratch mn = mx = v[lo] @inbounds for i in (lo+1):hi vi = v[i] @@ -734,7 +787,7 @@ function _sort!(v::AbstractVector, a::ComputeExtrema, o::Ordering, kw) end mn, mx - lt(o, mn, mx) || return v # all same + lt(o, mn, mx) || return scratch # all same _sort!(v, a.next, o, (;kw..., mn, mx)) end @@ -779,10 +832,10 @@ struct CountingSort <: Algorithm end maybe_reverse(o::ForwardOrdering, x) = x maybe_reverse(o::ReverseOrdering, x) = reverse(x) function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering, kw) - @getkw lo hi mn mx range + @getkw lo hi mn mx range scratch offs = 1 - (o === Reverse ? mx : mn) - counts = fill(0, range+1) + counts = fill(0, range+1) # TODO use scratch (but be aware of type stability) @inbounds for i = lo:hi counts[v[i] + offs] += 1 end @@ -797,7 +850,7 @@ function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering, idx = lastidx + 1 end - v + scratch end @@ -865,31 +918,14 @@ function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering, kw) len = hi-lo + 1 U = UIntMappable(eltype(v), o) - # A large if-else chain to avoid type instabilities and dynamic dispatch - if scratch !== nothing && checkbounds(Bool, scratch, lo:hi) # Fully preallocated and aligned scratch - t = reinterpret(U, scratch) - if radix_sort!(u, lo, hi, bits, t) - uint_unmap!(v, u, lo, hi, o, umn) - else - uint_unmap!(v, t, lo, hi, o, umn) - end - elseif scratch !== nothing && (applicable(resize!, scratch, len) || length(scratch) >= len) # Viable scratch - length(scratch) >= len || resize!(scratch, len) - t1 = axes(scratch, 1) isa OneTo ? scratch : view(scratch, firstindex(scratch):lastindex(scratch)) - t = reinterpret(U, t1) - if radix_sort!(view(u, lo:hi), 1, len, bits, t) - uint_unmap!(view(v, lo:hi), view(u, lo:hi), 1, len, o, umn) - else - uint_unmap!(view(v, lo:hi), t, 1, len, o, umn) - end - else # No viable scratch - t = similar(u) - if radix_sort!(u, lo, hi, bits, t) - uint_unmap!(v, u, lo, hi, o, umn) - else - uint_unmap!(v, t, lo, hi, o, umn) - end + scratch, t = make_scratch(scratch, eltype(v), len) + tu = reinterpret(U, t) + if radix_sort!(u, lo, hi, bits, tu, 1-lo) + uint_unmap!(v, u, lo, hi, o, umn) + else + uint_unmap!(v, tu, lo, hi, o, umn, 1-lo) end + scratch end @@ -948,67 +984,73 @@ select_pivot(lo::Integer, hi::Integer) = typeof(hi-lo)(hash(lo) % (hi-lo+1)) + l # # returns (pivot, pivot_index) where pivot_index is the location the pivot # should end up, but does not set t[pivot_index] = pivot -function partition!(t::AbstractVector, lo::Integer, hi::Integer, o::Ordering, v::AbstractVector, rev::Bool) +function partition!(t::AbstractVector, lo::Integer, hi::Integer, offset::Integer, o::Ordering, v::AbstractVector, rev::Bool) pivot_index = select_pivot(lo, hi) - trues = 0 @inbounds begin pivot = v[pivot_index] while lo < pivot_index x = v[lo] fx = rev ? !lt(o, x, pivot) : lt(o, pivot, x) - t[(fx ? hi : lo) - trues] = x - trues += fx + t[(fx ? hi : lo) - offset] = x + offset += fx lo += 1 end while lo < hi x = v[lo+1] fx = rev ? lt(o, pivot, x) : !lt(o, x, pivot) - t[(fx ? hi : lo) - trues] = x - trues += fx + t[(fx ? hi : lo) - offset] = x + offset += fx lo += 1 end end - # pivot_index = lo-trues + # pivot_index = lo-offset # t[pivot_index] is whatever it was before # t[pivot_index] >* pivot, reverse stable - pivot, lo-trues + pivot, lo-offset end function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw; - t=nothing, swap=false, rev=false) - @getkw lo hi t + t=nothing, offset=nothing, swap=false, rev=false) + @getkw lo hi scratch + + if t === nothing + scratch, t = make_scratch(scratch, eltype(v), hi-lo+1) + offset = 1-lo + kw = (;kw..., scratch) + end while lo < hi && hi - lo > SMALL_THRESHOLD - pivot, j = swap ? partition!(v, lo, hi, o, t, rev) : partition!(t, lo, hi, o, v, rev) + pivot, j = swap ? partition!(v, lo+offset, hi+offset, offset, o, t, rev) : partition!(t, lo, hi, -offset, o, v, rev) + j -= !swap*offset @inbounds v[j] = pivot swap = !swap # For QuickSort, a.lo === a.hi === missing, so the first two branches get skipped if !ismissing(a.lo) && j <= a.lo # Skip sorting the lower part - swap && copyto!(v, lo, t, lo, j-lo) + swap && copyto!(v, lo, t, lo+offset, j-lo) rev && reverse!(v, lo, j-1) lo = j+1 rev = !rev elseif !ismissing(a.hi) && a.hi <= j # Skip sorting the upper part - swap && copyto!(v, j+1, t, j+1, hi-j) + swap && copyto!(v, j+1, t, j+1+offset, hi-j) rev || reverse!(v, j+1, hi) hi = j-1 elseif j-lo < hi-j # Sort the lower part recursively because it is smaller. Recursing on the # smaller part guarantees O(log(n)) stack space even on pathological inputs. - _sort!(v, a, o, (;kw..., lo, hi=j-1); swap, rev) + _sort!(v, a, o, (;kw..., lo, hi=j-1); t, offset, swap, rev) lo = j+1 rev = !rev else # Sort the higher part recursively - _sort!(v, a, o, (;kw..., lo=j+1, hi); swap, rev=!rev) + _sort!(v, a, o, (;kw..., lo=j+1, hi); t, offset, swap, rev=!rev) hi = j-1 end end - hi < lo && return v - swap && copyto!(v, lo, t, lo, hi-lo+1) + hi < lo && return scratch + swap && copyto!(v, lo, t, lo+offset, hi-lo+1) rev && reverse!(v, lo, hi) _sort!(v, a.next, o, (;kw..., lo, hi)) end @@ -1027,12 +1069,13 @@ struct StableCheckSorted{T<:Algorithm} <: Algorithm next::T end function _sort!(v::AbstractVector, a::StableCheckSorted, o::Ordering, kw) - @getkw lo hi + @getkw lo hi scratch if _issorted(v, lo, hi, o) - return v + return scratch elseif _issorted(v, lo, hi, Lt((x, y) -> !lt(o, x, y))) # Reverse only if necessary. Using issorted(..., Reverse(o)) would violate stability. - return reverse!(v, lo, hi) + reverse!(v, lo, hi) + return scratch end _sort!(v, a.next, o, kw) @@ -1042,23 +1085,24 @@ end # The return value indicates whether v is sorted (true) or t is sorted (false) # This is one of the many reasons radix_sort! is not exported. function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsigned, - t::AbstractVector{U}, chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned + t::AbstractVector{U}, offset::Integer, + chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned # bits is unsigned for performance reasons. - counts = Vector{Int}(undef, 1 << chunk_size + 1) + counts = Vector{Int}(undef, 1 << chunk_size + 1) # TODO use scratch for this shift = 0 while true - @noinline radix_sort_pass!(t, lo, hi, counts, v, shift, chunk_size) + @noinline radix_sort_pass!(t, lo, hi, offset, counts, v, shift, chunk_size) # the latest data resides in t shift += chunk_size shift < bits || return false - @noinline radix_sort_pass!(v, lo, hi, counts, t, shift, chunk_size) + @noinline radix_sort_pass!(v, lo+offset, hi+offset, -offset, counts, t, shift, chunk_size) # the latest data resides in v shift += chunk_size shift < bits || return true end end -function radix_sort_pass!(t, lo, hi, counts, v, shift, chunk_size) +function radix_sort_pass!(t, lo, hi, offset, counts, v, shift, chunk_size) mask = UInt(1) << chunk_size - 1 # mask is defined in pass so that the compiler @inbounds begin # ↳ knows it's shape # counts[2:mask+2] will store the number of elements that fall into each bucket. @@ -1081,7 +1125,7 @@ function radix_sort_pass!(t, lo, hi, counts, v, shift, chunk_size) x = v[k] # lookup the element i = (x >> shift)&mask + 1 # compute its bucket's index for this pass j = counts[i] # lookup the target index - t[j] = x # put the element where it belongs + t[j + offset] = x # put the element where it belongs counts[i] = j + 1 # increment the target index for the next end # ↳ element in this bucket end @@ -1310,8 +1354,9 @@ function sort!(v::AbstractVector{T}; by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, - scratch::Union{AbstractVector{T}, Nothing}=nothing) where T + scratch::Union{Vector{T}, Nothing}=nothing) where T _sort!(v, getalg(alg), ord(lt,by,rev,order), (;scratch)) + v end """ @@ -1494,7 +1539,7 @@ function sortperm(A::AbstractArray; by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, - scratch::Union{AbstractVector{<:Integer}, Nothing}=nothing, + scratch::Union{Vector{<:Integer}, Nothing}=nothing, dims...) #to optionally specify dims argument ordr = ord(lt,by,rev,order) if ordr === Forward && isa(A,Vector) && eltype(A)<:Integer @@ -1555,7 +1600,7 @@ function sortperm!(ix::AbstractArray{T}, A::AbstractArray; rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, initialized::Bool=false, - scratch::Union{AbstractVector{T}, Nothing}=nothing, + scratch::Union{Vector{T}, Nothing}=nothing, dims...) where T <: Integer #to optionally specify dims argument (typeof(A) <: AbstractVector) == (:dims in keys(dims)) && throw(ArgumentError("Dims argument incorrect for type $(typeof(A))")) axes(ix) == axes(A) || throw(ArgumentError("index array must have the same size/axes as the source array, $(axes(ix)) != $(axes(A))")) @@ -1628,7 +1673,7 @@ function sort(A::AbstractArray{T}; by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, - scratch::Union{AbstractVector{T}, Nothing}=similar(A, size(A, dims))) where T + scratch::Union{Vector{T}, Nothing}=nothing) where T dim = dims order = ord(lt,by,rev,order) n = length(axes(A, dim)) @@ -1636,19 +1681,31 @@ function sort(A::AbstractArray{T}; pdims = (dim, setdiff(1:ndims(A), dim)...) # put the selected dimension first Ap = permutedims(A, pdims) Av = vec(Ap) - sort_chunks!(Av, n, alg, order, scratch) + sort_chunks!(Av, n, getalg(alg), order, scratch) permutedims(Ap, invperm(pdims)) else Av = A[:] - sort_chunks!(Av, n, alg, order, scratch) + sort_chunks!(Av, n, getalg(alg), order, scratch) reshape(Av, axes(A)) end end @noinline function sort_chunks!(Av, n, alg, order, scratch) inds = LinearIndices(Av) - for lo = first(inds):n:last(inds) - _sort!(Av, getalg(alg), order, (; lo, hi=lo+n-1, scratch)) + sort_chunks!(Av, n, alg, order, scratch, first(inds), last(inds)) +end + +@noinline function sort_chunks!(Av, n, alg, order, scratch::Nothing, fst, lst) + for lo = fst:n:lst + s = _sort!(Av, alg, order, (; lo, hi=lo+n-1, scratch)) + s !== nothing && return sort_chunks!(Av, n, alg, order, s, lo+n, lst) + end + Av +end + +@noinline function sort_chunks!(Av, n, alg, order, scratch::AbstractVector, fst, lst) + for lo = fst:n:lst + _sort!(Av, alg, order, (; lo, hi=lo+n-1, scratch)) end Av end @@ -1689,14 +1746,14 @@ function sort!(A::AbstractArray{T}; lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, - order::Ordering=Forward, - scratch::Union{AbstractVector{T}, Nothing}=similar(A, size(A, dims))) where T + order::Ordering=Forward, # TODO stop eagerly over-allocating. + scratch::Union{Vector{T}, Nothing}=similar(A, size(A, dims))) where T __sort!(A, Val(dims), getalg(alg), ord(lt, by, rev, order), scratch) end function __sort!(A::AbstractArray{T}, ::Val{K}, alg::Union{Algorithm, Type{<:Algorithm}}, order::Ordering, - scratch::Union{AbstractVector{T}, Nothing}) where {K,T} + scratch::Union{Vector{T}, Nothing}) where {K,T} nd = ndims(A) 1 <= K <= nd || throw(ArgumentError("dimension out of range")) @@ -1787,9 +1844,10 @@ function uint_map!(v::AbstractVector, lo::Integer, hi::Integer, order::Ordering) end function uint_unmap!(v::AbstractVector, u::AbstractVector{U}, lo::Integer, hi::Integer, - order::Ordering, offset::U=zero(U)) where U <: Unsigned + order::Ordering, offset::U=zero(U), + index_offset::Integer=0) where U <: Unsigned @inbounds for i in lo:hi - v[i] = uint_unmap(eltype(v), u[i]+offset, order) + v[i] = uint_unmap(eltype(v), u[i+index_offset]+offset, order) end v end @@ -1819,46 +1877,47 @@ struct MergeSort{T <: Algorithm} <: Algorithm end MergeSort() = MergeSort(SMALL_ALGORITHM) -function _sort!(v::AbstractVector, a::MergeSort, o::Ordering, kw) +function _sort!(v::AbstractVector, a::MergeSort, o::Ordering, kw; t=nothing, offset=nothing) @getkw lo hi scratch @inbounds if lo < hi hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o, kw) m = midpoint(lo, hi) - t = scratch === nothing ? similar(v, m-lo+1) : scratch - length(t) < m-lo+1 && resize!(t, m-lo+1) - Base.require_one_based_indexing(t) + if t === nothing + scratch, t = make_scratch(scratch, eltype(v), m-lo+1) + offset = 1-lo + end - _sort!(v, a, o, (;kw..., hi=m, scratch=t)) - _sort!(v, a, o, (;kw..., lo=m+1, scratch=t)) + _sort!(v, a, o, (;kw..., hi=m, scratch); t, offset) + _sort!(v, a, o, (;kw..., lo=m+1, scratch); t, offset) i, j = 1, lo while j <= m - t[i] = v[j] + t[i+offset] = v[j] i += 1 j += 1 end i, k = 1, lo while k < j <= hi - if lt(o, v[j], t[i]) + if lt(o, v[j], t[i+offset]) v[k] = v[j] j += 1 else - v[k] = t[i] + v[k] = t[i+offset] i += 1 end k += 1 end while k < j - v[k] = t[i] + v[k] = t[i+offset] k += 1 i += 1 end end - return v + scratch end # Support alg=InsertionSort and alg=MergeSort for backwards compatability (prefer InsertionSort() and MergeSort()) @@ -1866,15 +1925,22 @@ getalg(a::Algorithm) = a getalg(::Type{A}) where A <: Algorithm = A() # Support 3- and 5-argument versions of sort! for calling into the internals in the old way -sort!(v::AbstractVector, a::Union{Algorithm, Type{<:Algorithm}}, o::Ordering) = _sort!(v, getalg(a), o, (; allow_legacy_dispatch=false)) -sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Union{Algorithm, Type{<:Algorithm}}, o::Ordering) = _sort!(v, getalg(a), o, (; lo, hi, allow_legacy_dispatch=false)) +function sort!(v::AbstractVector, a::Union{Algorithm, Type{<:Algorithm}}, o::Ordering) + _sort!(v, getalg(a), o, (; allow_legacy_dispatch=false)) + v +end +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Union{Algorithm, Type{<:Algorithm}}, o::Ordering) + _sort!(v, getalg(a), o, (; lo, hi, allow_legacy_dispatch=false)) + v +end # Support dispatch on custom algorithms in the old way # sort!(::AbstractVector, ::Integer, ::Integer, ::MyCustomAlgorithm, ::Ordering) = ... function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw) - @getkw lo hi allow_legacy_dispatch + @getkw lo hi scratch allow_legacy_dispatch if allow_legacy_dispatch sort!(v, lo, hi, a, o) + scratch else # This error prevents infinite recursion for unknown algorithms throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o))) is not defined")) diff --git a/test/sorting.jl b/test/sorting.jl index f9faecd2ff808..99d84a8211697 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -784,13 +784,15 @@ end x = rand(1:n+1, n) y = sort(x; order) - @test y == Base.Sort._sort!(x, alg, order, (;kw(y)...)) === x + @test Base.Sort._sort!(x, alg, order, (;kw(y)...)) !== x + @test all(y .=== x) alg isa requires_uint_mappable && continue x = randn(n) y = sort(x; order) - @test y == Base.Sort._sort!(x, alg, order, (;kw(y)...)) === x + @test Base.Sort._sort!(x, alg, order, (;kw(y)...)) !== x + @test all(y .=== x) end end end @@ -822,7 +824,9 @@ end @test 1 < length(lines) < 30 end -@testset "Defining new algorithms & backwards compatibility with packages that use sorting internals" begin +@testset "Extensibility" begin + # Defining new algorithms & backwards compatibility with packages that use sorting internals + struct MyFirstAlg <: Base.Sort.Algorithm end @test_throws ArgumentError sort([1,2,3], alg=MyFirstAlg()) # not a stack overflow error From a2c264644437157e899afb438c48cd488111ab7a Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Tue, 15 Nov 2022 08:02:25 +0600 Subject: [PATCH 19/29] Consistency with other constructors --- base/sort.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 93e41ece28725..cabfdfeb77ff1 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -948,8 +948,8 @@ struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T< hi::H next::T end -PartialQuickSort(k::Integer) = InitialOptimizations(PartialQuickSort(missing, k, SMALL_ALGORITHM)) -PartialQuickSort(k::OrdinalRange) = InitialOptimizations(PartialQuickSort(first(k), last(k), SMALL_ALGORITHM)) +PartialQuickSort(k::Integer) = PartialQuickSort(missing, k, SMALL_ALGORITHM) +PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k), SMALL_ALGORITHM) _PartialQuickSort(k::Integer) = InitialOptimizations(PartialQuickSort(k:k)) _PartialQuickSort(k::OrdinalRange) = InitialOptimizations(PartialQuickSort(k)) From e752ea72d106e8acf76aa7621ce2e871f3518809 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Fri, 18 Nov 2022 07:34:43 +0600 Subject: [PATCH 20/29] pass around even fewer easily computed things in kw to reduce load on the compiler --- base/sort.jl | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index cabfdfeb77ff1..d610193b8eb8a 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -419,14 +419,6 @@ for (sym, deps, exp, type) in [ (:hi, (), :(lastindex(v)), Integer), (:mn, (), :(throw(ArgumentError("mn is needed but has not been computed"))), :(eltype(v))), (:mx, (), :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))), - (:range, (:mn, :mx), quote - o isa DirectOrdering || throw(ArgumentError("Cannot compute range under ordering $o")) - maybe_unsigned(o === Reverse ? mn-mx : mx-mn) - end, Integer), - (:umn, (:mn,), :(uint_map(mn, o)), Unsigned), - (:umx, (:mx,), :(uint_map(mx, o)), Unsigned), - (:urange, (:umn, :umx), :(umx-umn), Unsigned), - (:bits, (:urange,), :(unsigned(8sizeof(urange) - leading_zeros(urange))), Unsigned), (:scratch, (), nothing, :(Union{Nothing, Vector})), # could have different eltype (:allow_legacy_dispatch, (), true, Bool)] str = string(sym) @@ -809,7 +801,9 @@ struct ConsiderCountingSort{T <: Algorithm, U <: Algorithm} <: Algorithm end ConsiderCountingSort(next) = ConsiderCountingSort(CountingSort(), next) function _sort!(v::AbstractVector{<:Integer}, a::ConsiderCountingSort, o::DirectOrdering, kw) - @getkw lo hi range + @getkw lo hi mn mx + range = maybe_unsigned(o === Reverse ? mn-mx : mx-mn) + if range < (sizeof(eltype(v)) > 8 ? 5(hi-lo)-100 : div(hi-lo, 2)) _sort!(v, a.counting, o, kw) else @@ -832,7 +826,8 @@ struct CountingSort <: Algorithm end maybe_reverse(o::ForwardOrdering, x) = x maybe_reverse(o::ReverseOrdering, x) = reverse(x) function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering, kw) - @getkw lo hi mn mx range scratch + @getkw lo hi mn mx scratch + range = maybe_unsigned(o === Reverse ? mn-mx : mx-mn) offs = 1 - (o === Reverse ? mx : mn) counts = fill(0, range+1) # TODO use scratch (but be aware of type stability) @@ -866,7 +861,9 @@ struct ConsiderRadixSort{T <: Algorithm, U <: Algorithm} <: Algorithm end ConsiderRadixSort(next) = ConsiderRadixSort(RadixSort(), next) function _sort!(v::AbstractVector, a::ConsiderRadixSort, o::DirectOrdering, kw) - @getkw bits lo hi + @getkw lo hi mn mx + urange = uint_map(mx, o)-uint_map(mn, o) + bits = unsigned(8sizeof(urange) - leading_zeros(urange)) if sizeof(eltype(v)) <= 8 && bits+70 < 22log(hi-lo) _sort!(v, a.radix, o, kw) else @@ -898,7 +895,10 @@ Each pass divides the input into `2^chunk_size == mask+1` buckets. To do this, i """ struct RadixSort <: Algorithm end function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering, kw) - @getkw lo hi umn scratch bits + @getkw lo hi mn mx scratch + umn = uint_map(mn, o) + urange = uint_map(mx, o)-umn + bits = unsigned(8sizeof(urange) - leading_zeros(urange)) # At this point, we are committed to radix sort. u = uint_map!(v, lo, hi, o) @@ -916,10 +916,8 @@ function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering, kw) u[i] -= umn end - len = hi-lo + 1 - U = UIntMappable(eltype(v), o) - scratch, t = make_scratch(scratch, eltype(v), len) - tu = reinterpret(U, t) + scratch, t = make_scratch(scratch, eltype(v), hi-lo+1) + tu = reinterpret(eltype(u), t) if radix_sort!(u, lo, hi, bits, tu, 1-lo) uint_unmap!(v, u, lo, hi, o, umn) else From 69677ba4fa57d47b735a812727662936e5b793ad Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 19 Nov 2022 11:42:29 +0600 Subject: [PATCH 21/29] revert "remove InsertionSortAlg and MergeSortAlg" for backwards compatability with folks who use internals (DataFrames.jl) --- base/sort.jl | 59 +++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index d610193b8eb8a..745a155b6abb7 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -693,8 +693,10 @@ function _sort!(v::AbstractVector, a::Small{N}, o::Ordering, kw) where N end +struct InsertionSortAlg <: Algorithm end + """ - InsertionSort() + InsertionSort Use the insertion sort algorithm. @@ -708,10 +710,10 @@ Characteristics: * *quadratic performance* in the number of elements to be sorted: it is well-suited to small collections but should not be used for large ones. """ -struct InsertionSort <: Algorithm end +const InsertionSort = InsertionSortAlg() +const SMALL_ALGORITHM = InsertionSortAlg() -const SMALL_ALGORITHM = InsertionSort() -function _sort!(v::AbstractVector, ::InsertionSort, o::Ordering, kw) +function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering, kw) @getkw lo hi scratch lo_plus_1 = (lo + 1)::Integer @inbounds for i = lo_plus_1:hi @@ -1347,13 +1349,13 @@ julia> v = [(1, "c"), (3, "a"), (2, "b")]; sort!(v, by = x -> x[2]); v ``` """ function sort!(v::AbstractVector{T}; - alg::Union{Algorithm, Type{<:Algorithm}}=defalg(v), + alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, scratch::Union{Vector{T}, Nothing}=nothing) where T - _sort!(v, getalg(alg), ord(lt,by,rev,order), (;scratch)) + _sort!(v, alg, ord(lt,by,rev,order), (;scratch)) v end @@ -1532,7 +1534,7 @@ julia> sortperm(A, dims = 2) ``` """ function sortperm(A::AbstractArray; - alg::Union{Algorithm, Type{<:Algorithm}}=DEFAULT_UNSTABLE, + alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, @@ -1592,7 +1594,7 @@ julia> sortperm!(p, A; dims=2); p ``` """ function sortperm!(ix::AbstractArray{T}, A::AbstractArray; - alg::Union{Algorithm, Type{<:Algorithm}}=DEFAULT_UNSTABLE, + alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, @@ -1666,7 +1668,7 @@ julia> sort(A, dims = 2) """ function sort(A::AbstractArray{T}; dims::Integer, - alg::Union{Algorithm, Type{<:Algorithm}}=defalg(A), + alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, @@ -1679,11 +1681,11 @@ function sort(A::AbstractArray{T}; pdims = (dim, setdiff(1:ndims(A), dim)...) # put the selected dimension first Ap = permutedims(A, pdims) Av = vec(Ap) - sort_chunks!(Av, n, getalg(alg), order, scratch) + sort_chunks!(Av, n, alg, order, scratch) permutedims(Ap, invperm(pdims)) else Av = A[:] - sort_chunks!(Av, n, getalg(alg), order, scratch) + sort_chunks!(Av, n, alg, order, scratch) reshape(Av, axes(A)) end end @@ -1740,16 +1742,16 @@ julia> sort!(A, dims = 2); A """ function sort!(A::AbstractArray{T}; dims::Integer, - alg::Union{Algorithm, Type{<:Algorithm}}=defalg(A), + alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, # TODO stop eagerly over-allocating. scratch::Union{Vector{T}, Nothing}=similar(A, size(A, dims))) where T - __sort!(A, Val(dims), getalg(alg), ord(lt, by, rev, order), scratch) + __sort!(A, Val(dims), alg, ord(lt, by, rev, order), scratch) end function __sort!(A::AbstractArray{T}, ::Val{K}, - alg::Union{Algorithm, Type{<:Algorithm}}, + alg::Algorithm, order::Ordering, scratch::Union{Vector{T}, Nothing}) where {K,T} nd = ndims(A) @@ -1852,10 +1854,14 @@ end -### Unused constructs for backward compatability ### +### Unused constructs for backward compatibility ### + +struct MergeSortAlg{T <: Algorithm} <: Algorithm + next::T +end """ - MergeSort() + MergeSort Indicate that a sorting function should use the merge sort algorithm. @@ -1870,12 +1876,9 @@ Characteristics: * *not in-place* in memory. * *divide-and-conquer* sort strategy. """ -struct MergeSort{T <: Algorithm} <: Algorithm - next::T -end -MergeSort() = MergeSort(SMALL_ALGORITHM) +const MergeSort = MergeSortAlg(SMALL_ALGORITHM) -function _sort!(v::AbstractVector, a::MergeSort, o::Ordering, kw; t=nothing, offset=nothing) +function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, offset=nothing) @getkw lo hi scratch @inbounds if lo < hi hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o, kw) @@ -1918,17 +1921,13 @@ function _sort!(v::AbstractVector, a::MergeSort, o::Ordering, kw; t=nothing, off scratch end -# Support alg=InsertionSort and alg=MergeSort for backwards compatability (prefer InsertionSort() and MergeSort()) -getalg(a::Algorithm) = a -getalg(::Type{A}) where A <: Algorithm = A() - # Support 3- and 5-argument versions of sort! for calling into the internals in the old way -function sort!(v::AbstractVector, a::Union{Algorithm, Type{<:Algorithm}}, o::Ordering) - _sort!(v, getalg(a), o, (; allow_legacy_dispatch=false)) +function sort!(v::AbstractVector, a::Algorithm, o::Ordering) + _sort!(v, a, o, (; allow_legacy_dispatch=false)) v end -function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Union{Algorithm, Type{<:Algorithm}}, o::Ordering) - _sort!(v, getalg(a), o, (; lo, hi, allow_legacy_dispatch=false)) +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) + _sort!(v, a, o, (; lo, hi, allow_legacy_dispatch=false)) v end @@ -1948,7 +1947,5 @@ end # Keep old internal types so that people can keep dispatching with # sort!(::AbstractVector, ::Integer, ::Integer, ::Base.QuickSortAlg, ::Ordering) = ... const QuickSortAlg = typeof(QuickSort) -const MergeSortAlg = typeof(MergeSort) -const InsertionSortAlg = typeof(InsertionSort) end # module Sort From bc27dcaef56fc562e1e046efc47abc8c4b3e4f1c Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sun, 20 Nov 2022 15:55:30 +0600 Subject: [PATCH 22/29] remove type constraint that is trickkky for the compiler to handle --- base/sort.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/sort.jl b/base/sort.jl index 745a155b6abb7..da9a25a5197b8 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -499,7 +499,7 @@ struct MissingOptimization{T <: Algorithm} <: Algorithm next::T end -struct WithoutMissingVector{T, U <: AbstractVector{Union{T, Missing}}} <: AbstractVector{T} +struct WithoutMissingVector{T, U} <: AbstractVector{T} data::U function WithoutMissingVector(data; unsafe=false) if !unsafe && any(ismissing, data) From ab549f5fe6555e9ec7a8ffea5fd6cf29c8e20c6e Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sun, 20 Nov 2022 19:29:46 +0600 Subject: [PATCH 23/29] improve legacy dispatch system --- base/sort.jl | 5 +---- test/sorting.jl | 8 ++++++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index da9a25a5197b8..a9e656720cc3f 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -1922,10 +1922,7 @@ function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, end # Support 3- and 5-argument versions of sort! for calling into the internals in the old way -function sort!(v::AbstractVector, a::Algorithm, o::Ordering) - _sort!(v, a, o, (; allow_legacy_dispatch=false)) - v -end +sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o) function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) _sort!(v, a, o, (; lo, hi, allow_legacy_dispatch=false)) v diff --git a/test/sorting.jl b/test/sorting.jl index 99d84a8211697..bcee3245df0cb 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -840,14 +840,18 @@ end @test sort([1,2,3], alg=MyFirstAlg()) == [7,7,7] @test all(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg())) .=== vcat(fill(7, 100), fill(missing, 10))) - # Use the pre 1.9 hook into the internals + # Using the old hook with old entry-point + @test sort!([3,1,2], MyFirstAlg(), Base.Forward) == [7,7,7] + @test sort!([3,1,2], 1, 3, MyFirstAlg(), Base.Forward) == [7,7,7] + + # Use the pre 1.9 entry-point into the internals function Base.sort!(v::AbstractVector{Int}, lo::Integer, hi::Integer, ::MyFirstAlg, o::Base.Order.Ordering) sort!(v, lo, hi, Base.DEFAULT_STABLE, o) end @test sort([3,1,2], alg=MyFirstAlg()) == [1,2,3] @test issorted(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg()))) - # Another pre 1.9 hook into the internals + # Another pre 1.9 entry-point into the internals @test issorted(sort!(rand(100), InsertionSort, Base.Order.Forward)) struct MySecondAlg <: Base.Sort.Algorithm end From 9400b9383bc86d1b1d96d67cf12a49e389256957 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Mon, 21 Nov 2022 09:19:24 +0600 Subject: [PATCH 24/29] fix 5-arg MergeSort and add tests for 5-arg sort --- base/sort.jl | 9 ++++----- test/sorting.jl | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index a9e656720cc3f..837af5856d638 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -1887,7 +1887,6 @@ function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, if t === nothing scratch, t = make_scratch(scratch, eltype(v), m-lo+1) - offset = 1-lo end _sort!(v, a, o, (;kw..., hi=m, scratch); t, offset) @@ -1895,24 +1894,24 @@ function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, i, j = 1, lo while j <= m - t[i+offset] = v[j] + t[i] = v[j] i += 1 j += 1 end i, k = 1, lo while k < j <= hi - if lt(o, v[j], t[i+offset]) + if lt(o, v[j], t[i]) v[k] = v[j] j += 1 else - v[k] = t[i+offset] + v[k] = t[i] i += 1 end k += 1 end while k < j - v[k] = t[i+offset] + v[k] = t[i] k += 1 i += 1 end diff --git a/test/sorting.jl b/test/sorting.jl index bcee3245df0cb..8bad942fb9c81 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -864,6 +864,29 @@ end @test all(sort(v, alg=Base.Sort.InitialOptimizations(MySecondAlg())) .=== vcat(fill(9, 100), fill(missing, 10))) end +@testset "sort!(v, lo, hi, alg, order)" begin + v = Vector{Float64}(undef, 4000) + for alg in [MergeSort, QuickSort, InsertionSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + rand!(v) + sort!(v, 1, 2000, alg, Base.Forward) + @test issorted(v[1:2000]) + @test !issorted(v) + + sort!(v, 2001, 4000, alg, Base.Forward) + @test issorted(v[1:2000]) + @test issorted(v[2001:4000]) + @test !issorted(v) + + sort!(v, 1001, 3000, alg, Base.Forward) + @test issorted(v[1:1000]) + @test issorted(v[1001:3000]) + @test issorted(v[3001:4000]) + @test !issorted(v[1:2000]) + @test !issorted(v[2001:4000]) + @test !issorted(v) + end +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, From a73825afa120b08a04d7018e32858ada624b5917 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Mon, 21 Nov 2022 10:03:06 +0600 Subject: [PATCH 25/29] cleanup interpolation to make JET.jl happy --- base/sort.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 837af5856d638..3e2c8aca1953d 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -421,10 +421,11 @@ for (sym, deps, exp, type) in [ (:mx, (), :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))), (:scratch, (), nothing, :(Union{Nothing, Vector})), # could have different eltype (:allow_legacy_dispatch, (), true, Bool)] - str = string(sym) usym = Symbol(:_, sym) @eval function $usym(v, o, kw) - Symbol($str) ∈ keys(kw) && return kw, kw[Symbol($str)]::$type # TODO this interpolation feels too complicated + # using missing instead of nothing because scratch could === nothing. + res = get(kw, $(Expr(:quote, sym)), missing) + res !== missing && return kw, res::$type @getkw $(deps...) $sym = $exp (;kw..., $sym), $sym::$type From fef85c011853331659af2d946222c81f030e1282 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Mon, 21 Nov 2022 10:29:19 +0600 Subject: [PATCH 26/29] fix and test handling -0.0 in IEEEFloatOptimization --- base/sort.jl | 4 ++-- test/sorting.jl | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 3e2c8aca1953d..7f8ea621be80a 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -592,8 +592,8 @@ end UIntType(::Type{Float16}) = UInt16 UIntType(::Type{Float32}) = UInt32 UIntType(::Type{Float64}) = UInt64 -after_zero(::ForwardOrdering, x) = 0 <= x -after_zero(::ReverseOrdering, x) = x < 0 +after_zero(::ForwardOrdering, x) = !signbit(x) +after_zero(::ReverseOrdering, x) = signbit(x) is_concrete_IEEEFloat(T::Type) = T <: Base.IEEEFloat && isconcretetype(T) function _sort!(v::AbstractVector, a::IEEEFloatOptimization, o::Ordering, kw) @getkw lo hi diff --git a/test/sorting.jl b/test/sorting.jl index 8bad942fb9c81..d8e422ff52aae 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -887,6 +887,13 @@ end end end +@testset "IEEEFloatOptimization with -0.0" begin + x = vcat(round.(100 .* randn(1000)) ./ 100) # Also test lots of duplicates + x[rand(1:1000, 5)] .= 0.0 + x[rand(1:1000, 5)] .= -0.0 # To be sure that -0.0 is present + @test issorted(sort!(x)) +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, From 9df25d4cae65129d6ea7ed97c7837a39b377570d Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Mon, 21 Nov 2022 17:10:09 +0600 Subject: [PATCH 27/29] fix and test bug where countsort's correct overflow behavior triggers error due to unexpected promotion to UInt --- base/sort.jl | 2 +- test/sorting.jl | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/base/sort.jl b/base/sort.jl index 7f8ea621be80a..313786cee8fb3 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -830,7 +830,7 @@ maybe_reverse(o::ForwardOrdering, x) = x maybe_reverse(o::ReverseOrdering, x) = reverse(x) function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering, kw) @getkw lo hi mn mx scratch - range = maybe_unsigned(o === Reverse ? mn-mx : mx-mn) + range = o === Reverse ? mn-mx : mx-mn offs = 1 - (o === Reverse ? mx : mn) counts = fill(0, range+1) # TODO use scratch (but be aware of type stability) diff --git a/test/sorting.jl b/test/sorting.jl index d8e422ff52aae..37bad7d23c94b 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -894,6 +894,11 @@ end @test issorted(sort!(x)) end +@testset "Count sort near the edge of its range" begin + @test issorted(sort(rand(typemin(Int):typemin(Int)+100, 1000))) + @test issorted(sort(rand(typemax(Int)-100:typemax(Int), 1000))) +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, From 964e58fd6d9887c9ae2de5ba9f7f2e89ac9ec9d4 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Mon, 21 Nov 2022 17:22:30 +0600 Subject: [PATCH 28/29] add type signature to reduce possible method ambiguities (e.g. with AbstractTrees v0.3.4's ImplicitRootState) --- base/sort.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/sort.jl b/base/sort.jl index 313786cee8fb3..a397ff49a2c24 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -509,7 +509,7 @@ struct WithoutMissingVector{T, U} <: AbstractVector{T} new{nonmissingtype(eltype(data)), typeof(data)}(data) end end -Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i) +Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i::Integer) out = v.data[i] @assert !(out isa Missing) out::eltype(v) From 037ae71042c576da3ee7078a474e2c761c70b571 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Mon, 21 Nov 2022 17:28:54 +0600 Subject: [PATCH 29/29] support 6-argument sort! because people do actually use it. --- base/sort.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/base/sort.jl b/base/sort.jl index a397ff49a2c24..086cbb859f641 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -1921,12 +1921,17 @@ function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, scratch end -# Support 3- and 5-argument versions of sort! for calling into the internals in the old way +# Support 3-, 5-, and 6-argument versions of sort! for calling into the internals in the old way sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o) function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) _sort!(v, a, o, (; lo, hi, allow_legacy_dispatch=false)) v end +sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, _) = sort!(v, lo, hi, a, o) +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, scratch::Vector) + _sort!(v, a, o, (; lo, hi, scratch, allow_legacy_dispatch=false)) + v +end # Support dispatch on custom algorithms in the old way # sort!(::AbstractVector, ::Integer, ::Integer, ::MyCustomAlgorithm, ::Ordering) = ...