diff --git a/base/sort.jl b/base/sort.jl index e995a64a9f76f..932da36b9e1d6 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -86,7 +86,7 @@ issorted(itr; issorted(itr, ord(lt,by,rev,order)) function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering) - sort!(v, _PartialQuickSort(k), o) + _sort!(v, _PartialQuickSort(k), o, (;)) maybeview(v, k) end @@ -407,112 +407,315 @@ function insorted end insorted(x, v::AbstractVector; kw...) = !isempty(searchsorted(v, x; kw...)) insorted(x, r::AbstractRange) = in(x, r) -## sorting algorithms ## +## Alternative keyword management -abstract type Algorithm end +macro getkw(syms...) + getters = (getproperty(Sort, Symbol(:_, sym)) for sym in syms) + Expr(:block, (:($(esc(:((kw, $sym) = $getter(v, o, kw))))) for (sym, getter) in zip(syms, getters))...) +end -struct InsertionSortAlg <: Algorithm end -struct MergeSortAlg <: Algorithm end -struct AdaptiveSortAlg <: Algorithm end +for (sym, deps, exp, type) in [ + (:lo, (), :(firstindex(v)), Integer), + (:hi, (), :(lastindex(v)), Integer), + (:mn, (), :(throw(ArgumentError("mn is needed but has not been computed"))), :(eltype(v))), + (:mx, (), :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))), + (:scratch, (), nothing, :(Union{Nothing, Vector})), # could have different eltype + (:allow_legacy_dispatch, (), true, Bool)] + usym = Symbol(:_, sym) + @eval function $usym(v, o, kw) + # using missing instead of nothing because scratch could === nothing. + res = get(kw, $(Expr(:quote, sym)), missing) + res !== missing && return kw, res::$type + @getkw $(deps...) + $sym = $exp + (;kw..., $sym), $sym::$type + end +end + +## Scratch space management """ - PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}) + make_scratch(scratch::Union{Nothing, Vector}, T::Type, len::Integer) -Indicate that a sorting function should use the partial quick sort algorithm. +Returns `(s, t)` where `t` is an `AbstractVector` of type `T` with length at least `len` +that is backed by the `Vector` `s`. If `scratch !== nothing`, then `s === scratch`. -Partial quick sort finds and sorts the elements that would end up in positions -`lo:hi` using [`QuickSort`](@ref). +This function will allocate a new vector if `scratch === nothing`, `resize!` `scratch` if it +is too short, and `reinterpret` `scratch` if its eltype is not `T`. +""" +function make_scratch(scratch::Nothing, T::Type, len::Integer) + s = Vector{T}(undef, len) + s, s +end +function make_scratch(scratch::Vector{T}, ::Type{T}, len::Integer) where T + len > length(scratch) && resize!(scratch, len) + scratch, scratch +end +function make_scratch(scratch::Vector, T::Type, len::Integer) + len_bytes = len * sizeof(T) + len_scratch = div(len_bytes, sizeof(eltype(scratch))) + len_scratch > length(scratch) && resize!(scratch, len_scratch) + scratch, reinterpret(T, scratch) +end + + +## sorting algorithm components ## -Characteristics: - * *stable*: preserves the ordering of elements which compare equal - (e.g. "a" and "A" in a sort of letters which ignores case). - * *not in-place* in memory. - * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). """ -struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}} <: Algorithm - lo::L - hi::H + _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw; t, offset) + +An internal function that sorts `v` using the algorithm `a` under the ordering `o`, +subject to specifications provided in `kw` (such as `lo` and `hi` in which case it only +sorts `view(v, lo:hi)`) + +Returns a scratch space if provided or constructed during the sort, or `nothing` if +no scratch space is present. + +!!! note + `_sort!` modifies but does not return `v`. + +A returned scratch space will be a `Vector{T}` where `T` is usually the eltype of `v`. There +are some exceptions, for example if `eltype(v) == Union{Missing, T}` then the scratch space +may be be a `Vector{T}` due to `MissingOptimization` changing the eltype of `v` to `T`. + +`t` is an appropriate scratch space for the algorithm at hand, to be accessed as +`t[i + offset]`. `t` is used for an algorithm to pass a scratch space back to itself in +internal or recursive calls. +""" +function _sort! end + +abstract type Algorithm end + + +""" + MissingOptimization(next) <: Algorithm + +Filter out missing values. + +Missing values are placed after other values according to `DirectOrdering`s. This pass puts +them there and passes on a view into the original vector that excludes the missing values. +This pass is triggered for both `sort([1, missing, 3])` and `sortperm([1, missing, 3])`. +""" +struct MissingOptimization{T <: Algorithm} <: Algorithm + next::T end -PartialQuickSort(k::Integer) = PartialQuickSort(missing, k) -PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k)) -_PartialQuickSort(k::Integer) = PartialQuickSort(k, k) -_PartialQuickSort(k::OrdinalRange) = PartialQuickSort(k) + +struct WithoutMissingVector{T, U} <: AbstractVector{T} + data::U + function WithoutMissingVector(data; unsafe=false) + if !unsafe && any(ismissing, data) + throw(ArgumentError("data must not contain missing values")) + end + new{nonmissingtype(eltype(data)), typeof(data)}(data) + end +end +Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i::Integer) + out = v.data[i] + @assert !(out isa Missing) + out::eltype(v) +end +Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector{T}, x::T, i) where T + v.data[i] = x + v +end +Base.size(v::WithoutMissingVector) = size(v.data) """ - InsertionSort + send_to_end!(f::Function, v::AbstractVector; [lo, hi]) -Indicate that a sorting function should use the insertion sort algorithm. +Send every element of `v` for which `f` returns `true` to the end of the vector and return +the index of the last element which for which `f` returns `false`. -Insertion sort traverses the collection one element at a time, inserting -each element into its correct, sorted position in the output vector. +`send_to_end!(f, v, lo, hi)` is equivalent to `send_to_end!(f, view(v, lo:hi))+lo-1` -Characteristics: - * *stable*: preserves the ordering of elements which - compare equal (e.g. "a" and "A" in a sort of letters - which ignores case). - * *in-place* in memory. - * *quadratic performance* in the number of elements to be sorted: - it is well-suited to small collections but should not be used for large ones. +Preserves the order of the elements that are not sent to the end. """ -const InsertionSort = InsertionSortAlg() +function send_to_end!(f::F, v::AbstractVector; lo=firstindex(v), hi=lastindex(v)) where F <: Function + i = lo + @inbounds while i <= hi && !f(v[i]) + i += 1 + end + j = i + 1 + @inbounds while j <= hi + if !f(v[j]) + v[i], v[j] = v[j], v[i] + i += 1 + end + j += 1 + end + i - 1 +end +""" + send_to_end!(f::Function, v::AbstractVector, o::DirectOrdering[, end_stable]; lo, hi) +Return `(a, b)` where `v[a:b]` are the elements that are not sent to the end. + +If `o isa ReverseOrdering` then the "end" of `v` is `v[lo]`. + +If `end_stable` is set, the elements that are sent to the end are stable instead of the +elements that are not """ - QuickSort +@inline send_to_end!(f::F, v::AbstractVector, ::ForwardOrdering, end_stable=false; lo, hi) where F <: Function = + end_stable ? (lo, hi-send_to_end!(!f, view(v, hi:-1:lo))) : (lo, send_to_end!(f, v; lo, hi)) +@inline send_to_end!(f::F, v::AbstractVector, ::ReverseOrdering, end_stable=false; lo, hi) where F <: Function = + end_stable ? (send_to_end!(!f, v; lo, hi)+1, hi) : (hi-send_to_end!(f, view(v, hi:-1:lo))+1, hi) + + +function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering, kw) + @getkw lo hi + if nonmissingtype(eltype(v)) != eltype(v) && o isa DirectOrdering + lo, hi = send_to_end!(ismissing, v, o; lo, hi) + _sort!(WithoutMissingVector(v, unsafe=true), a.next, o, (;kw..., lo, hi)) + elseif eltype(v) <: Integer && o isa Perm{DirectOrdering} && nonmissingtype(eltype(o.data)) != eltype(o.data) + lo, hi = send_to_end!(i -> ismissing(@inbounds o.data[i]), v, o) + _sort!(v, a.next, Perm(o.order, WithoutMissingVector(o.data, unsafe=true)), (;kw..., lo, hi)) + else + _sort!(v, a.next, o, kw) + end +end -Indicate that a sorting function should use the quick sort algorithm. -Quick sort picks a pivot element, partitions the array based on the pivot, -and then sorts the elements before and after the pivot recursively. +""" + IEEEFloatOptimization(next) <: Algorithm -Characteristics: - * *stable*: preserves the ordering of elements which compare equal - (e.g. "a" and "A" in a sort of letters which ignores case). - * *not in-place* in memory. - * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). - * *good performance* for almost all large collections. - * *quadratic worst case runtime* in pathological cases - (vanishingly rare for non-malicious input) +Move NaN values to the end, partition by sign, and reinterpret the rest as unsigned integers. + +IEEE floating point numbers (`Float64`, `Float32`, and `Float16`) compare the same as +unsigned integers with the bits with a few exceptions. This pass + +This pass is triggered for both `sort([1.0, NaN, 3.0])` and `sortperm([1.0, NaN, 3.0])`. """ -const QuickSort = PartialQuickSort(missing, missing) -const QuickSortAlg = PartialQuickSort{Missing, Missing} # Exists for backward compatibility +struct IEEEFloatOptimization{T <: Algorithm} <: Algorithm + next::T +end + +UIntType(::Type{Float16}) = UInt16 +UIntType(::Type{Float32}) = UInt32 +UIntType(::Type{Float64}) = UInt64 +after_zero(::ForwardOrdering, x) = !signbit(x) +after_zero(::ReverseOrdering, x) = signbit(x) +is_concrete_IEEEFloat(T::Type) = T <: Base.IEEEFloat && isconcretetype(T) +function _sort!(v::AbstractVector, a::IEEEFloatOptimization, o::Ordering, kw) + @getkw lo hi + if is_concrete_IEEEFloat(eltype(v)) && o isa DirectOrdering + lo, hi = send_to_end!(isnan, v, o, true; lo, hi) + iv = reinterpret(UIntType(eltype(v)), v) + j = send_to_end!(x -> after_zero(o, x), v; lo, hi) + scratch = _sort!(iv, a.next, Reverse, (;kw..., lo, hi=j)) + if scratch === nothing # Union split + _sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi, scratch)) + else + _sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi, scratch)) + end + elseif eltype(v) <: Integer && o isa Perm && o.order isa DirectOrdering && is_concrete_IEEEFloat(eltype(o.data)) + lo, hi = send_to_end!(i -> isnan(@inbounds o.data[i]), v, o.order, true; lo, hi) + ip = reinterpret(UIntType(eltype(o.data)), o.data) + j = send_to_end!(i -> after_zero(o.order, @inbounds o.data[i]), v; lo, hi) + scratch = _sort!(v, a.next, Perm(Reverse, ip), (;kw..., lo, hi=j)) + if scratch === nothing # Union split + _sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi, scratch)) + else + _sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi, scratch)) + end + else + _sort!(v, a.next, o, kw) + end +end + """ - MergeSort + BoolOptimization(next) <: Algorithm -Indicate that a sorting function should use the merge sort algorithm. +Sort `AbstractVector{Bool}`s using a specialized version of counting sort. -Merge sort divides the collection into subcollections and -repeatedly merges them, sorting each subcollection at each step, -until the entire collection has been recombined in sorted form. +Accesses each element at most twice (one read and one write), and performs at most two +comparisons. +""" +struct BoolOptimization{T <: Algorithm} <: Algorithm + next::T +end +_sort!(v::AbstractVector, a::BoolOptimization, o::Ordering, kw) = _sort!(v, a.next, o, kw) +function _sort!(v::AbstractVector{Bool}, ::BoolOptimization, o::Ordering, kw) + first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v + @getkw lo hi scratch + count = 0 + @inbounds for i in lo:hi + if v[i] == first + count += 1 + end + end + @inbounds v[lo:lo+count-1] .= first + @inbounds v[lo+count:hi] .= !first + scratch +end -Characteristics: - * *stable*: preserves the ordering of elements which compare - equal (e.g. "a" and "A" in a sort of letters which ignores - case). - * *not in-place* in memory. - * *divide-and-conquer* sort strategy. + +""" + IsUIntMappable(yes, no) <: Algorithm + +Determines if the elements of a vector can be mapped to unsigned integers while preserving +their order under the specified ordering. + +If they can be, dispatch to the `yes` algorithm and record the unsigned integer type that +the elements may be mapped to. Otherwise dispatch to the `no` algorithm. """ -const MergeSort = MergeSortAlg() +struct IsUIntMappable{T <: Algorithm, U <: Algorithm} <: Algorithm + yes::T + no::U +end +function _sort!(v::AbstractVector, a::IsUIntMappable, o::Ordering, kw) + if UIntMappable(eltype(v), o) !== nothing + _sort!(v, a.yes, o, kw) + else + _sort!(v, a.no, o, kw) + end +end + """ - AdaptiveSort + Small{N}(small=SMALL_ALGORITHM, big) <: Algorithm -Indicate that a sorting function should use the fastest available stable algorithm. +Sort inputs with `length(lo:hi) <= N` using the `small` algorithm. Otherwise use the `big` +algorithm. +""" +struct Small{N, T <: Algorithm, U <: Algorithm} <: Algorithm + small::T + big::U +end +Small{N}(small, big) where N = Small{N, typeof(small), typeof(big)}(small, big) +Small{N}(big) where N = Small{N}(SMALL_ALGORITHM, big) +function _sort!(v::AbstractVector, a::Small{N}, o::Ordering, kw) where N + @getkw lo hi + if (hi-lo) < N + _sort!(v, a.small, o, kw) + else + _sort!(v, a.big, o, kw) + end +end + + +struct InsertionSortAlg <: Algorithm end -Currently, AdaptiveSort uses - * [`InsertionSort`](@ref) for short vectors - * [`QuickSort`](@ref) for vectors that are not [`UIntMappable`](@ref) - * Radix sort for long vectors - * Counting sort for vectors of integers spanning a short range """ -const AdaptiveSort = AdaptiveSortAlg() + InsertionSort -const DEFAULT_UNSTABLE = AdaptiveSort -const DEFAULT_STABLE = AdaptiveSort -const SMALL_ALGORITHM = InsertionSort -const SMALL_THRESHOLD = 20 +Use the insertion sort algorithm. -function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::InsertionSortAlg, o::Ordering) +Insertion sort traverses the collection one element at a time, inserting +each element into its correct, sorted position in the output vector. + +Characteristics: +* *stable*: preserves the ordering of elements which compare equal +(e.g. "a" and "A" in a sort of letters which ignores case). +* *in-place* in memory. +* *quadratic performance* in the number of elements to be sorted: +it is well-suited to small collections but should not be used for large ones. +""" +const InsertionSort = InsertionSortAlg() +const SMALL_ALGORITHM = InsertionSortAlg() + +function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering, kw) + @getkw lo hi scratch lo_plus_1 = (lo + 1)::Integer @inbounds for i = lo_plus_1:hi j = i @@ -527,9 +730,249 @@ function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::InsertionSortAlg, end v[j] = x end - return v + scratch +end + + +""" + CheckSorted(next) <: Algorithm + +Check if the input is already sorted and for large inputs, also check if it is +reverse-sorted. The reverse-sorted check is unstable. +""" +struct CheckSorted{T <: Algorithm} <: Algorithm + next::T +end +function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering, kw) + @getkw lo hi scratch + + # For most arrays, a presorted check is cheap (overhead < 5%) and for most large + # arrays it is essentially free (<1%). + _issorted(v, lo, hi, o) && return scratch + + # For most large arrays, a reverse-sorted check is essentially free (overhead < 1%) + if hi-lo >= 500 && _issorted(v, lo, hi, ReverseOrdering(o)) + # If reversing is valid, do so. This does violates stability. + reverse!(v, lo, hi) + return scratch + end + + _sort!(v, a.next, o, kw) +end + + +""" + ComputeExtrema(next) <: Algorithm + +Compute the extrema of the input under the provided order. + +If the minimum is no less than the maximum, then the input is already sorted. Otherwise, +dispatch to the `next` algorithm. +""" +struct ComputeExtrema{T <: Algorithm} <: Algorithm + next::T +end +function _sort!(v::AbstractVector, a::ComputeExtrema, o::Ordering, kw) + @getkw lo hi scratch + mn = mx = v[lo] + @inbounds for i in (lo+1):hi + vi = v[i] + lt(o, vi, mn) && (mn = vi) + lt(o, mx, vi) && (mx = vi) + end + mn, mx + + lt(o, mn, mx) || return scratch # all same + + _sort!(v, a.next, o, (;kw..., mn, mx)) +end + + +""" + ConsiderCountingSort(counting=CountingSort(), next) <: Algorithm + +If the input's range is small enough, use the `counting` algorithm. Otherwise, dispatch to +the `next` algorithm. + +For most types, the threshold is if the range is shorter than half the length, but for types +larger than Int64, bitshifts are expensive and RadixSort is not viable, so the threshold is +much more generous. +""" +struct ConsiderCountingSort{T <: Algorithm, U <: Algorithm} <: Algorithm + counting::T + next::U +end +ConsiderCountingSort(next) = ConsiderCountingSort(CountingSort(), next) +function _sort!(v::AbstractVector{<:Integer}, a::ConsiderCountingSort, o::DirectOrdering, kw) + @getkw lo hi mn mx + range = maybe_unsigned(o === Reverse ? mn-mx : mx-mn) + + if range < (sizeof(eltype(v)) > 8 ? 5(hi-lo)-100 : div(hi-lo, 2)) + _sort!(v, a.counting, o, kw) + else + _sort!(v, a.next, o, kw) + end +end +_sort!(v::AbstractVector, a::ConsiderCountingSort, o::Ordering, kw) = _sort!(v, a.next, o, kw) + + +""" + CountingSort <: Algorithm + +Use the counting sort algorithm. + +`CountingSort` is an algorithm for sorting integers that runs in Θ(length + range) time and +space. It counts the number of occurrences of each value in the input and then iterates +through those counts repopulating the input with the values in sorted order. +""" +struct CountingSort <: Algorithm end +maybe_reverse(o::ForwardOrdering, x) = x +maybe_reverse(o::ReverseOrdering, x) = reverse(x) +function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering, kw) + @getkw lo hi mn mx scratch + range = o === Reverse ? mn-mx : mx-mn + offs = 1 - (o === Reverse ? mx : mn) + + counts = fill(0, range+1) # TODO use scratch (but be aware of type stability) + @inbounds for i = lo:hi + counts[v[i] + offs] += 1 + end + + idx = lo + @inbounds for i = maybe_reverse(o, 1:range+1) + lastidx = idx + counts[i] - 1 + val = i-offs + for j = idx:lastidx + v[j] = val + end + idx = lastidx + 1 + end + + scratch +end + + +""" + ConsiderRadixSort(radix=RadixSort(), next) <: Algorithm + +If the number of bits in the input's range is small enough and the input supports efficient +bitshifts, use the `radix` algorithm. Otherwise, dispatch to the `next` algorithm. +""" +struct ConsiderRadixSort{T <: Algorithm, U <: Algorithm} <: Algorithm + radix::T + next::U +end +ConsiderRadixSort(next) = ConsiderRadixSort(RadixSort(), next) +function _sort!(v::AbstractVector, a::ConsiderRadixSort, o::DirectOrdering, kw) + @getkw lo hi mn mx + urange = uint_map(mx, o)-uint_map(mn, o) + bits = unsigned(8sizeof(urange) - leading_zeros(urange)) + if sizeof(eltype(v)) <= 8 && bits+70 < 22log(hi-lo) + _sort!(v, a.radix, o, kw) + else + _sort!(v, a.next, o, kw) + end +end + + +""" + RadixSort <: Algorithm + +Use the radix sort algorithm. + +`RadixSort` is a stable least significant bit first radix sort algorithm that runs in +`O(length * log(range))` time and linear space. + +It first sorts the entire vector by the last `chunk_size` bits, then by the second +to last `chunk_size` bits, and so on. Stability means that it will not reorder two elements +that compare equal. This is essential so that the order introduced by earlier, +less significant passes is preserved by later passes. + +Each pass divides the input into `2^chunk_size == mask+1` buckets. To do this, it + * counts the number of entries that fall into each bucket + * uses those counts to compute the indices to move elements of those buckets into + * moves elements into the computed indices in the swap array + * switches the swap and working array + +`chunk_size` is larger for larger inputs and determined by an empirical heuristic. +""" +struct RadixSort <: Algorithm end +function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering, kw) + @getkw lo hi mn mx scratch + umn = uint_map(mn, o) + urange = uint_map(mx, o)-umn + bits = unsigned(8sizeof(urange) - leading_zeros(urange)) + + # At this point, we are committed to radix sort. + u = uint_map!(v, lo, hi, o) + + # we subtract umn to avoid radixing over unnecessary bits. For example, + # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002] + # which uses all 32 bits, but once we subtract umn = 0x7fffffff, we are left with + # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and + # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4] + # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits. + # the overhead for this subtraction is small enough that it is worthwhile in many cases. + + # this is faster than u[lo:hi] .-= umn as of v1.9.0-DEV.100 + @inbounds for i in lo:hi + u[i] -= umn + end + + scratch, t = make_scratch(scratch, eltype(v), hi-lo+1) + tu = reinterpret(eltype(u), t) + if radix_sort!(u, lo, hi, bits, tu, 1-lo) + uint_unmap!(v, u, lo, hi, o, umn) + else + uint_unmap!(v, tu, lo, hi, o, umn, 1-lo) + end + scratch end + +""" + PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}, next::Algorithm) <: Algorithm + +Indicate that a sorting function should use the partial quick sort algorithm. + +Partial quick sort finds and sorts the elements that would end up in positions `lo:hi` using +[`QuickSort`](@ref). It is recursive and uses the `next` algorithm for small chunks + +Characteristics: + * *stable*: preserves the ordering of elements which compare equal + (e.g. "a" and "A" in a sort of letters which ignores case). + * *not in-place* in memory. + * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). +""" +struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm + lo::L + hi::H + next::T +end +PartialQuickSort(k::Integer) = PartialQuickSort(missing, k, SMALL_ALGORITHM) +PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k), SMALL_ALGORITHM) +_PartialQuickSort(k::Integer) = InitialOptimizations(PartialQuickSort(k:k)) +_PartialQuickSort(k::OrdinalRange) = InitialOptimizations(PartialQuickSort(k)) + +""" + QuickSort + +Indicate that a sorting function should use the quick sort algorithm. + +Quick sort picks a pivot element, partitions the array based on the pivot, +and then sorts the elements before and after the pivot recursively. + +Characteristics: + * *stable*: preserves the ordering of elements which compare equal + (e.g. "a" and "A" in a sort of letters which ignores case). + * *not in-place* in memory. + * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). + * *good performance* for almost all large collections. + * *quadratic worst case runtime* in pathological cases + (vanishingly rare for non-malicious input) +""" +const QuickSort = PartialQuickSort(missing, missing, SMALL_ALGORITHM) + # select a pivot for QuickSort # # This method is redefined to rand(lo:hi) in Random.jl @@ -542,147 +985,127 @@ select_pivot(lo::Integer, hi::Integer) = typeof(hi-lo)(hash(lo) % (hi-lo+1)) + l # # returns (pivot, pivot_index) where pivot_index is the location the pivot # should end up, but does not set t[pivot_index] = pivot -function partition!(t::AbstractVector, lo::Integer, hi::Integer, o::Ordering, v::AbstractVector, rev::Bool) +function partition!(t::AbstractVector, lo::Integer, hi::Integer, offset::Integer, o::Ordering, v::AbstractVector, rev::Bool) pivot_index = select_pivot(lo, hi) - trues = 0 @inbounds begin pivot = v[pivot_index] while lo < pivot_index x = v[lo] fx = rev ? !lt(o, x, pivot) : lt(o, pivot, x) - t[(fx ? hi : lo) - trues] = x - trues += fx + t[(fx ? hi : lo) - offset] = x + offset += fx lo += 1 end while lo < hi x = v[lo+1] fx = rev ? lt(o, pivot, x) : !lt(o, x, pivot) - t[(fx ? hi : lo) - trues] = x - trues += fx + t[(fx ? hi : lo) - offset] = x + offset += fx lo += 1 end end - # pivot_index = lo-trues + # pivot_index = lo-offset # t[pivot_index] is whatever it was before # t[pivot_index] >* pivot, reverse stable - pivot, lo-trues + pivot, lo-offset end -function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort, - o::Ordering, t::AbstractVector=similar(v), swap=false, rev=false; - check_presorted=true) +function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw; + t=nothing, offset=nothing, swap=false, rev=false) + @getkw lo hi scratch - if check_presorted && !rev && !swap - # Even if we are only sorting a short region, we can only short-circuit if the whole - # vector is presorted. A weaker condition is possible, but unlikely to be useful. - if _issorted(v, lo, hi, o) - return v - elseif _issorted(v, lo, hi, Lt((x, y) -> !lt(o, x, y))) - # Reverse only if necessary. Using issorted(..., Reverse(o)) would violate stability. - return reverse!(v, lo, hi) - end + if t === nothing + scratch, t = make_scratch(scratch, eltype(v), hi-lo+1) + offset = 1-lo + kw = (;kw..., scratch) end while lo < hi && hi - lo > SMALL_THRESHOLD - pivot, j = swap ? partition!(v, lo, hi, o, t, rev) : partition!(t, lo, hi, o, v, rev) + pivot, j = swap ? partition!(v, lo+offset, hi+offset, offset, o, t, rev) : partition!(t, lo, hi, -offset, o, v, rev) + j -= !swap*offset @inbounds v[j] = pivot swap = !swap # For QuickSort, a.lo === a.hi === missing, so the first two branches get skipped if !ismissing(a.lo) && j <= a.lo # Skip sorting the lower part - swap && copyto!(v, lo, t, lo, j-lo) + swap && copyto!(v, lo, t, lo+offset, j-lo) rev && reverse!(v, lo, j-1) lo = j+1 rev = !rev elseif !ismissing(a.hi) && a.hi <= j # Skip sorting the upper part - swap && copyto!(v, j+1, t, j+1, hi-j) + swap && copyto!(v, j+1, t, j+1+offset, hi-j) rev || reverse!(v, j+1, hi) hi = j-1 elseif j-lo < hi-j # Sort the lower part recursively because it is smaller. Recursing on the # smaller part guarantees O(log(n)) stack space even on pathological inputs. - sort!(v, lo, j-1, a, o, t, swap, rev; check_presorted=false) + _sort!(v, a, o, (;kw..., lo, hi=j-1); t, offset, swap, rev) lo = j+1 rev = !rev else # Sort the higher part recursively - sort!(v, j+1, hi, a, o, t, swap, !rev; check_presorted=false) + _sort!(v, a, o, (;kw..., lo=j+1, hi); t, offset, swap, rev=!rev) hi = j-1 end end - hi < lo && return v - swap && copyto!(v, lo, t, lo, hi-lo+1) + hi < lo && return scratch + swap && copyto!(v, lo, t, lo+offset, hi-lo+1) rev && reverse!(v, lo, hi) - sort!(v, lo, hi, SMALL_ALGORITHM, o) + _sort!(v, a.next, o, (;kw..., lo, hi)) end -function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering, - t0::Union{AbstractVector{T}, Nothing}=nothing) where T - @inbounds if lo < hi - hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o) - m = midpoint(lo, hi) - - t = t0 === nothing ? similar(v, m-lo+1) : t0 - length(t) < m-lo+1 && resize!(t, m-lo+1) - require_one_based_indexing(t) +""" + StableCheckSorted(next) <: Algorithm - sort!(v, lo, m, a, o, t) - sort!(v, m+1, hi, a, o, t) +Check if an input is sorted and/or reverse-sorted. - i, j = 1, lo - while j <= m - t[i] = v[j] - i += 1 - j += 1 - end - - i, k = 1, lo - while k < j <= hi - if lt(o, v[j], t[i]) - v[k] = v[j] - j += 1 - else - v[k] = t[i] - i += 1 - end - k += 1 - end - while k < j - v[k] = t[i] - k += 1 - i += 1 - end +The definition of reverse-sorted is that for every pair of adjacent elements, the latter is +less than the former. This is stricter than `issorted(v, Reverse(o))` to avoid swapping pairs +of elements that compare equal. +""" +struct StableCheckSorted{T<:Algorithm} <: Algorithm + next::T +end +function _sort!(v::AbstractVector, a::StableCheckSorted, o::Ordering, kw) + @getkw lo hi scratch + if _issorted(v, lo, hi, o) + return scratch + elseif _issorted(v, lo, hi, Lt((x, y) -> !lt(o, x, y))) + # Reverse only if necessary. Using issorted(..., Reverse(o)) would violate stability. + reverse!(v, lo, hi) + return scratch end - return v + _sort!(v, a.next, o, kw) end -# This is a stable least significant bit first radix sort. -# -# That is, it first sorts the entire vector by the last chunk_size bits, then by the second -# to last chunk_size bits, and so on. Stability means that it will not reorder two elements -# that compare equal. This is essential so that the order introduced by earlier, -# less significant passes is preserved by later passes. -# -# Each pass divides the input into 2^chunk_size == mask+1 buckets. To do this, it -# * counts the number of entries that fall into each bucket -# * uses those counts to compute the indices to move elements of those buckets into -# * moves elements into the computed indices in the swap array -# * switches the swap and working array -# -# In the case of an odd number of passes, the returned vector will === the input vector t, -# not v. This is one of the many reasons radix_sort! is not exported. + +# The return value indicates whether v is sorted (true) or t is sorted (false) +# This is one of the many reasons radix_sort! is not exported. function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsigned, - t::AbstractVector{U}, chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned + t::AbstractVector{U}, offset::Integer, + chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned # bits is unsigned for performance reasons. - mask = UInt(1) << chunk_size - 1 - counts = Vector{Int}(undef, mask+2) - - @inbounds for shift in 0:chunk_size:bits-1 - + counts = Vector{Int}(undef, 1 << chunk_size + 1) # TODO use scratch for this + + shift = 0 + while true + @noinline radix_sort_pass!(t, lo, hi, offset, counts, v, shift, chunk_size) + # the latest data resides in t + shift += chunk_size + shift < bits || return false + @noinline radix_sort_pass!(v, lo+offset, hi+offset, -offset, counts, t, shift, chunk_size) + # the latest data resides in v + shift += chunk_size + shift < bits || return true + end +end +function radix_sort_pass!(t, lo, hi, offset, counts, v, shift, chunk_size) + mask = UInt(1) << chunk_size - 1 # mask is defined in pass so that the compiler + @inbounds begin # ↳ knows it's shape # counts[2:mask+2] will store the number of elements that fall into each bucket. # if chunk_size = 8, counts[2] is bucket 0x00 and counts[257] is bucket 0xff. counts .= 0 @@ -703,15 +1126,10 @@ function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsig x = v[k] # lookup the element i = (x >> shift)&mask + 1 # compute its bucket's index for this pass j = counts[i] # lookup the target index - t[j] = x # put the element where it belongs + t[j + offset] = x # put the element where it belongs counts[i] = j + 1 # increment the target index for the next end # ↳ element in this bucket - - v, t = t, v # swap the now sorted destination vector t back into primary vector v - end - - v end function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned) # chunk_size is the number of bits to radix over at once. @@ -726,23 +1144,6 @@ function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned) UInt8(cld(bits, cld(bits, guess))) end -# For AbstractVector{Bool}, counting sort is always best. -# This is an implementation of counting sort specialized for Bools. -# Accepts unused scratch space to avoid method ambiguity. -function sort!(v::AbstractVector{Bool}, lo::Integer, hi::Integer, ::AdaptiveSortAlg, o::Ordering, - t::Union{AbstractVector{Bool}, Nothing}=nothing) - first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v - count = 0 - @inbounds for i in lo:hi - if v[i] == first - count += 1 - end - end - @inbounds v[lo:lo+count-1] .= first - @inbounds v[lo+count:hi] .= !first - v -end - maybe_unsigned(x::Integer) = x # this is necessary to avoid calling unsigned on BigInt maybe_unsigned(x::BitSigned) = unsigned(x) function _extrema(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) @@ -761,129 +1162,152 @@ function _issorted(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) end true end -function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, ::AdaptiveSortAlg, o::Ordering, - t::Union{AbstractVector{T}, Nothing}=nothing) where T - # if the sorting task is not UIntMappable, then we can't radix sort or sort_int_range! - # so we skip straight to the fallback algorithm which is comparison based. - U = UIntMappable(eltype(v), o) - U === nothing && return sort!(v, lo, hi, QuickSort, o) - - # to avoid introducing excessive detection costs for the trivial sorting problem - # and to avoid overflow, we check for small inputs before any other runtime checks - hi <= lo && return v - lenm1 = maybe_unsigned(hi-lo) # adding 1 would risk overflow - # only count sort on a short range can compete with insertion sort when lenm1 < 40 - # and the optimization is not worth the detection cost, so we use insertion sort. - lenm1 < 40 && return sort!(v, lo, hi, SMALL_ALGORITHM, o) - # For most arrays, a presorted check is cheap (overhead < 5%) and for most large - # arrays it is essentially free (<1%). Insertion sort runs in a fast O(n) on presorted - # input and this guarantees presorted input will always be efficiently handled - _issorted(v, lo, hi, o) && return v - - # For large arrays, a reverse-sorted check is essentially free (overhead < 1%) - if lenm1 >= 500 && _issorted(v, lo, hi, ReverseOrdering(o)) - # If reversing is valid, do so. This does not violate stability - # because being UIntMappable implies a linear order. - reverse!(v, lo, hi) - return v - end - # UInt128 does not support fast bit shifting so we never - # dispatch to radix sort but we may still perform count sort - if sizeof(U) > 8 - if T <: Integer && o isa DirectOrdering - v_min, v_max = _extrema(v, lo, hi, Forward) - v_range = maybe_unsigned(v_max-v_min) - v_range == 0 && return v # all same - - # we know lenm1 ≥ 40, so this will never underflow. - # if lenm1 > 3.7e18 (59 exabytes), then this may incorrectly dispatch to fallback - if v_range < 5lenm1-100 # count sort will outperform comparison sort if v's range is small - return sort_int_range!(v, Int(v_range+1), v_min, o === Forward ? identity : reverse, lo, hi) - end - end - return sort!(v, lo, hi, QuickSort, o; check_presorted=false) - end +## default sorting policy ## - v_min, v_max = _extrema(v, lo, hi, o) - lt(o, v_min, v_max) || return v # all same - if T <: Integer && o isa DirectOrdering - R = o === Reverse - v_range = maybe_unsigned(R ? v_min-v_max : v_max-v_min) - if v_range < div(lenm1, 2) # count sort will be superior if v's range is very small - return sort_int_range!(v, Int(v_range+1), R ? v_max : v_min, R ? reverse : identity, lo, hi) - end - end +""" + InitialOptimizations(next) <: Algorithm - u_min, u_max = uint_map(v_min, o), uint_map(v_max, o) - u_range = maybe_unsigned(u_max-u_min) - if u_range < div(lenm1, 2) # count sort will be superior if u's range is very small - u = uint_map!(v, lo, hi, o) - sort_int_range!(u, Int(u_range+1), u_min, identity, lo, hi) - return uint_unmap!(v, u, lo, hi, o) - end +Attempt to apply a suite of low-cost optimizations to the input vector before sorting. - # if u's range is small, then once we subtract out v_min, we'll get a vector like - # UInt16[0x001a, 0x0015, 0x0006, 0x001b, 0x0008, 0x000c, 0x0001, 0x000e, 0x001c, 0x0009] - # where we only need to radix over the last few bits (5, in the example). - bits = unsigned(8sizeof(u_range) - leading_zeros(u_range)) - - # radix sort runs in O(bits * lenm1), quick sort runs in O(lenm1 * log(lenm1)). - # dividing both sides by lenm1 and introducing empirical constant factors yields - # the following heuristic for when QuickSort is faster than RadixSort - if 22log(lenm1) < bits + 70 - return if lenm1 > 80 - sort!(v, lo, hi, QuickSort, o; check_presorted=false) - else - sort!(v, lo, hi, SMALL_ALGORITHM, o) - end - end +`InitialOptimizations` is an implementation detail and subject to change or removal in +future versions of Julia. - # At this point, we are committed to radix sort. - u = uint_map!(v, lo, hi, o) +If `next` is stable, then `InitialOptimizations(next)` is also stable. - # we subtract u_min to avoid radixing over unnecessary bits. For example, - # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002] - # which uses all 32 bits, but once we subtract u_min = 0x7fffffff, we are left with - # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and - # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4] - # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits. - # the overhead for this subtraction is small enough that it is worthwhile in many cases. +The specific optimizations attempted by `InitialOptimizations` are +[`MissingOptimization`](@ref), [`BoolOptimization`](@ref), dispatch to +[`InsertionSort`](@ref) for inputs with `length <= 10`, and [`IEEEFloatOptimization`](@ref). +""" +InitialOptimizations(next) = MissingOptimization( + BoolOptimization( + Small{10}( + IEEEFloatOptimization( + next)))) +""" + DEFAULT_STABLE - # this is faster than u[lo:hi] .-= u_min as of v1.9.0-DEV.100 - @inbounds for i in lo:hi - u[i] -= u_min - end +The default sorting algorithm. - len = lenm1 + 1 - if t !== nothing && checkbounds(Bool, t, lo:hi) # Fully preallocated and aligned scratch space - u2 = radix_sort!(u, lo, hi, bits, reinterpret(U, t)) - uint_unmap!(v, u2, lo, hi, o, u_min) - elseif t !== nothing && (applicable(resize!, t, len) || length(t) >= len) # Viable scratch space - length(t) >= len || resize!(t, len) - t1 = axes(t, 1) isa OneTo ? t : view(t, firstindex(t):lastindex(t)) - u2 = radix_sort!(view(u, lo:hi), 1, len, bits, reinterpret(U, t1)) - uint_unmap!(view(v, lo:hi), u2, 1, len, o, u_min) - else # No viable scratch space - u2 = radix_sort!(u, lo, hi, bits, similar(u)) - uint_unmap!(v, u2, lo, hi, o, u_min) - end -end +This algorithm is guaranteed to be stable (i.e. it will not reorder elements that compare +equal). It makes an effort to be fast for most inputs. -## generic sorting methods ## +The algorithms used by `DEFAULT_STABLE` are an implementation detail. See extended help +for the current dispatch system. -defalg(v::AbstractArray) = DEFAULT_STABLE +# Extended Help -function sort!(v::AbstractVector{T}, alg::Algorithm, - order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T - sort!(v, firstindex(v), lastindex(v), alg, order, t) -end +`DEFAULT_STABLE` is composed of two parts: the [`InitialOptimizations`](@ref) and a hybrid +of Radix, Insertion, Counting, Quick sorts. + +We begin with MissingOptimization because it has no runtime cost when it is not +triggered and can enable other optimizations to be applied later. For example, +BoolOptimization cannot apply to an `AbstractVector{Union{Missing, Bool}}`, but after +[`MissingOptimization`](@ref) is applied, that input will be converted into am +`AbstractVector{Bool}`. + +We next apply [`BoolOptimization`](@ref) because it also has no runtime cost when it is not +triggered and when it is triggered, it is an incredibly efficient algorithm (sorting `Bool`s +is quite easy). + +Next, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 10`. This dispatch +occurs before the [`IEEEFloatOptimization`](@ref) pass because the +[`IEEEFloatOptimization`](@ref)s are not beneficial for very small inputs. + +To conclude the [`InitialOptimizations`](@ref), we apply [`IEEEFloatOptimization`](@ref). + +After these optimizations, we branch on whether radix sort and related algorithms can be +applied to the input vector and ordering. We conduct this branch by testing if +`UIntMappable(v, order) !== nothing`. That is, we see if we know of a reversible mapping +from `eltype(v)` to `UInt` that preserves the ordering `order`. We perform this check after +the initial optimizations because they can change the input vector's type and ordering to +make them `UIntMappable`. + +If the input is not [`UIntMappable`](@ref), then we perform a presorted check and dispatch +to [`QuickSort`](@ref). + +Otherwise, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 40` and then +perform a presorted check ([`CheckSorted`](@ref)). + +We check for short inputs before performing the presorted check to avoid the overhead of the +check for small inputs. Because the alternate dispatch is to [`InseritonSort`](@ref) which +has efficient `O(n)` runtime on presorted inputs, the check is not necessary for small +inputs. + +We check if the input is reverse-sorted for long vectors (more than 500 elements) because +the check is essentially free unless the input is almost entirely reverse sorted. + +Note that once the input is determined to be [`UIntMappable`](@ref), we know the order forms +a [total order](wikipedia.org/wiki/Total_order) over the inputs and so it is impossible to +perform an unstable sort because no two elements can compare equal unless they _are_ equal, +in which case switching them is undetectable. We utilize this fact to perform a more +aggressive reverse sorted check that will reverse the vector `[3, 2, 2, 1]`. + +After these potential fast-paths are tried and failed, we [`ComputeExtrema`](@ref) of the +input. This computation has a fairly fast `O(n)` runtime, but we still try to delay it until +it is necessary. + +Next, we [`ConsiderCountingSort`](@ref). If the range the input is small compared to its +length, we apply [`CountingSort`](@ref). + +Next, we [`ConsiderRadixSort`](@ref). This is similar to the dispatch to counting sort, +but we conside rthe number of _bits_ in the range, rather than the range itself. +Consequently, we apply [`RadixSort`](@ref) for any reasonably long inputs that reach this +stage. -function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, alg::Algorithm, - order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T - sort!(v, lo, hi, alg, order) +Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and +otherwise we dispatch to [`QuickSort`](@ref). +""" +const DEFAULT_STABLE = InitialOptimizations( + IsUIntMappable( + Small{40}( + CheckSorted( + ComputeExtrema( + ConsiderCountingSort( + ConsiderRadixSort( + Small{80}( + QuickSort)))))), + StableCheckSorted( + QuickSort))) +""" + DEFAULT_UNSTABLE + +An efficient sorting algorithm. + +The algorithms used by `DEFAULT_UNSTABLE` are an implementation detail. They are currently +the same as those used by [`DEFAULT_STABLE`](@ref), but this is subject to change in future. +""" +const DEFAULT_UNSTABLE = DEFAULT_STABLE +const SMALL_THRESHOLD = 20 + +function Base.show(io::IO, alg::Algorithm) + print_tree(io, alg, 0) end +function print_tree(io::IO, alg::Algorithm, cols::Int) + print(io, " "^cols) + show_type(io, alg) + print(io, '(') + for (i, name) in enumerate(fieldnames(typeof(alg))) + arg = getproperty(alg, name) + i > 1 && print(io, ',') + if arg isa Algorithm + println(io) + print_tree(io, arg, cols+1) + else + i > 1 && print(io, ' ') + print(io, arg) + end + end + print(io, ')') +end +show_type(io::IO, alg::Algorithm) = Base.show_type_name(io, typeof(alg).name) +show_type(io::IO, alg::Small{N}) where N = print(io, "Base.Sort.Small{$N}") + +defalg(v::AbstractArray) = DEFAULT_STABLE +defalg(v::AbstractArray{<:Union{Number, Missing}}) = DEFAULT_UNSTABLE +defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation +defalg(v::AbstractArray{Union{}}) = DEFAULT_UNSTABLE # for method disambiguation """ sort!(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) @@ -931,31 +1355,9 @@ function sort!(v::AbstractVector{T}; by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, - scratch::Union{AbstractVector{T}, Nothing}=nothing) where T - sort!(v, alg, ord(lt,by,rev,order), scratch) -end - -# sort! for vectors of few unique integers -function sort_int_range!(x::AbstractVector{<:Integer}, rangelen, minval, maybereverse, - lo=firstindex(x), hi=lastindex(x)) - offs = 1 - minval - - counts = fill(0, rangelen) - @inbounds for i = lo:hi - counts[x[i] + offs] += 1 - end - - idx = lo - @inbounds for i = maybereverse(1:rangelen) - lastidx = idx + counts[i] - 1 - val = i-offs - for j = idx:lastidx - x[j] = val - end - idx = lastidx + 1 - end - - return x + scratch::Union{Vector{T}, Nothing}=nothing) where T + _sort!(v, alg, ord(lt,by,rev,order), (;scratch)) + v end """ @@ -1081,7 +1483,7 @@ function partialsortperm!(ix::AbstractVector{<:Integer}, v::AbstractVector, end # do partial quicksort - sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v)) + _sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v), (;)) maybeview(ix, k) end @@ -1141,7 +1543,7 @@ function sortperm(A::AbstractArray; by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, - scratch::Union{AbstractVector{<:Integer}, Nothing}=nothing, + scratch::Union{Vector{<:Integer}, Nothing}=nothing, dims...) #to optionally specify dims argument ordr = ord(lt,by,rev,order) if ordr === Forward && isa(A,Vector) && eltype(A)<:Integer @@ -1205,7 +1607,7 @@ function sortperm!(ix::AbstractArray{T}, A::AbstractArray; rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, initialized::Bool=false, - scratch::Union{AbstractVector{T}, Nothing}=nothing, + scratch::Union{Vector{T}, Nothing}=nothing, dims...) where T <: Integer #to optionally specify dims argument (typeof(A) <: AbstractVector) == (:dims in keys(dims)) && throw(ArgumentError("Dims argument incorrect for type $(typeof(A))")) axes(ix) == axes(A) || throw(ArgumentError("index array must have the same size/axes as the source array, $(axes(ix)) != $(axes(A))")) @@ -1278,7 +1680,7 @@ function sort(A::AbstractArray{T}; by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, - scratch::Union{AbstractVector{T}, Nothing}=similar(A, size(A, dims))) where T + scratch::Union{Vector{T}, Nothing}=nothing) where T dim = dims order = ord(lt,by,rev,order) n = length(axes(A, dim)) @@ -1295,14 +1697,27 @@ function sort(A::AbstractArray{T}; end end -@noinline function sort_chunks!(Av, n, alg, order, t) +@noinline function sort_chunks!(Av, n, alg, order, scratch) inds = LinearIndices(Av) - for s = first(inds):n:last(inds) - sort!(Av, s, s+n-1, alg, order, t) + sort_chunks!(Av, n, alg, order, scratch, first(inds), last(inds)) +end + +@noinline function sort_chunks!(Av, n, alg, order, scratch::Nothing, fst, lst) + for lo = fst:n:lst + s = _sort!(Av, alg, order, (; lo, hi=lo+n-1, scratch)) + s !== nothing && return sort_chunks!(Av, n, alg, order, s, lo+n, lst) end Av end +@noinline function sort_chunks!(Av, n, alg, order, scratch::AbstractVector, fst, lst) + for lo = fst:n:lst + _sort!(Av, alg, order, (; lo, hi=lo+n-1, scratch)) + end + Av +end + + """ sort!(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) @@ -1338,14 +1753,14 @@ function sort!(A::AbstractArray{T}; lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, - order::Ordering=Forward, - scratch::Union{AbstractVector{T}, Nothing}=similar(A, size(A, dims))) where T - _sort!(A, Val(dims), alg, ord(lt, by, rev, order), scratch) + order::Ordering=Forward, # TODO stop eagerly over-allocating. + scratch::Union{Vector{T}, Nothing}=similar(A, size(A, dims))) where T + __sort!(A, Val(dims), alg, ord(lt, by, rev, order), scratch) end -function _sort!(A::AbstractArray{T}, ::Val{K}, +function __sort!(A::AbstractArray{T}, ::Val{K}, alg::Algorithm, order::Ordering, - scratch::Union{AbstractVector{T}, Nothing}) where {K,T} + scratch::Union{Vector{T}, Nothing}) where {K,T} nd = ndims(A) 1 <= K <= nd || throw(ArgumentError("dimension out of range")) @@ -1353,7 +1768,7 @@ function _sort!(A::AbstractArray{T}, ::Val{K}, remdims = ntuple(i -> i == K ? 1 : axes(A, i), nd) for idx in CartesianIndices(remdims) Av = view(A, ntuple(i -> i == K ? Colon() : idx[i], nd)...) - sort!(Av, alg, order, scratch) + sort!(Av; alg, order, scratch) end A end @@ -1436,175 +1851,109 @@ function uint_map!(v::AbstractVector, lo::Integer, hi::Integer, order::Ordering) end function uint_unmap!(v::AbstractVector, u::AbstractVector{U}, lo::Integer, hi::Integer, - order::Ordering, offset::U=zero(U)) where U <: Unsigned + order::Ordering, offset::U=zero(U), + index_offset::Integer=0) where U <: Unsigned @inbounds for i in lo:hi - v[i] = uint_unmap(eltype(v), u[i]+offset, order) + v[i] = uint_unmap(eltype(v), u[i+index_offset]+offset, order) end v end -## fast clever sorting for floats ## - -module Float -using ..Sort -using ...Order -using Base: IEEEFloat - -import Core.Intrinsics: slt_int -import ..Sort: sort!, UIntMappable, uint_map, uint_unmap -import ...Order: lt, DirectOrdering - -# fpsort is not safe for vectors of mixed bitwidth such as Vector{Union{Float32, Float64}}. -# This type allows us to dispatch only when it is safe to do so. See #42739 for more info. -const FPSortable = Union{ - AbstractVector{Union{Float16, Missing}}, - AbstractVector{Union{Float32, Missing}}, - AbstractVector{Union{Float64, Missing}}, - AbstractVector{Float16}, - AbstractVector{Float32}, - AbstractVector{Float64}, - AbstractVector{Missing}} -struct Left <: Ordering end -struct Right <: Ordering end +### Unused constructs for backward compatibility ### -left(::DirectOrdering) = Left() -right(::DirectOrdering) = Right() +struct MergeSortAlg{T <: Algorithm} <: Algorithm + next::T +end -left(o::Perm) = Perm(left(o.order), o.data) -right(o::Perm) = Perm(right(o.order), o.data) +""" + MergeSort -lt(::Left, x::T, y::T) where {T<:IEEEFloat} = slt_int(y, x) -lt(::Right, x::T, y::T) where {T<:IEEEFloat} = slt_int(x, y) +Indicate that a sorting function should use the merge sort algorithm. -uint_map(x::Float16, ::Left) = ~reinterpret(UInt16, x) -uint_unmap(::Type{Float16}, u::UInt16, ::Left) = reinterpret(Float16, ~u) -uint_map(x::Float16, ::Right) = reinterpret(UInt16, x) -uint_unmap(::Type{Float16}, u::UInt16, ::Right) = reinterpret(Float16, u) -UIntMappable(::Type{Float16}, ::Union{Left, Right}) = UInt16 +Merge sort divides the collection into subcollections and +repeatedly merges them, sorting each subcollection at each step, +until the entire collection has been recombined in sorted form. -uint_map(x::Float32, ::Left) = ~reinterpret(UInt32, x) -uint_unmap(::Type{Float32}, u::UInt32, ::Left) = reinterpret(Float32, ~u) -uint_map(x::Float32, ::Right) = reinterpret(UInt32, x) -uint_unmap(::Type{Float32}, u::UInt32, ::Right) = reinterpret(Float32, u) -UIntMappable(::Type{Float32}, ::Union{Left, Right}) = UInt32 +Characteristics: + * *stable*: preserves the ordering of elements which compare + equal (e.g. "a" and "A" in a sort of letters which ignores + case). + * *not in-place* in memory. + * *divide-and-conquer* sort strategy. +""" +const MergeSort = MergeSortAlg(SMALL_ALGORITHM) -uint_map(x::Float64, ::Left) = ~reinterpret(UInt64, x) -uint_unmap(::Type{Float64}, u::UInt64, ::Left) = reinterpret(Float64, ~u) -uint_map(x::Float64, ::Right) = reinterpret(UInt64, x) -uint_unmap(::Type{Float64}, u::UInt64, ::Right) = reinterpret(Float64, u) -UIntMappable(::Type{Float64}, ::Union{Left, Right}) = UInt64 +function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, offset=nothing) + @getkw lo hi scratch + @inbounds if lo < hi + hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o, kw) -isnan(o::DirectOrdering, x::IEEEFloat) = (x!=x) -isnan(o::DirectOrdering, x::Missing) = false -isnan(o::Perm, i::Integer) = isnan(o.order,o.data[i]) + m = midpoint(lo, hi) -ismissing(o::DirectOrdering, x::IEEEFloat) = false -ismissing(o::DirectOrdering, x::Missing) = true -ismissing(o::Perm, i::Integer) = ismissing(o.order,o.data[i]) + if t === nothing + scratch, t = make_scratch(scratch, eltype(v), m-lo+1) + end -allowsmissing(::AbstractVector{T}, ::DirectOrdering) where {T} = T >: Missing -allowsmissing(::AbstractVector{<:Integer}, - ::Perm{<:DirectOrdering,<:AbstractVector{T}}) where {T} = - T >: Missing + _sort!(v, a, o, (;kw..., hi=m, scratch); t, offset) + _sort!(v, a, o, (;kw..., lo=m+1, scratch); t, offset) -function specials2left!(testf::Function, v::AbstractVector, o::Ordering, - lo::Integer=firstindex(v), hi::Integer=lastindex(v)) - i = lo - @inbounds while i <= hi && testf(o,v[i]) - i += 1 - end - j = i + 1 - @inbounds while j <= hi - if testf(o,v[j]) - v[i], v[j] = v[j], v[i] + i, j = 1, lo + while j <= m + t[i] = v[j] i += 1 + j += 1 end - j += 1 - end - return i, hi -end -function specials2right!(testf::Function, v::AbstractVector, o::Ordering, - lo::Integer=firstindex(v), hi::Integer=lastindex(v)) - i = hi - @inbounds while lo <= i && testf(o,v[i]) - i -= 1 - end - j = i - 1 - @inbounds while lo <= j - if testf(o,v[j]) - v[i], v[j] = v[j], v[i] - i -= 1 + + i, k = 1, lo + while k < j <= hi + if lt(o, v[j], t[i]) + v[k] = v[j] + j += 1 + else + v[k] = t[i] + i += 1 + end + k += 1 + end + while k < j + v[k] = t[i] + k += 1 + i += 1 end - j -= 1 end - return lo, i + + scratch end -function specials2left!(v::AbstractVector, a::Algorithm, o::Ordering) - lo, hi = firstindex(v), lastindex(v) - if allowsmissing(v, o) - i, _ = specials2left!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi) - sort!(v, lo, i-1, a, o) - return i, hi - else - return specials2left!(isnan, v, o, lo, hi) - end +# Support 3-, 5-, and 6-argument versions of sort! for calling into the internals in the old way +sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o) +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) + _sort!(v, a, o, (; lo, hi, allow_legacy_dispatch=false)) + v end -function specials2right!(v::AbstractVector, a::Algorithm, o::Ordering) - lo, hi = firstindex(v), lastindex(v) - if allowsmissing(v, o) - _, i = specials2right!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi) - sort!(v, i+1, hi, a, o) - return lo, i - else - return specials2right!(isnan, v, o, lo, hi) - end +sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, _) = sort!(v, lo, hi, a, o) +function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, scratch::Vector) + _sort!(v, a, o, (; lo, hi, scratch, allow_legacy_dispatch=false)) + v end -specials2end!(v::AbstractVector, a::Algorithm, o::ForwardOrdering) = - specials2right!(v, a, o) -specials2end!(v::AbstractVector, a::Algorithm, o::ReverseOrdering) = - specials2left!(v, a, o) -specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ForwardOrdering}) = - specials2right!(v, a, o) -specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ReverseOrdering}) = - specials2left!(v, a, o) - -issignleft(o::ForwardOrdering, x::IEEEFloat) = lt(o, x, zero(x)) -issignleft(o::ReverseOrdering, x::IEEEFloat) = lt(o, x, -zero(x)) -issignleft(o::Perm, i::Integer) = issignleft(o.order, o.data[i]) - -function fpsort!(v::AbstractVector{T}, a::Algorithm, o::Ordering, - t::Union{AbstractVector{T}, Nothing}=nothing) where T - # fpsort!'s optimizations speed up comparisons, of which there are O(nlogn). - # The overhead is O(n). For n < 10, it's not worth it. - length(v) < 10 && return sort!(v, firstindex(v), lastindex(v), SMALL_ALGORITHM, o, t) - - i, j = lo, hi = specials2end!(v,a,o) - @inbounds while true - while i <= j && issignleft(o,v[i]); i += 1; end - while i <= j && !issignleft(o,v[j]); j -= 1; end - i <= j || break - v[i], v[j] = v[j], v[i] - i += 1; j -= 1 +# Support dispatch on custom algorithms in the old way +# sort!(::AbstractVector, ::Integer, ::Integer, ::MyCustomAlgorithm, ::Ordering) = ... +function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw) + @getkw lo hi scratch allow_legacy_dispatch + if allow_legacy_dispatch + sort!(v, lo, hi, a, o) + scratch + else + # This error prevents infinite recursion for unknown algorithms + throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o))) is not defined")) end - sort!(v, lo, j, a, left(o), t) - sort!(v, i, hi, a, right(o), t) - return v -end - - -function sort!(v::FPSortable, a::Algorithm, o::DirectOrdering, - t::Union{FPSortable, Nothing}=nothing) - fpsort!(v, a, o, t) -end -function sort!(v::AbstractVector{T}, a::Algorithm, o::Perm{<:DirectOrdering,<:FPSortable}, - t::Union{AbstractVector{T}, Nothing}=nothing) where T <: Union{Signed, Unsigned} - fpsort!(v, a, o, t) end -end # module Sort.Float +# Keep old internal types so that people can keep dispatching with +# sort!(::AbstractVector, ::Integer, ::Integer, ::Base.QuickSortAlg, ::Ordering) = ... +const QuickSortAlg = typeof(QuickSort) end # module Sort diff --git a/test/sorting.jl b/test/sorting.jl index 4a0299b2217c2..37bad7d23c94b 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -79,8 +79,9 @@ end end @testset "stability" begin - for Alg in [InsertionSort, MergeSort, QuickSort, Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, - PartialQuickSort(missing, 1729), PartialQuickSort(1729, missing)] + for Alg in [InsertionSort, MergeSort, QuickSort, Base.DEFAULT_STABLE, + PartialQuickSort(missing, 1729, Base.Sort.SMALL_ALGORITHM), + PartialQuickSort(1729, missing, Base.Sort.SMALL_ALGORITHM)] @test issorted(sort(1:2000, alg=Alg, by=x->0)) @test issorted(sort(1:2000, alg=Alg, by=x->x÷100)) end @@ -534,11 +535,11 @@ end @test issorted(a) a = view([9:-1:0;], :)::SubArray - Base.Sort.sort_int_range!(a, 10, 0, identity) # test it supports non-Vector + Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, (; mn=0, mx=9)) # test it supports non-Vector @test issorted(a) a = OffsetArray([9:-1:0;], -5) - Base.Sort.sort_int_range!(a, 10, 0, identity) + Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, (; mn=0, mx=9)) @test issorted(a) end @@ -632,9 +633,9 @@ end @testset "uint mappings" begin #Construct value lists - floats = [T[-π, -1.0, -1/π, 1/π, 1.0, π, -0.0, 0.0, Inf, -Inf, NaN, -NaN, - prevfloat(T(0)), nextfloat(T(0)), prevfloat(T(Inf)), nextfloat(T(-Inf))] - for T in [Float16, Float32, Float64]] + floats = [reinterpret(U, vcat(T[-π, -1.0, -1/π, 1/π, 1.0, π, -0.0, 0.0, Inf, -Inf, NaN, -NaN, + prevfloat(T(0)), nextfloat(T(0)), prevfloat(T(Inf)), nextfloat(T(-Inf))], randnans(4))) + for (U, T) in [(UInt16, Float16), (UInt32, Float32), (UInt64, Float64)]] ints = [T[17, -T(17), 0, -one(T), 1, typemax(T), typemin(T), typemax(T)-1, typemin(T)+1] for T in Base.BitInteger_types] @@ -650,21 +651,18 @@ end UIntN(::Val{8}) = UInt64 UIntN(::Val{16}) = UInt128 map(vals) do x + x isa Base.ReinterpretArray && return T = eltype(x) U = UIntN(Val(sizeof(T))) append!(x, rand(T, 4)) append!(x, reinterpret.(T, rand(U, 4))) - if T <: AbstractFloat - mask = reinterpret(U, T(NaN)) - append!(x, reinterpret.(T, mask .| rand(U, 4))) - end end for x in vals T = eltype(x) U = UIntN(Val(sizeof(T))) - for order in [Forward, Reverse, Base.Sort.Float.Left(), Base.Sort.Float.Right(), By(Forward, identity)] - if order isa Base.Order.By || ((T <: AbstractFloat) == (order isa DirectOrdering)) + for order in [Forward, Reverse, By(Forward, identity)] + if order isa Base.Order.By @test Base.Sort.UIntMappable(T, order) === nothing continue end @@ -681,10 +679,6 @@ end for a in x for b in x - if order === Base.Sort.Float.Left() || order === Base.Sort.Float.Right() - # Left and Right orderings guarantee homogeneous sign and no NaNs - (isnan(a) || isnan(b) || signbit(a) != signbit(b)) && continue - end @test Base.Order.lt(order, a, b) === Base.Order.lt(Forward, Base.Sort.uint_map(a, order), Base.Sort.uint_map(b, order)) end end @@ -705,7 +699,7 @@ end # Nevertheless, it still works... for alg in [InsertionSort, MergeSort, QuickSort, - Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] @test sort(v, alg=alg, lt = <=) == s end @test partialsort(v, 172, lt = <=) == s[172] @@ -716,7 +710,7 @@ end # this invalid lt order. perm = reverse(sortperm(v, rev=true)) for alg in [InsertionSort, MergeSort, QuickSort, - Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] @test sort(1:n, alg=alg, lt = (i,j) -> v[i]<=v[j]) == perm end @test partialsort(1:n, 172, lt = (i,j) -> v[i]<=v[j]) == perm[172] @@ -724,7 +718,7 @@ end # lt can be very poorly behaved and sort will still permute its input in some way. for alg in [InsertionSort, MergeSort, QuickSort, - Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] @test sort!(sort(v, alg=alg, lt = (x,y) -> rand([false, true]))) == s end @test partialsort(v, 172, lt = (x,y) -> rand([false, true])) ∈ 1:5 @@ -739,7 +733,6 @@ end @test issorted(k[idx], rev=true) end -# This testset is at the end of the file because it is slow @testset "sort(x; scratch)" begin for n in [1,10,100,1000] v = rand(n) @@ -770,6 +763,142 @@ end end end +@testset "Unions with missing" begin + @test issorted(sort(shuffle!(vcat(fill(missing, 10), rand(Int, 100))))) +end + +@testset "Specific algorithms" begin + let + requires_uint_mappable = Union{Base.Sort.RadixSort, Base.Sort.ConsiderRadixSort, + Base.Sort.CountingSort, Base.Sort.ConsiderCountingSort, + typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes), + typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes.big), + typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes.big.next)} + + function test_alg(kw, alg, float=true) + for order in [Base.Forward, Base.Reverse, Base.By(x -> x^2)] + order isa Base.By && alg isa requires_uint_mappable && continue + for n in [1,7,179,1312] + + n == 1 && alg isa Base.Sort.RadixSort && continue + + x = rand(1:n+1, n) + y = sort(x; order) + @test Base.Sort._sort!(x, alg, order, (;kw(y)...)) !== x + @test all(y .=== x) + + alg isa requires_uint_mappable && continue + + x = randn(n) + y = sort(x; order) + @test Base.Sort._sort!(x, alg, order, (;kw(y)...)) !== x + @test all(y .=== x) + end + end + end + test_alg(alg) = test_alg(x -> (), alg) + + function test_alg_rec(alg, extrema=false) + if extrema + test_alg(alg) do y + (;mn=first(y),mx=last(y)) + end + else + test_alg(alg) + end + extrema |= alg isa Base.Sort.ComputeExtrema + for name in fieldnames(typeof(alg)) + a = getfield(alg, name) + a isa Base.Sort.Algorithm && test_alg_rec(a, extrema) + end + end + + test_alg_rec(Base.DEFAULT_STABLE) + end +end + +@testset "show(::Algorithm)" begin + @test eval(Meta.parse(string(Base.DEFAULT_STABLE))) === Base.DEFAULT_STABLE + lines = split(string(Base.DEFAULT_STABLE), '\n') + @test 10 < maximum(length, lines) < 100 + @test 1 < length(lines) < 30 +end + +@testset "Extensibility" begin + # Defining new algorithms & backwards compatibility with packages that use sorting internals + + struct MyFirstAlg <: Base.Sort.Algorithm end + + @test_throws ArgumentError sort([1,2,3], alg=MyFirstAlg()) # not a stack overflow error + + v = shuffle(vcat(fill(missing, 10), rand(Int, 100))) + + # The pre 1.9 dispatch method + function Base.sort!(v::AbstractVector{Int}, lo::Integer, hi::Integer, ::MyFirstAlg, o::Base.Order.Ordering) + v[lo:hi] .= 7 + end + @test sort([1,2,3], alg=MyFirstAlg()) == [7,7,7] + @test all(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg())) .=== vcat(fill(7, 100), fill(missing, 10))) + + # Using the old hook with old entry-point + @test sort!([3,1,2], MyFirstAlg(), Base.Forward) == [7,7,7] + @test sort!([3,1,2], 1, 3, MyFirstAlg(), Base.Forward) == [7,7,7] + + # Use the pre 1.9 entry-point into the internals + function Base.sort!(v::AbstractVector{Int}, lo::Integer, hi::Integer, ::MyFirstAlg, o::Base.Order.Ordering) + sort!(v, lo, hi, Base.DEFAULT_STABLE, o) + end + @test sort([3,1,2], alg=MyFirstAlg()) == [1,2,3] + @test issorted(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg()))) + + # Another pre 1.9 entry-point into the internals + @test issorted(sort!(rand(100), InsertionSort, Base.Order.Forward)) + + struct MySecondAlg <: Base.Sort.Algorithm end + # A new dispatch method + function Base.Sort._sort!(v::AbstractVector, ::MySecondAlg, o::Base.Order.Ordering, kw) + Base.Sort.@getkw lo hi + v[lo:hi] .= 9 + end + @test sort([1,2,3], alg=MySecondAlg()) == [9,9,9] + @test all(sort(v, alg=Base.Sort.InitialOptimizations(MySecondAlg())) .=== vcat(fill(9, 100), fill(missing, 10))) +end + +@testset "sort!(v, lo, hi, alg, order)" begin + v = Vector{Float64}(undef, 4000) + for alg in [MergeSort, QuickSort, InsertionSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE] + rand!(v) + sort!(v, 1, 2000, alg, Base.Forward) + @test issorted(v[1:2000]) + @test !issorted(v) + + sort!(v, 2001, 4000, alg, Base.Forward) + @test issorted(v[1:2000]) + @test issorted(v[2001:4000]) + @test !issorted(v) + + sort!(v, 1001, 3000, alg, Base.Forward) + @test issorted(v[1:1000]) + @test issorted(v[1001:3000]) + @test issorted(v[3001:4000]) + @test !issorted(v[1:2000]) + @test !issorted(v[2001:4000]) + @test !issorted(v) + end +end + +@testset "IEEEFloatOptimization with -0.0" begin + x = vcat(round.(100 .* randn(1000)) ./ 100) # Also test lots of duplicates + x[rand(1:1000, 5)] .= 0.0 + x[rand(1:1000, 5)] .= -0.0 # To be sure that -0.0 is present + @test issorted(sort!(x)) +end + +@testset "Count sort near the edge of its range" begin + @test issorted(sort(rand(typemin(Int):typemin(Int)+100, 1000))) + @test issorted(sort(rand(typemax(Int)-100:typemax(Int), 1000))) +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128,