diff --git a/base/sort.jl b/base/sort.jl
index e995a64a9f76f..932da36b9e1d6 100644
--- a/base/sort.jl
+++ b/base/sort.jl
@@ -86,7 +86,7 @@ issorted(itr;
     issorted(itr, ord(lt,by,rev,order))
 
 function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering)
-    sort!(v, _PartialQuickSort(k), o)
+    _sort!(v, _PartialQuickSort(k), o, (;))
     maybeview(v, k)
 end
 
@@ -407,112 +407,315 @@ function insorted end
 insorted(x, v::AbstractVector; kw...) = !isempty(searchsorted(v, x; kw...))
 insorted(x, r::AbstractRange) = in(x, r)
 
-## sorting algorithms ##
+## Alternative keyword management
 
-abstract type Algorithm end
+macro getkw(syms...)
+    getters = (getproperty(Sort, Symbol(:_, sym)) for sym in syms)
+    Expr(:block, (:($(esc(:((kw, $sym) = $getter(v, o, kw))))) for (sym, getter) in zip(syms, getters))...)
+end
 
-struct InsertionSortAlg <: Algorithm end
-struct MergeSortAlg     <: Algorithm end
-struct AdaptiveSortAlg  <: Algorithm end
+for (sym, deps, exp, type) in [
+        (:lo, (), :(firstindex(v)), Integer),
+        (:hi, (), :(lastindex(v)),  Integer),
+        (:mn, (), :(throw(ArgumentError("mn is needed but has not been computed"))), :(eltype(v))),
+        (:mx, (), :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))),
+        (:scratch, (), nothing, :(Union{Nothing, Vector})), # could have different eltype
+        (:allow_legacy_dispatch, (), true, Bool)]
+    usym = Symbol(:_, sym)
+    @eval function $usym(v, o, kw)
+        # using missing instead of nothing because scratch could === nothing.
+        res = get(kw, $(Expr(:quote, sym)), missing)
+        res !== missing && return kw, res::$type
+        @getkw $(deps...)
+        $sym = $exp
+        (;kw..., $sym), $sym::$type
+    end
+end
+
+## Scratch space management
 
 """
-    PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing})
+    make_scratch(scratch::Union{Nothing, Vector}, T::Type, len::Integer)
 
-Indicate that a sorting function should use the partial quick sort algorithm.
+Returns `(s, t)` where `t` is an `AbstractVector` of type `T` with length at least `len`
+that is backed by the `Vector` `s`. If `scratch !== nothing`, then `s === scratch`.
 
-Partial quick sort finds and sorts the elements that would end up in positions
-`lo:hi` using [`QuickSort`](@ref).
+This function will allocate a new vector if `scratch === nothing`, `resize!` `scratch` if it
+is too short, and `reinterpret` `scratch` if its eltype is not `T`.
+"""
+function make_scratch(scratch::Nothing, T::Type, len::Integer)
+    s = Vector{T}(undef, len)
+    s, s
+end
+function make_scratch(scratch::Vector{T}, ::Type{T}, len::Integer) where T
+    len > length(scratch) && resize!(scratch, len)
+    scratch, scratch
+end
+function make_scratch(scratch::Vector, T::Type, len::Integer)
+    len_bytes = len * sizeof(T)
+    len_scratch = div(len_bytes, sizeof(eltype(scratch)))
+    len_scratch > length(scratch) && resize!(scratch, len_scratch)
+    scratch, reinterpret(T, scratch)
+end
+
+
+## sorting algorithm components ##
 
-Characteristics:
-  * *stable*: preserves the ordering of elements which compare equal
-    (e.g. "a" and "A" in a sort of letters which ignores case).
-  * *not in-place* in memory.
-  * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
 """
-struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}} <: Algorithm
-    lo::L
-    hi::H
+    _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw; t, offset)
+
+An internal function that sorts `v` using the algorithm `a` under the ordering `o`,
+subject to specifications provided in `kw` (such as `lo` and `hi` in which case it only
+sorts `view(v, lo:hi)`)
+
+Returns a scratch space if provided or constructed during the sort, or `nothing` if
+no scratch space is present.
+
+!!! note
+    `_sort!` modifies but does not return `v`.
+
+A returned scratch space will be a `Vector{T}` where `T` is usually the eltype of `v`. There
+are some exceptions, for example if `eltype(v) == Union{Missing, T}` then the scratch space
+may be be a `Vector{T}` due to `MissingOptimization` changing the eltype of `v` to `T`.
+
+`t` is an appropriate scratch space for the algorithm at hand, to be accessed as
+`t[i + offset]`. `t` is used for an algorithm to pass a scratch space back to itself in
+internal or recursive calls.
+"""
+function _sort! end
+
+abstract type Algorithm end
+
+
+"""
+    MissingOptimization(next) <: Algorithm
+
+Filter out missing values.
+
+Missing values are placed after other values according to `DirectOrdering`s. This pass puts
+them there and passes on a view into the original vector that excludes the missing values.
+This pass is triggered for both `sort([1, missing, 3])` and `sortperm([1, missing, 3])`.
+"""
+struct MissingOptimization{T <: Algorithm} <: Algorithm
+    next::T
 end
-PartialQuickSort(k::Integer) = PartialQuickSort(missing, k)
-PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k))
-_PartialQuickSort(k::Integer) = PartialQuickSort(k, k)
-_PartialQuickSort(k::OrdinalRange) = PartialQuickSort(k)
+
+struct WithoutMissingVector{T, U} <: AbstractVector{T}
+    data::U
+    function WithoutMissingVector(data; unsafe=false)
+        if !unsafe && any(ismissing, data)
+            throw(ArgumentError("data must not contain missing values"))
+        end
+        new{nonmissingtype(eltype(data)), typeof(data)}(data)
+    end
+end
+Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i::Integer)
+    out = v.data[i]
+    @assert !(out isa Missing)
+    out::eltype(v)
+end
+Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector{T}, x::T, i) where T
+    v.data[i] = x
+    v
+end
+Base.size(v::WithoutMissingVector) = size(v.data)
 
 """
-    InsertionSort
+    send_to_end!(f::Function, v::AbstractVector; [lo, hi])
 
-Indicate that a sorting function should use the insertion sort algorithm.
+Send every element of `v` for which `f` returns `true` to the end of the vector and return
+the index of the last element which for which `f` returns `false`.
 
-Insertion sort traverses the collection one element at a time, inserting
-each element into its correct, sorted position in the output vector.
+`send_to_end!(f, v, lo, hi)` is equivalent to `send_to_end!(f, view(v, lo:hi))+lo-1`
 
-Characteristics:
-  * *stable*: preserves the ordering of elements which
-    compare equal (e.g. "a" and "A" in a sort of letters
-    which ignores case).
-  * *in-place* in memory.
-  * *quadratic performance* in the number of elements to be sorted:
-    it is well-suited to small collections but should not be used for large ones.
+Preserves the order of the elements that are not sent to the end.
 """
-const InsertionSort = InsertionSortAlg()
+function send_to_end!(f::F, v::AbstractVector; lo=firstindex(v), hi=lastindex(v)) where F <: Function
+    i = lo
+    @inbounds while i <= hi && !f(v[i])
+        i += 1
+    end
+    j = i + 1
+    @inbounds while j <= hi
+        if !f(v[j])
+            v[i], v[j] = v[j], v[i]
+            i += 1
+        end
+        j += 1
+    end
+    i - 1
+end
+"""
+    send_to_end!(f::Function, v::AbstractVector, o::DirectOrdering[, end_stable]; lo, hi)
 
+Return `(a, b)` where `v[a:b]` are the elements that are not sent to the end.
+
+If `o isa ReverseOrdering` then the "end" of `v` is `v[lo]`.
+
+If `end_stable` is set, the elements that are sent to the end are stable instead of the
+elements that are not
 """
-    QuickSort
+@inline send_to_end!(f::F, v::AbstractVector, ::ForwardOrdering, end_stable=false; lo, hi) where F <: Function =
+    end_stable ? (lo, hi-send_to_end!(!f, view(v, hi:-1:lo))) : (lo, send_to_end!(f, v; lo, hi))
+@inline send_to_end!(f::F, v::AbstractVector, ::ReverseOrdering, end_stable=false; lo, hi) where F <: Function =
+    end_stable ? (send_to_end!(!f, v; lo, hi)+1, hi) : (hi-send_to_end!(f, view(v, hi:-1:lo))+1, hi)
+
+
+function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering, kw)
+    @getkw lo hi
+    if nonmissingtype(eltype(v)) != eltype(v) && o isa DirectOrdering
+        lo, hi = send_to_end!(ismissing, v, o; lo, hi)
+        _sort!(WithoutMissingVector(v, unsafe=true), a.next, o, (;kw..., lo, hi))
+    elseif eltype(v) <: Integer && o isa Perm{DirectOrdering} && nonmissingtype(eltype(o.data)) != eltype(o.data)
+        lo, hi = send_to_end!(i -> ismissing(@inbounds o.data[i]), v, o)
+        _sort!(v, a.next, Perm(o.order, WithoutMissingVector(o.data, unsafe=true)), (;kw..., lo, hi))
+    else
+        _sort!(v, a.next, o, kw)
+    end
+end
 
-Indicate that a sorting function should use the quick sort algorithm.
 
-Quick sort picks a pivot element, partitions the array based on the pivot,
-and then sorts the elements before and after the pivot recursively.
+"""
+    IEEEFloatOptimization(next) <: Algorithm
 
-Characteristics:
-  * *stable*: preserves the ordering of elements which compare equal
-    (e.g. "a" and "A" in a sort of letters which ignores case).
-  * *not in-place* in memory.
-  * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
-  * *good performance* for almost all large collections.
-  * *quadratic worst case runtime* in pathological cases
-    (vanishingly rare for non-malicious input)
+Move NaN values to the end, partition by sign, and reinterpret the rest as unsigned integers.
+
+IEEE floating point numbers (`Float64`, `Float32`, and `Float16`) compare the same as
+unsigned integers with the bits with a few exceptions. This pass
+
+This pass is triggered for both `sort([1.0, NaN, 3.0])` and `sortperm([1.0, NaN, 3.0])`.
 """
-const QuickSort = PartialQuickSort(missing, missing)
-const QuickSortAlg = PartialQuickSort{Missing, Missing} # Exists for backward compatibility
+struct IEEEFloatOptimization{T <: Algorithm} <: Algorithm
+    next::T
+end
+
+UIntType(::Type{Float16}) = UInt16
+UIntType(::Type{Float32}) = UInt32
+UIntType(::Type{Float64}) = UInt64
+after_zero(::ForwardOrdering, x) = !signbit(x)
+after_zero(::ReverseOrdering, x) = signbit(x)
+is_concrete_IEEEFloat(T::Type) = T <: Base.IEEEFloat && isconcretetype(T)
+function _sort!(v::AbstractVector, a::IEEEFloatOptimization, o::Ordering, kw)
+    @getkw lo hi
+    if is_concrete_IEEEFloat(eltype(v)) && o isa DirectOrdering
+        lo, hi = send_to_end!(isnan, v, o, true; lo, hi)
+        iv = reinterpret(UIntType(eltype(v)), v)
+        j = send_to_end!(x -> after_zero(o, x), v; lo, hi)
+        scratch = _sort!(iv, a.next, Reverse, (;kw..., lo, hi=j))
+        if scratch === nothing # Union split
+            _sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi, scratch))
+        else
+            _sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi, scratch))
+        end
+    elseif eltype(v) <: Integer && o isa Perm && o.order isa DirectOrdering && is_concrete_IEEEFloat(eltype(o.data))
+        lo, hi = send_to_end!(i -> isnan(@inbounds o.data[i]), v, o.order, true; lo, hi)
+        ip = reinterpret(UIntType(eltype(o.data)), o.data)
+        j = send_to_end!(i -> after_zero(o.order, @inbounds o.data[i]), v; lo, hi)
+        scratch = _sort!(v, a.next, Perm(Reverse, ip), (;kw..., lo, hi=j))
+        if scratch === nothing # Union split
+            _sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi, scratch))
+        else
+            _sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi, scratch))
+        end
+    else
+        _sort!(v, a.next, o, kw)
+    end
+end
+
 
 """
-    MergeSort
+    BoolOptimization(next) <: Algorithm
 
-Indicate that a sorting function should use the merge sort algorithm.
+Sort `AbstractVector{Bool}`s using a specialized version of counting sort.
 
-Merge sort divides the collection into subcollections and
-repeatedly merges them, sorting each subcollection at each step,
-until the entire collection has been recombined in sorted form.
+Accesses each element at most twice (one read and one write), and performs at most two
+comparisons.
+"""
+struct BoolOptimization{T <: Algorithm} <: Algorithm
+    next::T
+end
+_sort!(v::AbstractVector, a::BoolOptimization, o::Ordering, kw) = _sort!(v, a.next, o, kw)
+function _sort!(v::AbstractVector{Bool}, ::BoolOptimization, o::Ordering, kw)
+    first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v
+    @getkw lo hi scratch
+    count = 0
+    @inbounds for i in lo:hi
+        if v[i] == first
+            count += 1
+        end
+    end
+    @inbounds v[lo:lo+count-1] .= first
+    @inbounds v[lo+count:hi] .= !first
+    scratch
+end
 
-Characteristics:
-  * *stable*: preserves the ordering of elements which compare
-    equal (e.g. "a" and "A" in a sort of letters which ignores
-    case).
-  * *not in-place* in memory.
-  * *divide-and-conquer* sort strategy.
+
+"""
+    IsUIntMappable(yes, no) <: Algorithm
+
+Determines if the elements of a vector can be mapped to unsigned integers while preserving
+their order under the specified ordering.
+
+If they can be, dispatch to the `yes` algorithm and record the unsigned integer type that
+the elements may be mapped to. Otherwise dispatch to the `no` algorithm.
 """
-const MergeSort = MergeSortAlg()
+struct IsUIntMappable{T <: Algorithm, U <: Algorithm} <: Algorithm
+    yes::T
+    no::U
+end
+function _sort!(v::AbstractVector, a::IsUIntMappable, o::Ordering, kw)
+    if UIntMappable(eltype(v), o) !== nothing
+        _sort!(v, a.yes, o, kw)
+    else
+        _sort!(v, a.no, o, kw)
+    end
+end
+
 
 """
-    AdaptiveSort
+    Small{N}(small=SMALL_ALGORITHM, big) <: Algorithm
 
-Indicate that a sorting function should use the fastest available stable algorithm.
+Sort inputs with `length(lo:hi) <= N` using the `small` algorithm. Otherwise use the `big`
+algorithm.
+"""
+struct Small{N, T <: Algorithm, U <: Algorithm} <: Algorithm
+    small::T
+    big::U
+end
+Small{N}(small, big) where N = Small{N, typeof(small), typeof(big)}(small, big)
+Small{N}(big) where N = Small{N}(SMALL_ALGORITHM, big)
+function _sort!(v::AbstractVector, a::Small{N}, o::Ordering, kw) where N
+    @getkw lo hi
+    if (hi-lo) < N
+        _sort!(v, a.small, o, kw)
+    else
+        _sort!(v, a.big, o, kw)
+    end
+end
+
+
+struct InsertionSortAlg <: Algorithm end
 
-Currently, AdaptiveSort uses
-  * [`InsertionSort`](@ref) for short vectors
-  * [`QuickSort`](@ref) for vectors that are not [`UIntMappable`](@ref)
-  * Radix sort for long vectors
-  * Counting sort for vectors of integers spanning a short range
 """
-const AdaptiveSort = AdaptiveSortAlg()
+    InsertionSort
 
-const DEFAULT_UNSTABLE = AdaptiveSort
-const DEFAULT_STABLE   = AdaptiveSort
-const SMALL_ALGORITHM  = InsertionSort
-const SMALL_THRESHOLD  = 20
+Use the insertion sort algorithm.
 
-function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::InsertionSortAlg, o::Ordering)
+Insertion sort traverses the collection one element at a time, inserting
+each element into its correct, sorted position in the output vector.
+
+Characteristics:
+* *stable*: preserves the ordering of elements which compare equal
+(e.g. "a" and "A" in a sort of letters which ignores case).
+* *in-place* in memory.
+* *quadratic performance* in the number of elements to be sorted:
+it is well-suited to small collections but should not be used for large ones.
+"""
+const InsertionSort = InsertionSortAlg()
+const SMALL_ALGORITHM = InsertionSortAlg()
+
+function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering, kw)
+    @getkw lo hi scratch
     lo_plus_1 = (lo + 1)::Integer
     @inbounds for i = lo_plus_1:hi
         j = i
@@ -527,9 +730,249 @@ function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::InsertionSortAlg,
         end
         v[j] = x
     end
-    return v
+    scratch
+end
+
+
+"""
+    CheckSorted(next) <: Algorithm
+
+Check if the input is already sorted and for large inputs, also check if it is
+reverse-sorted. The reverse-sorted check is unstable.
+"""
+struct CheckSorted{T <: Algorithm} <: Algorithm
+    next::T
+end
+function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering, kw)
+    @getkw lo hi scratch
+
+    # For most arrays, a presorted check is cheap (overhead < 5%) and for most large
+    # arrays it is essentially free (<1%).
+    _issorted(v, lo, hi, o) && return scratch
+
+    # For most large arrays, a reverse-sorted check is essentially free (overhead < 1%)
+    if hi-lo >= 500 && _issorted(v, lo, hi, ReverseOrdering(o))
+        # If reversing is valid, do so. This does violates stability.
+        reverse!(v, lo, hi)
+        return scratch
+    end
+
+    _sort!(v, a.next, o, kw)
+end
+
+
+"""
+    ComputeExtrema(next) <: Algorithm
+
+Compute the extrema of the input under the provided order.
+
+If the minimum is no less than the maximum, then the input is already sorted. Otherwise,
+dispatch to the `next` algorithm.
+"""
+struct ComputeExtrema{T <: Algorithm} <: Algorithm
+    next::T
+end
+function _sort!(v::AbstractVector, a::ComputeExtrema, o::Ordering, kw)
+    @getkw lo hi scratch
+    mn = mx = v[lo]
+    @inbounds for i in (lo+1):hi
+        vi = v[i]
+        lt(o, vi, mn) && (mn = vi)
+        lt(o, mx, vi) && (mx = vi)
+    end
+    mn, mx
+
+    lt(o, mn, mx) || return scratch # all same
+
+    _sort!(v, a.next, o, (;kw..., mn, mx))
+end
+
+
+"""
+    ConsiderCountingSort(counting=CountingSort(), next) <: Algorithm
+
+If the input's range is small enough, use the `counting` algorithm. Otherwise, dispatch to
+the `next` algorithm.
+
+For most types, the threshold is if the range is shorter than half the length, but for types
+larger than Int64, bitshifts are expensive and RadixSort is not viable, so the threshold is
+much more generous.
+"""
+struct ConsiderCountingSort{T <: Algorithm, U <: Algorithm} <: Algorithm
+    counting::T
+    next::U
+end
+ConsiderCountingSort(next) = ConsiderCountingSort(CountingSort(), next)
+function _sort!(v::AbstractVector{<:Integer}, a::ConsiderCountingSort, o::DirectOrdering, kw)
+    @getkw lo hi mn mx
+    range = maybe_unsigned(o === Reverse ? mn-mx : mx-mn)
+
+    if range < (sizeof(eltype(v)) > 8 ? 5(hi-lo)-100 : div(hi-lo, 2))
+        _sort!(v, a.counting, o, kw)
+    else
+        _sort!(v, a.next, o, kw)
+    end
+end
+_sort!(v::AbstractVector, a::ConsiderCountingSort, o::Ordering, kw) = _sort!(v, a.next, o, kw)
+
+
+"""
+    CountingSort <: Algorithm
+
+Use the counting sort algorithm.
+
+`CountingSort` is an algorithm for sorting integers that runs in Θ(length + range) time and
+space. It counts the number of occurrences of each value in the input and then iterates
+through those counts repopulating the input with the values in sorted order.
+"""
+struct CountingSort <: Algorithm end
+maybe_reverse(o::ForwardOrdering, x) = x
+maybe_reverse(o::ReverseOrdering, x) = reverse(x)
+function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering, kw)
+    @getkw lo hi mn mx scratch
+    range = o === Reverse ? mn-mx : mx-mn
+    offs = 1 - (o === Reverse ? mx : mn)
+
+    counts = fill(0, range+1) # TODO use scratch (but be aware of type stability)
+    @inbounds for i = lo:hi
+        counts[v[i] + offs] += 1
+    end
+
+    idx = lo
+    @inbounds for i = maybe_reverse(o, 1:range+1)
+        lastidx = idx + counts[i] - 1
+        val = i-offs
+        for j = idx:lastidx
+            v[j] = val
+        end
+        idx = lastidx + 1
+    end
+
+    scratch
+end
+
+
+"""
+    ConsiderRadixSort(radix=RadixSort(), next) <: Algorithm
+
+If the number of bits in the input's range is small enough and the input supports efficient
+bitshifts, use the `radix` algorithm. Otherwise, dispatch to the `next` algorithm.
+"""
+struct ConsiderRadixSort{T <: Algorithm, U <: Algorithm} <: Algorithm
+    radix::T
+    next::U
+end
+ConsiderRadixSort(next) = ConsiderRadixSort(RadixSort(), next)
+function _sort!(v::AbstractVector, a::ConsiderRadixSort, o::DirectOrdering, kw)
+    @getkw lo hi mn mx
+    urange = uint_map(mx, o)-uint_map(mn, o)
+    bits = unsigned(8sizeof(urange) - leading_zeros(urange))
+    if sizeof(eltype(v)) <= 8 && bits+70 < 22log(hi-lo)
+        _sort!(v, a.radix, o, kw)
+    else
+        _sort!(v, a.next, o, kw)
+    end
+end
+
+
+"""
+    RadixSort <: Algorithm
+
+Use the radix sort algorithm.
+
+`RadixSort` is a stable least significant bit first radix sort algorithm that runs in
+`O(length * log(range))` time and linear space.
+
+It first sorts the entire vector by the last `chunk_size` bits, then by the second
+to last `chunk_size` bits, and so on. Stability means that it will not reorder two elements
+that compare equal. This is essential so that the order introduced by earlier,
+less significant passes is preserved by later passes.
+
+Each pass divides the input into `2^chunk_size == mask+1` buckets. To do this, it
+ * counts the number of entries that fall into each bucket
+ * uses those counts to compute the indices to move elements of those buckets into
+ * moves elements into the computed indices in the swap array
+ * switches the swap and working array
+
+`chunk_size` is larger for larger inputs and determined by an empirical heuristic.
+"""
+struct RadixSort <: Algorithm end
+function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering, kw)
+    @getkw lo hi mn mx scratch
+    umn = uint_map(mn, o)
+    urange = uint_map(mx, o)-umn
+    bits = unsigned(8sizeof(urange) - leading_zeros(urange))
+
+    # At this point, we are committed to radix sort.
+    u = uint_map!(v, lo, hi, o)
+
+    # we subtract umn to avoid radixing over unnecessary bits. For example,
+    # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002]
+    # which uses all 32 bits, but once we subtract umn = 0x7fffffff, we are left with
+    # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and
+    # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4]
+    # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits.
+    # the overhead for this subtraction is small enough that it is worthwhile in many cases.
+
+    # this is faster than u[lo:hi] .-= umn as of v1.9.0-DEV.100
+    @inbounds for i in lo:hi
+        u[i] -= umn
+    end
+
+    scratch, t = make_scratch(scratch, eltype(v), hi-lo+1)
+    tu = reinterpret(eltype(u), t)
+    if radix_sort!(u, lo, hi, bits, tu, 1-lo)
+        uint_unmap!(v, u, lo, hi, o, umn)
+    else
+        uint_unmap!(v, tu, lo, hi, o, umn, 1-lo)
+    end
+    scratch
 end
 
+
+"""
+    PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}, next::Algorithm) <: Algorithm
+
+Indicate that a sorting function should use the partial quick sort algorithm.
+
+Partial quick sort finds and sorts the elements that would end up in positions `lo:hi` using
+[`QuickSort`](@ref). It is recursive and uses the `next` algorithm for small chunks
+
+Characteristics:
+  * *stable*: preserves the ordering of elements which compare equal
+    (e.g. "a" and "A" in a sort of letters which ignores case).
+  * *not in-place* in memory.
+  * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
+"""
+struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm
+    lo::L
+    hi::H
+    next::T
+end
+PartialQuickSort(k::Integer) = PartialQuickSort(missing, k, SMALL_ALGORITHM)
+PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k), SMALL_ALGORITHM)
+_PartialQuickSort(k::Integer) = InitialOptimizations(PartialQuickSort(k:k))
+_PartialQuickSort(k::OrdinalRange) = InitialOptimizations(PartialQuickSort(k))
+
+"""
+    QuickSort
+
+Indicate that a sorting function should use the quick sort algorithm.
+
+Quick sort picks a pivot element, partitions the array based on the pivot,
+and then sorts the elements before and after the pivot recursively.
+
+Characteristics:
+  * *stable*: preserves the ordering of elements which compare equal
+    (e.g. "a" and "A" in a sort of letters which ignores case).
+  * *not in-place* in memory.
+  * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
+  * *good performance* for almost all large collections.
+  * *quadratic worst case runtime* in pathological cases
+    (vanishingly rare for non-malicious input)
+"""
+const QuickSort = PartialQuickSort(missing, missing, SMALL_ALGORITHM)
+
 # select a pivot for QuickSort
 #
 # This method is redefined to rand(lo:hi) in Random.jl
@@ -542,147 +985,127 @@ select_pivot(lo::Integer, hi::Integer) = typeof(hi-lo)(hash(lo) % (hi-lo+1)) + l
 #
 # returns (pivot, pivot_index) where pivot_index is the location the pivot
 # should end up, but does not set t[pivot_index] = pivot
-function partition!(t::AbstractVector, lo::Integer, hi::Integer, o::Ordering, v::AbstractVector, rev::Bool)
+function partition!(t::AbstractVector, lo::Integer, hi::Integer, offset::Integer, o::Ordering, v::AbstractVector, rev::Bool)
     pivot_index = select_pivot(lo, hi)
-    trues = 0
     @inbounds begin
         pivot = v[pivot_index]
         while lo < pivot_index
             x = v[lo]
             fx = rev ? !lt(o, x, pivot) : lt(o, pivot, x)
-            t[(fx ? hi : lo) - trues] = x
-            trues += fx
+            t[(fx ? hi : lo) - offset] = x
+            offset += fx
             lo += 1
         end
         while lo < hi
             x = v[lo+1]
             fx = rev ? lt(o, pivot, x) : !lt(o, x, pivot)
-            t[(fx ? hi : lo) - trues] = x
-            trues += fx
+            t[(fx ? hi : lo) - offset] = x
+            offset += fx
             lo += 1
         end
     end
 
-    # pivot_index = lo-trues
+    # pivot_index = lo-offset
     # t[pivot_index] is whatever it was before
     # t[<pivot_index] <* pivot, stable
     # t[>pivot_index] >* pivot, reverse stable
 
-    pivot, lo-trues
+    pivot, lo-offset
 end
 
-function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort,
-               o::Ordering, t::AbstractVector=similar(v), swap=false, rev=false;
-               check_presorted=true)
+function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw;
+                t=nothing, offset=nothing, swap=false, rev=false)
+    @getkw lo hi scratch
 
-    if check_presorted && !rev && !swap
-        # Even if we are only sorting a short region, we can only short-circuit if the whole
-        # vector is presorted. A weaker condition is possible, but unlikely to be useful.
-        if _issorted(v, lo, hi, o)
-            return v
-        elseif _issorted(v, lo, hi, Lt((x, y) -> !lt(o, x, y)))
-            # Reverse only if necessary. Using issorted(..., Reverse(o)) would violate stability.
-            return reverse!(v, lo, hi)
-        end
+    if t === nothing
+        scratch, t = make_scratch(scratch, eltype(v), hi-lo+1)
+        offset = 1-lo
+        kw = (;kw..., scratch)
     end
 
     while lo < hi && hi - lo > SMALL_THRESHOLD
-        pivot, j = swap ? partition!(v, lo, hi, o, t, rev) : partition!(t, lo, hi, o, v, rev)
+        pivot, j = swap ? partition!(v, lo+offset, hi+offset, offset, o, t, rev) : partition!(t, lo, hi, -offset, o, v, rev)
+        j -= !swap*offset
         @inbounds v[j] = pivot
         swap = !swap
 
         # For QuickSort, a.lo === a.hi === missing, so the first two branches get skipped
         if !ismissing(a.lo) && j <= a.lo # Skip sorting the lower part
-            swap && copyto!(v, lo, t, lo, j-lo)
+            swap && copyto!(v, lo, t, lo+offset, j-lo)
             rev && reverse!(v, lo, j-1)
             lo = j+1
             rev = !rev
         elseif !ismissing(a.hi) && a.hi <= j # Skip sorting the upper part
-            swap && copyto!(v, j+1, t, j+1, hi-j)
+            swap && copyto!(v, j+1, t, j+1+offset, hi-j)
             rev || reverse!(v, j+1, hi)
             hi = j-1
         elseif j-lo < hi-j
             # Sort the lower part recursively because it is smaller. Recursing on the
             # smaller part guarantees O(log(n)) stack space even on pathological inputs.
-            sort!(v, lo, j-1, a, o, t, swap, rev; check_presorted=false)
+            _sort!(v, a, o, (;kw..., lo, hi=j-1); t, offset, swap, rev)
             lo = j+1
             rev = !rev
         else # Sort the higher part recursively
-            sort!(v, j+1, hi, a, o, t, swap, !rev; check_presorted=false)
+            _sort!(v, a, o, (;kw..., lo=j+1, hi); t, offset, swap, rev=!rev)
             hi = j-1
         end
     end
-    hi < lo && return v
-    swap && copyto!(v, lo, t, lo, hi-lo+1)
+    hi < lo && return scratch
+    swap && copyto!(v, lo, t, lo+offset, hi-lo+1)
     rev && reverse!(v, lo, hi)
-    sort!(v, lo, hi, SMALL_ALGORITHM, o)
+    _sort!(v, a.next, o, (;kw..., lo, hi))
 end
 
-function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering,
-        t0::Union{AbstractVector{T}, Nothing}=nothing) where T
-    @inbounds if lo < hi
-        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
 
-        m = midpoint(lo, hi)
-
-        t = t0 === nothing ? similar(v, m-lo+1) : t0
-        length(t) < m-lo+1 && resize!(t, m-lo+1)
-        require_one_based_indexing(t)
+"""
+    StableCheckSorted(next) <: Algorithm
 
-        sort!(v, lo,  m,  a, o, t)
-        sort!(v, m+1, hi, a, o, t)
+Check if an input is sorted and/or reverse-sorted.
 
-        i, j = 1, lo
-        while j <= m
-            t[i] = v[j]
-            i += 1
-            j += 1
-        end
-
-        i, k = 1, lo
-        while k < j <= hi
-            if lt(o, v[j], t[i])
-                v[k] = v[j]
-                j += 1
-            else
-                v[k] = t[i]
-                i += 1
-            end
-            k += 1
-        end
-        while k < j
-            v[k] = t[i]
-            k += 1
-            i += 1
-        end
+The definition of reverse-sorted is that for every pair of adjacent elements, the latter is
+less than the former. This is stricter than `issorted(v, Reverse(o))` to avoid swapping pairs
+of elements that compare equal.
+"""
+struct StableCheckSorted{T<:Algorithm} <: Algorithm
+    next::T
+end
+function _sort!(v::AbstractVector, a::StableCheckSorted, o::Ordering, kw)
+    @getkw lo hi scratch
+    if _issorted(v, lo, hi, o)
+        return scratch
+    elseif _issorted(v, lo, hi, Lt((x, y) -> !lt(o, x, y)))
+        # Reverse only if necessary. Using issorted(..., Reverse(o)) would violate stability.
+        reverse!(v, lo, hi)
+        return scratch
     end
 
-    return v
+    _sort!(v, a.next, o, kw)
 end
 
-# This is a stable least significant bit first radix sort.
-#
-# That is, it first sorts the entire vector by the last chunk_size bits, then by the second
-# to last chunk_size bits, and so on. Stability means that it will not reorder two elements
-# that compare equal. This is essential so that the order introduced by earlier,
-# less significant passes is preserved by later passes.
-#
-# Each pass divides the input into 2^chunk_size == mask+1 buckets. To do this, it
-#  * counts the number of entries that fall into each bucket
-#  * uses those counts to compute the indices to move elements of those buckets into
-#  * moves elements into the computed indices in the swap array
-#  * switches the swap and working array
-#
-# In the case of an odd number of passes, the returned vector will === the input vector t,
-# not v. This is one of the many reasons radix_sort! is not exported.
+
+# The return value indicates whether v is sorted (true) or t is sorted (false)
+# This is one of the many reasons radix_sort! is not exported.
 function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsigned,
-                     t::AbstractVector{U}, chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned
+                     t::AbstractVector{U}, offset::Integer,
+                     chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned
     # bits is unsigned for performance reasons.
-    mask = UInt(1) << chunk_size - 1
-    counts = Vector{Int}(undef, mask+2)
-
-    @inbounds for shift in 0:chunk_size:bits-1
-
+    counts = Vector{Int}(undef, 1 << chunk_size + 1) # TODO use scratch for this
+
+    shift = 0
+    while true
+        @noinline radix_sort_pass!(t, lo, hi, offset, counts, v, shift, chunk_size)
+        # the latest data resides in t
+        shift += chunk_size
+        shift < bits || return false
+        @noinline radix_sort_pass!(v, lo+offset, hi+offset, -offset, counts, t, shift, chunk_size)
+        # the latest data resides in v
+        shift += chunk_size
+        shift < bits || return true
+    end
+end
+function radix_sort_pass!(t, lo, hi, offset, counts, v, shift, chunk_size)
+    mask = UInt(1) << chunk_size - 1  # mask is defined in pass so that the compiler
+    @inbounds begin                   #  ↳ knows it's shape
         # counts[2:mask+2] will store the number of elements that fall into each bucket.
         # if chunk_size = 8, counts[2] is bucket 0x00 and counts[257] is bucket 0xff.
         counts .= 0
@@ -703,15 +1126,10 @@ function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsig
             x = v[k]                  # lookup the element
             i = (x >> shift)&mask + 1 # compute its bucket's index for this pass
             j = counts[i]             # lookup the target index
-            t[j] = x                  # put the element where it belongs
+            t[j + offset] = x         # put the element where it belongs
             counts[i] = j + 1         # increment the target index for the next
         end                           #  ↳ element in this bucket
-
-        v, t = t, v # swap the now sorted destination vector t back into primary vector v
-
     end
-
-    v
 end
 function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned)
     # chunk_size is the number of bits to radix over at once.
@@ -726,23 +1144,6 @@ function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned)
     UInt8(cld(bits, cld(bits, guess)))
 end
 
-# For AbstractVector{Bool}, counting sort is always best.
-# This is an implementation of counting sort specialized for Bools.
-# Accepts unused scratch space to avoid method ambiguity.
-function sort!(v::AbstractVector{Bool}, lo::Integer, hi::Integer, ::AdaptiveSortAlg, o::Ordering,
-        t::Union{AbstractVector{Bool}, Nothing}=nothing)
-    first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v
-    count = 0
-    @inbounds for i in lo:hi
-        if v[i] == first
-            count += 1
-        end
-    end
-    @inbounds v[lo:lo+count-1] .= first
-    @inbounds v[lo+count:hi] .= !first
-    v
-end
-
 maybe_unsigned(x::Integer) = x # this is necessary to avoid calling unsigned on BigInt
 maybe_unsigned(x::BitSigned) = unsigned(x)
 function _extrema(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
@@ -761,129 +1162,152 @@ function _issorted(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
     end
     true
 end
-function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, ::AdaptiveSortAlg, o::Ordering,
-               t::Union{AbstractVector{T}, Nothing}=nothing) where T
-    # if the sorting task is not UIntMappable, then we can't radix sort or sort_int_range!
-    # so we skip straight to the fallback algorithm which is comparison based.
-    U = UIntMappable(eltype(v), o)
-    U === nothing && return sort!(v, lo, hi, QuickSort, o)
-
-    # to avoid introducing excessive detection costs for the trivial sorting problem
-    # and to avoid overflow, we check for small inputs before any other runtime checks
-    hi <= lo && return v
-    lenm1 = maybe_unsigned(hi-lo) # adding 1 would risk overflow
-    # only count sort on a short range can compete with insertion sort when lenm1 < 40
-    # and the optimization is not worth the detection cost, so we use insertion sort.
-    lenm1 < 40 && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
 
-    # For most arrays, a presorted check is cheap (overhead < 5%) and for most large
-    # arrays it is essentially free (<1%). Insertion sort runs in a fast O(n) on presorted
-    # input and this guarantees presorted input will always be efficiently handled
-    _issorted(v, lo, hi, o) && return v
-
-    # For large arrays, a reverse-sorted check is essentially free (overhead < 1%)
-    if lenm1 >= 500 && _issorted(v, lo, hi, ReverseOrdering(o))
-        # If reversing is valid, do so. This does not violate stability
-        # because being UIntMappable implies a linear order.
-        reverse!(v, lo, hi)
-        return v
-    end
 
-    # UInt128 does not support fast bit shifting so we never
-    # dispatch to radix sort but we may still perform count sort
-    if sizeof(U) > 8
-        if T <: Integer && o isa DirectOrdering
-            v_min, v_max = _extrema(v, lo, hi, Forward)
-            v_range = maybe_unsigned(v_max-v_min)
-            v_range == 0 && return v # all same
-
-            # we know lenm1 ≥ 40, so this will never underflow.
-            # if lenm1 > 3.7e18 (59 exabytes), then this may incorrectly dispatch to fallback
-            if v_range < 5lenm1-100 # count sort will outperform comparison sort if v's range is small
-                return sort_int_range!(v, Int(v_range+1), v_min, o === Forward ? identity : reverse, lo, hi)
-            end
-        end
-        return sort!(v, lo, hi, QuickSort, o; check_presorted=false)
-    end
+## default sorting policy ##
 
-    v_min, v_max = _extrema(v, lo, hi, o)
-    lt(o, v_min, v_max) || return v # all same
-    if T <: Integer && o isa DirectOrdering
-        R = o === Reverse
-        v_range = maybe_unsigned(R ? v_min-v_max : v_max-v_min)
-        if v_range < div(lenm1, 2) # count sort will be superior if v's range is very small
-            return sort_int_range!(v, Int(v_range+1), R ? v_max : v_min, R ? reverse : identity, lo, hi)
-        end
-    end
+"""
+    InitialOptimizations(next) <: Algorithm
 
-    u_min, u_max = uint_map(v_min, o), uint_map(v_max, o)
-    u_range = maybe_unsigned(u_max-u_min)
-    if u_range < div(lenm1, 2) # count sort will be superior if u's range is very small
-        u = uint_map!(v, lo, hi, o)
-        sort_int_range!(u, Int(u_range+1), u_min, identity, lo, hi)
-        return uint_unmap!(v, u, lo, hi, o)
-    end
+Attempt to apply a suite of low-cost optimizations to the input vector before sorting.
 
-    # if u's range is small, then once we subtract out v_min, we'll get a vector like
-    # UInt16[0x001a, 0x0015, 0x0006, 0x001b, 0x0008, 0x000c, 0x0001, 0x000e, 0x001c, 0x0009]
-    # where we only need to radix over the last few bits (5, in the example).
-    bits = unsigned(8sizeof(u_range) - leading_zeros(u_range))
-
-    # radix sort runs in O(bits * lenm1), quick sort runs in O(lenm1 * log(lenm1)).
-    # dividing both sides by lenm1 and introducing empirical constant factors yields
-    # the following heuristic for when QuickSort is faster than RadixSort
-    if 22log(lenm1) < bits + 70
-        return if lenm1 > 80
-            sort!(v, lo, hi, QuickSort, o; check_presorted=false)
-        else
-            sort!(v, lo, hi, SMALL_ALGORITHM, o)
-        end
-    end
+`InitialOptimizations` is an implementation detail and subject to change or removal in
+future versions of Julia.
 
-    # At this point, we are committed to radix sort.
-    u = uint_map!(v, lo, hi, o)
+If `next` is stable, then `InitialOptimizations(next)` is also stable.
 
-    # we subtract u_min to avoid radixing over unnecessary bits. For example,
-    # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002]
-    # which uses all 32 bits, but once we subtract u_min = 0x7fffffff, we are left with
-    # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and
-    # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4]
-    # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits.
-    # the overhead for this subtraction is small enough that it is worthwhile in many cases.
+The specific optimizations attempted by `InitialOptimizations` are
+[`MissingOptimization`](@ref), [`BoolOptimization`](@ref), dispatch to
+[`InsertionSort`](@ref) for inputs with `length <= 10`, and [`IEEEFloatOptimization`](@ref).
+"""
+InitialOptimizations(next) = MissingOptimization(
+    BoolOptimization(
+        Small{10}(
+            IEEEFloatOptimization(
+                next))))
+"""
+    DEFAULT_STABLE
 
-    # this is faster than u[lo:hi] .-= u_min as of v1.9.0-DEV.100
-    @inbounds for i in lo:hi
-        u[i] -= u_min
-    end
+The default sorting algorithm.
 
-    len = lenm1 + 1
-    if t !== nothing && checkbounds(Bool, t, lo:hi) # Fully preallocated and aligned scratch space
-        u2 = radix_sort!(u, lo, hi, bits, reinterpret(U, t))
-        uint_unmap!(v, u2, lo, hi, o, u_min)
-    elseif t !== nothing && (applicable(resize!, t, len) || length(t) >= len) # Viable scratch space
-        length(t) >= len || resize!(t, len)
-        t1 = axes(t, 1) isa OneTo ? t : view(t, firstindex(t):lastindex(t))
-        u2 = radix_sort!(view(u, lo:hi), 1, len, bits, reinterpret(U, t1))
-        uint_unmap!(view(v, lo:hi), u2, 1, len, o, u_min)
-    else # No viable scratch space
-        u2 = radix_sort!(u, lo, hi, bits, similar(u))
-        uint_unmap!(v, u2, lo, hi, o, u_min)
-    end
-end
+This algorithm is guaranteed to be stable (i.e. it will not reorder elements that compare
+equal). It makes an effort to be fast for most inputs.
 
-## generic sorting methods ##
+The algorithms used by `DEFAULT_STABLE` are an implementation detail. See extended help
+for the current dispatch system.
 
-defalg(v::AbstractArray) = DEFAULT_STABLE
+# Extended Help
 
-function sort!(v::AbstractVector{T}, alg::Algorithm,
-               order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T
-    sort!(v, firstindex(v), lastindex(v), alg, order, t)
-end
+`DEFAULT_STABLE` is composed of two parts: the [`InitialOptimizations`](@ref) and a hybrid
+of Radix, Insertion, Counting, Quick sorts.
+
+We begin with MissingOptimization because it has no runtime cost when it is not
+triggered and can enable other optimizations to be applied later. For example,
+BoolOptimization cannot apply to an `AbstractVector{Union{Missing, Bool}}`, but after
+[`MissingOptimization`](@ref) is applied, that input will be converted into am
+`AbstractVector{Bool}`.
+
+We next apply [`BoolOptimization`](@ref) because it also has no runtime cost when it is not
+triggered and when it is triggered, it is an incredibly efficient algorithm (sorting `Bool`s
+is quite easy).
+
+Next, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 10`. This dispatch
+occurs before the [`IEEEFloatOptimization`](@ref) pass because the
+[`IEEEFloatOptimization`](@ref)s are not beneficial for very small inputs.
+
+To conclude the [`InitialOptimizations`](@ref), we apply [`IEEEFloatOptimization`](@ref).
+
+After these optimizations, we branch on whether radix sort and related algorithms can be
+applied to the input vector and ordering. We conduct this branch by testing if
+`UIntMappable(v, order) !== nothing`. That is, we see if we know of a reversible mapping
+from `eltype(v)` to `UInt` that preserves the ordering `order`. We perform this check after
+the initial optimizations because they can change the input vector's type and ordering to
+make them `UIntMappable`.
+
+If the input is not [`UIntMappable`](@ref), then we perform a presorted check and dispatch
+to [`QuickSort`](@ref).
+
+Otherwise, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 40` and then
+perform a presorted check ([`CheckSorted`](@ref)).
+
+We check for short inputs before performing the presorted check to avoid the overhead of the
+check for small inputs. Because the alternate dispatch is to [`InseritonSort`](@ref) which
+has efficient `O(n)` runtime on presorted inputs, the check is not necessary for small
+inputs.
+
+We check if the input is reverse-sorted for long vectors (more than 500 elements) because
+the check is essentially free unless the input is almost entirely reverse sorted.
+
+Note that once the input is determined to be [`UIntMappable`](@ref), we know the order forms
+a [total order](wikipedia.org/wiki/Total_order) over the inputs and so it is impossible to
+perform an unstable sort because no two elements can compare equal unless they _are_ equal,
+in which case switching them is undetectable. We utilize this fact to perform a more
+aggressive reverse sorted check that will reverse the vector `[3, 2, 2, 1]`.
+
+After these potential fast-paths are tried and failed, we [`ComputeExtrema`](@ref) of the
+input. This computation has a fairly fast `O(n)` runtime, but we still try to delay it until
+it is necessary.
+
+Next, we [`ConsiderCountingSort`](@ref). If the range the input is small compared to its
+length, we apply [`CountingSort`](@ref).
+
+Next, we [`ConsiderRadixSort`](@ref). This is similar to the dispatch to counting sort,
+but we conside rthe number of _bits_ in the range, rather than the range itself.
+Consequently, we apply [`RadixSort`](@ref) for any reasonably long inputs that reach this
+stage.
 
-function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, alg::Algorithm,
-               order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T
-    sort!(v, lo, hi, alg, order)
+Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and
+otherwise we dispatch to [`QuickSort`](@ref).
+"""
+const DEFAULT_STABLE = InitialOptimizations(
+    IsUIntMappable(
+        Small{40}(
+            CheckSorted(
+                ComputeExtrema(
+                    ConsiderCountingSort(
+                        ConsiderRadixSort(
+                            Small{80}(
+                                QuickSort)))))),
+        StableCheckSorted(
+            QuickSort)))
+"""
+    DEFAULT_UNSTABLE
+
+An efficient sorting algorithm.
+
+The algorithms used by `DEFAULT_UNSTABLE` are an implementation detail. They are currently
+the same as those used by [`DEFAULT_STABLE`](@ref), but this is subject to change in future.
+"""
+const DEFAULT_UNSTABLE = DEFAULT_STABLE
+const SMALL_THRESHOLD  = 20
+
+function Base.show(io::IO, alg::Algorithm)
+    print_tree(io, alg, 0)
 end
+function print_tree(io::IO, alg::Algorithm, cols::Int)
+    print(io, "    "^cols)
+    show_type(io, alg)
+    print(io, '(')
+    for (i, name) in enumerate(fieldnames(typeof(alg)))
+        arg = getproperty(alg, name)
+        i > 1 && print(io, ',')
+        if arg isa Algorithm
+            println(io)
+            print_tree(io, arg, cols+1)
+        else
+            i > 1 && print(io, ' ')
+            print(io, arg)
+        end
+    end
+    print(io, ')')
+end
+show_type(io::IO, alg::Algorithm) = Base.show_type_name(io, typeof(alg).name)
+show_type(io::IO, alg::Small{N}) where N = print(io, "Base.Sort.Small{$N}")
+
+defalg(v::AbstractArray) = DEFAULT_STABLE
+defalg(v::AbstractArray{<:Union{Number, Missing}}) = DEFAULT_UNSTABLE
+defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation
+defalg(v::AbstractArray{Union{}}) = DEFAULT_UNSTABLE # for method disambiguation
 
 """
     sort!(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
@@ -931,31 +1355,9 @@ function sort!(v::AbstractVector{T};
                by=identity,
                rev::Union{Bool,Nothing}=nothing,
                order::Ordering=Forward,
-               scratch::Union{AbstractVector{T}, Nothing}=nothing) where T
-    sort!(v, alg, ord(lt,by,rev,order), scratch)
-end
-
-# sort! for vectors of few unique integers
-function sort_int_range!(x::AbstractVector{<:Integer}, rangelen, minval, maybereverse,
-                         lo=firstindex(x), hi=lastindex(x))
-    offs = 1 - minval
-
-    counts = fill(0, rangelen)
-    @inbounds for i = lo:hi
-        counts[x[i] + offs] += 1
-    end
-
-    idx = lo
-    @inbounds for i = maybereverse(1:rangelen)
-        lastidx = idx + counts[i] - 1
-        val = i-offs
-        for j = idx:lastidx
-            x[j] = val
-        end
-        idx = lastidx + 1
-    end
-
-    return x
+               scratch::Union{Vector{T}, Nothing}=nothing) where T
+    _sort!(v, alg, ord(lt,by,rev,order), (;scratch))
+    v
 end
 
 """
@@ -1081,7 +1483,7 @@ function partialsortperm!(ix::AbstractVector{<:Integer}, v::AbstractVector,
     end
 
     # do partial quicksort
-    sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v))
+    _sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v), (;))
 
     maybeview(ix, k)
 end
@@ -1141,7 +1543,7 @@ function sortperm(A::AbstractArray;
                   by=identity,
                   rev::Union{Bool,Nothing}=nothing,
                   order::Ordering=Forward,
-                  scratch::Union{AbstractVector{<:Integer}, Nothing}=nothing,
+                  scratch::Union{Vector{<:Integer}, Nothing}=nothing,
                   dims...) #to optionally specify dims argument
     ordr = ord(lt,by,rev,order)
     if ordr === Forward && isa(A,Vector) && eltype(A)<:Integer
@@ -1205,7 +1607,7 @@ function sortperm!(ix::AbstractArray{T}, A::AbstractArray;
                    rev::Union{Bool,Nothing}=nothing,
                    order::Ordering=Forward,
                    initialized::Bool=false,
-                   scratch::Union{AbstractVector{T}, Nothing}=nothing,
+                   scratch::Union{Vector{T}, Nothing}=nothing,
                    dims...) where T <: Integer #to optionally specify dims argument
     (typeof(A) <: AbstractVector) == (:dims in keys(dims)) && throw(ArgumentError("Dims argument incorrect for type $(typeof(A))"))
     axes(ix) == axes(A) || throw(ArgumentError("index array must have the same size/axes as the source array, $(axes(ix)) != $(axes(A))"))
@@ -1278,7 +1680,7 @@ function sort(A::AbstractArray{T};
               by=identity,
               rev::Union{Bool,Nothing}=nothing,
               order::Ordering=Forward,
-              scratch::Union{AbstractVector{T}, Nothing}=similar(A, size(A, dims))) where T
+              scratch::Union{Vector{T}, Nothing}=nothing) where T
     dim = dims
     order = ord(lt,by,rev,order)
     n = length(axes(A, dim))
@@ -1295,14 +1697,27 @@ function sort(A::AbstractArray{T};
     end
 end
 
-@noinline function sort_chunks!(Av, n, alg, order, t)
+@noinline function sort_chunks!(Av, n, alg, order, scratch)
     inds = LinearIndices(Av)
-    for s = first(inds):n:last(inds)
-        sort!(Av, s, s+n-1, alg, order, t)
+    sort_chunks!(Av, n, alg, order, scratch, first(inds), last(inds))
+end
+
+@noinline function sort_chunks!(Av, n, alg, order, scratch::Nothing, fst, lst)
+    for lo = fst:n:lst
+        s = _sort!(Av, alg, order, (; lo, hi=lo+n-1, scratch))
+        s !== nothing && return sort_chunks!(Av, n, alg, order, s, lo+n, lst)
     end
     Av
 end
 
+@noinline function sort_chunks!(Av, n, alg, order, scratch::AbstractVector, fst, lst)
+    for lo = fst:n:lst
+        _sort!(Av, alg, order, (; lo, hi=lo+n-1, scratch))
+    end
+    Av
+end
+
+
 """
     sort!(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
 
@@ -1338,14 +1753,14 @@ function sort!(A::AbstractArray{T};
                lt=isless,
                by=identity,
                rev::Union{Bool,Nothing}=nothing,
-               order::Ordering=Forward,
-               scratch::Union{AbstractVector{T}, Nothing}=similar(A, size(A, dims))) where T
-    _sort!(A, Val(dims), alg, ord(lt, by, rev, order), scratch)
+               order::Ordering=Forward, # TODO stop eagerly over-allocating.
+               scratch::Union{Vector{T}, Nothing}=similar(A, size(A, dims))) where T
+    __sort!(A, Val(dims), alg, ord(lt, by, rev, order), scratch)
 end
-function _sort!(A::AbstractArray{T}, ::Val{K},
+function __sort!(A::AbstractArray{T}, ::Val{K},
                 alg::Algorithm,
                 order::Ordering,
-                scratch::Union{AbstractVector{T}, Nothing}) where {K,T}
+                scratch::Union{Vector{T}, Nothing}) where {K,T}
     nd = ndims(A)
 
     1 <= K <= nd || throw(ArgumentError("dimension out of range"))
@@ -1353,7 +1768,7 @@ function _sort!(A::AbstractArray{T}, ::Val{K},
     remdims = ntuple(i -> i == K ? 1 : axes(A, i), nd)
     for idx in CartesianIndices(remdims)
         Av = view(A, ntuple(i -> i == K ? Colon() : idx[i], nd)...)
-        sort!(Av, alg, order, scratch)
+        sort!(Av; alg, order, scratch)
     end
     A
 end
@@ -1436,175 +1851,109 @@ function uint_map!(v::AbstractVector, lo::Integer, hi::Integer, order::Ordering)
 end
 
 function uint_unmap!(v::AbstractVector, u::AbstractVector{U}, lo::Integer, hi::Integer,
-                     order::Ordering, offset::U=zero(U)) where U <: Unsigned
+                     order::Ordering, offset::U=zero(U),
+                     index_offset::Integer=0) where U <: Unsigned
     @inbounds for i in lo:hi
-        v[i] = uint_unmap(eltype(v), u[i]+offset, order)
+        v[i] = uint_unmap(eltype(v), u[i+index_offset]+offset, order)
     end
     v
 end
 
 
-## fast clever sorting for floats ##
-
-module Float
-using ..Sort
-using ...Order
-using Base: IEEEFloat
-
-import Core.Intrinsics: slt_int
-import ..Sort: sort!, UIntMappable, uint_map, uint_unmap
-import ...Order: lt, DirectOrdering
-
-# fpsort is not safe for vectors of mixed bitwidth such as Vector{Union{Float32, Float64}}.
-# This type allows us to dispatch only when it is safe to do so. See #42739 for more info.
-const FPSortable = Union{
-    AbstractVector{Union{Float16, Missing}},
-    AbstractVector{Union{Float32, Missing}},
-    AbstractVector{Union{Float64, Missing}},
-    AbstractVector{Float16},
-    AbstractVector{Float32},
-    AbstractVector{Float64},
-    AbstractVector{Missing}}
 
-struct Left <: Ordering end
-struct Right <: Ordering end
+### Unused constructs for backward compatibility ###
 
-left(::DirectOrdering) = Left()
-right(::DirectOrdering) = Right()
+struct MergeSortAlg{T <: Algorithm} <: Algorithm
+    next::T
+end
 
-left(o::Perm) = Perm(left(o.order), o.data)
-right(o::Perm) = Perm(right(o.order), o.data)
+"""
+    MergeSort
 
-lt(::Left, x::T, y::T) where {T<:IEEEFloat} = slt_int(y, x)
-lt(::Right, x::T, y::T) where {T<:IEEEFloat} = slt_int(x, y)
+Indicate that a sorting function should use the merge sort algorithm.
 
-uint_map(x::Float16, ::Left) = ~reinterpret(UInt16, x)
-uint_unmap(::Type{Float16}, u::UInt16, ::Left) = reinterpret(Float16, ~u)
-uint_map(x::Float16, ::Right) = reinterpret(UInt16, x)
-uint_unmap(::Type{Float16}, u::UInt16, ::Right) = reinterpret(Float16, u)
-UIntMappable(::Type{Float16}, ::Union{Left, Right}) = UInt16
+Merge sort divides the collection into subcollections and
+repeatedly merges them, sorting each subcollection at each step,
+until the entire collection has been recombined in sorted form.
 
-uint_map(x::Float32, ::Left) = ~reinterpret(UInt32, x)
-uint_unmap(::Type{Float32}, u::UInt32, ::Left) = reinterpret(Float32, ~u)
-uint_map(x::Float32, ::Right) = reinterpret(UInt32, x)
-uint_unmap(::Type{Float32}, u::UInt32, ::Right) = reinterpret(Float32, u)
-UIntMappable(::Type{Float32}, ::Union{Left, Right}) = UInt32
+Characteristics:
+  * *stable*: preserves the ordering of elements which compare
+    equal (e.g. "a" and "A" in a sort of letters which ignores
+    case).
+  * *not in-place* in memory.
+  * *divide-and-conquer* sort strategy.
+"""
+const MergeSort = MergeSortAlg(SMALL_ALGORITHM)
 
-uint_map(x::Float64, ::Left) = ~reinterpret(UInt64, x)
-uint_unmap(::Type{Float64}, u::UInt64, ::Left) = reinterpret(Float64, ~u)
-uint_map(x::Float64, ::Right) = reinterpret(UInt64, x)
-uint_unmap(::Type{Float64}, u::UInt64, ::Right) = reinterpret(Float64, u)
-UIntMappable(::Type{Float64}, ::Union{Left, Right}) = UInt64
+function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, offset=nothing)
+    @getkw lo hi scratch
+    @inbounds if lo < hi
+        hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o, kw)
 
-isnan(o::DirectOrdering, x::IEEEFloat) = (x!=x)
-isnan(o::DirectOrdering, x::Missing) = false
-isnan(o::Perm, i::Integer) = isnan(o.order,o.data[i])
+        m = midpoint(lo, hi)
 
-ismissing(o::DirectOrdering, x::IEEEFloat) = false
-ismissing(o::DirectOrdering, x::Missing) = true
-ismissing(o::Perm, i::Integer) = ismissing(o.order,o.data[i])
+        if t === nothing
+            scratch, t = make_scratch(scratch, eltype(v), m-lo+1)
+        end
 
-allowsmissing(::AbstractVector{T}, ::DirectOrdering) where {T} = T >: Missing
-allowsmissing(::AbstractVector{<:Integer},
-              ::Perm{<:DirectOrdering,<:AbstractVector{T}}) where {T} =
-    T >: Missing
+        _sort!(v, a, o, (;kw..., hi=m, scratch); t, offset)
+        _sort!(v, a, o, (;kw..., lo=m+1, scratch); t, offset)
 
-function specials2left!(testf::Function, v::AbstractVector, o::Ordering,
-                        lo::Integer=firstindex(v), hi::Integer=lastindex(v))
-    i = lo
-    @inbounds while i <= hi && testf(o,v[i])
-        i += 1
-    end
-    j = i + 1
-    @inbounds while j <= hi
-        if testf(o,v[j])
-            v[i], v[j] = v[j], v[i]
+        i, j = 1, lo
+        while j <= m
+            t[i] = v[j]
             i += 1
+            j += 1
         end
-        j += 1
-    end
-    return i, hi
-end
-function specials2right!(testf::Function, v::AbstractVector, o::Ordering,
-                         lo::Integer=firstindex(v), hi::Integer=lastindex(v))
-    i = hi
-    @inbounds while lo <= i && testf(o,v[i])
-        i -= 1
-    end
-    j = i - 1
-    @inbounds while lo <= j
-        if testf(o,v[j])
-            v[i], v[j] = v[j], v[i]
-            i -= 1
+
+        i, k = 1, lo
+        while k < j <= hi
+            if lt(o, v[j], t[i])
+                v[k] = v[j]
+                j += 1
+            else
+                v[k] = t[i]
+                i += 1
+            end
+            k += 1
+        end
+        while k < j
+            v[k] = t[i]
+            k += 1
+            i += 1
         end
-        j -= 1
     end
-    return lo, i
+
+    scratch
 end
 
-function specials2left!(v::AbstractVector, a::Algorithm, o::Ordering)
-    lo, hi = firstindex(v), lastindex(v)
-    if allowsmissing(v, o)
-        i, _ = specials2left!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi)
-        sort!(v, lo, i-1, a, o)
-        return i, hi
-    else
-        return specials2left!(isnan, v, o, lo, hi)
-    end
+# Support 3-, 5-, and 6-argument versions of sort! for calling into the internals in the old way
+sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o)
+function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering)
+    _sort!(v, a, o, (; lo, hi, allow_legacy_dispatch=false))
+    v
 end
-function specials2right!(v::AbstractVector, a::Algorithm, o::Ordering)
-    lo, hi = firstindex(v), lastindex(v)
-    if allowsmissing(v, o)
-        _, i = specials2right!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi)
-        sort!(v, i+1, hi, a, o)
-        return lo, i
-    else
-        return specials2right!(isnan, v, o, lo, hi)
-    end
+sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, _) = sort!(v, lo, hi, a, o)
+function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, scratch::Vector)
+    _sort!(v, a, o, (; lo, hi, scratch, allow_legacy_dispatch=false))
+    v
 end
 
-specials2end!(v::AbstractVector, a::Algorithm, o::ForwardOrdering) =
-    specials2right!(v, a, o)
-specials2end!(v::AbstractVector, a::Algorithm, o::ReverseOrdering) =
-    specials2left!(v, a, o)
-specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ForwardOrdering}) =
-    specials2right!(v, a, o)
-specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ReverseOrdering}) =
-    specials2left!(v, a, o)
-
-issignleft(o::ForwardOrdering, x::IEEEFloat) = lt(o, x, zero(x))
-issignleft(o::ReverseOrdering, x::IEEEFloat) = lt(o, x, -zero(x))
-issignleft(o::Perm, i::Integer) = issignleft(o.order, o.data[i])
-
-function fpsort!(v::AbstractVector{T}, a::Algorithm, o::Ordering,
-                 t::Union{AbstractVector{T}, Nothing}=nothing) where T
-    # fpsort!'s optimizations speed up comparisons, of which there are O(nlogn).
-    # The overhead is O(n). For n < 10, it's not worth it.
-    length(v) < 10 && return sort!(v, firstindex(v), lastindex(v), SMALL_ALGORITHM, o, t)
-
-    i, j = lo, hi = specials2end!(v,a,o)
-    @inbounds while true
-        while i <= j &&  issignleft(o,v[i]); i += 1; end
-        while i <= j && !issignleft(o,v[j]); j -= 1; end
-        i <= j || break
-        v[i], v[j] = v[j], v[i]
-        i += 1; j -= 1
+# Support dispatch on custom algorithms in the old way
+# sort!(::AbstractVector, ::Integer, ::Integer, ::MyCustomAlgorithm, ::Ordering) = ...
+function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw)
+    @getkw lo hi scratch allow_legacy_dispatch
+    if allow_legacy_dispatch
+        sort!(v, lo, hi, a, o)
+        scratch
+    else
+        # This error prevents infinite recursion for unknown algorithms
+        throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o))) is not defined"))
     end
-    sort!(v, lo, j,  a, left(o), t)
-    sort!(v, i,  hi, a, right(o), t)
-    return v
-end
-
-
-function sort!(v::FPSortable, a::Algorithm, o::DirectOrdering,
-               t::Union{FPSortable, Nothing}=nothing)
-    fpsort!(v, a, o, t)
-end
-function sort!(v::AbstractVector{T}, a::Algorithm, o::Perm{<:DirectOrdering,<:FPSortable},
-               t::Union{AbstractVector{T}, Nothing}=nothing) where T <: Union{Signed, Unsigned}
-    fpsort!(v, a, o, t)
 end
 
-end # module Sort.Float
+# Keep old internal types so that people can keep dispatching with
+# sort!(::AbstractVector, ::Integer, ::Integer, ::Base.QuickSortAlg, ::Ordering) = ...
+const QuickSortAlg = typeof(QuickSort)
 
 end # module Sort
diff --git a/test/sorting.jl b/test/sorting.jl
index 4a0299b2217c2..37bad7d23c94b 100644
--- a/test/sorting.jl
+++ b/test/sorting.jl
@@ -79,8 +79,9 @@ end
 end
 
 @testset "stability" begin
-    for Alg in [InsertionSort, MergeSort, QuickSort, Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE,
-        PartialQuickSort(missing, 1729), PartialQuickSort(1729, missing)]
+    for Alg in [InsertionSort, MergeSort, QuickSort, Base.DEFAULT_STABLE,
+            PartialQuickSort(missing, 1729, Base.Sort.SMALL_ALGORITHM),
+            PartialQuickSort(1729, missing, Base.Sort.SMALL_ALGORITHM)]
         @test issorted(sort(1:2000, alg=Alg, by=x->0))
         @test issorted(sort(1:2000, alg=Alg, by=x->x÷100))
     end
@@ -534,11 +535,11 @@ end
     @test issorted(a)
 
     a = view([9:-1:0;], :)::SubArray
-    Base.Sort.sort_int_range!(a, 10, 0, identity)  # test it supports non-Vector
+    Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, (; mn=0, mx=9))  # test it supports non-Vector
     @test issorted(a)
 
     a = OffsetArray([9:-1:0;], -5)
-    Base.Sort.sort_int_range!(a, 10, 0, identity)
+    Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, (; mn=0, mx=9))
     @test issorted(a)
 end
 
@@ -632,9 +633,9 @@ end
 @testset "uint mappings" begin
 
     #Construct value lists
-    floats = [T[-π, -1.0, -1/π, 1/π, 1.0, π, -0.0, 0.0, Inf, -Inf, NaN, -NaN,
-                prevfloat(T(0)), nextfloat(T(0)), prevfloat(T(Inf)), nextfloat(T(-Inf))]
-        for T in [Float16, Float32, Float64]]
+    floats = [reinterpret(U, vcat(T[-π, -1.0, -1/π, 1/π, 1.0, π, -0.0, 0.0, Inf, -Inf, NaN, -NaN,
+                prevfloat(T(0)), nextfloat(T(0)), prevfloat(T(Inf)), nextfloat(T(-Inf))], randnans(4)))
+        for (U, T) in [(UInt16, Float16), (UInt32, Float32), (UInt64, Float64)]]
 
     ints = [T[17, -T(17), 0, -one(T), 1, typemax(T), typemin(T), typemax(T)-1, typemin(T)+1]
         for T in Base.BitInteger_types]
@@ -650,21 +651,18 @@ end
     UIntN(::Val{8}) = UInt64
     UIntN(::Val{16}) = UInt128
     map(vals) do x
+        x isa Base.ReinterpretArray && return
         T = eltype(x)
         U = UIntN(Val(sizeof(T)))
         append!(x, rand(T, 4))
         append!(x, reinterpret.(T, rand(U, 4)))
-        if T <: AbstractFloat
-            mask = reinterpret(U, T(NaN))
-            append!(x, reinterpret.(T, mask .| rand(U, 4)))
-        end
     end
 
     for x in vals
         T = eltype(x)
         U = UIntN(Val(sizeof(T)))
-        for order in [Forward, Reverse, Base.Sort.Float.Left(), Base.Sort.Float.Right(), By(Forward, identity)]
-            if order isa Base.Order.By || ((T <: AbstractFloat) == (order isa DirectOrdering))
+        for order in [Forward, Reverse, By(Forward, identity)]
+            if order isa Base.Order.By
                 @test Base.Sort.UIntMappable(T, order) === nothing
                 continue
             end
@@ -681,10 +679,6 @@ end
 
             for a in x
                 for b in x
-                    if order === Base.Sort.Float.Left() || order === Base.Sort.Float.Right()
-                        # Left and Right orderings guarantee homogeneous sign and no NaNs
-                        (isnan(a) || isnan(b) || signbit(a) != signbit(b)) && continue
-                    end
                     @test Base.Order.lt(order, a, b) === Base.Order.lt(Forward, Base.Sort.uint_map(a, order), Base.Sort.uint_map(b, order))
                 end
             end
@@ -705,7 +699,7 @@ end
 
     # Nevertheless, it still works...
     for alg in [InsertionSort, MergeSort, QuickSort,
-            Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
+            Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
         @test sort(v, alg=alg, lt = <=) == s
     end
     @test partialsort(v, 172, lt = <=) == s[172]
@@ -716,7 +710,7 @@ end
     # this invalid lt order.
     perm = reverse(sortperm(v, rev=true))
     for alg in [InsertionSort, MergeSort, QuickSort,
-            Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
+            Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
         @test sort(1:n, alg=alg, lt = (i,j) -> v[i]<=v[j]) == perm
     end
     @test partialsort(1:n, 172, lt = (i,j) -> v[i]<=v[j]) == perm[172]
@@ -724,7 +718,7 @@ end
 
     # lt can be very poorly behaved and sort will still permute its input in some way.
     for alg in [InsertionSort, MergeSort, QuickSort,
-            Base.Sort.AdaptiveSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
+            Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
         @test sort!(sort(v, alg=alg, lt = (x,y) -> rand([false, true]))) == s
     end
     @test partialsort(v, 172, lt = (x,y) -> rand([false, true])) ∈ 1:5
@@ -739,7 +733,6 @@ end
     @test issorted(k[idx], rev=true)
 end
 
-# This testset is at the end of the file because it is slow
 @testset "sort(x; scratch)" begin
     for n in [1,10,100,1000]
         v = rand(n)
@@ -770,6 +763,142 @@ end
     end
 end
 
+@testset "Unions with missing" begin
+    @test issorted(sort(shuffle!(vcat(fill(missing, 10), rand(Int, 100)))))
+end
+
+@testset "Specific algorithms" begin
+    let
+        requires_uint_mappable = Union{Base.Sort.RadixSort, Base.Sort.ConsiderRadixSort,
+            Base.Sort.CountingSort, Base.Sort.ConsiderCountingSort,
+            typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes),
+            typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes.big),
+            typeof(Base.Sort.DEFAULT_STABLE.next.next.big.next.yes.big.next)}
+
+        function test_alg(kw, alg, float=true)
+            for order in [Base.Forward, Base.Reverse, Base.By(x -> x^2)]
+                order isa Base.By && alg isa requires_uint_mappable && continue
+                for n in [1,7,179,1312]
+
+                    n == 1 && alg isa Base.Sort.RadixSort && continue
+
+                    x = rand(1:n+1, n)
+                    y = sort(x; order)
+                    @test Base.Sort._sort!(x, alg, order, (;kw(y)...)) !== x
+                    @test all(y .=== x)
+
+                    alg isa requires_uint_mappable && continue
+
+                    x = randn(n)
+                    y = sort(x; order)
+                    @test Base.Sort._sort!(x, alg, order, (;kw(y)...)) !== x
+                    @test all(y .=== x)
+                end
+            end
+        end
+        test_alg(alg) = test_alg(x -> (), alg)
+
+        function test_alg_rec(alg, extrema=false)
+            if extrema
+                test_alg(alg) do y
+                    (;mn=first(y),mx=last(y))
+                end
+            else
+                test_alg(alg)
+            end
+            extrema |= alg isa Base.Sort.ComputeExtrema
+            for name in fieldnames(typeof(alg))
+                a = getfield(alg, name)
+                a isa Base.Sort.Algorithm && test_alg_rec(a, extrema)
+            end
+        end
+
+        test_alg_rec(Base.DEFAULT_STABLE)
+    end
+end
+
+@testset "show(::Algorithm)" begin
+    @test eval(Meta.parse(string(Base.DEFAULT_STABLE))) === Base.DEFAULT_STABLE
+    lines = split(string(Base.DEFAULT_STABLE), '\n')
+    @test 10 < maximum(length, lines) < 100
+    @test 1 < length(lines) < 30
+end
+
+@testset "Extensibility" begin
+    # Defining new algorithms & backwards compatibility with packages that use sorting internals
+
+    struct MyFirstAlg <: Base.Sort.Algorithm end
+
+    @test_throws ArgumentError sort([1,2,3], alg=MyFirstAlg()) # not a stack overflow error
+
+    v = shuffle(vcat(fill(missing, 10), rand(Int, 100)))
+
+    # The pre 1.9 dispatch method
+    function Base.sort!(v::AbstractVector{Int}, lo::Integer, hi::Integer, ::MyFirstAlg, o::Base.Order.Ordering)
+        v[lo:hi] .= 7
+    end
+    @test sort([1,2,3], alg=MyFirstAlg()) == [7,7,7]
+    @test all(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg())) .=== vcat(fill(7, 100), fill(missing, 10)))
+
+    # Using the old hook with old entry-point
+    @test sort!([3,1,2], MyFirstAlg(), Base.Forward) == [7,7,7]
+    @test sort!([3,1,2], 1, 3, MyFirstAlg(), Base.Forward) == [7,7,7]
+
+    # Use the pre 1.9 entry-point into the internals
+    function Base.sort!(v::AbstractVector{Int}, lo::Integer, hi::Integer, ::MyFirstAlg, o::Base.Order.Ordering)
+        sort!(v, lo, hi, Base.DEFAULT_STABLE, o)
+    end
+    @test sort([3,1,2], alg=MyFirstAlg()) == [1,2,3]
+    @test issorted(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg())))
+
+    # Another pre 1.9 entry-point into the internals
+    @test issorted(sort!(rand(100), InsertionSort, Base.Order.Forward))
+
+    struct MySecondAlg <: Base.Sort.Algorithm end
+    # A new dispatch method
+    function Base.Sort._sort!(v::AbstractVector, ::MySecondAlg, o::Base.Order.Ordering, kw)
+        Base.Sort.@getkw lo hi
+        v[lo:hi] .= 9
+    end
+    @test sort([1,2,3], alg=MySecondAlg()) == [9,9,9]
+    @test all(sort(v, alg=Base.Sort.InitialOptimizations(MySecondAlg())) .=== vcat(fill(9, 100), fill(missing, 10)))
+end
+
+@testset "sort!(v, lo, hi, alg, order)" begin
+    v = Vector{Float64}(undef, 4000)
+    for alg in [MergeSort, QuickSort, InsertionSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
+        rand!(v)
+        sort!(v, 1, 2000, alg, Base.Forward)
+        @test issorted(v[1:2000])
+        @test !issorted(v)
+
+        sort!(v, 2001, 4000, alg, Base.Forward)
+        @test issorted(v[1:2000])
+        @test issorted(v[2001:4000])
+        @test !issorted(v)
+
+        sort!(v, 1001, 3000, alg, Base.Forward)
+        @test issorted(v[1:1000])
+        @test issorted(v[1001:3000])
+        @test issorted(v[3001:4000])
+        @test !issorted(v[1:2000])
+        @test !issorted(v[2001:4000])
+        @test !issorted(v)
+    end
+end
+
+@testset "IEEEFloatOptimization with -0.0" begin
+    x = vcat(round.(100 .* randn(1000)) ./ 100) # Also test lots of duplicates
+    x[rand(1:1000, 5)] .= 0.0
+    x[rand(1:1000, 5)] .= -0.0  # To be sure that -0.0 is present
+    @test issorted(sort!(x))
+end
+
+@testset "Count sort near the edge of its range" begin
+    @test issorted(sort(rand(typemin(Int):typemin(Int)+100, 1000)))
+    @test issorted(sort(rand(typemax(Int)-100:typemax(Int), 1000)))
+end
+
 # This testset is at the end of the file because it is slow.
 @testset "searchsorted" begin
     numTypes = [ Int8,  Int16,  Int32,  Int64,  Int128,