diff --git a/base/array.jl b/base/array.jl
index c1b44a9a94e25..4a059886b674a 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -1302,25 +1302,23 @@ function empty!(a::Vector)
     return a
 end
 
+_memcmp(a, b, len) = ccall(:memcmp, Int32, (Ptr{Void}, Ptr{Void}, Csize_t), a, b, len) % Int
+
 # use memcmp for lexcmp on byte arrays
 function lexcmp(a::Array{UInt8,1}, b::Array{UInt8,1})
-    c = ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt),
-              a, b, min(length(a),length(b)))
+    c = _memcmp(a, b, min(length(a),length(b)))
     return c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b))
 end
 
 const BitIntegerArray{N} = Union{map(T->Array{T,N}, BitInteger_types)...} where N
 # use memcmp for == on bit integer types
-function ==(a::Arr, b::Arr) where Arr <: BitIntegerArray
-    size(a) == size(b) && 0 == ccall(
-        :memcmp, Int32, (Ptr{Void}, Ptr{Void}, UInt), a, b, sizeof(eltype(Arr)) * length(a))
-end
+==(a::Arr, b::Arr) where {Arr <: BitIntegerArray} =
+    size(a) == size(b) && 0 == _memcmp(a, b, sizeof(eltype(Arr)) * length(a))
 
 # this is ~20% faster than the generic implementation above for very small arrays
 function ==(a::Arr, b::Arr) where Arr <: BitIntegerArray{1}
     len = length(a)
-    len == length(b) && 0 == ccall(
-        :memcmp, Int32, (Ptr{Void}, Ptr{Void}, UInt), a, b, sizeof(eltype(Arr)) * len)
+    len == length(b) && 0 == _memcmp(a, b, sizeof(eltype(Arr)) * len)
 end
 
 """
diff --git a/base/bitarray.jl b/base/bitarray.jl
index 2dbf192adc2c5..93db5579d59d6 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -82,7 +82,7 @@ IndexStyle(::Type{<:BitArray}) = IndexLinear()
 ## aux functions ##
 
 const _msk64 = ~UInt64(0)
-@inline _div64(l) = l >>> 6
+@inline _div64(l) = l >> 6
 @inline _mod64(l) = l & 63
 @inline _msk_end(l::Integer) = _msk64 >>> _mod64(-l)
 @inline _msk_end(B::BitArray) = _msk_end(length(B))
@@ -636,6 +636,10 @@ end
 
 @inline function unsafe_bitsetindex!(Bc::Array{UInt64}, x::Bool, i::Int)
     i1, i2 = get_chunks_id(i)
+    _unsafe_bitsetindex!(Bc, x, i1, i2)
+end
+
+@inline function _unsafe_bitsetindex!(Bc::Array{UInt64}, x::Bool, i1::Int, i2::Int)
     u = UInt64(1) << i2
     @inbounds begin
         c = Bc[i1]
@@ -1438,22 +1442,17 @@ circshift!(B::BitVector, i::Integer) = circshift!(B, B, i)
 
 ## count & find ##
 
-function count(B::BitArray)
+function bitcount(Bc::Vector{UInt64})
     n = 0
-    Bc = B.chunks
     @inbounds for i = 1:length(Bc)
         n += count_ones(Bc[i])
     end
     return n
 end
 
-# returns the index of the next non-zero element, or 0 if all zeros
-function findnext(B::BitArray, start::Integer)
-    start > 0 || throw(BoundsError(B, start))
-    start > length(B) && return 0
-
-    Bc = B.chunks
+count(B::BitArray) = bitcount(B.chunks)
 
+function unsafe_bitfindnext(Bc::Vector{UInt64}, start::Integer)
     chunk_start = _div64(start-1)+1
     within_chunk_start = _mod64(start-1)
     mask = _msk64 << within_chunk_start
@@ -1471,6 +1470,14 @@ function findnext(B::BitArray, start::Integer)
     end
     return 0
 end
+
+# returns the index of the next non-zero element, or 0 if all zeros
+function findnext(B::BitArray, start::Integer)
+    start > 0 || throw(BoundsError(B, start))
+    start > length(B) && return 0
+    unsafe_bitfindnext(B.chunks, start)
+end
+
 #findfirst(B::BitArray) = findnext(B, 1)  ## defined in array.jl
 
 # aux function: same as findnext(~B, start), but performed without temporaries
@@ -1527,13 +1534,7 @@ function findnext(testf::Function, B::BitArray, start::Integer)
 end
 #findfirst(testf::Function, B::BitArray) = findnext(testf, B, 1)  ## defined in array.jl
 
-# returns the index of the previous non-zero element, or 0 if all zeros
-function findprev(B::BitArray, start::Integer)
-    start > 0 || return 0
-    start > length(B) && throw(BoundsError(B, start))
-
-    Bc = B.chunks
-
+function unsafe_bitfindprev(Bc::Vector{UInt64}, start::Integer)
     chunk_start = _div64(start-1)+1
     mask = _msk_end(start)
 
@@ -1551,6 +1552,13 @@ function findprev(B::BitArray, start::Integer)
     return 0
 end
 
+# returns the index of the previous non-zero element, or 0 if all zeros
+function findprev(B::BitArray, start::Integer)
+    start > 0 || return 0
+    start > length(B) && throw(BoundsError(B, start))
+    unsafe_bitfindprev(B.chunks, start)
+end
+
 function findprevnot(B::BitArray, start::Integer)
     start > 0 || return 0
     start > length(B) && throw(BoundsError(B, start))
diff --git a/base/bitset.jl b/base/bitset.jl
index 46132374ce115..b01e5b09a095b 100644
--- a/base/bitset.jl
+++ b/base/bitset.jl
@@ -1,20 +1,35 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-struct BitSet <: AbstractSet{Int}
-    bits::BitVector
-    BitSet() = new(sizehint!(falses(0), 256))
+const Bits = Vector{UInt64}
+const CHK0 = zero(UInt64)
+const NO_OFFSET = Int === Int64 ? -one(Int) << 60 : -one(Int) << 29
+# + NO_OFFSET must be small enough to stay < 0 when added with any offset.
+#   An offset is in the range -2^57:2^57 (64-bits architectures)
+#   or -2^26:2^26 (32-bits architectures)
+# + when the offset is NO_OFFSET, the bits field *must* be empty
+# + NO_OFFSET could be made to be > 0, but a negative one allows
+#   a small optimization in the in(x, ::BitSet)
+
+mutable struct BitSet <: AbstractSet{Int}
+    bits::Vector{UInt64}
+    # 1st stored Int equals 64*offset
+    offset::Int
+
+    BitSet() = new(sizehint!(zeros(UInt64, 0), 4), NO_OFFSET)
 end
 
 """
     BitSet([itr])
 
-Construct a sorted set of positive `Int`s generated by the given iterable object, or an
-empty set. Implemented as a bit string, and therefore designed for dense integer sets. Only
-`Int`s greater than 0 can be stored. If the set will be sparse (for example holding a few
+Construct a sorted set of `Int`s generated by the given iterable object, or an
+empty set. Implemented as a bit string, and therefore designed for dense integer sets.
+If the set will be sparse (for example holding a few
 very large integers), use [`Set`](@ref) instead.
 """
 BitSet(itr) = union!(BitSet(), itr)
 
+@inline intoffset(s::BitSet) = s.offset << 6
+
 eltype(::Type{BitSet}) = Int
 similar(s::BitSet) = BitSet()
 copy(s1::BitSet) = copy!(BitSet(), s1)
@@ -30,92 +45,211 @@ See also [`copyto!`](@ref).
 function copy!(dest::BitSet, src::BitSet)
     resize!(dest.bits, length(src.bits))
     copyto!(dest.bits, src.bits)
+    dest.offset = src.offset
     dest
 end
+
 eltype(s::BitSet) = Int
-sizehint!(s::BitSet, n::Integer) = (n > length(s.bits) && _resize0!(s.bits, n); s)
 
-# An internal function for setting the inclusion bit for a given integer n >= 0
-@inline function _setint!(s::BitSet, idx::Integer, b::Bool)
-    if idx > length(s.bits)
+sizehint!(s::BitSet, n::Integer) = (sizehint!(s.bits, (n+63) >> 6); s)
+
+function _bits_getindex(b::Bits, n::Int, offset::Int)
+    ci = _div64(n) - offset + 1
+    1 <= ci <= length(b) || return false
+    @inbounds r = (b[ci] & (one(UInt64) << _mod64(n))) != 0
+    r
+end
+
+function _bits_findnext(b::Bits, start::Int)
+    # start is 0-based
+    # @assert start >= 0
+    _div64(start) + 1 > length(b) && return -1
+    unsafe_bitfindnext(b, start+1) - 1
+end
+
+function _bits_findprev(b::Bits, start::Int)
+    # start is 0-based
+    # @assert start <= 64 * length(b) - 1
+    start >= 0 || return -1
+    unsafe_bitfindprev(b, start+1) - 1
+end
+
+# An internal function for setting the inclusion bit for a given integer
+@inline function _setint!(s::BitSet, idx::Int, b::Bool)
+    cidx = _div64(idx)
+    len = length(s.bits)
+    diff = cidx - s.offset
+    if diff >= len
         b || return s # setting a bit to zero outside the set's bits is a no-op
-        _resize0!(s.bits, idx)
+
+        # we put the following test within one of the two branches,
+        # with the NO_OFFSET trick, to avoid having to perform it at
+        # each and every call to _setint!
+        if s.offset == NO_OFFSET # initialize the offset
+            # we assume isempty(s.bits)
+            s.offset = cidx
+            diff = 0
+        end
+        _growend0!(s.bits, diff - len + 1)
+    elseif diff < 0
+        b || return s
+        _growbeg0!(s.bits, -diff)
+        s.offset += diff
+        diff = 0
     end
-    @inbounds s.bits[idx] = b
+    _unsafe_bitsetindex!(s.bits, b, diff+1, _mod64(idx))
     s
 end
 
-# An internal function to resize a bitarray and ensure the newly allocated
+
+# An internal function to resize a Bits object and ensure the newly allocated
 # elements are zeroed (will become unnecessary if this behavior changes)
-@inline function _resize0!(b::BitVector, newlen::Integer)
+@inline function _growend0!(b::Bits, nchunks::Int)
     len = length(b)
-    newlen = ((newlen+63) >> 6) << 6 # smallest multiple of 64 >= newlen
-    resize!(b, newlen)
-    len < newlen && @inbounds b[len+1:newlen] = false # resize! gives dirty memory
-    b
+    _growend!(b, nchunks)
+    @inbounds b[len+1:end] = CHK0 # resize! gives dirty memory
+end
+
+@inline function _growbeg0!(b::Bits, nchunks::Int)
+    _growbeg!(b, nchunks)
+    @inbounds b[1:nchunks] = CHK0
+end
+
+function _matched_map!(f, s1::BitSet, s2::BitSet)
+    left_false_is_false = f(false, false) == f(false, true) == false
+    right_false_is_false = f(false, false) == f(true, false) == false
+
+    # we must first handle the NO_OFFSET case; we could test for
+    # isempty(s1) but it can be costly, so the user has to call
+    # empty!(s1) herself before-hand to re-initialize to NO_OFFSET
+    if s1.offset == NO_OFFSET
+        return left_false_is_false ? s1 : copy!(s1, s2)
+    elseif s2.offset == NO_OFFSET
+        return right_false_is_false ? empty!(s1) : s1
+    end
+    s1.offset = _matched_map!(f, s1.bits, s1.offset, s2.bits, s2.offset,
+                              left_false_is_false, right_false_is_false)
+    s1
 end
 
 # An internal function that takes a pure function `f` and maps across two BitArrays
-# allowing the lengths to be different and altering b1 with the result
+# allowing the lengths and offsets to be different and altering b1 with the result
 # WARNING: the assumptions written in the else clauses must hold
-function _matched_map!(f, b1::BitArray, b2::BitArray)
-    l1, l2 = length(b1), length(b2)
-    _bit_map!(f, b1, b2)
-    if l1 < l2
-        if f(false, false) == f(false, true) == false
+function _matched_map!(f, a1::Bits, b1::Int, a2::Bits, b2::Int,
+                       left_false_is_false::Bool, right_false_is_false::Bool)
+    l1, l2 = length(a1), length(a2)
+    bdiff = b2 - b1
+    e1, e2 = l1+b1, l2+b2
+    ediff = e2 - e1
+
+    # map! over the common indices
+    @inbounds for i = max(1, 1+bdiff):min(l1, l2+bdiff)
+        a1[i] = f(a1[i], a2[i-bdiff])
+    end
+
+    if ediff > 0
+        if left_false_is_false
             # We don't need to worry about the trailing bits — they're all false
         else # @assert f(false, x) == x
-            resize!(b1, l2)
-            chk_offs = 1+l1>>6
-            unsafe_copyto!(b1.chunks, chk_offs, b2.chunks, chk_offs, 1+l2>>6-chk_offs)
+            _growend!(a1, ediff)
+            # if a1 and a2 are not overlapping, we infer implied "false" values from a2
+            for outer l1 = l1+1:bdiff
+                @inbounds a1[l1] = CHK0
+            end
+            # update ediff in case l1 was updated
+            ediff = e2 - l1 - b1
+            # copy actual chunks from a2
+            unsafe_copyto!(a1, l1+1, a2, l2+1-ediff, ediff)
+            l1 = length(a1)
         end
-    elseif l1 > l2
-        if f(false, false) == f(true, false) == false
+    elseif ediff < 0
+        if right_false_is_false
             # We don't need to worry about the trailing bits — they're all false
-            resize!(b1, l2)
+            _deleteend!(a1, min(l1, -ediff))
+            # no need to update l1, as if bdiff > 0 (case below), then bdiff will
+            # be smaller anyway than an updated l1
         else # @assert f(x, false) == x
             # We don't need to worry about the trailing bits — they already have the
             # correct value
         end
     end
-    b1
-end
 
-# similar to bit_map! in bitarray.jl, but lengths are multiple of 64,
-# and may not match
-function _bit_map!(f, b1::BitArray, b2::BitArray)
-    b1c, b2c = b1.chunks, b2.chunks
-    l = min(length(b1c), length(b2c))
-    @inbounds for i = 1:l
-        b1c[i] = f(b1c[i], b2c[i])
+    if bdiff < 0
+        if left_false_is_false
+            # We don't need to worry about the leading bits — they're all false
+        else # @assert f(false, x) == x
+            _growbeg!(a1, -bdiff)
+            # if a1 and a2 are not overlapping, we infer implied "false" values from a2
+            for i = l2+1:-bdiff
+                @inbounds a1[i] = CHK0
+            end
+            b1 += bdiff # updated return value
+
+            # copy actual chunks from a2
+            unsafe_copyto!(a1, 1, a2, 1, min(-bdiff, l2))
+        end
+    elseif bdiff > 0
+        if right_false_is_false
+            # We don't need to worry about the trailing bits — they're all false
+            _deletebeg!(a1, min(l1, bdiff))
+            b1 += bdiff
+        else # @assert f(x, false) == x
+            # We don't need to worry about the trailing bits — they already have the
+            # correct value
+        end
     end
-    b1
+    b1 # the new offset
 end
 
-@noinline _throw_bitset_bounds_err() = throw(ArgumentError("elements of BitSet must be between 1 and typemax(Int)"))
+
+@noinline _throw_bitset_bounds_err() =
+    throw(ArgumentError("elements of BitSet must be between typemin(Int) and typemax(Int)"))
+
+@inline _is_convertible_Int(n) = typemin(Int) <= n <= typemax(Int)
+
+@inline _check_bitset_bounds(n) =
+    _is_convertible_Int(n) ? Int(n) : _throw_bitset_bounds_err()
+
+@inline _check_bitset_bounds(n::Int) = n
+
 @noinline _throw_keyerror(n) = throw(KeyError(n))
 
-@inline function push!(s::BitSet, n::Integer)
-    0 < n <= typemax(Int) || _throw_bitset_bounds_err()
-    _setint!(s, n, true)
-end
+@inline push!(s::BitSet, n::Integer) = _setint!(s, _check_bitset_bounds(n), true)
+
 push!(s::BitSet, ns::Integer...) = (for n in ns; push!(s, n); end; s)
 
-@inline function pop!(s::BitSet)
-    pop!(s, last(s))
-end
+@inline pop!(s::BitSet) = pop!(s, last(s))
+
 @inline function pop!(s::BitSet, n::Integer)
-    n in s ? (_delete!(s, n); n) : _throw_keyerror(n)
+    if n in s
+        delete!(s, n)
+        n
+    else
+        _throw_keyerror(n)
+    end
 end
+
 @inline function pop!(s::BitSet, n::Integer, default)
-    n in s ? (_delete!(s, n); n) : default
+    if n in s
+        delete!(s, n)
+        n
+    else
+        default
+    end
 end
-@inline _delete!(s::BitSet, n::Integer) = _setint!(s, n, false)
-@inline delete!(s::BitSet, n::Integer) = n > 0 ? _delete!(s, n) : s
+
+@inline delete!(s::BitSet, n::Int) = _setint!(s, n, false)
+@inline delete!(s::BitSet, n::Integer) = _is_convertible_Int(n) ? delete!(s, Int(n)) : s
+
 shift!(s::BitSet) = pop!(s, first(s))
 
-empty!(s::BitSet) = (fill!(s.bits, false); s)
-isempty(s::BitSet) = !any(s.bits)
+function empty!(s::BitSet)
+    empty!(s.bits)
+    s.offset = NO_OFFSET
+    s
+end
+
+isempty(s::BitSet) = _check0(s.bits, 1, length(s.bits))
 
 # Mathematical set functions: union!, intersect!, setdiff!, symdiff!
 
@@ -124,10 +258,7 @@ union(s1::BitSet, s2::BitSet) = union!(copy(s1), s2)
 union(s1::BitSet, ss::BitSet...) = union(s1, union(ss...))
 union(s::BitSet, ns) = union!(copy(s), ns)
 union!(s::BitSet, ns) = (for n in ns; push!(s, n); end; s)
-function union!(s1::BitSet, s2::BitSet)
-    _matched_map!(|, s1.bits, s2.bits)
-    s1
-end
+union!(s1::BitSet, s2::BitSet) = _matched_map!(|, s1, s2)
 
 intersect(s1::BitSet) = copy(s1)
 intersect(s1::BitSet, ss::BitSet...) = intersect(s1, intersect(ss...))
@@ -146,17 +277,11 @@ intersect(s1::BitSet, s2::BitSet) =
 Intersects sets `s1` and `s2` and overwrites the set `s1` with the result. If needed, `s1`
 will be expanded to the size of `s2`.
 """
-function intersect!(s1::BitSet, s2::BitSet)
-    _matched_map!(&, s1.bits, s2.bits)
-    s1
-end
+intersect!(s1::BitSet, s2::BitSet) = _matched_map!(&, s1, s2)
 
 setdiff(s::BitSet, ns) = setdiff!(copy(s), ns)
 setdiff!(s::BitSet, ns) = (for n in ns; delete!(s, n); end; s)
-function setdiff!(s1::BitSet, s2::BitSet)
-    _matched_map!((p, q) -> p & ~q, s1.bits, s2.bits)
-    s1
-end
+setdiff!(s1::BitSet, s2::BitSet) = _matched_map!((p, q) -> p & ~q, s1, s2)
 
 symdiff(s::BitSet, ns) = symdiff!(copy(s), ns)
 """
@@ -173,40 +298,41 @@ The set `s` is destructively modified to toggle the inclusion of integer `n`.
 symdiff!(s::BitSet, n::Integer) = int_symdiff!(s, n)
 
 function int_symdiff!(s::BitSet, n::Integer)
-    0 < n < typemax(Int) || _throw_bitset_bounds_err()
-    val = !(n in s)
-    _setint!(s, n, val)
+    n0 = _check_bitset_bounds(n)
+    val = !(n0 in s)
+    _setint!(s, n0, val)
     s
 end
-function symdiff!(s1::BitSet, s2::BitSet)
-    _matched_map!(xor, s1.bits, s2.bits)
-    s1
-end
 
-@inline in(n::Integer, s::BitSet) = get(s.bits, n, false)
+symdiff!(s1::BitSet, s2::BitSet) = _matched_map!(xor, s1, s2)
+
+@inline in(n::Int, s::BitSet) = _bits_getindex(s.bits, n, s.offset)
+@inline in(n::Integer, s::BitSet) = _is_convertible_Int(n) ? in(Int(n), s) : false
 
 # Use the next-set index as the state to prevent looking it up again in done
-start(s::BitSet) = next(s, 0)[2]
-function next(s::BitSet, i)
-    nextidx = i == typemax(Int) ? 0 : findnext(s.bits, i+1)
-    (i, nextidx)
+start(s::BitSet) = _bits_findnext(s.bits, 0)
+
+function next(s::BitSet, i::Int)
+    nextidx = _bits_findnext(s.bits, i+1)
+    (i+intoffset(s), nextidx)
 end
-done(s::BitSet, i) = i <= 0
 
+done(s::BitSet, i) = i == -1
 
-@noinline _throw_bitset_notempty_error() = throw(ArgumentError("collection must be non-empty"))
+@noinline _throw_bitset_notempty_error() =
+    throw(ArgumentError("collection must be non-empty"))
 
 function first(s::BitSet)
-    idx = findfirst(s.bits)
-    idx == 0 ? _throw_bitset_notempty_error() : idx
+    idx = _bits_findnext(s.bits, 0)
+    idx == -1 ? _throw_bitset_notempty_error() : idx + intoffset(s)
 end
 
 function last(s::BitSet)
-    idx = findprev(s.bits, length(s.bits))
-    idx == 0 ? _throw_bitset_notempty_error() : idx
+    idx = _bits_findprev(s.bits, (length(s.bits) << 6) - 1)
+    idx == -1 ? _throw_bitset_notempty_error() : idx + intoffset(s)
 end
 
-length(s::BitSet) = sum(s.bits)
+length(s::BitSet) = bitcount(s.bits) # = mapreduce(count_ones, +, 0, s.bits)
 
 function show(io::IO, s::BitSet)
     print(io, "BitSet([")
@@ -219,27 +345,40 @@ function show(io::IO, s::BitSet)
     print(io, "])")
 end
 
+function _check0(a::Vector{UInt64}, b::Int, e::Int)
+    @inbounds for i in b:e
+        a[i] == CHK0 || return false
+    end
+    true
+end
+
 function ==(s1::BitSet, s2::BitSet)
-    l1 = length(s1.bits)
-    l2 = length(s2.bits)
-    # If the lengths are the same, simply punt to bitarray comparison
-    l1 == l2 && return s1.bits == s2.bits
-
-    # Swap so s1 is always longer
-    if l1 < l2
-        s2, s1 = s1, s2
-        l2, l1 = l1, l2
+    # Swap so s1 has always the smallest offset
+    if s1.offset > s2.offset
+        s1, s2 = s2, s1
     end
-    # Iteratively check the chunks of the bitarrays
-    c1 = s1.bits.chunks
-    c2 = s2.bits.chunks
-    @inbounds for i in 1:length(c2)
-        c1[i] == c2[i] || return false
+    a1 = s1.bits
+    a2 = s2.bits
+    b1, b2 = s1.offset, s2.offset
+    l1, l2 = length(a1), length(a2)
+    e1 = l1+b1
+    overlap0 = max(0, e1 - b2)
+    included = overlap0 >= l2  # whether a2's indices are included in a1's
+    overlap  = included ? l2 : overlap0
+
+    # Ensure non-overlap chunks are zero (unlikely)
+    _check0(a1, 1, l1-overlap0) || return false
+    if included
+        _check0(a1, b2-b1+l2+1, l1) || return false
+    else
+        _check0(a2, 1+overlap, l2) || return false
     end
-    # Ensure remaining chunks are zero
-    @inbounds for i in length(c2)+1:length(c1)
-        c1[i] == UInt64(0) || return false
+
+    # compare overlap values
+    if overlap > 0
+        _memcmp(pointer(a1, b2-b1+1), pointer(a2), overlap<<3) == 0 || return false
     end
+
     return true
 end
 
@@ -250,15 +389,23 @@ issubset(a::BitSet, b::BitSet) = isequal(a, intersect(a,b))
 const hashis_seed = UInt === UInt64 ? 0x88989f1fc7dea67d : 0xc7dea67d
 function hash(s::BitSet, h::UInt)
     h ⊻= hashis_seed
-    bc = s.bits.chunks
-    i = length(bc)
-    while i > 0 && bc[i] == UInt64(0)
+    bc = s.bits
+    i = 1
+    j = length(bc)
+
+    while j > 0 && bc[j] == CHK0
         # Skip trailing empty bytes to prevent extra space from changing the hash
-        i -= 1
+        j -= 1
+    end
+    while i <= j && bc[i] == CHK0
+        # Skip leading empty bytes to prevent extra space from changing the hash
+        i += 1
     end
-    while i > 0
-        h = hash(bc[i], h)
-        i -= 1
+    i > j && return h # empty
+    h = hash(i+s.offset, h) # normalized offset
+    while j >= i
+        h = hash(bc[j], h)
+        j -= 1
     end
     h
 end
diff --git a/base/precompile.jl b/base/precompile.jl
index 3ab4aff158793..e4ba7ffbf6e55 100644
--- a/base/precompile.jl
+++ b/base/precompile.jl
@@ -822,7 +822,6 @@ precompile(Tuple{typeof(Core.Inference.copy_exprargs), Array{Any, 1}})
 precompile(Tuple{typeof(Core.Inference.copy), Expr})
 precompile(Tuple{typeof(Core.Inference.copyto!), Array{Any, 1}, Core.Inference.Generator{Array{Any, 1}, typeof(Core.Inference.copy_exprs)}})
 precompile(Tuple{typeof(Core.Inference._widen_all_consts!), Expr, Array{Bool, 1}})
-precompile(Tuple{typeof(Core.Inference._delete!), Core.Inference.BitSet, Int64})
 precompile(Tuple{typeof(Core.Inference.promote_type), Type{Float16}, Type{Int64}})
 precompile(Tuple{typeof(Core.Inference.mk_tuplecall), Array{Any, 1}, Core.Inference.InferenceState})
 precompile(Tuple{typeof(Core.Inference.annotate_slot_load!), Expr, Array{Any, 1}, Core.Inference.InferenceState, Array{Bool, 1}})
diff --git a/base/random/generation.jl b/base/random/generation.jl
index 33cfa8d48edf2..3b4aa83469b7f 100644
--- a/base/random/generation.jl
+++ b/base/random/generation.jl
@@ -344,14 +344,13 @@ rand(rng::AbstractRNG, sp::SamplerTag{Set,<:Sampler}) = rand(rng, sp.data).first
 
 function Sampler(rng::AbstractRNG, t::BitSet, n::Repetition)
     isempty(t) && throw(ArgumentError("collection must be non-empty"))
-    SamplerSimple(t, Sampler(rng, linearindices(t.bits), Val(Inf)))
+    SamplerSimple(t, Sampler(rng, minimum(t):maximum(t), Val(Inf)))
 end
 
 function rand(rng::AbstractRNG, sp::SamplerSimple{BitSet,<:Sampler})
     while true
         n = rand(rng, sp.data)
-        @inbounds b = sp[].bits[n]
-        b && return n
+        n in sp[] && return n
     end
 end
 
diff --git a/test/bitset.jl b/test/bitset.jl
index a7b4e3e061f29..86e6c96b09ce5 100644
--- a/test/bitset.jl
+++ b/test/bitset.jl
@@ -55,9 +55,11 @@ end
     @test hash(BitSet([1])) != hash(BitSet([33]))
     @test hash(BitSet([1])) != hash(BitSet([65]))
     @test hash(BitSet([1])) != hash(BitSet([129]))
+    # test with a different internal structure
+    s = BitSet([129])
+    pop!(push!(s, 65), 65)
+    @test hash(BitSet([1])) != hash(s)
 
-    # issue #7851
-    @test_throws ArgumentError BitSet(-1)
     @test !(-1 in BitSet(1:10))
 end
 
@@ -79,7 +81,9 @@ end
     empty!(i)
     @test length(i) === 0
 
-    @test_throws ArgumentError symdiff!(i, -3)
+    @test symdiff!(i, -3) == BitSet([-3])
+    @test symdiff!(i, -3) == BitSet([])
+
     @test symdiff!(i, 3) == BitSet([3])
     @test symdiff!(i, 257) == BitSet([3, 257])
     @test symdiff!(i, [3, 6]) == BitSet([6, 257])
@@ -87,9 +91,8 @@ end
     i = BitSet(1:6)
     @test symdiff!(i, BitSet([6, 513])) == BitSet([1:5; 513])
 
-    # issue #23099 : these tests should not segfault
-    @test_throws ArgumentError symdiff!(BitSet(rand(1:100, 30)), 0)
-    @test_throws ArgumentError symdiff!(BitSet(rand(1:100, 30)), [0, 2, 4])
+    @test 0 ∈ symdiff!(BitSet(rand(1:100, 30)), 0)
+    @test BitSet(0:2:4) ⊆ symdiff!(BitSet(rand(5:100, 30)), [0, 2, 4])
 
     # issue #23557 :
     @test_throws MethodError symdiff!(BitSet([1]), ['a']) # should no stack-overflow
@@ -128,16 +131,17 @@ end
     @test union(i, j, k) == BitSet(1:9)
 
     s1 = BitSet()
-    @test_throws ArgumentError push!(s1, -1)
-    push!(s1, 1, 10, 100, 1000)
-    @test collect(s1) == [1, 10, 100, 1000]
+    @test push!(s1, -1) == BitSet([-1])
+    push!(s1, -10, 1, 10, 100, 1000)
+    @test collect(s1) == [-10, -1, 1, 10, 100, 1000]
     push!(s1, 606)
-    @test collect(s1) == [1, 10, 100, 606, 1000]
+    @test collect(s1) == [-10, -1, 1, 10, 100, 606, 1000]
+
     s2 = BitSet()
     @test s2 === union!(s2, s1)
-    s3 = BitSet([1, 10, 100])
-    union!(s3, [1, 606, 1000])
-    s4 = union(BitSet([1, 100, 1000]), BitSet([10, 100, 606]))
+    s3 = BitSet([-1, 1, 10, 100])
+    union!(s3, [-10, 1, 606, 1000])
+    s4 = union(BitSet([-1, 1, 100, 1000]), BitSet([-10, 10, 100, 606]))
     @test s1 == s2 == s3 == s4
 end
 
@@ -313,3 +317,16 @@ end
     @test M == last(s)  == maximum(s) == maximum(a)
     @test issorted(s)
 end
+
+@testset "extreme values" begin
+    @test pop!(BitSet(typemin(Int))) == typemin(Int)
+    @test pop!(BitSet(typemax(Int))) == typemax(Int)
+end
+
+@testset "sizehint! returns a BitSet" begin
+    # see #25029
+    @test sizehint!(BitSet(), 100) isa BitSet
+    # TODO: test that we don't delegate sizehint! to the underlying bits
+    # field without dividing by 64 (i.e. the 100 above should allocate
+    # only 2 UInt64 words
+end
diff --git a/test/sets.jl b/test/sets.jl
index ef46d7fec7564..8d025b773eeab 100644
--- a/test/sets.jl
+++ b/test/sets.jl
@@ -344,3 +344,73 @@ end
     @test typeof(cssset) == Set{String}
     @test cssset == Set(["foo", "bar"])
 end
+
+@testset "fuzzy testing Set & BitSet" begin
+    b1, b2 = rand(-1000:1000, 2)
+    e1 = rand(b1-9:1000) # -9 to have an empty list sometimes
+    e2 = rand(b2-9:1000)
+    l1, l2 = rand(1:1000, 2)
+    a1 = b1 <= e1 ? rand(b1:e1, l1) : Int[]
+    a2 = b2 <= e2 ? rand(b2:e2, l2) : Int[]
+    s1, s2 = Set(a1), Set(a2)
+    t1, t2 = BitSet(a1), BitSet(a2)
+
+    for (s, t) = ((s1, t1), (s2, t2))
+        @test length(s) == length(t)
+        @test issubset(s, t)
+        @test issubset(t, s)
+        @test isempty(s) == isempty(t)
+        isempty(s) && continue
+        @test maximum(s) == maximum(t)
+        @test minimum(s) == minimum(t)
+        @test extrema(s) == extrema(t)
+        rs, rt = rand(s), rand(t)
+        @test rs in s
+        @test rt in s
+        @test rs in t
+        @test rt in t
+        for y in (rs, rt)
+            ss = copy(s)
+            tt = copy(t)
+            pop!(ss, y)
+            pop!(tt, y)
+            @test BitSet(ss) == tt
+            @test Set(tt) == ss
+            z = rand(1001:1100) # z ∉ s or t
+            push!(ss, z)
+            push!(tt, z)
+            @test BitSet(ss) == tt
+            @test Set(tt) == ss
+        end
+    end
+
+    res = Dict{String,Union{Bool,Vector{Int}}}()
+    function check(desc, val)
+        n = val isa Bool ? val : sort!(collect(val))
+        r = get!(res, desc, n)
+        if n isa Bool || r !== n
+            @test r == n
+        end
+    end
+    asbitset(x) = x isa BitSet ? x : BitSet(x)
+    asset(x) = x isa Set ? x : Set(x)
+
+    for x1 = (s1, t1), x2 = (s2, t2)
+        check("union", union(x1, x2))
+        check("intersect", intersect(x1, x2))
+        check("symdiff", symdiff(x1, x2))
+        check("setdiff", setdiff(x1, x2))
+        check("== as Bitset", asbitset(x1) == asbitset(x2))
+        check("== as Set", asset(x1) == asset(x2))
+        check("issubset", issubset(x1, x2))
+        if typeof(x1) == typeof(x2)
+            check("<", x1 < x2)
+            check("<=", x1 > x2)
+            check("union!", union!(copy(x1), x2))
+            check("setdiff!", setdiff!(copy(x1), x2))
+            x1 isa Set && continue
+            check("intersect!", intersect!(copy(x1), x2))
+            check("symdiff!", symdiff!(copy(x1), x2))
+        end
+    end
+end