From d5d12ad601756058a18d95b4edf80c6965f269a6 Mon Sep 17 00:00:00 2001
From: Rafael Fourquet <fourquet.rafael@gmail.com>
Date: Sun, 10 Dec 2017 15:42:17 +0100
Subject: [PATCH 1/9] BitSet: swap the underlying BitVector for Vector{UInt64}

The BitVector had already started to show its limits for
BitSet needs. Juggling at 3 layers with BitSet, BitVector,
and Vector{UInt64} was becoming confusing at times.
Updating this code to handle negative integers only
increased the confusion.
Some modest performance improvements (~20%) could be achieved
in the process for some functions.
---
 base/array.jl             |  14 ++---
 base/bitarray.jl          |  34 +++++++-----
 base/bitset.jl            | 113 +++++++++++++++++++++-----------------
 base/random/generation.jl |  15 -----
 4 files changed, 87 insertions(+), 89 deletions(-)

diff --git a/base/array.jl b/base/array.jl
index c1b44a9a94e25..4a059886b674a 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -1302,25 +1302,23 @@ function empty!(a::Vector)
     return a
 end
 
+_memcmp(a, b, len) = ccall(:memcmp, Int32, (Ptr{Void}, Ptr{Void}, Csize_t), a, b, len) % Int
+
 # use memcmp for lexcmp on byte arrays
 function lexcmp(a::Array{UInt8,1}, b::Array{UInt8,1})
-    c = ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt),
-              a, b, min(length(a),length(b)))
+    c = _memcmp(a, b, min(length(a),length(b)))
     return c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b))
 end
 
 const BitIntegerArray{N} = Union{map(T->Array{T,N}, BitInteger_types)...} where N
 # use memcmp for == on bit integer types
-function ==(a::Arr, b::Arr) where Arr <: BitIntegerArray
-    size(a) == size(b) && 0 == ccall(
-        :memcmp, Int32, (Ptr{Void}, Ptr{Void}, UInt), a, b, sizeof(eltype(Arr)) * length(a))
-end
+==(a::Arr, b::Arr) where {Arr <: BitIntegerArray} =
+    size(a) == size(b) && 0 == _memcmp(a, b, sizeof(eltype(Arr)) * length(a))
 
 # this is ~20% faster than the generic implementation above for very small arrays
 function ==(a::Arr, b::Arr) where Arr <: BitIntegerArray{1}
     len = length(a)
-    len == length(b) && 0 == ccall(
-        :memcmp, Int32, (Ptr{Void}, Ptr{Void}, UInt), a, b, sizeof(eltype(Arr)) * len)
+    len == length(b) && 0 == _memcmp(a, b, sizeof(eltype(Arr)) * len)
 end
 
 """
diff --git a/base/bitarray.jl b/base/bitarray.jl
index 2dbf192adc2c5..d243a38256a8f 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -1438,22 +1438,17 @@ circshift!(B::BitVector, i::Integer) = circshift!(B, B, i)
 
 ## count & find ##
 
-function count(B::BitArray)
+function bitcount(Bc::Vector{UInt64})
     n = 0
-    Bc = B.chunks
     @inbounds for i = 1:length(Bc)
         n += count_ones(Bc[i])
     end
     return n
 end
 
-# returns the index of the next non-zero element, or 0 if all zeros
-function findnext(B::BitArray, start::Integer)
-    start > 0 || throw(BoundsError(B, start))
-    start > length(B) && return 0
-
-    Bc = B.chunks
+count(B::BitArray) = bitcount(B.chunks)
 
+function unsafe_bitfindnext(Bc::Vector{UInt64}, start::Integer)
     chunk_start = _div64(start-1)+1
     within_chunk_start = _mod64(start-1)
     mask = _msk64 << within_chunk_start
@@ -1471,6 +1466,14 @@ function findnext(B::BitArray, start::Integer)
     end
     return 0
 end
+
+# returns the index of the next non-zero element, or 0 if all zeros
+function findnext(B::BitArray, start::Integer)
+    start > 0 || throw(BoundsError(B, start))
+    start > length(B) && return 0
+    unsafe_bitfindnext(B.chunks, start)
+end
+
 #findfirst(B::BitArray) = findnext(B, 1)  ## defined in array.jl
 
 # aux function: same as findnext(~B, start), but performed without temporaries
@@ -1527,13 +1530,7 @@ function findnext(testf::Function, B::BitArray, start::Integer)
 end
 #findfirst(testf::Function, B::BitArray) = findnext(testf, B, 1)  ## defined in array.jl
 
-# returns the index of the previous non-zero element, or 0 if all zeros
-function findprev(B::BitArray, start::Integer)
-    start > 0 || return 0
-    start > length(B) && throw(BoundsError(B, start))
-
-    Bc = B.chunks
-
+function unsafe_bitfindprev(Bc::Vector{UInt64}, start::Integer)
     chunk_start = _div64(start-1)+1
     mask = _msk_end(start)
 
@@ -1551,6 +1548,13 @@ function findprev(B::BitArray, start::Integer)
     return 0
 end
 
+# returns the index of the previous non-zero element, or 0 if all zeros
+function findprev(B::BitArray, start::Integer)
+    start > 0 || return 0
+    start > length(B) && throw(BoundsError(B, start))
+    unsafe_bitfindprev(B.chunks, start)
+end
+
 function findprevnot(B::BitArray, start::Integer)
     start > 0 || return 0
     start > length(B) && throw(BoundsError(B, start))
diff --git a/base/bitset.jl b/base/bitset.jl
index 46132374ce115..0c2494eaaa3ee 100644
--- a/base/bitset.jl
+++ b/base/bitset.jl
@@ -1,8 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+const Bits = Vector{UInt64}
+const Chk0 = zero(UInt64)
+
 struct BitSet <: AbstractSet{Int}
-    bits::BitVector
-    BitSet() = new(sizehint!(falses(0), 256))
+    bits::Vector{UInt64}
+    BitSet() = new(sizehint!(zeros(UInt64, 0), 4))
 end
 
 """
@@ -35,39 +38,67 @@ end
 eltype(s::BitSet) = Int
 sizehint!(s::BitSet, n::Integer) = (n > length(s.bits) && _resize0!(s.bits, n); s)
 
+# given an integer i, return the chunk which stores it
+chk_index(i::Integer) = _div64(Int(i)+63)
+# return the bit offset of i within chk_index(i)
+chk_offset(i::Int) = _mod64(i-1)
+
+unsafe_bitsetindex!(a, b::Bool, n::Integer) = unsafe_bitsetindex!(a, b, Int(n))
+
+function _bits_getindex(b::Bits, i::Int)
+    ci = chk_index(i)
+    ci > length(b) && return false
+    @inbounds r = (b[ci] & (one(UInt64) << chk_offset(i))) != 0
+    r
+end
+
+function _bits_findnext(b::Bits, start::Int)
+    start = max(1, start)
+    chk_index(start) > length(b) && return 0
+    unsafe_bitfindnext(b, start)
+end
+
+function _bits_findprev(b::Bits, start::Int)
+    start = min(length(b)*64, start)
+    start > 0 || return 0
+    unsafe_bitfindprev(b, start)
+end
+
 # An internal function for setting the inclusion bit for a given integer n >= 0
 @inline function _setint!(s::BitSet, idx::Integer, b::Bool)
-    if idx > length(s.bits)
+    cidx = chk_index(idx)
+    if cidx > length(s.bits)
         b || return s # setting a bit to zero outside the set's bits is a no-op
-        _resize0!(s.bits, idx)
+        _resize0!(s.bits, cidx)
     end
-    @inbounds s.bits[idx] = b
+    unsafe_bitsetindex!(s.bits, b, idx)
     s
 end
 
-# An internal function to resize a bitarray and ensure the newly allocated
+# An internal function to resize a Bits object and ensure the newly allocated
 # elements are zeroed (will become unnecessary if this behavior changes)
-@inline function _resize0!(b::BitVector, newlen::Integer)
+@inline function _resize0!(b::Bits, newlen::Int)
     len = length(b)
-    newlen = ((newlen+63) >> 6) << 6 # smallest multiple of 64 >= newlen
     resize!(b, newlen)
-    len < newlen && @inbounds b[len+1:newlen] = false # resize! gives dirty memory
-    b
+    len < newlen && @inbounds b[len+1:newlen] = Chk0 # resize! gives dirty memory
+    nothing
 end
 
 # An internal function that takes a pure function `f` and maps across two BitArrays
 # allowing the lengths to be different and altering b1 with the result
 # WARNING: the assumptions written in the else clauses must hold
-function _matched_map!(f, b1::BitArray, b2::BitArray)
+function _matched_map!(f, b1::Bits, b2::Bits)
     l1, l2 = length(b1), length(b2)
-    _bit_map!(f, b1, b2)
+
+    # map! over the common indices
+    map!(f, b1, b1, b2)
+
     if l1 < l2
         if f(false, false) == f(false, true) == false
             # We don't need to worry about the trailing bits — they're all false
         else # @assert f(false, x) == x
             resize!(b1, l2)
-            chk_offs = 1+l1>>6
-            unsafe_copyto!(b1.chunks, chk_offs, b2.chunks, chk_offs, 1+l2>>6-chk_offs)
+            unsafe_copyto!(b1, l1+1, b2, l1+1, l2-l1)
         end
     elseif l1 > l2
         if f(false, false) == f(true, false) == false
@@ -81,17 +112,6 @@ function _matched_map!(f, b1::BitArray, b2::BitArray)
     b1
 end
 
-# similar to bit_map! in bitarray.jl, but lengths are multiple of 64,
-# and may not match
-function _bit_map!(f, b1::BitArray, b2::BitArray)
-    b1c, b2c = b1.chunks, b2.chunks
-    l = min(length(b1c), length(b2c))
-    @inbounds for i = 1:l
-        b1c[i] = f(b1c[i], b2c[i])
-    end
-    b1
-end
-
 @noinline _throw_bitset_bounds_err() = throw(ArgumentError("elements of BitSet must be between 1 and typemax(Int)"))
 @noinline _throw_keyerror(n) = throw(KeyError(n))
 
@@ -114,8 +134,8 @@ end
 @inline delete!(s::BitSet, n::Integer) = n > 0 ? _delete!(s, n) : s
 shift!(s::BitSet) = pop!(s, first(s))
 
-empty!(s::BitSet) = (fill!(s.bits, false); s)
-isempty(s::BitSet) = !any(s.bits)
+empty!(s::BitSet) = (fill!(s.bits, Chk0); s)
+isempty(s::BitSet) = all(equalto(Chk0), s.bits)
 
 # Mathematical set functions: union!, intersect!, setdiff!, symdiff!
 
@@ -183,30 +203,30 @@ function symdiff!(s1::BitSet, s2::BitSet)
     s1
 end
 
-@inline in(n::Integer, s::BitSet) = get(s.bits, n, false)
+@inline in(n::Integer, s::BitSet) = _bits_getindex(s.bits, Int(n))
 
 # Use the next-set index as the state to prevent looking it up again in done
 start(s::BitSet) = next(s, 0)[2]
-function next(s::BitSet, i)
-    nextidx = i == typemax(Int) ? 0 : findnext(s.bits, i+1)
+function next(s::BitSet, i::Int)
+    nextidx = i == typemax(Int) ? 0 : _bits_findnext(s.bits, i+1)
     (i, nextidx)
 end
-done(s::BitSet, i) = i <= 0
+done(s::BitSet, i) = i == 0
 
 
 @noinline _throw_bitset_notempty_error() = throw(ArgumentError("collection must be non-empty"))
 
 function first(s::BitSet)
-    idx = findfirst(s.bits)
+    idx = _bits_findnext(s.bits, 1)
     idx == 0 ? _throw_bitset_notempty_error() : idx
 end
 
 function last(s::BitSet)
-    idx = findprev(s.bits, length(s.bits))
+    idx = _bits_findprev(s.bits, typemax(Int))
     idx == 0 ? _throw_bitset_notempty_error() : idx
 end
 
-length(s::BitSet) = sum(s.bits)
+length(s::BitSet) = bitcount(s.bits) # = mapreduce(count_ones, +, 0, s.bits)
 
 function show(io::IO, s::BitSet)
     print(io, "BitSet([")
@@ -220,25 +240,16 @@ function show(io::IO, s::BitSet)
 end
 
 function ==(s1::BitSet, s2::BitSet)
-    l1 = length(s1.bits)
-    l2 = length(s2.bits)
-    # If the lengths are the same, simply punt to bitarray comparison
-    l1 == l2 && return s1.bits == s2.bits
-
-    # Swap so s1 is always longer
-    if l1 < l2
-        s2, s1 = s1, s2
-        l2, l1 = l1, l2
-    end
-    # Iteratively check the chunks of the bitarrays
-    c1 = s1.bits.chunks
-    c2 = s2.bits.chunks
-    @inbounds for i in 1:length(c2)
-        c1[i] == c2[i] || return false
+    c1 = s1.bits
+    c2 = s2.bits
+    # Swap so c1 is always longer
+    if length(c1) < length(c2)
+        c1, c2 = c2, c1
     end
+    _memcmp(c1, c2, length(c2)) == 0 || return false
     # Ensure remaining chunks are zero
     @inbounds for i in length(c2)+1:length(c1)
-        c1[i] == UInt64(0) || return false
+        c1[i] == Chk0 || return false
     end
     return true
 end
@@ -250,7 +261,7 @@ issubset(a::BitSet, b::BitSet) = isequal(a, intersect(a,b))
 const hashis_seed = UInt === UInt64 ? 0x88989f1fc7dea67d : 0xc7dea67d
 function hash(s::BitSet, h::UInt)
     h ⊻= hashis_seed
-    bc = s.bits.chunks
+    bc = s.bits
     i = length(bc)
     while i > 0 && bc[i] == UInt64(0)
         # Skip trailing empty bytes to prevent extra space from changing the hash
diff --git a/base/random/generation.jl b/base/random/generation.jl
index 33cfa8d48edf2..961092e00817c 100644
--- a/base/random/generation.jl
+++ b/base/random/generation.jl
@@ -340,21 +340,6 @@ Sampler(rng::AbstractRNG, t::Set, n::Repetition) = SamplerTag{Set}(Sampler(rng,
 
 rand(rng::AbstractRNG, sp::SamplerTag{Set,<:Sampler}) = rand(rng, sp.data).first
 
-## random values from BitSet
-
-function Sampler(rng::AbstractRNG, t::BitSet, n::Repetition)
-    isempty(t) && throw(ArgumentError("collection must be non-empty"))
-    SamplerSimple(t, Sampler(rng, linearindices(t.bits), Val(Inf)))
-end
-
-function rand(rng::AbstractRNG, sp::SamplerSimple{BitSet,<:Sampler})
-    while true
-        n = rand(rng, sp.data)
-        @inbounds b = sp[].bits[n]
-        b && return n
-    end
-end
-
 ## random values from AbstractDict/AbstractSet
 
 # we defer to _Sampler to avoid ambiguities with a call like Sampler(rng, Set(1), Val(1))

From cb49741cd62842dbfa3f31c7ed373b18fb044969 Mon Sep 17 00:00:00 2001
From: Rafael Fourquet <fourquet.rafael@gmail.com>
Date: Wed, 2 Aug 2017 17:24:15 +0200
Subject: [PATCH 2/9] morph BitSet into a real Int Set

Add an offset field to BitSet acting as an infimum of stored values,
which can be adjusted on-the-fly. BitSet then becomes able
to store negative integers, and doesn't have to have elements
centered around 0.
---
 base/bitset.jl     | 279 ++++++++++++++++++++++++++++++---------------
 base/precompile.jl |   1 -
 test/bitset.jl     |  43 ++++---
 3 files changed, 218 insertions(+), 105 deletions(-)

diff --git a/base/bitset.jl b/base/bitset.jl
index 0c2494eaaa3ee..9ef83d63c9dfc 100644
--- a/base/bitset.jl
+++ b/base/bitset.jl
@@ -1,23 +1,28 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 const Bits = Vector{UInt64}
-const Chk0 = zero(UInt64)
+const CHK0 = zero(UInt64)
 
-struct BitSet <: AbstractSet{Int}
+mutable struct BitSet <: AbstractSet{Int}
     bits::Vector{UInt64}
-    BitSet() = new(sizehint!(zeros(UInt64, 0), 4))
+    # 1st stored Int equals 64*offset+1
+    offset::Int
+
+    BitSet() = new(sizehint!(zeros(UInt64, 0), 4), 0)
 end
 
 """
     BitSet([itr])
 
-Construct a sorted set of positive `Int`s generated by the given iterable object, or an
-empty set. Implemented as a bit string, and therefore designed for dense integer sets. Only
-`Int`s greater than 0 can be stored. If the set will be sparse (for example holding a few
+Construct a sorted set of `Int`s generated by the given iterable object, or an
+empty set. Implemented as a bit string, and therefore designed for dense integer sets.
+If the set will be sparse (for example holding a few
 very large integers), use [`Set`](@ref) instead.
 """
 BitSet(itr) = union!(BitSet(), itr)
 
+@inline intoffset(s::BitSet) = s.offset << 6
+
 eltype(::Type{BitSet}) = Int
 similar(s::BitSet) = BitSet()
 copy(s1::BitSet) = copy!(BitSet(), s1)
@@ -33,109 +38,181 @@ See also [`copyto!`](@ref).
 function copy!(dest::BitSet, src::BitSet)
     resize!(dest.bits, length(src.bits))
     copyto!(dest.bits, src.bits)
+    dest.offset = src.offset
     dest
 end
-eltype(s::BitSet) = Int
-sizehint!(s::BitSet, n::Integer) = (n > length(s.bits) && _resize0!(s.bits, n); s)
 
-# given an integer i, return the chunk which stores it
-chk_index(i::Integer) = _div64(Int(i)+63)
-# return the bit offset of i within chk_index(i)
-chk_offset(i::Int) = _mod64(i-1)
+eltype(s::BitSet) = Int
 
-unsafe_bitsetindex!(a, b::Bool, n::Integer) = unsafe_bitsetindex!(a, b, Int(n))
+sizehint!(s::BitSet, n::Integer) = (sizehint!(s.bits, (n+63) >> 6); s)
 
-function _bits_getindex(b::Bits, i::Int)
-    ci = chk_index(i)
-    ci > length(b) && return false
-    @inbounds r = (b[ci] & (one(UInt64) << chk_offset(i))) != 0
+# given an integer i, return the chunk which stores it
+chk_indice(i::Int) = (((i % Int128)+63) >> 6) % Int # Int128 necessary when i close to typemax(Int)
+# return the bit offset of i within chk_indice(i)
+chk_offset(i::Int) = ((i % Int128)-1) & 63 % Int
+
+function _bits_getindex(b::Bits, n::Int, offset::Int)
+    ci = chk_indice(n) - offset
+    1 <= ci <= length(b) || return false
+    @inbounds r = (b[ci] & (one(UInt64) << chk_offset(n))) != 0
     r
 end
 
 function _bits_findnext(b::Bits, start::Int)
     start = max(1, start)
-    chk_index(start) > length(b) && return 0
+    chk_indice(start) > length(b) && return 0
     unsafe_bitfindnext(b, start)
 end
 
 function _bits_findprev(b::Bits, start::Int)
-    start = min(length(b)*64, start)
+    start = min(length(b) << 6, start)
     start > 0 || return 0
     unsafe_bitfindprev(b, start)
 end
 
-# An internal function for setting the inclusion bit for a given integer n >= 0
-@inline function _setint!(s::BitSet, idx::Integer, b::Bool)
-    cidx = chk_index(idx)
-    if cidx > length(s.bits)
+# An internal function for setting the inclusion bit for a given integer
+@inline function _setint!(s::BitSet, idx::Int, b::Bool)
+    cidx = chk_indice(idx)
+    len = length(s.bits)
+    if len == 0 # initialize the offset
+        b || return s
+        s.offset = cidx - 1
+    end
+    diff = cidx - s.offset
+    if diff > len
         b || return s # setting a bit to zero outside the set's bits is a no-op
-        _resize0!(s.bits, cidx)
+        _growend0!(s.bits, diff - len)
+    elseif diff <= 0
+        b || return s
+        nchunks = 1 - diff
+        _growbeg0!(s.bits, nchunks)
+        s.offset -= nchunks
     end
-    unsafe_bitsetindex!(s.bits, b, idx)
+    unsafe_bitsetindex!(s.bits, b, idx-intoffset(s))
     s
 end
 
+
 # An internal function to resize a Bits object and ensure the newly allocated
 # elements are zeroed (will become unnecessary if this behavior changes)
-@inline function _resize0!(b::Bits, newlen::Int)
+@inline function _growend0!(b::Bits, nchunks::Int)
     len = length(b)
-    resize!(b, newlen)
-    len < newlen && @inbounds b[len+1:newlen] = Chk0 # resize! gives dirty memory
-    nothing
+    _growend!(b, nchunks)
+    @inbounds b[len+1:end] = CHK0 # resize! gives dirty memory
+end
+
+@inline function _growbeg0!(b::Bits, nchunks::Int)
+    _growbeg!(b, nchunks)
+    @inbounds b[1:nchunks] = CHK0
+end
+
+function _matched_map!(f, s1::BitSet, s2::BitSet)
+    s1.offset = _matched_map!(f, s1.bits, s1.offset, s2.bits, s2.offset)
+    s1
 end
 
 # An internal function that takes a pure function `f` and maps across two BitArrays
-# allowing the lengths to be different and altering b1 with the result
+# allowing the lengths and offsets to be different and altering b1 with the result
 # WARNING: the assumptions written in the else clauses must hold
-function _matched_map!(f, b1::Bits, b2::Bits)
-    l1, l2 = length(b1), length(b2)
+function _matched_map!(f, a1::Bits, b1::Int, a2::Bits, b2::Int)
+    l1, l2 = length(a1), length(a2)
+    bdiff = b2 - b1
+    e1, e2 = l1+b1, l2+b2
+    ediff = e2 - e1
 
     # map! over the common indices
-    map!(f, b1, b1, b2)
+    @inbounds for i = max(1, 1+bdiff):min(l1, l2+bdiff)
+        a1[i] = f(a1[i], a2[i-bdiff])
+    end
 
-    if l1 < l2
+    if ediff > 0
         if f(false, false) == f(false, true) == false
             # We don't need to worry about the trailing bits — they're all false
         else # @assert f(false, x) == x
-            resize!(b1, l2)
-            unsafe_copyto!(b1, l1+1, b2, l1+1, l2-l1)
+            _growend!(a1, ediff)
+            # if a1 and a2 are not overlapping, we infer implied "false" values from a2
+            for outer l1 = l1+1:bdiff
+                @inbounds a1[l1] = CHK0
+            end
+            # update ediff in case l1 was updated
+            ediff = e2 - l1 - b1
+            # copy actual chunks from a2
+            unsafe_copyto!(a1, l1+1, a2, l2+1-ediff, ediff)
+            l1 = length(a1)
         end
-    elseif l1 > l2
+    elseif ediff < 0
         if f(false, false) == f(true, false) == false
             # We don't need to worry about the trailing bits — they're all false
-            resize!(b1, l2)
+            _deleteend!(a1, min(l1, -ediff))
+            # no need to update l1, as if bdiff > 0 (case below), then bdiff will
+            # be smaller anyway than an updated l1
         else # @assert f(x, false) == x
             # We don't need to worry about the trailing bits — they already have the
             # correct value
         end
     end
-    b1
+
+    if bdiff < 0
+        if f(false, false) == f(false, true) == false
+            # We don't need to worry about the leading bits — they're all false
+        else # @assert f(false, x) == x
+            _growbeg!(a1, -bdiff)
+            # if a1 and a2 are not overlapping, we infer implied "false" values from a2
+            for i = l2+1:-bdiff
+                @inbounds a1[i] = CHK0
+            end
+            b1 += bdiff # updated return value
+
+            # copy actual chunks from a2
+            unsafe_copyto!(a1, 1, a2, 1, min(-bdiff, l2))
+        end
+    elseif bdiff > 0
+        if f(false, false) == f(true, false) == false
+            # We don't need to worry about the trailing bits — they're all false
+            _deletebeg!(a1, min(l1, bdiff))
+            b1 += bdiff
+        else # @assert f(x, false) == x
+            # We don't need to worry about the trailing bits — they already have the
+            # correct value
+        end
+    end
+    b1 # the new offset
 end
 
-@noinline _throw_bitset_bounds_err() = throw(ArgumentError("elements of BitSet must be between 1 and typemax(Int)"))
+
+@noinline _throw_bitset_bounds_err() =
+    throw(ArgumentError("elements of BitSet must be between typemin(Int) and typemax(Int)"))
+
+@inline _is_convertible_Int(n) = typemin(Int) <= n <= typemax(Int)
+
+@inline _check_bitset_bounds(n) =
+    _is_convertible_Int(n) ? Int(n) : _throw_bitset_bounds_err()
+
+@inline _check_bitset_bounds(n::Int) = n
+
 @noinline _throw_keyerror(n) = throw(KeyError(n))
 
-@inline function push!(s::BitSet, n::Integer)
-    0 < n <= typemax(Int) || _throw_bitset_bounds_err()
-    _setint!(s, n, true)
-end
+@inline push!(s::BitSet, n::Integer) = _setint!(s, _check_bitset_bounds(n), true)
+
 push!(s::BitSet, ns::Integer...) = (for n in ns; push!(s, n); end; s)
 
-@inline function pop!(s::BitSet)
-    pop!(s, last(s))
-end
+@inline pop!(s::BitSet) = pop!(s, last(s))
+
 @inline function pop!(s::BitSet, n::Integer)
-    n in s ? (_delete!(s, n); n) : _throw_keyerror(n)
+    n in s ? (delete!(s, n); n) : _throw_keyerror(n)
 end
+
 @inline function pop!(s::BitSet, n::Integer, default)
-    n in s ? (_delete!(s, n); n) : default
+    n in s ? (delete!(s, n); n) : default
 end
-@inline _delete!(s::BitSet, n::Integer) = _setint!(s, n, false)
-@inline delete!(s::BitSet, n::Integer) = n > 0 ? _delete!(s, n) : s
+
+@inline delete!(s::BitSet, n::Int) = _setint!(s, n, false)
+@inline delete!(s::BitSet, n::Integer) = _is_convertible_Int(n) ? delete!(s, Int(n)) : s
+
 shift!(s::BitSet) = pop!(s, first(s))
 
-empty!(s::BitSet) = (fill!(s.bits, Chk0); s)
-isempty(s::BitSet) = all(equalto(Chk0), s.bits)
+empty!(s::BitSet) = (empty!(s.bits); s)
+isempty(s::BitSet) = all(equalto(CHK0), s.bits)
 
 # Mathematical set functions: union!, intersect!, setdiff!, symdiff!
 
@@ -144,10 +221,7 @@ union(s1::BitSet, s2::BitSet) = union!(copy(s1), s2)
 union(s1::BitSet, ss::BitSet...) = union(s1, union(ss...))
 union(s::BitSet, ns) = union!(copy(s), ns)
 union!(s::BitSet, ns) = (for n in ns; push!(s, n); end; s)
-function union!(s1::BitSet, s2::BitSet)
-    _matched_map!(|, s1.bits, s2.bits)
-    s1
-end
+union!(s1::BitSet, s2::BitSet) = _matched_map!(|, s1, s2)
 
 intersect(s1::BitSet) = copy(s1)
 intersect(s1::BitSet, ss::BitSet...) = intersect(s1, intersect(ss...))
@@ -166,17 +240,11 @@ intersect(s1::BitSet, s2::BitSet) =
 Intersects sets `s1` and `s2` and overwrites the set `s1` with the result. If needed, `s1`
 will be expanded to the size of `s2`.
 """
-function intersect!(s1::BitSet, s2::BitSet)
-    _matched_map!(&, s1.bits, s2.bits)
-    s1
-end
+intersect!(s1::BitSet, s2::BitSet) = _matched_map!(&, s1, s2)
 
 setdiff(s::BitSet, ns) = setdiff!(copy(s), ns)
 setdiff!(s::BitSet, ns) = (for n in ns; delete!(s, n); end; s)
-function setdiff!(s1::BitSet, s2::BitSet)
-    _matched_map!((p, q) -> p & ~q, s1.bits, s2.bits)
-    s1
-end
+setdiff!(s1::BitSet, s2::BitSet) = _matched_map!((p, q) -> p & ~q, s1, s2)
 
 symdiff(s::BitSet, ns) = symdiff!(copy(s), ns)
 """
@@ -193,37 +261,37 @@ The set `s` is destructively modified to toggle the inclusion of integer `n`.
 symdiff!(s::BitSet, n::Integer) = int_symdiff!(s, n)
 
 function int_symdiff!(s::BitSet, n::Integer)
-    0 < n < typemax(Int) || _throw_bitset_bounds_err()
-    val = !(n in s)
-    _setint!(s, n, val)
+    n0 = _check_bitset_bounds(n)
+    val = !(n0 in s)
+    _setint!(s, n0, val)
     s
 end
-function symdiff!(s1::BitSet, s2::BitSet)
-    _matched_map!(xor, s1.bits, s2.bits)
-    s1
-end
 
-@inline in(n::Integer, s::BitSet) = _bits_getindex(s.bits, Int(n))
+symdiff!(s1::BitSet, s2::BitSet) = _matched_map!(xor, s1, s2)
+
+@inline in(n::Int, s::BitSet) = _bits_getindex(s.bits, n, s.offset)
+@inline in(n::Integer, s::BitSet) = _is_convertible_Int(n) ? in(Int(n), s) : false
 
 # Use the next-set index as the state to prevent looking it up again in done
 start(s::BitSet) = next(s, 0)[2]
+
 function next(s::BitSet, i::Int)
     nextidx = i == typemax(Int) ? 0 : _bits_findnext(s.bits, i+1)
-    (i, nextidx)
+    (i+intoffset(s), nextidx)
 end
-done(s::BitSet, i) = i == 0
 
+done(s::BitSet, i) = i == 0
 
 @noinline _throw_bitset_notempty_error() = throw(ArgumentError("collection must be non-empty"))
 
 function first(s::BitSet)
     idx = _bits_findnext(s.bits, 1)
-    idx == 0 ? _throw_bitset_notempty_error() : idx
+    idx == 0 ? _throw_bitset_notempty_error() : idx + intoffset(s)
 end
 
 function last(s::BitSet)
     idx = _bits_findprev(s.bits, typemax(Int))
-    idx == 0 ? _throw_bitset_notempty_error() : idx
+    idx == 0 ? _throw_bitset_notempty_error() : idx + intoffset(s)
 end
 
 length(s::BitSet) = bitcount(s.bits) # = mapreduce(count_ones, +, 0, s.bits)
@@ -239,17 +307,38 @@ function show(io::IO, s::BitSet)
     print(io, "])")
 end
 
+function _check0(a::Vector{UInt64}, b::Int, e::Int)
+    @inbounds for i in b:e
+        a[i] == CHK0 || return false
+    end
+    true
+end
+
 function ==(s1::BitSet, s2::BitSet)
-    c1 = s1.bits
-    c2 = s2.bits
-    # Swap so c1 is always longer
-    if length(c1) < length(c2)
-        c1, c2 = c2, c1
+    # Swap so s1 has always the smallest offset
+    if s1.offset > s2.offset
+        s1, s2 = s2, s1
+    end
+    a1 = s1.bits
+    a2 = s2.bits
+    b1, b2 = s1.offset, s2.offset
+    l1, l2 = length(a1), length(a2)
+    e1 = l1+b1
+    overlap0 = max(0, e1 - b2)
+    included = overlap0 >= l2  # whether a2's indices are included in a1's
+    overlap  = included ? l2 : overlap0
+
+    # compare overlap values
+    if overlap > 0
+        _memcmp(pointer(a1, b2-b1+1), pointer(a2), overlap<<3) == 0 || return false
     end
-    _memcmp(c1, c2, length(c2)) == 0 || return false
+
     # Ensure remaining chunks are zero
-    @inbounds for i in length(c2)+1:length(c1)
-        c1[i] == Chk0 || return false
+    _check0(a1, 1, l1-overlap0) || return false
+    if included
+        _check0(a1, b2-b1+l2+1, l1) || return false
+    else
+        _check0(a2, 1+overlap, l2) || return false
     end
     return true
 end
@@ -262,14 +351,22 @@ const hashis_seed = UInt === UInt64 ? 0x88989f1fc7dea67d : 0xc7dea67d
 function hash(s::BitSet, h::UInt)
     h ⊻= hashis_seed
     bc = s.bits
-    i = length(bc)
-    while i > 0 && bc[i] == UInt64(0)
+    i = 1
+    j = length(bc)
+
+    while j > 0 && bc[j] == CHK0
         # Skip trailing empty bytes to prevent extra space from changing the hash
-        i -= 1
+        j -= 1
+    end
+    while i <= j && bc[i] == CHK0
+        # Skip leading empty bytes to prevent extra space from changing the hash
+        i += 1
     end
-    while i > 0
-        h = hash(bc[i], h)
-        i -= 1
+    i > j && return h # empty
+    h = hash(i+s.offset, h) # normalized offset
+    while j >= i
+        h = hash(bc[j], h)
+        j -= 1
     end
     h
 end
diff --git a/base/precompile.jl b/base/precompile.jl
index 3ab4aff158793..e4ba7ffbf6e55 100644
--- a/base/precompile.jl
+++ b/base/precompile.jl
@@ -822,7 +822,6 @@ precompile(Tuple{typeof(Core.Inference.copy_exprargs), Array{Any, 1}})
 precompile(Tuple{typeof(Core.Inference.copy), Expr})
 precompile(Tuple{typeof(Core.Inference.copyto!), Array{Any, 1}, Core.Inference.Generator{Array{Any, 1}, typeof(Core.Inference.copy_exprs)}})
 precompile(Tuple{typeof(Core.Inference._widen_all_consts!), Expr, Array{Bool, 1}})
-precompile(Tuple{typeof(Core.Inference._delete!), Core.Inference.BitSet, Int64})
 precompile(Tuple{typeof(Core.Inference.promote_type), Type{Float16}, Type{Int64}})
 precompile(Tuple{typeof(Core.Inference.mk_tuplecall), Array{Any, 1}, Core.Inference.InferenceState})
 precompile(Tuple{typeof(Core.Inference.annotate_slot_load!), Expr, Array{Any, 1}, Core.Inference.InferenceState, Array{Bool, 1}})
diff --git a/test/bitset.jl b/test/bitset.jl
index a7b4e3e061f29..86e6c96b09ce5 100644
--- a/test/bitset.jl
+++ b/test/bitset.jl
@@ -55,9 +55,11 @@ end
     @test hash(BitSet([1])) != hash(BitSet([33]))
     @test hash(BitSet([1])) != hash(BitSet([65]))
     @test hash(BitSet([1])) != hash(BitSet([129]))
+    # test with a different internal structure
+    s = BitSet([129])
+    pop!(push!(s, 65), 65)
+    @test hash(BitSet([1])) != hash(s)
 
-    # issue #7851
-    @test_throws ArgumentError BitSet(-1)
     @test !(-1 in BitSet(1:10))
 end
 
@@ -79,7 +81,9 @@ end
     empty!(i)
     @test length(i) === 0
 
-    @test_throws ArgumentError symdiff!(i, -3)
+    @test symdiff!(i, -3) == BitSet([-3])
+    @test symdiff!(i, -3) == BitSet([])
+
     @test symdiff!(i, 3) == BitSet([3])
     @test symdiff!(i, 257) == BitSet([3, 257])
     @test symdiff!(i, [3, 6]) == BitSet([6, 257])
@@ -87,9 +91,8 @@ end
     i = BitSet(1:6)
     @test symdiff!(i, BitSet([6, 513])) == BitSet([1:5; 513])
 
-    # issue #23099 : these tests should not segfault
-    @test_throws ArgumentError symdiff!(BitSet(rand(1:100, 30)), 0)
-    @test_throws ArgumentError symdiff!(BitSet(rand(1:100, 30)), [0, 2, 4])
+    @test 0 ∈ symdiff!(BitSet(rand(1:100, 30)), 0)
+    @test BitSet(0:2:4) ⊆ symdiff!(BitSet(rand(5:100, 30)), [0, 2, 4])
 
     # issue #23557 :
     @test_throws MethodError symdiff!(BitSet([1]), ['a']) # should no stack-overflow
@@ -128,16 +131,17 @@ end
     @test union(i, j, k) == BitSet(1:9)
 
     s1 = BitSet()
-    @test_throws ArgumentError push!(s1, -1)
-    push!(s1, 1, 10, 100, 1000)
-    @test collect(s1) == [1, 10, 100, 1000]
+    @test push!(s1, -1) == BitSet([-1])
+    push!(s1, -10, 1, 10, 100, 1000)
+    @test collect(s1) == [-10, -1, 1, 10, 100, 1000]
     push!(s1, 606)
-    @test collect(s1) == [1, 10, 100, 606, 1000]
+    @test collect(s1) == [-10, -1, 1, 10, 100, 606, 1000]
+
     s2 = BitSet()
     @test s2 === union!(s2, s1)
-    s3 = BitSet([1, 10, 100])
-    union!(s3, [1, 606, 1000])
-    s4 = union(BitSet([1, 100, 1000]), BitSet([10, 100, 606]))
+    s3 = BitSet([-1, 1, 10, 100])
+    union!(s3, [-10, 1, 606, 1000])
+    s4 = union(BitSet([-1, 1, 100, 1000]), BitSet([-10, 10, 100, 606]))
     @test s1 == s2 == s3 == s4
 end
 
@@ -313,3 +317,16 @@ end
     @test M == last(s)  == maximum(s) == maximum(a)
     @test issorted(s)
 end
+
+@testset "extreme values" begin
+    @test pop!(BitSet(typemin(Int))) == typemin(Int)
+    @test pop!(BitSet(typemax(Int))) == typemax(Int)
+end
+
+@testset "sizehint! returns a BitSet" begin
+    # see #25029
+    @test sizehint!(BitSet(), 100) isa BitSet
+    # TODO: test that we don't delegate sizehint! to the underlying bits
+    # field without dividing by 64 (i.e. the 100 above should allocate
+    # only 2 UInt64 words
+end

From ed692cd34447b7833dffe3177d42885ce6dfee30 Mon Sep 17 00:00:00 2001
From: Rafael Fourquet <fourquet.rafael@gmail.com>
Date: Mon, 11 Dec 2017 15:37:21 +0100
Subject: [PATCH 3/9] BitSet: use 0-based bit-indexing

The 1-based indexing was forcing to promote Int to Int128
in chk_indice & chk_offset, to allow correct computation
with extreme values (e.g. BitSet([typemin(Int)])).
1-based indexing was a left-over from the former BitVector
as underlying implementation.
Switching to 0-based indexing allows both a more-direct mapping
between the model and the implementation, and better
performance.
---
 base/bitset.jl | 51 +++++++++++++++++++++++++-------------------------
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/base/bitset.jl b/base/bitset.jl
index 9ef83d63c9dfc..1379e29931d09 100644
--- a/base/bitset.jl
+++ b/base/bitset.jl
@@ -5,7 +5,7 @@ const CHK0 = zero(UInt64)
 
 mutable struct BitSet <: AbstractSet{Int}
     bits::Vector{UInt64}
-    # 1st stored Int equals 64*offset+1
+    # 1st stored Int equals 64*offset
     offset::Int
 
     BitSet() = new(sizehint!(zeros(UInt64, 0), 4), 0)
@@ -47,27 +47,29 @@ eltype(s::BitSet) = Int
 sizehint!(s::BitSet, n::Integer) = (sizehint!(s.bits, (n+63) >> 6); s)
 
 # given an integer i, return the chunk which stores it
-chk_indice(i::Int) = (((i % Int128)+63) >> 6) % Int # Int128 necessary when i close to typemax(Int)
+chk_indice(i::Int) = i >> 6
 # return the bit offset of i within chk_indice(i)
-chk_offset(i::Int) = ((i % Int128)-1) & 63 % Int
+chk_offset(i::Int) = i & 63
 
 function _bits_getindex(b::Bits, n::Int, offset::Int)
-    ci = chk_indice(n) - offset
+    ci = chk_indice(n) - offset + 1
     1 <= ci <= length(b) || return false
     @inbounds r = (b[ci] & (one(UInt64) << chk_offset(n))) != 0
     r
 end
 
 function _bits_findnext(b::Bits, start::Int)
-    start = max(1, start)
-    chk_indice(start) > length(b) && return 0
-    unsafe_bitfindnext(b, start)
+    # start is 0-based
+    # @assert start >= 0
+    chk_indice(start) + 1 > length(b) && return -1
+    unsafe_bitfindnext(b, start+1) - 1
 end
 
 function _bits_findprev(b::Bits, start::Int)
-    start = min(length(b) << 6, start)
-    start > 0 || return 0
-    unsafe_bitfindprev(b, start)
+    # start is 0-based
+    # @assert start <= 64 * length(b) - 1
+    start >= 0 || return -1
+    unsafe_bitfindprev(b, start+1) - 1
 end
 
 # An internal function for setting the inclusion bit for a given integer
@@ -76,19 +78,18 @@ end
     len = length(s.bits)
     if len == 0 # initialize the offset
         b || return s
-        s.offset = cidx - 1
+        s.offset = cidx
     end
     diff = cidx - s.offset
-    if diff > len
+    if diff >= len
         b || return s # setting a bit to zero outside the set's bits is a no-op
-        _growend0!(s.bits, diff - len)
-    elseif diff <= 0
+        _growend0!(s.bits, diff - len + 1)
+    elseif diff < 0
         b || return s
-        nchunks = 1 - diff
-        _growbeg0!(s.bits, nchunks)
-        s.offset -= nchunks
+        _growbeg0!(s.bits, -diff)
+        s.offset += diff
     end
-    unsafe_bitsetindex!(s.bits, b, idx-intoffset(s))
+    unsafe_bitsetindex!(s.bits, b, 1+idx-intoffset(s))
     s
 end
 
@@ -273,25 +274,25 @@ symdiff!(s1::BitSet, s2::BitSet) = _matched_map!(xor, s1, s2)
 @inline in(n::Integer, s::BitSet) = _is_convertible_Int(n) ? in(Int(n), s) : false
 
 # Use the next-set index as the state to prevent looking it up again in done
-start(s::BitSet) = next(s, 0)[2]
+start(s::BitSet) = _bits_findnext(s.bits, 0)
 
 function next(s::BitSet, i::Int)
-    nextidx = i == typemax(Int) ? 0 : _bits_findnext(s.bits, i+1)
+    nextidx = _bits_findnext(s.bits, i+1)
     (i+intoffset(s), nextidx)
 end
 
-done(s::BitSet, i) = i == 0
+done(s::BitSet, i) = i == -1
 
 @noinline _throw_bitset_notempty_error() = throw(ArgumentError("collection must be non-empty"))
 
 function first(s::BitSet)
-    idx = _bits_findnext(s.bits, 1)
-    idx == 0 ? _throw_bitset_notempty_error() : idx + intoffset(s)
+    idx = _bits_findnext(s.bits, 0)
+    idx == -1 ? _throw_bitset_notempty_error() : idx + intoffset(s)
 end
 
 function last(s::BitSet)
-    idx = _bits_findprev(s.bits, typemax(Int))
-    idx == 0 ? _throw_bitset_notempty_error() : idx + intoffset(s)
+    idx = _bits_findprev(s.bits, (length(s.bits) << 6) - 1)
+    idx == -1 ? _throw_bitset_notempty_error() : idx + intoffset(s)
 end
 
 length(s::BitSet) = bitcount(s.bits) # = mapreduce(count_ones, +, 0, s.bits)

From 13cdaeb0095d16db08b9a3e735ef876e345e1d09 Mon Sep 17 00:00:00 2001
From: Rafael Fourquet <fourquet.rafael@gmail.com>
Date: Mon, 11 Dec 2017 17:51:27 +0100
Subject: [PATCH 4/9] BitSet push! & pop! : don't check for emptyness every
 time

When a BitSet is empty, its offset must be initialized.
But checking for emptyness in each invocation of _setint!
was costly, so we put the check in a branch which is called
less often.
---
 base/bitset.jl | 68 +++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 54 insertions(+), 14 deletions(-)

diff --git a/base/bitset.jl b/base/bitset.jl
index 1379e29931d09..69db49ad1e9b9 100644
--- a/base/bitset.jl
+++ b/base/bitset.jl
@@ -2,13 +2,20 @@
 
 const Bits = Vector{UInt64}
 const CHK0 = zero(UInt64)
+const NO_OFFSET = Int === Int64 ? -one(Int) << 60 : -one(Int) << 29
+# + NO_OFFSET must be small enough to stay < 0 when added with any offset.
+#   An offset is in the range -2^57:2^57 (64-bits architectures)
+#   or -2^26:2^26 (32-bits architectures)
+# + when the offset is NO_OFFSET, the bits field *must* be empty
+# + NO_OFFSET could be made to be > 0, but a negative one allows
+#   a small optimization in the in(x, ::BitSet)
 
 mutable struct BitSet <: AbstractSet{Int}
     bits::Vector{UInt64}
     # 1st stored Int equals 64*offset
     offset::Int
 
-    BitSet() = new(sizehint!(zeros(UInt64, 0), 4), 0)
+    BitSet() = new(sizehint!(zeros(UInt64, 0), 4), NO_OFFSET)
 end
 
 """
@@ -76,13 +83,18 @@ end
 @inline function _setint!(s::BitSet, idx::Int, b::Bool)
     cidx = chk_indice(idx)
     len = length(s.bits)
-    if len == 0 # initialize the offset
-        b || return s
-        s.offset = cidx
-    end
     diff = cidx - s.offset
     if diff >= len
         b || return s # setting a bit to zero outside the set's bits is a no-op
+
+        # we put the following test within one of the two branches,
+        # with the NO_OFFSET trick, to avoid having to perform it at
+        # each and every call to _setint!
+        if s.offset == NO_OFFSET # initialize the offset
+            # we assume isempty(s.bits)
+            s.offset = cidx
+            diff = 0
+        end
         _growend0!(s.bits, diff - len + 1)
     elseif diff < 0
         b || return s
@@ -108,14 +120,27 @@ end
 end
 
 function _matched_map!(f, s1::BitSet, s2::BitSet)
-    s1.offset = _matched_map!(f, s1.bits, s1.offset, s2.bits, s2.offset)
+    left_false_is_false = f(false, false) == f(false, true) == false
+    right_false_is_false = f(false, false) == f(true, false) == false
+
+    # we must first handle the NO_OFFSET case; we could test for
+    # isempty(s1) but it can be costly, so the user has to call
+    # empty!(s1) herself before-hand to re-initialize to NO_OFFSET
+    if s1.offset == NO_OFFSET
+        return left_false_is_false ? s1 : copy!(s1, s2)
+    elseif s2.offset == NO_OFFSET
+        return right_false_is_false ? empty!(s1) : s1
+    end
+    s1.offset = _matched_map!(f, s1.bits, s1.offset, s2.bits, s2.offset,
+                              left_false_is_false, right_false_is_false)
     s1
 end
 
 # An internal function that takes a pure function `f` and maps across two BitArrays
 # allowing the lengths and offsets to be different and altering b1 with the result
 # WARNING: the assumptions written in the else clauses must hold
-function _matched_map!(f, a1::Bits, b1::Int, a2::Bits, b2::Int)
+function _matched_map!(f, a1::Bits, b1::Int, a2::Bits, b2::Int,
+                       left_false_is_false::Bool, right_false_is_false::Bool)
     l1, l2 = length(a1), length(a2)
     bdiff = b2 - b1
     e1, e2 = l1+b1, l2+b2
@@ -127,7 +152,7 @@ function _matched_map!(f, a1::Bits, b1::Int, a2::Bits, b2::Int)
     end
 
     if ediff > 0
-        if f(false, false) == f(false, true) == false
+        if left_false_is_false
             # We don't need to worry about the trailing bits — they're all false
         else # @assert f(false, x) == x
             _growend!(a1, ediff)
@@ -142,7 +167,7 @@ function _matched_map!(f, a1::Bits, b1::Int, a2::Bits, b2::Int)
             l1 = length(a1)
         end
     elseif ediff < 0
-        if f(false, false) == f(true, false) == false
+        if right_false_is_false
             # We don't need to worry about the trailing bits — they're all false
             _deleteend!(a1, min(l1, -ediff))
             # no need to update l1, as if bdiff > 0 (case below), then bdiff will
@@ -154,7 +179,7 @@ function _matched_map!(f, a1::Bits, b1::Int, a2::Bits, b2::Int)
     end
 
     if bdiff < 0
-        if f(false, false) == f(false, true) == false
+        if left_false_is_false
             # We don't need to worry about the leading bits — they're all false
         else # @assert f(false, x) == x
             _growbeg!(a1, -bdiff)
@@ -168,7 +193,7 @@ function _matched_map!(f, a1::Bits, b1::Int, a2::Bits, b2::Int)
             unsafe_copyto!(a1, 1, a2, 1, min(-bdiff, l2))
         end
     elseif bdiff > 0
-        if f(false, false) == f(true, false) == false
+        if right_false_is_false
             # We don't need to worry about the trailing bits — they're all false
             _deletebeg!(a1, min(l1, bdiff))
             b1 += bdiff
@@ -200,11 +225,21 @@ push!(s::BitSet, ns::Integer...) = (for n in ns; push!(s, n); end; s)
 @inline pop!(s::BitSet) = pop!(s, last(s))
 
 @inline function pop!(s::BitSet, n::Integer)
-    n in s ? (delete!(s, n); n) : _throw_keyerror(n)
+    if n in s
+        delete!(s, n)
+        n
+    else
+        _throw_keyerror(n)
+    end
 end
 
 @inline function pop!(s::BitSet, n::Integer, default)
-    n in s ? (delete!(s, n); n) : default
+    if n in s
+        delete!(s, n)
+        n
+    else
+        default
+    end
 end
 
 @inline delete!(s::BitSet, n::Int) = _setint!(s, n, false)
@@ -212,7 +247,12 @@ end
 
 shift!(s::BitSet) = pop!(s, first(s))
 
-empty!(s::BitSet) = (empty!(s.bits); s)
+function empty!(s::BitSet)
+    empty!(s.bits)
+    s.offset = NO_OFFSET
+    s
+end
+
 isempty(s::BitSet) = all(equalto(CHK0), s.bits)
 
 # Mathematical set functions: union!, intersect!, setdiff!, symdiff!

From 7f44009c2dbee98cd1cf68a6b1ddd84efc608601 Mon Sep 17 00:00:00 2001
From: Rafael Fourquet <fourquet.rafael@gmail.com>
Date: Mon, 11 Dec 2017 18:24:10 +0100
Subject: [PATCH 5/9] optimize updating a value in BitSet

_setint! was delegating to the bitarray code, but then
some work was being redone in get_chunks_id. So we
split this functionality into lower level functions.
---
 base/bitarray.jl | 4 ++++
 base/bitset.jl   | 6 ++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/base/bitarray.jl b/base/bitarray.jl
index d243a38256a8f..a80f1a85dc82a 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -636,6 +636,10 @@ end
 
 @inline function unsafe_bitsetindex!(Bc::Array{UInt64}, x::Bool, i::Int)
     i1, i2 = get_chunks_id(i)
+    _unsafe_bitsetindex!(Bc, x, i1, i2)
+end
+
+@inline function _unsafe_bitsetindex!(Bc::Array{UInt64}, x::Bool, i1::Int, i2::Int)
     u = UInt64(1) << i2
     @inbounds begin
         c = Bc[i1]
diff --git a/base/bitset.jl b/base/bitset.jl
index 69db49ad1e9b9..6ff01f61319b0 100644
--- a/base/bitset.jl
+++ b/base/bitset.jl
@@ -100,8 +100,9 @@ end
         b || return s
         _growbeg0!(s.bits, -diff)
         s.offset += diff
+        diff = 0
     end
-    unsafe_bitsetindex!(s.bits, b, 1+idx-intoffset(s))
+    _unsafe_bitsetindex!(s.bits, b, diff+1, chk_offset(idx))
     s
 end
 
@@ -323,7 +324,8 @@ end
 
 done(s::BitSet, i) = i == -1
 
-@noinline _throw_bitset_notempty_error() = throw(ArgumentError("collection must be non-empty"))
+@noinline _throw_bitset_notempty_error() =
+    throw(ArgumentError("collection must be non-empty"))
 
 function first(s::BitSet)
     idx = _bits_findnext(s.bits, 0)

From dd6a94ab21dac82b2157730bbf3399f499050fc7 Mon Sep 17 00:00:00 2001
From: Rafael Fourquet <fourquet.rafael@gmail.com>
Date: Tue, 12 Dec 2017 11:33:00 +0100
Subject: [PATCH 6/9] re-implement rand(::BitSet)

---
 base/random/generation.jl | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/base/random/generation.jl b/base/random/generation.jl
index 961092e00817c..3b4aa83469b7f 100644
--- a/base/random/generation.jl
+++ b/base/random/generation.jl
@@ -340,6 +340,20 @@ Sampler(rng::AbstractRNG, t::Set, n::Repetition) = SamplerTag{Set}(Sampler(rng,
 
 rand(rng::AbstractRNG, sp::SamplerTag{Set,<:Sampler}) = rand(rng, sp.data).first
 
+## random values from BitSet
+
+function Sampler(rng::AbstractRNG, t::BitSet, n::Repetition)
+    isempty(t) && throw(ArgumentError("collection must be non-empty"))
+    SamplerSimple(t, Sampler(rng, minimum(t):maximum(t), Val(Inf)))
+end
+
+function rand(rng::AbstractRNG, sp::SamplerSimple{BitSet,<:Sampler})
+    while true
+        n = rand(rng, sp.data)
+        n in sp[] && return n
+    end
+end
+
 ## random values from AbstractDict/AbstractSet
 
 # we defer to _Sampler to avoid ambiguities with a call like Sampler(rng, Set(1), Val(1))

From 55968e74b61dfcd501197c2f7e82cdf26cc03940 Mon Sep 17 00:00:00 2001
From: Rafael Fourquet <fourquet.rafael@gmail.com>
Date: Tue, 12 Dec 2017 11:36:47 +0100
Subject: [PATCH 7/9] BitSet: use _div64 & _mod64 from bitarray.jl

---
 base/bitarray.jl |  2 +-
 base/bitset.jl   | 15 +++++----------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/base/bitarray.jl b/base/bitarray.jl
index a80f1a85dc82a..93db5579d59d6 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -82,7 +82,7 @@ IndexStyle(::Type{<:BitArray}) = IndexLinear()
 ## aux functions ##
 
 const _msk64 = ~UInt64(0)
-@inline _div64(l) = l >>> 6
+@inline _div64(l) = l >> 6
 @inline _mod64(l) = l & 63
 @inline _msk_end(l::Integer) = _msk64 >>> _mod64(-l)
 @inline _msk_end(B::BitArray) = _msk_end(length(B))
diff --git a/base/bitset.jl b/base/bitset.jl
index 6ff01f61319b0..bb0e9110d20b3 100644
--- a/base/bitset.jl
+++ b/base/bitset.jl
@@ -53,22 +53,17 @@ eltype(s::BitSet) = Int
 
 sizehint!(s::BitSet, n::Integer) = (sizehint!(s.bits, (n+63) >> 6); s)
 
-# given an integer i, return the chunk which stores it
-chk_indice(i::Int) = i >> 6
-# return the bit offset of i within chk_indice(i)
-chk_offset(i::Int) = i & 63
-
 function _bits_getindex(b::Bits, n::Int, offset::Int)
-    ci = chk_indice(n) - offset + 1
+    ci = _div64(n) - offset + 1
     1 <= ci <= length(b) || return false
-    @inbounds r = (b[ci] & (one(UInt64) << chk_offset(n))) != 0
+    @inbounds r = (b[ci] & (one(UInt64) << _mod64(n))) != 0
     r
 end
 
 function _bits_findnext(b::Bits, start::Int)
     # start is 0-based
     # @assert start >= 0
-    chk_indice(start) + 1 > length(b) && return -1
+    _div64(start) + 1 > length(b) && return -1
     unsafe_bitfindnext(b, start+1) - 1
 end
 
@@ -81,7 +76,7 @@ end
 
 # An internal function for setting the inclusion bit for a given integer
 @inline function _setint!(s::BitSet, idx::Int, b::Bool)
-    cidx = chk_indice(idx)
+    cidx = _div64(idx)
     len = length(s.bits)
     diff = cidx - s.offset
     if diff >= len
@@ -102,7 +97,7 @@ end
         s.offset += diff
         diff = 0
     end
-    _unsafe_bitsetindex!(s.bits, b, diff+1, chk_offset(idx))
+    _unsafe_bitsetindex!(s.bits, b, diff+1, _mod64(idx))
     s
 end
 

From a4371b28f2c1bc4b15ed181658cd8dc148e21a75 Mon Sep 17 00:00:00 2001
From: Rafael Fourquet <fourquet.rafael@gmail.com>
Date: Tue, 12 Dec 2017 11:41:50 +0100
Subject: [PATCH 8/9] BitSet: add a couple small optimizations

* isempty: using _check0 instead of all makes it 35% faster
* ==: checking first non-overlapping parts is more likely
  to be faster, as the the lower and upper parts of the bits
  field are unlikely to be zero (at least for a freshly
  created BitSet)
---
 base/bitset.jl | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/base/bitset.jl b/base/bitset.jl
index bb0e9110d20b3..b01e5b09a095b 100644
--- a/base/bitset.jl
+++ b/base/bitset.jl
@@ -249,7 +249,7 @@ function empty!(s::BitSet)
     s
 end
 
-isempty(s::BitSet) = all(equalto(CHK0), s.bits)
+isempty(s::BitSet) = _check0(s.bits, 1, length(s.bits))
 
 # Mathematical set functions: union!, intersect!, setdiff!, symdiff!
 
@@ -366,18 +366,19 @@ function ==(s1::BitSet, s2::BitSet)
     included = overlap0 >= l2  # whether a2's indices are included in a1's
     overlap  = included ? l2 : overlap0
 
-    # compare overlap values
-    if overlap > 0
-        _memcmp(pointer(a1, b2-b1+1), pointer(a2), overlap<<3) == 0 || return false
-    end
-
-    # Ensure remaining chunks are zero
+    # Ensure non-overlap chunks are zero (unlikely)
     _check0(a1, 1, l1-overlap0) || return false
     if included
         _check0(a1, b2-b1+l2+1, l1) || return false
     else
         _check0(a2, 1+overlap, l2) || return false
     end
+
+    # compare overlap values
+    if overlap > 0
+        _memcmp(pointer(a1, b2-b1+1), pointer(a2), overlap<<3) == 0 || return false
+    end
+
     return true
 end
 

From 55852ee48fe33a63c555ed48dfb636c48b30e4a9 Mon Sep 17 00:00:00 2001
From: Rafael Fourquet <fourquet.rafael@gmail.com>
Date: Thu, 14 Dec 2017 12:06:59 +0100
Subject: [PATCH 9/9] add fuzzy testing for some Set and BitSet functions

---
 test/sets.jl | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/test/sets.jl b/test/sets.jl
index ef46d7fec7564..8d025b773eeab 100644
--- a/test/sets.jl
+++ b/test/sets.jl
@@ -344,3 +344,73 @@ end
     @test typeof(cssset) == Set{String}
     @test cssset == Set(["foo", "bar"])
 end
+
+@testset "fuzzy testing Set & BitSet" begin
+    b1, b2 = rand(-1000:1000, 2)
+    e1 = rand(b1-9:1000) # -9 to have an empty list sometimes
+    e2 = rand(b2-9:1000)
+    l1, l2 = rand(1:1000, 2)
+    a1 = b1 <= e1 ? rand(b1:e1, l1) : Int[]
+    a2 = b2 <= e2 ? rand(b2:e2, l2) : Int[]
+    s1, s2 = Set(a1), Set(a2)
+    t1, t2 = BitSet(a1), BitSet(a2)
+
+    for (s, t) = ((s1, t1), (s2, t2))
+        @test length(s) == length(t)
+        @test issubset(s, t)
+        @test issubset(t, s)
+        @test isempty(s) == isempty(t)
+        isempty(s) && continue
+        @test maximum(s) == maximum(t)
+        @test minimum(s) == minimum(t)
+        @test extrema(s) == extrema(t)
+        rs, rt = rand(s), rand(t)
+        @test rs in s
+        @test rt in s
+        @test rs in t
+        @test rt in t
+        for y in (rs, rt)
+            ss = copy(s)
+            tt = copy(t)
+            pop!(ss, y)
+            pop!(tt, y)
+            @test BitSet(ss) == tt
+            @test Set(tt) == ss
+            z = rand(1001:1100) # z ∉ s or t
+            push!(ss, z)
+            push!(tt, z)
+            @test BitSet(ss) == tt
+            @test Set(tt) == ss
+        end
+    end
+
+    res = Dict{String,Union{Bool,Vector{Int}}}()
+    function check(desc, val)
+        n = val isa Bool ? val : sort!(collect(val))
+        r = get!(res, desc, n)
+        if n isa Bool || r !== n
+            @test r == n
+        end
+    end
+    asbitset(x) = x isa BitSet ? x : BitSet(x)
+    asset(x) = x isa Set ? x : Set(x)
+
+    for x1 = (s1, t1), x2 = (s2, t2)
+        check("union", union(x1, x2))
+        check("intersect", intersect(x1, x2))
+        check("symdiff", symdiff(x1, x2))
+        check("setdiff", setdiff(x1, x2))
+        check("== as Bitset", asbitset(x1) == asbitset(x2))
+        check("== as Set", asset(x1) == asset(x2))
+        check("issubset", issubset(x1, x2))
+        if typeof(x1) == typeof(x2)
+            check("<", x1 < x2)
+            check("<=", x1 > x2)
+            check("union!", union!(copy(x1), x2))
+            check("setdiff!", setdiff!(copy(x1), x2))
+            x1 isa Set && continue
+            check("intersect!", intersect!(copy(x1), x2))
+            check("symdiff!", symdiff!(copy(x1), x2))
+        end
+    end
+end