diff --git a/base/array.jl b/base/array.jl index c1b44a9a94e25..4a059886b674a 100644 --- a/base/array.jl +++ b/base/array.jl @@ -1302,25 +1302,23 @@ function empty!(a::Vector) return a end +_memcmp(a, b, len) = ccall(:memcmp, Int32, (Ptr{Void}, Ptr{Void}, Csize_t), a, b, len) % Int + # use memcmp for lexcmp on byte arrays function lexcmp(a::Array{UInt8,1}, b::Array{UInt8,1}) - c = ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), - a, b, min(length(a),length(b))) + c = _memcmp(a, b, min(length(a),length(b))) return c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b)) end const BitIntegerArray{N} = Union{map(T->Array{T,N}, BitInteger_types)...} where N # use memcmp for == on bit integer types -function ==(a::Arr, b::Arr) where Arr <: BitIntegerArray - size(a) == size(b) && 0 == ccall( - :memcmp, Int32, (Ptr{Void}, Ptr{Void}, UInt), a, b, sizeof(eltype(Arr)) * length(a)) -end +==(a::Arr, b::Arr) where {Arr <: BitIntegerArray} = + size(a) == size(b) && 0 == _memcmp(a, b, sizeof(eltype(Arr)) * length(a)) # this is ~20% faster than the generic implementation above for very small arrays function ==(a::Arr, b::Arr) where Arr <: BitIntegerArray{1} len = length(a) - len == length(b) && 0 == ccall( - :memcmp, Int32, (Ptr{Void}, Ptr{Void}, UInt), a, b, sizeof(eltype(Arr)) * len) + len == length(b) && 0 == _memcmp(a, b, sizeof(eltype(Arr)) * len) end """ diff --git a/base/bitarray.jl b/base/bitarray.jl index 2dbf192adc2c5..93db5579d59d6 100644 --- a/base/bitarray.jl +++ b/base/bitarray.jl @@ -82,7 +82,7 @@ IndexStyle(::Type{<:BitArray}) = IndexLinear() ## aux functions ## const _msk64 = ~UInt64(0) -@inline _div64(l) = l >>> 6 +@inline _div64(l) = l >> 6 @inline _mod64(l) = l & 63 @inline _msk_end(l::Integer) = _msk64 >>> _mod64(-l) @inline _msk_end(B::BitArray) = _msk_end(length(B)) @@ -636,6 +636,10 @@ end @inline function unsafe_bitsetindex!(Bc::Array{UInt64}, x::Bool, i::Int) i1, i2 = get_chunks_id(i) + _unsafe_bitsetindex!(Bc, x, i1, i2) +end + +@inline function _unsafe_bitsetindex!(Bc::Array{UInt64}, x::Bool, i1::Int, i2::Int) u = UInt64(1) << i2 @inbounds begin c = Bc[i1] @@ -1438,22 +1442,17 @@ circshift!(B::BitVector, i::Integer) = circshift!(B, B, i) ## count & find ## -function count(B::BitArray) +function bitcount(Bc::Vector{UInt64}) n = 0 - Bc = B.chunks @inbounds for i = 1:length(Bc) n += count_ones(Bc[i]) end return n end -# returns the index of the next non-zero element, or 0 if all zeros -function findnext(B::BitArray, start::Integer) - start > 0 || throw(BoundsError(B, start)) - start > length(B) && return 0 - - Bc = B.chunks +count(B::BitArray) = bitcount(B.chunks) +function unsafe_bitfindnext(Bc::Vector{UInt64}, start::Integer) chunk_start = _div64(start-1)+1 within_chunk_start = _mod64(start-1) mask = _msk64 << within_chunk_start @@ -1471,6 +1470,14 @@ function findnext(B::BitArray, start::Integer) end return 0 end + +# returns the index of the next non-zero element, or 0 if all zeros +function findnext(B::BitArray, start::Integer) + start > 0 || throw(BoundsError(B, start)) + start > length(B) && return 0 + unsafe_bitfindnext(B.chunks, start) +end + #findfirst(B::BitArray) = findnext(B, 1) ## defined in array.jl # aux function: same as findnext(~B, start), but performed without temporaries @@ -1527,13 +1534,7 @@ function findnext(testf::Function, B::BitArray, start::Integer) end #findfirst(testf::Function, B::BitArray) = findnext(testf, B, 1) ## defined in array.jl -# returns the index of the previous non-zero element, or 0 if all zeros -function findprev(B::BitArray, start::Integer) - start > 0 || return 0 - start > length(B) && throw(BoundsError(B, start)) - - Bc = B.chunks - +function unsafe_bitfindprev(Bc::Vector{UInt64}, start::Integer) chunk_start = _div64(start-1)+1 mask = _msk_end(start) @@ -1551,6 +1552,13 @@ function findprev(B::BitArray, start::Integer) return 0 end +# returns the index of the previous non-zero element, or 0 if all zeros +function findprev(B::BitArray, start::Integer) + start > 0 || return 0 + start > length(B) && throw(BoundsError(B, start)) + unsafe_bitfindprev(B.chunks, start) +end + function findprevnot(B::BitArray, start::Integer) start > 0 || return 0 start > length(B) && throw(BoundsError(B, start)) diff --git a/base/bitset.jl b/base/bitset.jl index 46132374ce115..b01e5b09a095b 100644 --- a/base/bitset.jl +++ b/base/bitset.jl @@ -1,20 +1,35 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -struct BitSet <: AbstractSet{Int} - bits::BitVector - BitSet() = new(sizehint!(falses(0), 256)) +const Bits = Vector{UInt64} +const CHK0 = zero(UInt64) +const NO_OFFSET = Int === Int64 ? -one(Int) << 60 : -one(Int) << 29 +# + NO_OFFSET must be small enough to stay < 0 when added with any offset. +# An offset is in the range -2^57:2^57 (64-bits architectures) +# or -2^26:2^26 (32-bits architectures) +# + when the offset is NO_OFFSET, the bits field *must* be empty +# + NO_OFFSET could be made to be > 0, but a negative one allows +# a small optimization in the in(x, ::BitSet) + +mutable struct BitSet <: AbstractSet{Int} + bits::Vector{UInt64} + # 1st stored Int equals 64*offset + offset::Int + + BitSet() = new(sizehint!(zeros(UInt64, 0), 4), NO_OFFSET) end """ BitSet([itr]) -Construct a sorted set of positive `Int`s generated by the given iterable object, or an -empty set. Implemented as a bit string, and therefore designed for dense integer sets. Only -`Int`s greater than 0 can be stored. If the set will be sparse (for example holding a few +Construct a sorted set of `Int`s generated by the given iterable object, or an +empty set. Implemented as a bit string, and therefore designed for dense integer sets. +If the set will be sparse (for example holding a few very large integers), use [`Set`](@ref) instead. """ BitSet(itr) = union!(BitSet(), itr) +@inline intoffset(s::BitSet) = s.offset << 6 + eltype(::Type{BitSet}) = Int similar(s::BitSet) = BitSet() copy(s1::BitSet) = copy!(BitSet(), s1) @@ -30,92 +45,211 @@ See also [`copyto!`](@ref). function copy!(dest::BitSet, src::BitSet) resize!(dest.bits, length(src.bits)) copyto!(dest.bits, src.bits) + dest.offset = src.offset dest end + eltype(s::BitSet) = Int -sizehint!(s::BitSet, n::Integer) = (n > length(s.bits) && _resize0!(s.bits, n); s) -# An internal function for setting the inclusion bit for a given integer n >= 0 -@inline function _setint!(s::BitSet, idx::Integer, b::Bool) - if idx > length(s.bits) +sizehint!(s::BitSet, n::Integer) = (sizehint!(s.bits, (n+63) >> 6); s) + +function _bits_getindex(b::Bits, n::Int, offset::Int) + ci = _div64(n) - offset + 1 + 1 <= ci <= length(b) || return false + @inbounds r = (b[ci] & (one(UInt64) << _mod64(n))) != 0 + r +end + +function _bits_findnext(b::Bits, start::Int) + # start is 0-based + # @assert start >= 0 + _div64(start) + 1 > length(b) && return -1 + unsafe_bitfindnext(b, start+1) - 1 +end + +function _bits_findprev(b::Bits, start::Int) + # start is 0-based + # @assert start <= 64 * length(b) - 1 + start >= 0 || return -1 + unsafe_bitfindprev(b, start+1) - 1 +end + +# An internal function for setting the inclusion bit for a given integer +@inline function _setint!(s::BitSet, idx::Int, b::Bool) + cidx = _div64(idx) + len = length(s.bits) + diff = cidx - s.offset + if diff >= len b || return s # setting a bit to zero outside the set's bits is a no-op - _resize0!(s.bits, idx) + + # we put the following test within one of the two branches, + # with the NO_OFFSET trick, to avoid having to perform it at + # each and every call to _setint! + if s.offset == NO_OFFSET # initialize the offset + # we assume isempty(s.bits) + s.offset = cidx + diff = 0 + end + _growend0!(s.bits, diff - len + 1) + elseif diff < 0 + b || return s + _growbeg0!(s.bits, -diff) + s.offset += diff + diff = 0 end - @inbounds s.bits[idx] = b + _unsafe_bitsetindex!(s.bits, b, diff+1, _mod64(idx)) s end -# An internal function to resize a bitarray and ensure the newly allocated + +# An internal function to resize a Bits object and ensure the newly allocated # elements are zeroed (will become unnecessary if this behavior changes) -@inline function _resize0!(b::BitVector, newlen::Integer) +@inline function _growend0!(b::Bits, nchunks::Int) len = length(b) - newlen = ((newlen+63) >> 6) << 6 # smallest multiple of 64 >= newlen - resize!(b, newlen) - len < newlen && @inbounds b[len+1:newlen] = false # resize! gives dirty memory - b + _growend!(b, nchunks) + @inbounds b[len+1:end] = CHK0 # resize! gives dirty memory +end + +@inline function _growbeg0!(b::Bits, nchunks::Int) + _growbeg!(b, nchunks) + @inbounds b[1:nchunks] = CHK0 +end + +function _matched_map!(f, s1::BitSet, s2::BitSet) + left_false_is_false = f(false, false) == f(false, true) == false + right_false_is_false = f(false, false) == f(true, false) == false + + # we must first handle the NO_OFFSET case; we could test for + # isempty(s1) but it can be costly, so the user has to call + # empty!(s1) herself before-hand to re-initialize to NO_OFFSET + if s1.offset == NO_OFFSET + return left_false_is_false ? s1 : copy!(s1, s2) + elseif s2.offset == NO_OFFSET + return right_false_is_false ? empty!(s1) : s1 + end + s1.offset = _matched_map!(f, s1.bits, s1.offset, s2.bits, s2.offset, + left_false_is_false, right_false_is_false) + s1 end # An internal function that takes a pure function `f` and maps across two BitArrays -# allowing the lengths to be different and altering b1 with the result +# allowing the lengths and offsets to be different and altering b1 with the result # WARNING: the assumptions written in the else clauses must hold -function _matched_map!(f, b1::BitArray, b2::BitArray) - l1, l2 = length(b1), length(b2) - _bit_map!(f, b1, b2) - if l1 < l2 - if f(false, false) == f(false, true) == false +function _matched_map!(f, a1::Bits, b1::Int, a2::Bits, b2::Int, + left_false_is_false::Bool, right_false_is_false::Bool) + l1, l2 = length(a1), length(a2) + bdiff = b2 - b1 + e1, e2 = l1+b1, l2+b2 + ediff = e2 - e1 + + # map! over the common indices + @inbounds for i = max(1, 1+bdiff):min(l1, l2+bdiff) + a1[i] = f(a1[i], a2[i-bdiff]) + end + + if ediff > 0 + if left_false_is_false # We don't need to worry about the trailing bits — they're all false else # @assert f(false, x) == x - resize!(b1, l2) - chk_offs = 1+l1>>6 - unsafe_copyto!(b1.chunks, chk_offs, b2.chunks, chk_offs, 1+l2>>6-chk_offs) + _growend!(a1, ediff) + # if a1 and a2 are not overlapping, we infer implied "false" values from a2 + for outer l1 = l1+1:bdiff + @inbounds a1[l1] = CHK0 + end + # update ediff in case l1 was updated + ediff = e2 - l1 - b1 + # copy actual chunks from a2 + unsafe_copyto!(a1, l1+1, a2, l2+1-ediff, ediff) + l1 = length(a1) end - elseif l1 > l2 - if f(false, false) == f(true, false) == false + elseif ediff < 0 + if right_false_is_false # We don't need to worry about the trailing bits — they're all false - resize!(b1, l2) + _deleteend!(a1, min(l1, -ediff)) + # no need to update l1, as if bdiff > 0 (case below), then bdiff will + # be smaller anyway than an updated l1 else # @assert f(x, false) == x # We don't need to worry about the trailing bits — they already have the # correct value end end - b1 -end -# similar to bit_map! in bitarray.jl, but lengths are multiple of 64, -# and may not match -function _bit_map!(f, b1::BitArray, b2::BitArray) - b1c, b2c = b1.chunks, b2.chunks - l = min(length(b1c), length(b2c)) - @inbounds for i = 1:l - b1c[i] = f(b1c[i], b2c[i]) + if bdiff < 0 + if left_false_is_false + # We don't need to worry about the leading bits — they're all false + else # @assert f(false, x) == x + _growbeg!(a1, -bdiff) + # if a1 and a2 are not overlapping, we infer implied "false" values from a2 + for i = l2+1:-bdiff + @inbounds a1[i] = CHK0 + end + b1 += bdiff # updated return value + + # copy actual chunks from a2 + unsafe_copyto!(a1, 1, a2, 1, min(-bdiff, l2)) + end + elseif bdiff > 0 + if right_false_is_false + # We don't need to worry about the trailing bits — they're all false + _deletebeg!(a1, min(l1, bdiff)) + b1 += bdiff + else # @assert f(x, false) == x + # We don't need to worry about the trailing bits — they already have the + # correct value + end end - b1 + b1 # the new offset end -@noinline _throw_bitset_bounds_err() = throw(ArgumentError("elements of BitSet must be between 1 and typemax(Int)")) + +@noinline _throw_bitset_bounds_err() = + throw(ArgumentError("elements of BitSet must be between typemin(Int) and typemax(Int)")) + +@inline _is_convertible_Int(n) = typemin(Int) <= n <= typemax(Int) + +@inline _check_bitset_bounds(n) = + _is_convertible_Int(n) ? Int(n) : _throw_bitset_bounds_err() + +@inline _check_bitset_bounds(n::Int) = n + @noinline _throw_keyerror(n) = throw(KeyError(n)) -@inline function push!(s::BitSet, n::Integer) - 0 < n <= typemax(Int) || _throw_bitset_bounds_err() - _setint!(s, n, true) -end +@inline push!(s::BitSet, n::Integer) = _setint!(s, _check_bitset_bounds(n), true) + push!(s::BitSet, ns::Integer...) = (for n in ns; push!(s, n); end; s) -@inline function pop!(s::BitSet) - pop!(s, last(s)) -end +@inline pop!(s::BitSet) = pop!(s, last(s)) + @inline function pop!(s::BitSet, n::Integer) - n in s ? (_delete!(s, n); n) : _throw_keyerror(n) + if n in s + delete!(s, n) + n + else + _throw_keyerror(n) + end end + @inline function pop!(s::BitSet, n::Integer, default) - n in s ? (_delete!(s, n); n) : default + if n in s + delete!(s, n) + n + else + default + end end -@inline _delete!(s::BitSet, n::Integer) = _setint!(s, n, false) -@inline delete!(s::BitSet, n::Integer) = n > 0 ? _delete!(s, n) : s + +@inline delete!(s::BitSet, n::Int) = _setint!(s, n, false) +@inline delete!(s::BitSet, n::Integer) = _is_convertible_Int(n) ? delete!(s, Int(n)) : s + shift!(s::BitSet) = pop!(s, first(s)) -empty!(s::BitSet) = (fill!(s.bits, false); s) -isempty(s::BitSet) = !any(s.bits) +function empty!(s::BitSet) + empty!(s.bits) + s.offset = NO_OFFSET + s +end + +isempty(s::BitSet) = _check0(s.bits, 1, length(s.bits)) # Mathematical set functions: union!, intersect!, setdiff!, symdiff! @@ -124,10 +258,7 @@ union(s1::BitSet, s2::BitSet) = union!(copy(s1), s2) union(s1::BitSet, ss::BitSet...) = union(s1, union(ss...)) union(s::BitSet, ns) = union!(copy(s), ns) union!(s::BitSet, ns) = (for n in ns; push!(s, n); end; s) -function union!(s1::BitSet, s2::BitSet) - _matched_map!(|, s1.bits, s2.bits) - s1 -end +union!(s1::BitSet, s2::BitSet) = _matched_map!(|, s1, s2) intersect(s1::BitSet) = copy(s1) intersect(s1::BitSet, ss::BitSet...) = intersect(s1, intersect(ss...)) @@ -146,17 +277,11 @@ intersect(s1::BitSet, s2::BitSet) = Intersects sets `s1` and `s2` and overwrites the set `s1` with the result. If needed, `s1` will be expanded to the size of `s2`. """ -function intersect!(s1::BitSet, s2::BitSet) - _matched_map!(&, s1.bits, s2.bits) - s1 -end +intersect!(s1::BitSet, s2::BitSet) = _matched_map!(&, s1, s2) setdiff(s::BitSet, ns) = setdiff!(copy(s), ns) setdiff!(s::BitSet, ns) = (for n in ns; delete!(s, n); end; s) -function setdiff!(s1::BitSet, s2::BitSet) - _matched_map!((p, q) -> p & ~q, s1.bits, s2.bits) - s1 -end +setdiff!(s1::BitSet, s2::BitSet) = _matched_map!((p, q) -> p & ~q, s1, s2) symdiff(s::BitSet, ns) = symdiff!(copy(s), ns) """ @@ -173,40 +298,41 @@ The set `s` is destructively modified to toggle the inclusion of integer `n`. symdiff!(s::BitSet, n::Integer) = int_symdiff!(s, n) function int_symdiff!(s::BitSet, n::Integer) - 0 < n < typemax(Int) || _throw_bitset_bounds_err() - val = !(n in s) - _setint!(s, n, val) + n0 = _check_bitset_bounds(n) + val = !(n0 in s) + _setint!(s, n0, val) s end -function symdiff!(s1::BitSet, s2::BitSet) - _matched_map!(xor, s1.bits, s2.bits) - s1 -end -@inline in(n::Integer, s::BitSet) = get(s.bits, n, false) +symdiff!(s1::BitSet, s2::BitSet) = _matched_map!(xor, s1, s2) + +@inline in(n::Int, s::BitSet) = _bits_getindex(s.bits, n, s.offset) +@inline in(n::Integer, s::BitSet) = _is_convertible_Int(n) ? in(Int(n), s) : false # Use the next-set index as the state to prevent looking it up again in done -start(s::BitSet) = next(s, 0)[2] -function next(s::BitSet, i) - nextidx = i == typemax(Int) ? 0 : findnext(s.bits, i+1) - (i, nextidx) +start(s::BitSet) = _bits_findnext(s.bits, 0) + +function next(s::BitSet, i::Int) + nextidx = _bits_findnext(s.bits, i+1) + (i+intoffset(s), nextidx) end -done(s::BitSet, i) = i <= 0 +done(s::BitSet, i) = i == -1 -@noinline _throw_bitset_notempty_error() = throw(ArgumentError("collection must be non-empty")) +@noinline _throw_bitset_notempty_error() = + throw(ArgumentError("collection must be non-empty")) function first(s::BitSet) - idx = findfirst(s.bits) - idx == 0 ? _throw_bitset_notempty_error() : idx + idx = _bits_findnext(s.bits, 0) + idx == -1 ? _throw_bitset_notempty_error() : idx + intoffset(s) end function last(s::BitSet) - idx = findprev(s.bits, length(s.bits)) - idx == 0 ? _throw_bitset_notempty_error() : idx + idx = _bits_findprev(s.bits, (length(s.bits) << 6) - 1) + idx == -1 ? _throw_bitset_notempty_error() : idx + intoffset(s) end -length(s::BitSet) = sum(s.bits) +length(s::BitSet) = bitcount(s.bits) # = mapreduce(count_ones, +, 0, s.bits) function show(io::IO, s::BitSet) print(io, "BitSet([") @@ -219,27 +345,40 @@ function show(io::IO, s::BitSet) print(io, "])") end +function _check0(a::Vector{UInt64}, b::Int, e::Int) + @inbounds for i in b:e + a[i] == CHK0 || return false + end + true +end + function ==(s1::BitSet, s2::BitSet) - l1 = length(s1.bits) - l2 = length(s2.bits) - # If the lengths are the same, simply punt to bitarray comparison - l1 == l2 && return s1.bits == s2.bits - - # Swap so s1 is always longer - if l1 < l2 - s2, s1 = s1, s2 - l2, l1 = l1, l2 + # Swap so s1 has always the smallest offset + if s1.offset > s2.offset + s1, s2 = s2, s1 end - # Iteratively check the chunks of the bitarrays - c1 = s1.bits.chunks - c2 = s2.bits.chunks - @inbounds for i in 1:length(c2) - c1[i] == c2[i] || return false + a1 = s1.bits + a2 = s2.bits + b1, b2 = s1.offset, s2.offset + l1, l2 = length(a1), length(a2) + e1 = l1+b1 + overlap0 = max(0, e1 - b2) + included = overlap0 >= l2 # whether a2's indices are included in a1's + overlap = included ? l2 : overlap0 + + # Ensure non-overlap chunks are zero (unlikely) + _check0(a1, 1, l1-overlap0) || return false + if included + _check0(a1, b2-b1+l2+1, l1) || return false + else + _check0(a2, 1+overlap, l2) || return false end - # Ensure remaining chunks are zero - @inbounds for i in length(c2)+1:length(c1) - c1[i] == UInt64(0) || return false + + # compare overlap values + if overlap > 0 + _memcmp(pointer(a1, b2-b1+1), pointer(a2), overlap<<3) == 0 || return false end + return true end @@ -250,15 +389,23 @@ issubset(a::BitSet, b::BitSet) = isequal(a, intersect(a,b)) const hashis_seed = UInt === UInt64 ? 0x88989f1fc7dea67d : 0xc7dea67d function hash(s::BitSet, h::UInt) h ⊻= hashis_seed - bc = s.bits.chunks - i = length(bc) - while i > 0 && bc[i] == UInt64(0) + bc = s.bits + i = 1 + j = length(bc) + + while j > 0 && bc[j] == CHK0 # Skip trailing empty bytes to prevent extra space from changing the hash - i -= 1 + j -= 1 + end + while i <= j && bc[i] == CHK0 + # Skip leading empty bytes to prevent extra space from changing the hash + i += 1 end - while i > 0 - h = hash(bc[i], h) - i -= 1 + i > j && return h # empty + h = hash(i+s.offset, h) # normalized offset + while j >= i + h = hash(bc[j], h) + j -= 1 end h end diff --git a/base/precompile.jl b/base/precompile.jl index 3ab4aff158793..e4ba7ffbf6e55 100644 --- a/base/precompile.jl +++ b/base/precompile.jl @@ -822,7 +822,6 @@ precompile(Tuple{typeof(Core.Inference.copy_exprargs), Array{Any, 1}}) precompile(Tuple{typeof(Core.Inference.copy), Expr}) precompile(Tuple{typeof(Core.Inference.copyto!), Array{Any, 1}, Core.Inference.Generator{Array{Any, 1}, typeof(Core.Inference.copy_exprs)}}) precompile(Tuple{typeof(Core.Inference._widen_all_consts!), Expr, Array{Bool, 1}}) -precompile(Tuple{typeof(Core.Inference._delete!), Core.Inference.BitSet, Int64}) precompile(Tuple{typeof(Core.Inference.promote_type), Type{Float16}, Type{Int64}}) precompile(Tuple{typeof(Core.Inference.mk_tuplecall), Array{Any, 1}, Core.Inference.InferenceState}) precompile(Tuple{typeof(Core.Inference.annotate_slot_load!), Expr, Array{Any, 1}, Core.Inference.InferenceState, Array{Bool, 1}}) diff --git a/base/random/generation.jl b/base/random/generation.jl index 33cfa8d48edf2..3b4aa83469b7f 100644 --- a/base/random/generation.jl +++ b/base/random/generation.jl @@ -344,14 +344,13 @@ rand(rng::AbstractRNG, sp::SamplerTag{Set,<:Sampler}) = rand(rng, sp.data).first function Sampler(rng::AbstractRNG, t::BitSet, n::Repetition) isempty(t) && throw(ArgumentError("collection must be non-empty")) - SamplerSimple(t, Sampler(rng, linearindices(t.bits), Val(Inf))) + SamplerSimple(t, Sampler(rng, minimum(t):maximum(t), Val(Inf))) end function rand(rng::AbstractRNG, sp::SamplerSimple{BitSet,<:Sampler}) while true n = rand(rng, sp.data) - @inbounds b = sp[].bits[n] - b && return n + n in sp[] && return n end end diff --git a/test/bitset.jl b/test/bitset.jl index a7b4e3e061f29..86e6c96b09ce5 100644 --- a/test/bitset.jl +++ b/test/bitset.jl @@ -55,9 +55,11 @@ end @test hash(BitSet([1])) != hash(BitSet([33])) @test hash(BitSet([1])) != hash(BitSet([65])) @test hash(BitSet([1])) != hash(BitSet([129])) + # test with a different internal structure + s = BitSet([129]) + pop!(push!(s, 65), 65) + @test hash(BitSet([1])) != hash(s) - # issue #7851 - @test_throws ArgumentError BitSet(-1) @test !(-1 in BitSet(1:10)) end @@ -79,7 +81,9 @@ end empty!(i) @test length(i) === 0 - @test_throws ArgumentError symdiff!(i, -3) + @test symdiff!(i, -3) == BitSet([-3]) + @test symdiff!(i, -3) == BitSet([]) + @test symdiff!(i, 3) == BitSet([3]) @test symdiff!(i, 257) == BitSet([3, 257]) @test symdiff!(i, [3, 6]) == BitSet([6, 257]) @@ -87,9 +91,8 @@ end i = BitSet(1:6) @test symdiff!(i, BitSet([6, 513])) == BitSet([1:5; 513]) - # issue #23099 : these tests should not segfault - @test_throws ArgumentError symdiff!(BitSet(rand(1:100, 30)), 0) - @test_throws ArgumentError symdiff!(BitSet(rand(1:100, 30)), [0, 2, 4]) + @test 0 ∈ symdiff!(BitSet(rand(1:100, 30)), 0) + @test BitSet(0:2:4) ⊆ symdiff!(BitSet(rand(5:100, 30)), [0, 2, 4]) # issue #23557 : @test_throws MethodError symdiff!(BitSet([1]), ['a']) # should no stack-overflow @@ -128,16 +131,17 @@ end @test union(i, j, k) == BitSet(1:9) s1 = BitSet() - @test_throws ArgumentError push!(s1, -1) - push!(s1, 1, 10, 100, 1000) - @test collect(s1) == [1, 10, 100, 1000] + @test push!(s1, -1) == BitSet([-1]) + push!(s1, -10, 1, 10, 100, 1000) + @test collect(s1) == [-10, -1, 1, 10, 100, 1000] push!(s1, 606) - @test collect(s1) == [1, 10, 100, 606, 1000] + @test collect(s1) == [-10, -1, 1, 10, 100, 606, 1000] + s2 = BitSet() @test s2 === union!(s2, s1) - s3 = BitSet([1, 10, 100]) - union!(s3, [1, 606, 1000]) - s4 = union(BitSet([1, 100, 1000]), BitSet([10, 100, 606])) + s3 = BitSet([-1, 1, 10, 100]) + union!(s3, [-10, 1, 606, 1000]) + s4 = union(BitSet([-1, 1, 100, 1000]), BitSet([-10, 10, 100, 606])) @test s1 == s2 == s3 == s4 end @@ -313,3 +317,16 @@ end @test M == last(s) == maximum(s) == maximum(a) @test issorted(s) end + +@testset "extreme values" begin + @test pop!(BitSet(typemin(Int))) == typemin(Int) + @test pop!(BitSet(typemax(Int))) == typemax(Int) +end + +@testset "sizehint! returns a BitSet" begin + # see #25029 + @test sizehint!(BitSet(), 100) isa BitSet + # TODO: test that we don't delegate sizehint! to the underlying bits + # field without dividing by 64 (i.e. the 100 above should allocate + # only 2 UInt64 words +end diff --git a/test/sets.jl b/test/sets.jl index ef46d7fec7564..8d025b773eeab 100644 --- a/test/sets.jl +++ b/test/sets.jl @@ -344,3 +344,73 @@ end @test typeof(cssset) == Set{String} @test cssset == Set(["foo", "bar"]) end + +@testset "fuzzy testing Set & BitSet" begin + b1, b2 = rand(-1000:1000, 2) + e1 = rand(b1-9:1000) # -9 to have an empty list sometimes + e2 = rand(b2-9:1000) + l1, l2 = rand(1:1000, 2) + a1 = b1 <= e1 ? rand(b1:e1, l1) : Int[] + a2 = b2 <= e2 ? rand(b2:e2, l2) : Int[] + s1, s2 = Set(a1), Set(a2) + t1, t2 = BitSet(a1), BitSet(a2) + + for (s, t) = ((s1, t1), (s2, t2)) + @test length(s) == length(t) + @test issubset(s, t) + @test issubset(t, s) + @test isempty(s) == isempty(t) + isempty(s) && continue + @test maximum(s) == maximum(t) + @test minimum(s) == minimum(t) + @test extrema(s) == extrema(t) + rs, rt = rand(s), rand(t) + @test rs in s + @test rt in s + @test rs in t + @test rt in t + for y in (rs, rt) + ss = copy(s) + tt = copy(t) + pop!(ss, y) + pop!(tt, y) + @test BitSet(ss) == tt + @test Set(tt) == ss + z = rand(1001:1100) # z ∉ s or t + push!(ss, z) + push!(tt, z) + @test BitSet(ss) == tt + @test Set(tt) == ss + end + end + + res = Dict{String,Union{Bool,Vector{Int}}}() + function check(desc, val) + n = val isa Bool ? val : sort!(collect(val)) + r = get!(res, desc, n) + if n isa Bool || r !== n + @test r == n + end + end + asbitset(x) = x isa BitSet ? x : BitSet(x) + asset(x) = x isa Set ? x : Set(x) + + for x1 = (s1, t1), x2 = (s2, t2) + check("union", union(x1, x2)) + check("intersect", intersect(x1, x2)) + check("symdiff", symdiff(x1, x2)) + check("setdiff", setdiff(x1, x2)) + check("== as Bitset", asbitset(x1) == asbitset(x2)) + check("== as Set", asset(x1) == asset(x2)) + check("issubset", issubset(x1, x2)) + if typeof(x1) == typeof(x2) + check("<", x1 < x2) + check("<=", x1 > x2) + check("union!", union!(copy(x1), x2)) + check("setdiff!", setdiff!(copy(x1), x2)) + x1 isa Set && continue + check("intersect!", intersect!(copy(x1), x2)) + check("symdiff!", symdiff!(copy(x1), x2)) + end + end +end