From 59de210119f50cedaa42d175dc88b6335fcf63f6 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 29 Apr 2021 10:22:05 -0400 Subject: [PATCH 01/11] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 4aa2af0..97e8307 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,6 @@ *Go language library to map between non-negative integers and boolean values* [![Test](https://github.com/bits-and-blooms/bitset/workflows/Test/badge.svg)](https://github.com/willf/bitset/actions?query=workflow%3ATest) -[![Master Coverage Status](https://coveralls.io/repos/bits-and-blooms/bitset/badge.svg?branch=master&service=github)](https://coveralls.io/github/willf/bitset?branch=master) [![Go Report Card](https://goreportcard.com/badge/github.com/willf/bitset)](https://goreportcard.com/report/github.com/willf/bitset) [![PkgGoDev](https://pkg.go.dev/badge/github.com/bits-and-blooms/bitset?tab=doc)](https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc) From d10d8d6ab8b7bc89c53e05ef7a52a831d49d0e20 Mon Sep 17 00:00:00 2001 From: Christian Stewart Date: Sun, 20 Jun 2021 17:32:52 -0700 Subject: [PATCH 02/11] bitset: add from with length func Allowing to reconstruct a bitset from the outputs of Len() and Bytes(). Signed-off-by: Christian Stewart --- bitset.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bitset.go b/bitset.go index d688806..7f2fe50 100644 --- a/bitset.go +++ b/bitset.go @@ -89,7 +89,12 @@ func (b *BitSet) safeSet() []uint64 { // From is a constructor used to create a BitSet from an array of integers func From(buf []uint64) *BitSet { - return &BitSet{uint(len(buf)) * 64, buf} + return FromWithLength(uint(len(buf))*64, buf) +} + +// FromWithLength constructs from an array of integers and length. +func FromWithLength(len uint, set []uint64) *BitSet { + return &BitSet{len, set} } // Bytes returns the bitset as array of integers From 5a829244ffd64e4120015ce1bf8285b0b6168d55 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 9 Sep 2021 14:11:48 -0400 Subject: [PATCH 03/11] Minor fix to Shrink. --- bitset.go | 4 +++- bitset_test.go | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/bitset.go b/bitset.go index d688806..271ccdb 100644 --- a/bitset.go +++ b/bitset.go @@ -254,7 +254,9 @@ func (b *BitSet) Shrink(lastbitindex uint) *BitSet { copy(shrunk, b.set[:idx]) b.set = shrunk b.length = length - b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)))) + if length < 64 { + b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)))) + } return b } diff --git a/bitset_test.go b/bitset_test.go index f386d4e..7346b44 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -564,6 +564,13 @@ func TestAll(t *testing.T) { } func TestShrink(t *testing.T) { + bs := New(10) + bs.Set(0) + bs.Shrink(63) + if !bs.Test(0) { + t.Error("0 should be set") + return + } b := New(0) b.Set(0) From 036663ca52f26f722ace3256acbf84a943fd375f Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 10 Jan 2022 15:59:13 -0500 Subject: [PATCH 04/11] Better Copy documentation/comment --- bitset.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bitset.go b/bitset.go index 271ccdb..bc7faf8 100644 --- a/bitset.go +++ b/bitset.go @@ -524,9 +524,10 @@ func (b *BitSet) Clone() *BitSet { return c } -// Copy into a destination BitSet -// Returning the size of the destination BitSet -// like array copy +// Copy into a destination BitSet using the Go array copy semantics: +// the number of bits copied is the minimum of the number of bits in the current +// BitSet (Len()) and the destination Bitset. +// We return the number of bits copied in the destination BitSet. func (b *BitSet) Copy(c *BitSet) (count uint) { if c == nil { return From d02bae258ee785e44652f9bf05862d4facfc5126 Mon Sep 17 00:00:00 2001 From: Christian Stewart Date: Mon, 10 Jan 2022 17:06:06 -0800 Subject: [PATCH 05/11] bitset: fix indentation of shrink Signed-off-by: Christian Stewart --- bitset.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bitset.go b/bitset.go index bc7faf8..15b1556 100644 --- a/bitset.go +++ b/bitset.go @@ -255,8 +255,8 @@ func (b *BitSet) Shrink(lastbitindex uint) *BitSet { b.set = shrunk b.length = length if length < 64 { - b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)))) - } + b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)))) + } return b } From c5208c19792289732e184524ff68da9de4d36b48 Mon Sep 17 00:00:00 2001 From: Christian Stewart Date: Mon, 10 Jan 2022 17:01:53 -0800 Subject: [PATCH 06/11] CopyFull: add func to duplicate a bitset to a target Signed-off-by: Christian Stewart --- bitset.go | 21 +++++++++++++++++++++ bitset_test.go | 16 ++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/bitset.go b/bitset.go index 15b1556..8eff273 100644 --- a/bitset.go +++ b/bitset.go @@ -542,6 +542,27 @@ func (b *BitSet) Copy(c *BitSet) (count uint) { return } +// CopyFull copies into a destination BitSet such that the destination is +// identical to the source after the operation, allocating memory if necessary. +func (b *BitSet) CopyFull(c *BitSet) { + if c == nil { + return + } + c.length = b.length + if len(b.set) == 0 { + if c.set != nil { + c.set = c.set[:0] + } + } else { + if cap(c.set) < len(b.set) { + c.set = make([]uint64, len(b.set)) + } else { + c.set = c.set[:len(b.set)] + } + copy(c.set, b.set) + } +} + // Count (number of set bits). // Also known as "popcount" or "population count". func (b *BitSet) Count() uint { diff --git a/bitset_test.go b/bitset_test.go index 7346b44..7df653f 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -1425,6 +1425,22 @@ func TestCopy(t *testing.T) { } } +func TestCopyFull(t *testing.T) { + a := New(10) + b := &BitSet{} + a.CopyFull(b) + if b.length != a.length || len(b.set) != len(a.set) { + t.Error("Expected full length copy") + return + } + for i, v := range a.set { + if v != b.set[i] { + t.Error("Unexpected value") + return + } + } +} + func TestNextSetError(t *testing.T) { b := new(BitSet) c, d := b.NextSet(1) From a1cbd25ceaff1ab298754d3855009d2d0c6980c5 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 4 Mar 2022 17:04:56 -0500 Subject: [PATCH 07/11] Verifying and checking issue 92. --- bitset.go | 4 +++- bitset_test.go | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/bitset.go b/bitset.go index 8eff273..d60eaf2 100644 --- a/bitset.go +++ b/bitset.go @@ -226,7 +226,9 @@ func (b *BitSet) FlipRange(start, end uint) *BitSet { for i := startWord; i < endWord; i++ { b.set[i] = ^b.set[i] } - b.set[endWord] ^= ^uint64(0) >> (-end & (wordSize - 1)) + if end & (wordSize - 1) != 0 { + b.set[endWord] ^= ^uint64(0) >> (-end & (wordSize - 1)) + } return b } diff --git a/bitset_test.go b/bitset_test.go index 7df653f..a4abf72 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -1409,6 +1409,18 @@ func TestFlipRange(t *testing.T) { t.Error("Unexpected value: ", d.length) return } + // + for i := uint(0); i < 256; i++ { + for j := uint(0); j <= i; j++ { + bits := New(i) + bits.FlipRange(0, j) + c := bits.Count() + if c != j { + t.Error("Unexpected value: ", c, " expected: ", j) + return + } + } + } } func TestCopy(t *testing.T) { From 389c477faa7ee7f708ac8461f1b0f1fd886553fb Mon Sep 17 00:00:00 2001 From: Thanh Pham Kieu Date: Thu, 17 Mar 2022 00:18:19 +0700 Subject: [PATCH 08/11] use bufio.Writer and bufio.Reader for smaller memory footprint while serializing --- bitset.go | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/bitset.go b/bitset.go index 164544b..06b7482 100644 --- a/bitset.go +++ b/bitset.go @@ -895,8 +895,22 @@ func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { } // Write set - err = binary.Write(stream, binaryOrder, b.set) - return int64(b.BinaryStorageSize()), err + // current implementation of bufio.Writer is more memory efficient than + // binary.Write for large set + writer := bufio.NewWriter(stream) + var item = make([]byte, 8) // for serializing uint64 + var n = binary.Size(uint64(0)) // number of bytes written + for _, x := range b.set { + binaryOrder.PutUint64(item, x) + nn, err := writer.Write(item) + if err != nil { + return int64(n+nn), err + } + n += nn + } + + err = writer.Flush() + return int64(n), err } // ReadFrom reads a BitSet from a stream written using WriteTo @@ -915,9 +929,20 @@ func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { } // Read remaining bytes as set - err = binary.Read(stream, binaryOrder, newset.set) - if err != nil { - return 0, err + // current implementation bufio.Reader is more memory efficient than + // binary.Read for large set + reader := bufio.NewReader(stream) + i := 0 + var item = make([]byte, 8) // one uint64 + for { + if _, err := reader.Read(item); err != nil { + if err == io.EOF { + break // done + } + return 0, err + } + newset.set[i] = binaryOrder.Uint64(item) + i++ } *b = *newset @@ -927,25 +952,18 @@ func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { // MarshalBinary encodes a BitSet into a binary form and returns the result. func (b *BitSet) MarshalBinary() ([]byte, error) { var buf bytes.Buffer - writer := bufio.NewWriter(&buf) - - _, err := b.WriteTo(writer) + _, err := b.WriteTo(&buf) if err != nil { return []byte{}, err } - err = writer.Flush() - return buf.Bytes(), err } // UnmarshalBinary decodes the binary form generated by MarshalBinary. func (b *BitSet) UnmarshalBinary(data []byte) error { buf := bytes.NewReader(data) - reader := bufio.NewReader(buf) - - _, err := b.ReadFrom(reader) - + _, err := b.ReadFrom(buf) return err } From ff2336f485fb982acee67a57c7f79c267ce48cf2 Mon Sep 17 00:00:00 2001 From: Thanh Pham Kieu Date: Fri, 18 Mar 2022 08:55:22 +0700 Subject: [PATCH 09/11] clean --- bitset.go | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/bitset.go b/bitset.go index 06b7482..d4e243a 100644 --- a/bitset.go +++ b/bitset.go @@ -898,19 +898,16 @@ func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { // current implementation of bufio.Writer is more memory efficient than // binary.Write for large set writer := bufio.NewWriter(stream) - var item = make([]byte, 8) // for serializing uint64 - var n = binary.Size(uint64(0)) // number of bytes written - for _, x := range b.set { - binaryOrder.PutUint64(item, x) - nn, err := writer.Write(item) - if err != nil { - return int64(n+nn), err + var item = make([]byte, binary.Size(uint64(0))) // for serializing one uint64 + for i := range b.set { + binaryOrder.PutUint64(item, b.set[i]) + if nn, err := writer.Write(item); err != nil { + return int64(i*binary.Size(uint64(0)) + nn), err } - n += nn } err = writer.Flush() - return int64(n), err + return int64(b.BinaryStorageSize()), err } // ReadFrom reads a BitSet from a stream written using WriteTo @@ -932,9 +929,8 @@ func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { // current implementation bufio.Reader is more memory efficient than // binary.Read for large set reader := bufio.NewReader(stream) - i := 0 - var item = make([]byte, 8) // one uint64 - for { + var item = make([]byte, binary.Size(uint64(0))) // one uint64 + for i := uint64(0); i < length; i++ { if _, err := reader.Read(item); err != nil { if err == io.EOF { break // done @@ -942,7 +938,6 @@ func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { return 0, err } newset.set[i] = binaryOrder.Uint64(item) - i++ } *b = *newset From 02db5c7985ced6ad29d89d7206a3cc8a40a23a85 Mon Sep 17 00:00:00 2001 From: Mercurio Date: Thu, 21 Apr 2022 18:40:20 +0100 Subject: [PATCH 10/11] add SetBitsetFrom() to resolve #85 --- bitset.go | 6 ++++++ bitset_test.go | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/bitset.go b/bitset.go index be2f5fb..b423743 100644 --- a/bitset.go +++ b/bitset.go @@ -87,6 +87,12 @@ func (b *BitSet) safeSet() []uint64 { return b.set } +// SetBitsetFrom fills the bitset with an array of integers without creating a new BitSet instance +func (b *BitSet) SetBitsetFrom(buf []uint64) { + b.length = uint(len(buf)) * 64 + b.set = buf +} + // From is a constructor used to create a BitSet from an array of integers func From(buf []uint64) *BitSet { return FromWithLength(uint(len(buf))*64, buf) diff --git a/bitset_test.go b/bitset_test.go index a4abf72..b8eee43 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -1308,6 +1308,18 @@ func TestSafeSet(t *testing.T) { } } +func TestSetBitsetFrom(t *testing.T) { + u := []uint64{2, 3, 5, 7, 11} + b := new(BitSet) + b.SetBitsetFrom(u) + outType := fmt.Sprintf("%T", b) + expType := "*bitset.BitSet" + if outType != expType { + t.Error("Expecting type: ", expType, ", gotf:", outType) + return + } +} + func TestFrom(t *testing.T) { u := []uint64{2, 3, 5, 7, 11} b := From(u) From 3bba5f79b666b1874b50ff64a0bff168bbe354b1 Mon Sep 17 00:00:00 2001 From: SimFG Date: Thu, 21 Jul 2022 20:54:32 +0800 Subject: [PATCH 11/11] Extract `wordsIndex` to calculate the index of words in a `uint64`, and use it to replace `i % wordSize`. In addition, change the `InsertAt` function to reduce one bit operation. --- bitset.go | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/bitset.go b/bitset.go index b423743..49fe4bc 100644 --- a/bitset.go +++ b/bitset.go @@ -116,6 +116,11 @@ func wordsNeeded(i uint) int { return int((i + (wordSize - 1)) >> log2WordSize) } +// wordsIndex calculates the index of words in a `uint64` +func wordsIndex(i uint) uint { + return i & (wordSize - 1) +} + // New creates a new BitSet with a hint that length bits will be required func New(length uint) (bset *BitSet) { defer func() { @@ -171,7 +176,7 @@ func (b *BitSet) Test(i uint) bool { if i >= b.length { return false } - return b.set[i>>log2WordSize]&(1<<(i&(wordSize-1))) != 0 + return b.set[i>>log2WordSize]&(1<>log2WordSize] |= 1 << (i & (wordSize - 1)) + b.set[i>>log2WordSize] |= 1 << wordsIndex(i) return b } @@ -191,7 +196,7 @@ func (b *BitSet) Clear(i uint) *BitSet { if i >= b.length { return b } - b.set[i>>log2WordSize] &^= 1 << (i & (wordSize - 1)) + b.set[i>>log2WordSize] &^= 1 << wordsIndex(i) return b } @@ -216,7 +221,7 @@ func (b *BitSet) Flip(i uint) *BitSet { if i >= b.length { return b.Set(i) } - b.set[i>>log2WordSize] ^= 1 << (i & (wordSize - 1)) + b.set[i>>log2WordSize] ^= 1 << wordsIndex(i) return b } @@ -233,13 +238,13 @@ func (b *BitSet) FlipRange(start, end uint) *BitSet { b.extendSetMaybe(end - 1) var startWord uint = start >> log2WordSize var endWord uint = end >> log2WordSize - b.set[startWord] ^= ^(^uint64(0) << (start & (wordSize - 1))) + b.set[startWord] ^= ^(^uint64(0) << wordsIndex(start)) for i := startWord; i < endWord; i++ { b.set[i] = ^b.set[i] } - if end & (wordSize - 1) != 0 { - b.set[endWord] ^= ^uint64(0) >> (-end & (wordSize - 1)) - } + if end&(wordSize-1) != 0 { + b.set[endWord] ^= ^uint64(0) >> wordsIndex(-end) + } return b } @@ -268,7 +273,7 @@ func (b *BitSet) Shrink(lastbitindex uint) *BitSet { b.set = shrunk b.length = length if length < 64 { - b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)))) + b.set[idx-1] &= allBits >> uint64(64-wordsIndex(length)) } return b } @@ -298,7 +303,7 @@ func (b *BitSet) Compact() *BitSet { // this method could be extremely slow and in some cases might cause the entire BitSet // to be recopied. func (b *BitSet) InsertAt(idx uint) *BitSet { - insertAtElement := (idx >> log2WordSize) + insertAtElement := idx >> log2WordSize // if length of set is a multiple of wordSize we need to allocate more space first if b.isLenExactMultiple() { @@ -317,13 +322,13 @@ func (b *BitSet) InsertAt(idx uint) *BitSet { // generate a mask to extract the data that we need to shift left // within the element where we insert a bit - dataMask := ^(uint64(1)<> (i & (wordSize - 1)) + w = w >> wordsIndex(i) if w != 0 { return i + trailingZeroes64(w), true } @@ -453,7 +458,7 @@ func (b *BitSet) NextSetMany(i uint, buffer []uint) (uint, []uint) { if x >= len(b.set) || capacity == 0 { return 0, myanswer[:0] } - skip := i & (wordSize - 1) + skip := wordsIndex(i) word := b.set[x] >> skip myanswer = myanswer[:capacity] size := int(0) @@ -496,8 +501,8 @@ func (b *BitSet) NextClear(i uint) (uint, bool) { return 0, false } w := b.set[x] - w = w >> (i & (wordSize - 1)) - wA := allBits >> (i & (wordSize - 1)) + w = w >> wordsIndex(i) + wA := allBits >> wordsIndex(i) index := i + trailingZeroes64(^w) if w != wA && index < b.length { return index, true @@ -810,17 +815,17 @@ func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { // Is the length an exact multiple of word sizes? func (b *BitSet) isLenExactMultiple() bool { - return b.length%wordSize == 0 + return wordsIndex(b.length) == 0 } // Clean last word by setting unused bits to 0 func (b *BitSet) cleanLastWord() { if !b.isLenExactMultiple() { - b.set[len(b.set)-1] &= allBits >> (wordSize - b.length%wordSize) + b.set[len(b.set)-1] &= allBits >> (wordSize - wordsIndex(b.length)) } } -// Complement computes the (local) complement of a biset (up to length bits) +// Complement computes the (local) complement of a bitset (up to length bits) func (b *BitSet) Complement() (result *BitSet) { panicIfNull(b) result = New(b.length) @@ -848,7 +853,6 @@ func (b *BitSet) None() bool { return false } } - return true } return true }