From f4c002cb86bd8cf001f9a0333e9cc6e5d5a93dde Mon Sep 17 00:00:00 2001 From: Karl Gaissmaier Date: Sat, 14 Dec 2024 19:02:03 +0100 Subject: [PATCH 1/7] make constants untyped saves a lot of converting from/to uint/int --- bitset.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/bitset.go b/bitset.go index f252bc2..f0069f7 100644 --- a/bitset.go +++ b/bitset.go @@ -49,7 +49,7 @@ import ( ) // the wordSize of a bit set -const wordSize = uint(64) +const wordSize = 64 // the wordSize of a bit set in bytes const wordBytes = wordSize / 8 @@ -58,7 +58,7 @@ const wordBytes = wordSize / 8 const wordMask = wordSize - 1 // log2WordSize is lg(wordSize) -const log2WordSize = uint(6) +const log2WordSize = 6 // allBits has every bit set const allBits uint64 = 0xffffffffffffffff @@ -150,7 +150,7 @@ func wordsNeeded(i uint) int { // wordsNeededUnbound calculates the number of words needed for i bits, possibly exceeding the capacity. // This function is useful if you know that the capacity cannot be exceeded (e.g., you have an existing BitSet). func wordsNeededUnbound(i uint) int { - return int((i + (wordSize - 1)) >> log2WordSize) + return (int(i) + (wordSize - 1)) >> log2WordSize } // wordsIndex calculates the index of words in a `uint64` @@ -611,7 +611,7 @@ func (b *BitSet) NextClear(i uint) (uint, bool) { } for x < len(b.set) { if b.set[x] != allBits { - index = uint(x)*wordSize + uint(bits.TrailingZeros64(^b.set[x])) + index = uint(x*wordSize + bits.TrailingZeros64(^b.set[x])) if index < b.length { return index, true } @@ -1107,18 +1107,18 @@ func (b *BitSet) DumpAsBits() string { // BinaryStorageSize returns the binary storage requirements (see WriteTo) in bytes. func (b *BitSet) BinaryStorageSize() int { - return int(wordBytes + wordBytes*uint(b.wordCount())) + return wordBytes + wordBytes*b.wordCount() } func readUint64Array(reader io.Reader, data []uint64) error { length := len(data) bufferSize := 128 - buffer := make([]byte, bufferSize*int(wordBytes)) + buffer := make([]byte, bufferSize*wordBytes) for i := 0; i < length; i += bufferSize { end := i + bufferSize if end > length { end = length - buffer = buffer[:wordBytes*uint(end-i)] + buffer = buffer[:wordBytes*(end-i)] } chunk := data[i:end] if _, err := io.ReadFull(reader, buffer); err != nil { @@ -1133,12 +1133,12 @@ func readUint64Array(reader io.Reader, data []uint64) error { func writeUint64Array(writer io.Writer, data []uint64) error { bufferSize := 128 - buffer := make([]byte, bufferSize*int(wordBytes)) + buffer := make([]byte, bufferSize*wordBytes) for i := 0; i < len(data); i += bufferSize { end := i + bufferSize if end > len(data) { end = len(data) - buffer = buffer[:wordBytes*uint(end-i)] + buffer = buffer[:wordBytes*(end-i)] } chunk := data[i:end] for i, x := range chunk { @@ -1343,7 +1343,7 @@ func (b *BitSet) top() (uint, bool) { return 0, false } - return uint(idx)*wordSize + uint(bits.Len64(b.set[idx])) - 1, true + return uint(idx*wordSize+bits.Len64(b.set[idx])) - 1, true } // ShiftLeft shifts the bitset like << operation would do. From f5240a875565de24301dae888dd960ee2dec11eb Mon Sep 17 00:00:00 2001 From: Karl Gaissmaier Date: Sat, 14 Dec 2024 19:05:28 +0100 Subject: [PATCH 2/7] simplify code with wordMask since we have now wordMask = wordSize -1 we should use it, makes the code simpler to read --- bitset.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bitset.go b/bitset.go index f0069f7..a5e7f79 100644 --- a/bitset.go +++ b/bitset.go @@ -141,21 +141,21 @@ func (b *BitSet) Words() []uint64 { // wordsNeeded calculates the number of words needed for i bits func wordsNeeded(i uint) int { - if i > (Cap() - wordSize + 1) { + if i > (Cap() - wordMask) { return int(Cap() >> log2WordSize) } - return int((i + (wordSize - 1)) >> log2WordSize) + return int((i + wordMask) >> log2WordSize) } // wordsNeededUnbound calculates the number of words needed for i bits, possibly exceeding the capacity. // This function is useful if you know that the capacity cannot be exceeded (e.g., you have an existing BitSet). func wordsNeededUnbound(i uint) int { - return (int(i) + (wordSize - 1)) >> log2WordSize + return (int(i) + wordMask) >> log2WordSize } // wordsIndex calculates the index of words in a `uint64` func wordsIndex(i uint) uint { - return i & (wordSize - 1) + return i & wordMask } // New creates a new BitSet with a hint that length bits will be required. @@ -322,7 +322,7 @@ func (b *BitSet) FlipRange(start, end uint) *BitSet { data[i] = ^data[i] } } - if end&(wordSize-1) != 0 { + if end&wordMask != 0 { b.set[endWord] ^= ^uint64(0) >> wordsIndex(-end) } return b From c33ad75095c895c873eac30cd116a2960f723d2b Mon Sep 17 00:00:00 2001 From: Karl Gaissmaier Date: Sat, 14 Dec 2024 22:21:22 +0100 Subject: [PATCH 3/7] comment and simpliy FlipRange --- bitset.go | 55 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/bitset.go b/bitset.go index a5e7f79..166f6a8 100644 --- a/bitset.go +++ b/bitset.go @@ -308,23 +308,54 @@ func (b *BitSet) FlipRange(start, end uint) *BitSet { if start >= end { return b } + if end-1 >= b.length { // if we need more bits, make 'em b.extendSet(end - 1) } - var startWord uint = start >> log2WordSize - var endWord uint = end >> log2WordSize + + startWord := int(start >> log2WordSize) + endWord := int(end >> log2WordSize) + + // b.set[startWord] ^= ^(^uint64(0) << wordsIndex(start)) + // e.g: + // start = 71, + // startWord = 1 + // wordsIndex(start) = 71 % 64 = 7 + // (^uint64(0) << 7) = 0b111111....11110000000 + // + // mask = ^(^uint64(0) << 7) = 0b000000....00001111111 + // + // flips the first 7 bits in b.set[1] and + // in the range loop, the b.set[1] gets again flipped + // so the two expressions flip results in a flip + // in b.set[1] from [7,63] + // + // handle starWword special, get's reflipped in range loop b.set[startWord] ^= ^(^uint64(0) << wordsIndex(start)) - if endWord > 0 { - // bounds check elimination - data := b.set - _ = data[endWord-1] - for i := startWord; i < endWord; i++ { - data[i] = ^data[i] - } - } - if end&wordMask != 0 { - b.set[endWord] ^= ^uint64(0) >> wordsIndex(-end) + + for idx := range b.set[startWord:endWord] { + b.set[startWord+idx] = ^b.set[startWord+idx] + } + + // handle endWord special + // e.g. + // end = 135 + // endWord = 2 + // + // wordsIndex(-7) = 57 + // see the golang spec: + // "For unsigned integer values, the operations +, -, *, and << are computed + // modulo 2n, where n is the bit width of the unsigned integer's type." + // + // mask = ^uint64(0) >> 57 = 0b00000....0001111111 + // + // flips in b.set[2] from [0,7] + // + // is end at word boundary? + if idx := wordsIndex(-end); idx != 0 { + b.set[endWord] ^= ^uint64(0) >> wordsIndex(idx) } + return b } From 411d4d539e85432ffa3b09bdbf3bc7dc4b10d2ed Mon Sep 17 00:00:00 2001 From: Karl Gaissmaier Date: Sat, 14 Dec 2024 22:22:42 +0100 Subject: [PATCH 4/7] comment and simpliy NextClear --- bitset.go | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/bitset.go b/bitset.go index 166f6a8..2ee2213 100644 --- a/bitset.go +++ b/bitset.go @@ -628,27 +628,29 @@ func (b *BitSet) NextClear(i uint) (uint, bool) { if x >= len(b.set) { return 0, false } - w := b.set[x] - w = w >> wordsIndex(i) - wA := allBits >> wordsIndex(i) - index := i + uint(bits.TrailingZeros64(^w)) - if w != wA && index < b.length { + + // process first (maybe partial) word + word := b.set[x] + word = word >> wordsIndex(i) + wordAll := allBits >> wordsIndex(i) + + index := i + uint(bits.TrailingZeros64(^word)) + if word != wordAll && index < b.length { return index, true } + + // process the following full words until next bit is cleared + // x < len(b.set), no out-of-bounds panic in following slice expression x++ - // bounds check elimination in the loop - if x < 0 { - return 0, false - } - for x < len(b.set) { - if b.set[x] != allBits { - index = uint(x*wordSize + bits.TrailingZeros64(^b.set[x])) + for idx, word := range b.set[x:] { + if word != allBits { + index = uint((x+idx)*wordSize + bits.TrailingZeros64(^word)) if index < b.length { return index, true } } - x++ } + return 0, false } From dde4879352db00c101b525aa514f779d4b39c7f5 Mon Sep 17 00:00:00 2001 From: Karl Gaissmaier Date: Sat, 14 Dec 2024 22:23:45 +0100 Subject: [PATCH 5/7] harmonize var names in PreviousClear --- bitset.go | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/bitset.go b/bitset.go index 2ee2213..5d0cd2e 100644 --- a/bitset.go +++ b/bitset.go @@ -662,16 +662,18 @@ func (b *BitSet) PreviousSet(i uint) (uint, bool) { if x >= len(b.set) { return 0, false } - w := b.set[x] + word := b.set[x] + // Clear the bits above the index - w = w & ((1 << (wordsIndex(i) + 1)) - 1) - if w != 0 { - return uint(x<= 0; x-- { - w = b.set[x] - if w != 0 { - return uint(x<= len(b.set) { return 0, false } - w := b.set[x] + word := b.set[x] + // Flip all bits and find the highest one bit - w = ^w + word = ^word + // Clear the bits above the index - w = w & ((1 << (wordsIndex(i) + 1)) - 1) - if w != 0 { - return uint(x<= 0; x-- { - w = b.set[x] - w = ^w - if w != 0 { - return uint(x< Date: Sat, 14 Dec 2024 22:26:50 +0100 Subject: [PATCH 6/7] linter: non-constant format string in Errorf --- bitset_test.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/bitset_test.go b/bitset_test.go index 9f91df6..ae92a70 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -1530,13 +1530,13 @@ func TestMarshalUnmarshalBinaryByLittleEndian(t *testing.T) { func copyBinary(t *testing.T, from encoding.BinaryMarshaler, to encoding.BinaryUnmarshaler) { data, err := from.MarshalBinary() if err != nil { - t.Errorf(err.Error()) + t.Error(err.Error()) return } err = to.UnmarshalBinary(data) if err != nil { - t.Errorf(err.Error()) + t.Error(err.Error()) return } } @@ -1547,14 +1547,14 @@ func TestMarshalUnmarshalJSON(t *testing.T) { a.Set(10).Set(1001) data, err := json.Marshal(a) if err != nil { - t.Errorf(err.Error()) + t.Error(err.Error()) return } b := new(BitSet) err = json.Unmarshal(data, b) if err != nil { - t.Errorf(err.Error()) + t.Error(err.Error()) return } @@ -1568,14 +1568,14 @@ func TestMarshalUnmarshalJSON(t *testing.T) { a := New(1010).Set(10).Set(1001) data, err := json.Marshal(a) if err != nil { - t.Errorf(err.Error()) + t.Error(err.Error()) return } b := new(BitSet) err = json.Unmarshal(data, b) if err != nil { - t.Errorf(err.Error()) + t.Error(err.Error()) return } @@ -1591,7 +1591,7 @@ func TestMarshalUnmarshalJSONWithTrailingData(t *testing.T) { a := New(1010).Set(10).Set(1001) data, err := json.Marshal(a) if err != nil { - t.Errorf(err.Error()) + t.Error(err.Error()) return } @@ -1602,7 +1602,7 @@ func TestMarshalUnmarshalJSONWithTrailingData(t *testing.T) { b := new(BitSet) err = json.Unmarshal(data, b) if err != nil { - t.Errorf(err.Error()) + t.Error(err.Error()) return } @@ -1619,14 +1619,14 @@ func TestMarshalUnmarshalJSONByStdEncoding(t *testing.T) { a := New(1010).Set(10).Set(1001) data, err := json.Marshal(a) if err != nil { - t.Errorf(err.Error()) + t.Error(err.Error()) return } b := new(BitSet) err = json.Unmarshal(data, b) if err != nil { - t.Errorf(err.Error()) + t.Error(err.Error()) return } From cbfc1f7558a39125c46225dc139cb2742c19e0df Mon Sep 17 00:00:00 2001 From: Karl Gaissmaier Date: Mon, 16 Dec 2024 16:39:43 +0100 Subject: [PATCH 7/7] typo --- bitset.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitset.go b/bitset.go index 5d0cd2e..4339fea 100644 --- a/bitset.go +++ b/bitset.go @@ -330,7 +330,7 @@ func (b *BitSet) FlipRange(start, end uint) *BitSet { // so the two expressions flip results in a flip // in b.set[1] from [7,63] // - // handle starWword special, get's reflipped in range loop + // handle startWord special, get's reflipped in range loop b.set[startWord] ^= ^(^uint64(0) << wordsIndex(start)) for idx := range b.set[startWord:endWord] {