From f73d7bcf14459e628e51fe59bf9fbc27f3d6877d Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 10 Feb 2014 08:48:06 -0500 Subject: [PATCH 1/3] Readme should document how to iterate over set bits. --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 7c2581b..740e9e7 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,9 @@ Example use: if b.Test(1000) { b.Clear(1000) } + for i := b.NextSet(int64(0)); i >= 0; i = b.NextSet(i + 1) { + frmt.Println("The following bit is set:",i); + } if B.Intersection(bitset.New(100).Set(10)).Count() > 1 { fmt.Println("Intersection works.") } From a0f359b3d11f7a2869aef4d471ff315b7d41b0fe Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 11 Feb 2014 09:36:11 -0500 Subject: [PATCH 2/3] Updated the iteration routines --- bitset.go | 24 +++++++++++++----------- bitset_test.go | 8 ++++---- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/bitset.go b/bitset.go index 20faafa..d04fdde 100644 --- a/bitset.go +++ b/bitset.go @@ -77,7 +77,8 @@ func (b *BitSet) safeSet() []uint64 { } func wordsNeeded(i uint) uint { - if i == math.MaxUint64 { + if i > (math.MaxUint64 - wordSize + 1 ) { // safer? + // if i == math.MaxUint64 { return math.MaxUint64 >> log2WordSize } else if i == 0 { return 1 @@ -157,25 +158,25 @@ func (b *BitSet) Flip(i uint) *BitSet { // return the next bit set from the specified index, including possibly the current index // returns -1 if none is found // inspired by the Java API: for i:=int64(0); i>=0; i = NextSet(i) {...} -func (b *BitSet) NextSet(i int64) int64 { - x := uint(i) >> log2WordSize +func (b *BitSet) NextSet(i uint) (uint,bool) { + x := i >> log2WordSize if x >= b.length { - return -1 + return 0, false } w := b.set[x] - w = w >> (uint(i) & (wordSize - 1)) + w = w >> (i & (wordSize - 1)) if w != 0 { - return int64(i) + int64(trailingZeroes64(w)) + return i + trailingZeroes64(w),true } x = x + 1 for x < wordsNeeded(b.length) { if b.set[x] != 0 { - return int64(x*wordSize) + int64(trailingZeroes64(b.set[x])) + return x * wordSize + trailingZeroes64(b.set[x]),true } x = x + 1 } - return -1 + return 0, false } // Clear entire BitSet @@ -250,12 +251,13 @@ func (b *BitSet) Count() uint { return 0 } -func trailingZeroes64(v uint64) uint64 { +// computes the number of trailing zeroes on the assumption that v is non-zero +func trailingZeroes64(v uint64) uint { // NOTE: if 0 == v, then c = 63. if v&0x1 != 0 { return 0 } - c := uint64(1) + c := uint(1) if (v & 0xffffffff) == 0 { v >>= 32 c += 32 @@ -276,7 +278,7 @@ func trailingZeroes64(v uint64) uint64 { v >>= 2 c += 2 } - c -= v & 0x1 + c -= uint(v & 0x1) return c } diff --git a/bitset_test.go b/bitset_test.go index cbaeb97..a48e34a 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -111,8 +111,8 @@ func TestIterate(t *testing.T) { v.Set(2) data := make([]uint, 3) c := 0 - for i := v.NextSet(int64(0)); i >= 0; i = v.NextSet(i + 1) { - data[c] = uint(i) + for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) { + data[c] = i c++ } if data[0] != 0 { @@ -128,8 +128,8 @@ func TestIterate(t *testing.T) { v.Set(2000) data = make([]uint, 5) c = 0 - for i := v.NextSet(int64(0)); i >= 0; i = v.NextSet(i + 1) { - data[c] = uint(i) + for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) { + data[c] = i c++ } if data[0] != 0 { From dcd7017eaf49499e0fb924dce50db735775ea124 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 11 Feb 2014 11:03:27 -0500 Subject: [PATCH 3/3] Now with inplace operations --- README.md | 2 +- bitset.go | 148 ++++++++++++++++++++++++++++++++++++++++--------- bitset_test.go | 108 +++++++++++++++++++++++++++++++++++- 3 files changed, 229 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 740e9e7..dc042ed 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Example use: if b.Test(1000) { b.Clear(1000) } - for i := b.NextSet(int64(0)); i >= 0; i = b.NextSet(i + 1) { + for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) { frmt.Println("The following bit is set:",i); } if B.Intersection(bitset.New(100).Set(10)).Count() > 1 { diff --git a/bitset.go b/bitset.go index d04fdde..7b8e29b 100644 --- a/bitset.go +++ b/bitset.go @@ -51,6 +51,15 @@ import ( "math" ) + + +///////////// +// Design issue: I think that a slice/array in Go has a length of type int +// as per the spec http://golang.org/ref/spec#Length_and_capacity +// yet this code assumes that the length is uint. I think that this is wrong +////////// + + // Word size of a bit set const wordSize = uint(64) @@ -71,11 +80,12 @@ type BitSetError string // fixup b.set to be non-nil and return the field value func (b *BitSet) safeSet() []uint64 { if b.set == nil { - b.set = make([]uint64, wordsNeeded(0)) + b.set = make([]uint64, wordsNeeded(0)) } return b.set } +// Daniel: I think this should return an int since this is the type used for array lengths in Go func wordsNeeded(i uint) uint { if i > (math.MaxUint64 - wordSize + 1 ) { // safer? // if i == math.MaxUint64 { @@ -156,8 +166,8 @@ func (b *BitSet) Flip(i uint) *BitSet { } // return the next bit set from the specified index, including possibly the current index -// returns -1 if none is found -// inspired by the Java API: for i:=int64(0); i>=0; i = NextSet(i) {...} +// along with an error code (true = valid, false = no set bit found) +// for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) {...} func (b *BitSet) NextSet(i uint) (uint,bool) { x := i >> log2WordSize if x >= b.length { @@ -169,7 +179,7 @@ func (b *BitSet) NextSet(i uint) (uint,bool) { return i + trailingZeroes64(w),true } x = x + 1 - for x < wordsNeeded(b.length) { + for x < uint(len(b.set)) { if b.set[x] != 0 { return x * wordSize + trailingZeroes64(b.set[x]),true } @@ -189,6 +199,7 @@ func (b *BitSet) ClearAll() *BitSet { return b } +// Daniel: should return an int // Query words used in a bit set func (b *BitSet) wordCount() uint { return wordsNeeded(b.length) @@ -197,7 +208,9 @@ func (b *BitSet) wordCount() uint { // Clone this BitSet func (b *BitSet) Clone() *BitSet { c := New(b.length) - copy(c.set, b.safeSet()) + if b.set != nil {// Clone should not modify current object + copy(c.set, b.set) + } return c } @@ -208,7 +221,9 @@ func (b *BitSet) Copy(c *BitSet) (count uint) { if c == nil { return } - copy(c.set, b.safeSet()) + if b.set != nil {// Copy should not modify current object + copy(c.set, b.set) + } count = c.length if b.length < c.length { count = b.length @@ -292,7 +307,11 @@ func (b *BitSet) Equal(c *BitSet) bool { if b.length != c.length { return false } - for p, v := range b.safeSet() { + if b.length == 0 { // if they have both length == 0, then could have nil set + return true + } + // testing for equality shoud not transform the bitset (no call to safeSet) + for p, v := range b.set { if c.set[p] != v { return false } @@ -312,16 +331,32 @@ func (b *BitSet) Difference(compare *BitSet) (result *BitSet) { panicIfNull(b) panicIfNull(compare) result = b.Clone() // clone b (in case b is bigger than compare) - szl := compare.wordCount() - for i, word := range b.safeSet() { - if uint(i) >= szl { - break - } - result.set[i] = word &^ compare.set[i] + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + for i := 0; i < l ; i++ { + result.set[i] = b.set[i] &^ compare.set[i] } return } +// Difference of base set and other set +// This is the BitSet equivalent of &^ (and not) +func (b *BitSet) InPlaceDifference(compare *BitSet) { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + for i := 0; i < l ; i++ { + b.set[i] &^= compare.set[i] + } +} + + + // Convenience function: return two bitsets ordered by // increasing length. Note: neither can be nil func sortByLength(a *BitSet, b *BitSet) (ap *BitSet, bp *BitSet) { @@ -340,12 +375,35 @@ func (b *BitSet) Intersection(compare *BitSet) (result *BitSet) { panicIfNull(compare) b, compare = sortByLength(b, compare) result = New(b.length) - for i, word := range b.safeSet() { + for i, word := range b.set { result.set[i] = word & compare.set[i] } return } +// Intersection of base set and other set +// This is the BitSet equivalent of & (and) +func (b *BitSet) InPlaceIntersection(compare *BitSet) { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + for i := 0; i < l ; i++ { + b.set[i] &= compare.set[i] + } + for i := l; i < len(b.set) ; i++ { + b.set[i] = 0 + } + if compare.length > 0 { + b.extendSetMaybe(compare.length - 1) + } + return +} + + + // Union of base set and other set // This is the BitSet equivalent of | (or) func (b *BitSet) Union(compare *BitSet) (result *BitSet) { @@ -353,16 +411,35 @@ func (b *BitSet) Union(compare *BitSet) (result *BitSet) { panicIfNull(compare) b, compare = sortByLength(b, compare) result = compare.Clone() - szl := compare.wordCount() - for i, word := range b.safeSet() { - if uint(i) >= szl { - break - } + for i, word := range b.set { result.set[i] = word | compare.set[i] } return } + +// Union of base set and other set +// This is the BitSet equivalent of | (or) +func (b *BitSet) InPlaceUnion(compare *BitSet) { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + if compare.length > 0 { + b.extendSetMaybe(compare.length - 1) + } + for i := 0; i < l ; i++ { + b.set[i] |= compare.set[i] + } + if len(compare.set) > l { + for i := l; i < len(compare.set) ; i++ { + b.set[i] = compare.set[i] + } + } +} + // SymmetricDifference of base set and other set // This is the BitSet equivalent of ^ (xor) func (b *BitSet) SymmetricDifference(compare *BitSet) (result *BitSet) { @@ -371,16 +448,34 @@ func (b *BitSet) SymmetricDifference(compare *BitSet) (result *BitSet) { b, compare = sortByLength(b, compare) // compare is bigger, so clone it result = compare.Clone() - szl := b.wordCount() - for i, word := range b.safeSet() { - if uint(i) >= szl { - break - } + for i, word := range b.set { result.set[i] = word ^ compare.set[i] } return } +// SymmetricDifference of base set and other set +// This is the BitSet equivalent of ^ (xor) +func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + if compare.length > 0 { + b.extendSetMaybe(compare.length - 1) + } + for i := 0; i < l ; i++ { + b.set[i] ^= compare.set[i] + } + if len(compare.set) > l { + for i := l; i < len(compare.set) ; i++ { + b.set[i] = compare.set[i] + } + } +} + // Is the length an exact multiple of word sizes? func (b *BitSet) isEven() bool { return b.length%wordSize == 0 @@ -397,7 +492,7 @@ func (b *BitSet) cleanLastWord() { func (b *BitSet) Complement() (result *BitSet) { panicIfNull(b) result = New(b.length) - for i, word := range b.safeSet() { + for i, word := range b.set { result.set[i] = ^word } result.cleanLastWord() @@ -431,9 +526,10 @@ func (b *BitSet) Any() bool { } // Dump as bits +// if the bitset is empty, one word is automatically allocated func (b *BitSet) DumpAsBits() string { buffer := bytes.NewBufferString("") - b.safeSet() + b.safeSet() // it is a bit odd that dumping as bits should modify the bitset! i := int(wordsNeeded(b.length) - 1) for ; i >= 0; i-- { fmt.Fprintf(buffer, "%064b.", b.set[i]) diff --git a/bitset_test.go b/bitset_test.go index a48e34a..b1390df 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -438,6 +438,32 @@ func TestUnion(t *testing.T) { } } +func TestInPlaceUnion(t *testing.T) { + a := New(100) + b := New(200) + for i := uint(1); i < 100; i += 2 { + a.Set(i) + b.Set(i - 1) + } + for i := uint(100); i < 200; i++ { + b.Set(i) + } + c := a.Clone() + c.InPlaceUnion(b) + d := b.Clone() + d.InPlaceUnion(a) + if c.Count() != 200 { + t.Errorf("Union should have 200 bits set, but had %d", c.Count()) + } + if d.Count() != 200 { + t.Errorf("Union should have 200 bits set, but had %d", d.Count()) + } + if !c.Equal(d) { + t.Errorf("Union should be symmetric") + } +} + + func TestIntersection(t *testing.T) { a := New(100) b := New(200) @@ -458,6 +484,33 @@ func TestIntersection(t *testing.T) { } } + +func TestInplaceIntersection(t *testing.T) { + a := New(100) + b := New(200) + for i := uint(1); i < 100; i += 2 { + a.Set(i) + b.Set(i - 1).Set(i) + } + for i := uint(100); i < 200; i++ { + b.Set(i) + } + c := a.Clone() + c.InPlaceIntersection(b) + d := b.Clone() + d.InPlaceIntersection(a) + if c.Count() != 50 { + t.Errorf("Intersection should have 50 bits set, but had %d", c.Count()) + } + if d.Count() != 50 { + t.Errorf("Intersection should have 50 bits set, but had %d", d.Count()) + } + if !c.Equal(d) { + t.Errorf("Intersection should be symmetric") + } +} + + func TestDifference(t *testing.T) { a := New(100) b := New(200) @@ -474,7 +527,33 @@ func TestDifference(t *testing.T) { t.Errorf("a-b Difference should have 50 bits set, but had %d", c.Count()) } if d.Count() != 150 { - t.Errorf("b-a Difference should have 150 bits set, but had %d", c.Count()) + t.Errorf("b-a Difference should have 150 bits set, but had %d", d.Count()) + } + if c.Equal(d) { + t.Errorf("Difference, here, should not be symmetric") + } +} + + +func TestInPlaceDifference(t *testing.T) { + a := New(100) + b := New(200) + for i := uint(1); i < 100; i += 2 { + a.Set(i) + b.Set(i - 1) + } + for i := uint(100); i < 200; i++ { + b.Set(i) + } + c := a.Clone() + c.InPlaceDifference(b) + d := b.Clone() + d.InPlaceDifference(a) + if c.Count() != 50 { + t.Errorf("a-b Difference should have 50 bits set, but had %d", c.Count()) + } + if d.Count() != 150 { + t.Errorf("b-a Difference should have 150 bits set, but had %d", d.Count()) } if c.Equal(d) { t.Errorf("Difference, here, should not be symmetric") @@ -497,7 +576,32 @@ func TestSymmetricDifference(t *testing.T) { t.Errorf("a^b Difference should have 150 bits set, but had %d", c.Count()) } if d.Count() != 150 { - t.Errorf("b^a Difference should have 150 bits set, but had %d", c.Count()) + t.Errorf("b^a Difference should have 150 bits set, but had %d", d.Count()) + } + if !c.Equal(d) { + t.Errorf("SymmetricDifference should be symmetric") + } +} + +func TestInPlaceSymmetricDifference(t *testing.T) { + a := New(100) + b := New(200) + for i := uint(1); i < 100; i += 2 { + a.Set(i) // 01010101010 ... 0000000 + b.Set(i - 1).Set(i) // 11111111111111111000000 + } + for i := uint(100); i < 200; i++ { + b.Set(i) + } + c := a.Clone() + c.InPlaceSymmetricDifference(b) + d := b.Clone() + d.InPlaceSymmetricDifference(a) + if c.Count() != 150 { + t.Errorf("a^b Difference should have 150 bits set, but had %d", c.Count()) + } + if d.Count() != 150 { + t.Errorf("b^a Difference should have 150 bits set, but had %d", d.Count()) } if !c.Equal(d) { t.Errorf("SymmetricDifference should be symmetric")