From 246eb4959fe1d1abae41acb618e052a179d79a44 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 14 Feb 2014 00:28:14 -0500 Subject: [PATCH 1/6] Improved performance by rewriting key functions in C. Fixed a bug in the iterator function. --- bitset.go | 119 ++++++++++++++++++++++++++----------------------- bitset_test.go | 36 +++++++++++++++ 2 files changed, 100 insertions(+), 55 deletions(-) diff --git a/bitset.go b/bitset.go index d5f6b94..1238d06 100644 --- a/bitset.go +++ b/bitset.go @@ -42,6 +42,62 @@ */ package bitset +/* +#cgo CFLAGS: + +#if defined(__GNUC__) +int pop(unsigned long x) { + return __builtin_popcountl(x); +} +#else +int pop(unsigned long v) { + v = v - ((v >> 1) & 0x5555555555555555); + v = (v & 0x3333333333333333) + + ((v >> 2) & 0x3333333333333333); + v = ((v + (v >> 4)) & 0x0F0F0F0F0F0F0F0F); + return (int)((v*(0x0101010101010101))>>56); +} +#endif + + + +#if defined(__GNUC__) +int ctz(unsigned long n) { + return __builtin_ctzl(n); +} +#else +int ctz(unsigned long n) { + uint32_t i = 1; + if ((n & 4294967295UL) == 0) { + n >>= 32; + i += 32; + } + if ((n & 0x0000FFFF) == 0) { + n >>= 16; + i += 16; + } + + if ((n & 0x000000FFUL) == 0) { + n >>= 8; + i += 8; + } + + if ((n & 0x0000000FUL) == 0) { + n >>= 4; + i += 4; + } + + if ((n & 0x00000003UL) == 0) { + n >>= 2; + i += 2; + } + i -= (n & 0x1); + return i; +} +#endif +*/ +import "C" + import ( "bytes" "encoding/base64" @@ -158,8 +214,8 @@ func (b *BitSet) Flip(i uint) *BitSet { // along with an error code (true = valid, false = no set bit found) // for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) {...} func (b *BitSet) NextSet(i uint) (uint,bool) { - x := i >> log2WordSize - if x >= b.length { + x := int(i >> log2WordSize) + if x >= len(b.set) { return 0, false } w := b.set[x] @@ -168,9 +224,9 @@ func (b *BitSet) NextSet(i uint) (uint,bool) { return i + trailingZeroes64(w),true } x = x + 1 - for x < uint(len(b.set)) { + for x < len(b.set) { if b.set[x] != 0 { - return x * wordSize + trailingZeroes64(b.set[x]),true + return uint(x) * wordSize + trailingZeroes64(b.set[x]),true } x = x + 1 @@ -178,6 +234,7 @@ func (b *BitSet) NextSet(i uint) (uint,bool) { return 0, false } + // Clear entire BitSet func (b *BitSet) ClearAll() *BitSet { if b != nil && b.set != nil { @@ -219,35 +276,13 @@ func (b *BitSet) Copy(c *BitSet) (count uint) { return } -// From Wikipedia: http://en.wikipedia.org/wiki/Hamming_weight -const m1 uint64 = 0x5555555555555555 //binary: 0101... -const m2 uint64 = 0x3333333333333333 //binary: 00110011.. -const m4 uint64 = 0x0f0f0f0f0f0f0f0f //binary: 4 zeros, 4 ones ... -const m8 uint64 = 0x00ff00ff00ff00ff //binary: 8 zeros, 8 ones ... -const m16 uint64 = 0x0000ffff0000ffff //binary: 16 zeros, 16 ones ... -const m32 uint64 = 0x00000000ffffffff //binary: 32 zeros, 32 ones -const hff uint64 = 0xffffffffffffffff //binary: all ones -const h01 uint64 = 0x0101010101010101 //the sum of 256 to the power of 0,1,2,3... - -// From Wikipedia: count number of set bits. -// This is algorithm popcount_2 in the article retrieved May 9, 2011 - -func popcount_2(x uint64) uint64 { - x -= (x >> 1) & m1 //put count of each 2 bits into those 2 bits - x = (x & m2) + ((x >> 2) & m2) //put count of each 4 bits into those 4 bits - x = (x + (x >> 4)) & m4 //put count of each 8 bits into those 8 bits - x += x >> 8 //put count of each 16 bits into their lowest 8 bits - x += x >> 16 //put count of each 32 bits into their lowest 8 bits - x += x >> 32 //put count of each 64 bits into their lowest 8 bits - return x & 0x7f -} - + // Count (number of set bits) func (b *BitSet) Count() uint { if b != nil && b.set != nil { cnt := uint64(0) for _, word := range b.set { - cnt += popcount_2(word) + cnt += uint64(C.pop(C.ulong(word))) } return uint(cnt) } @@ -256,33 +291,7 @@ func (b *BitSet) Count() uint { // computes the number of trailing zeroes on the assumption that v is non-zero func trailingZeroes64(v uint64) uint { - // NOTE: if 0 == v, then c = 63. - if v&0x1 != 0 { - return 0 - } - c := uint(1) - if (v & 0xffffffff) == 0 { - v >>= 32 - c += 32 - } - if (v & 0xffff) == 0 { - v >>= 16 - c += 16 - } - if (v & 0xff) == 0 { - v >>= 8 - c += 8 - } - if (v & 0xf) == 0 { - v >>= 4 - c += 4 - } - if (v & 0x3) == 0 { - v >>= 2 - c += 2 - } - c -= uint(v & 0x1) - return c + return uint(C.ctz(C.ulong(v))) } // Test the equvalence of two BitSets. diff --git a/bitset_test.go b/bitset_test.go index a1fda65..68d7a64 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -13,6 +13,7 @@ import ( "testing" ) + func TestEmptyBitSet(t *testing.T) { defer func() { if r := recover(); r != nil { @@ -684,6 +685,7 @@ func BenchmarkSetExpand(b *testing.B) { } } +// go test -bench=Count func BenchmarkCount(b *testing.B) { b.StopTimer() s := New(100000) @@ -695,3 +697,37 @@ func BenchmarkCount(b *testing.B) { s.Count() } } + + + +// go test -bench=Iterate +func BenchmarkIterate(b *testing.B) { + b.StopTimer() + s := New(10000) + for i := 0; i < 10000; i += 3 { + s.Set(uint(i)) + } + b.StartTimer() + for j := 0; j < b.N; j++ { + c := uint(0) + for i,e := s.NextSet(0); e; i,e = s.NextSet(i + 1) { + c++ + } + } +} + +// go test -bench=SparseIterate +func BenchmarkSparseIterate(b *testing.B) { + b.StopTimer() + s := New(100000) + for i := 0; i < 100000; i += 30 { + s.Set(uint(i)) + } + b.StartTimer() + for j := 0; j < b.N; j++ { + c := uint(0) + for i,e := s.NextSet(0); e; i,e = s.NextSet(i + 1) { + c++ + } + } +} \ No newline at end of file From 5039f7d200ead74b360d1ef36e74cfcc6efe0ec7 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 14 Feb 2014 01:15:31 -0500 Subject: [PATCH 2/6] Decent perf boost --- bitset.go | 105 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 70 insertions(+), 35 deletions(-) diff --git a/bitset.go b/bitset.go index 1238d06..bd75f31 100644 --- a/bitset.go +++ b/bitset.go @@ -43,13 +43,16 @@ package bitset /* -#cgo CFLAGS: +#cgo CFLAGS: +// function to compute the number of set bits in a long integer #if defined(__GNUC__) +// when a GCC-like compiler is used, call the intrinsic int pop(unsigned long x) { return __builtin_popcountl(x); } #else +// otherwise use pure C int pop(unsigned long v) { v = v - ((v >> 1) & 0x5555555555555555); v = (v & 0x3333333333333333) + @@ -60,41 +63,16 @@ int pop(unsigned long v) { #endif - -#if defined(__GNUC__) -int ctz(unsigned long n) { - return __builtin_ctzl(n); +// computes the total number of set bits +unsigned int totalpop(void * v, int n) { + unsigned long * x = (unsigned long *) v; + unsigned int a = 0; + int k = 0; + for(; k < n ; ++k) a+= pop(x[k]); + return a; } -#else -int ctz(unsigned long n) { - uint32_t i = 1; - if ((n & 4294967295UL) == 0) { - n >>= 32; - i += 32; - } - if ((n & 0x0000FFFF) == 0) { - n >>= 16; - i += 16; - } - if ((n & 0x000000FFUL) == 0) { - n >>= 8; - i += 8; - } - if ((n & 0x0000000FUL) == 0) { - n >>= 4; - i += 4; - } - - if ((n & 0x00000003UL) == 0) { - n >>= 2; - i += 2; - } - i -= (n & 0x1); - return i; -} -#endif */ import "C" @@ -105,11 +83,13 @@ import ( "encoding/json" "fmt" "errors" + "unsafe" ) - +// we use the C code only if longs in C are 64-bit integers, otherwise fall back on pure Go +const useC = (unsafe.Sizeof(uint64(0)) == unsafe.Sizeof(C.ulong(0))) // Word size of a bit set const wordSize = uint(64) @@ -276,10 +256,35 @@ func (b *BitSet) Copy(c *BitSet) (count uint) { return } +// From Wikipedia: http://en.wikipedia.org/wiki/Hamming_weight +const m1 uint64 = 0x5555555555555555 //binary: 0101... +const m2 uint64 = 0x3333333333333333 //binary: 00110011.. +const m4 uint64 = 0x0f0f0f0f0f0f0f0f //binary: 4 zeros, 4 ones ... +const m8 uint64 = 0x00ff00ff00ff00ff //binary: 8 zeros, 8 ones ... +const m16 uint64 = 0x0000ffff0000ffff //binary: 16 zeros, 16 ones ... +const m32 uint64 = 0x00000000ffffffff //binary: 32 zeros, 32 ones +const hff uint64 = 0xffffffffffffffff //binary: all ones +const h01 uint64 = 0x0101010101010101 //the sum of 256 to the power of 0,1,2,3... + +// From Wikipedia: count number of set bits. +// This is algorithm popcount_2 in the article retrieved May 9, 2011 + +func popcount_2(x uint64) uint64 { + x -= (x >> 1) & m1 //put count of each 2 bits into those 2 bits + x = (x & m2) + ((x >> 2) & m2) //put count of each 4 bits into those 4 bits + x = (x + (x >> 4)) & m4 //put count of each 8 bits into those 8 bits + x += x >> 8 //put count of each 16 bits into their lowest 8 bits + x += x >> 16 //put count of each 32 bits into their lowest 8 bits + x += x >> 32 //put count of each 64 bits into their lowest 8 bits + return x & 0x7f +} // Count (number of set bits) func (b *BitSet) Count() uint { if b != nil && b.set != nil { + if useC { + return uint(C.totalpop(unsafe.Pointer(&b.set[0]),C.int(len(b.set)))) + } cnt := uint64(0) for _, word := range b.set { cnt += uint64(C.pop(C.ulong(word))) @@ -289,11 +294,41 @@ func (b *BitSet) Count() uint { return 0 } + + // computes the number of trailing zeroes on the assumption that v is non-zero func trailingZeroes64(v uint64) uint { - return uint(C.ctz(C.ulong(v))) + // NOTE: if 0 == v, then c = 63. + //if v&0x1 != 0 { + // return 0 + //} + c := uint(1) + if (v & 0xffffffff) == 0 { + v >>= 32 + c += 32 + } + if (v & 0xffff) == 0 { + v >>= 16 + c += 16 + } + if (v & 0xff) == 0 { + v >>= 8 + c += 8 + } + if (v & 0xf) == 0 { + v >>= 4 + c += 4 + } + if (v & 0x3) == 0 { + v >>= 2 + c += 2 + } + c -= uint(v & 0x1) + return c } + + // Test the equvalence of two BitSets. // False if they are of different sizes, otherwise true // only if all the same bits are set From 2627f20db6af991b636ce4508df10c4a02a44cb9 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 14 Feb 2014 01:21:57 -0500 Subject: [PATCH 3/6] Fixed idiotic issue --- bitset.go | 69 ++++++++++++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/bitset.go b/bitset.go index bd75f31..00a88dc 100644 --- a/bitset.go +++ b/bitset.go @@ -56,7 +56,7 @@ int pop(unsigned long x) { int pop(unsigned long v) { v = v - ((v >> 1) & 0x5555555555555555); v = (v & 0x3333333333333333) + - ((v >> 2) & 0x3333333333333333); + ((v >> 2) & 0x3333333333333333); v = ((v + (v >> 4)) & 0x0F0F0F0F0F0F0F0F); return (int)((v*(0x0101010101010101))>>56); } @@ -270,13 +270,13 @@ const h01 uint64 = 0x0101010101010101 //the sum of 256 to the power of 0,1,2,3.. // This is algorithm popcount_2 in the article retrieved May 9, 2011 func popcount_2(x uint64) uint64 { - x -= (x >> 1) & m1 //put count of each 2 bits into those 2 bits - x = (x & m2) + ((x >> 2) & m2) //put count of each 4 bits into those 4 bits - x = (x + (x >> 4)) & m4 //put count of each 8 bits into those 8 bits - x += x >> 8 //put count of each 16 bits into their lowest 8 bits - x += x >> 16 //put count of each 32 bits into their lowest 8 bits - x += x >> 32 //put count of each 64 bits into their lowest 8 bits - return x & 0x7f + x -= (x >> 1) & m1 //put count of each 2 bits into those 2 bits + x = (x & m2) + ((x >> 2) & m2) //put count of each 4 bits into those 4 bits + x = (x + (x >> 4)) & m4 //put count of each 8 bits into those 8 bits + x += x >> 8 //put count of each 16 bits into their lowest 8 bits + x += x >> 16 //put count of each 32 bits into their lowest 8 bits + x += x >> 32 //put count of each 64 bits into their lowest 8 bits + return x & 0x7f } // Count (number of set bits) @@ -287,7 +287,7 @@ func (b *BitSet) Count() uint { } cnt := uint64(0) for _, word := range b.set { - cnt += uint64(C.pop(C.ulong(word))) + cnt += popcount_2(word) } return uint(cnt) } @@ -298,33 +298,30 @@ func (b *BitSet) Count() uint { // computes the number of trailing zeroes on the assumption that v is non-zero func trailingZeroes64(v uint64) uint { - // NOTE: if 0 == v, then c = 63. - //if v&0x1 != 0 { - // return 0 - //} - c := uint(1) - if (v & 0xffffffff) == 0 { - v >>= 32 - c += 32 - } - if (v & 0xffff) == 0 { - v >>= 16 - c += 16 - } - if (v & 0xff) == 0 { - v >>= 8 - c += 8 - } - if (v & 0xf) == 0 { - v >>= 4 - c += 4 - } - if (v & 0x3) == 0 { - v >>= 2 - c += 2 - } - c -= uint(v & 0x1) - return c + // NOTE: if 0 == v, then c = 63. + c := uint(1) + if (v & 0xffffffff) == 0 { + v >>= 32 + c += 32 + } + if (v & 0xffff) == 0 { + v >>= 16 + c += 16 + } + if (v & 0xff) == 0 { + v >>= 8 + c += 8 + } + if (v & 0xf) == 0 { + v >>= 4 + c += 4 + } + if (v & 0x3) == 0 { + v >>= 2 + c += 2 + } + c -= uint(v & 0x1) + return c } From 15635d3d060bcde331c4bdb6bb6fcca77d811a26 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 14 Feb 2014 01:24:09 -0500 Subject: [PATCH 4/6] Undoing formatting changes --- bitset.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bitset.go b/bitset.go index 00a88dc..e7842bf 100644 --- a/bitset.go +++ b/bitset.go @@ -270,12 +270,12 @@ const h01 uint64 = 0x0101010101010101 //the sum of 256 to the power of 0,1,2,3.. // This is algorithm popcount_2 in the article retrieved May 9, 2011 func popcount_2(x uint64) uint64 { - x -= (x >> 1) & m1 //put count of each 2 bits into those 2 bits + x -= (x >> 1) & m1 //put count of each 2 bits into those 2 bits x = (x & m2) + ((x >> 2) & m2) //put count of each 4 bits into those 4 bits - x = (x + (x >> 4)) & m4 //put count of each 8 bits into those 8 bits - x += x >> 8 //put count of each 16 bits into their lowest 8 bits - x += x >> 16 //put count of each 32 bits into their lowest 8 bits - x += x >> 32 //put count of each 64 bits into their lowest 8 bits + x = (x + (x >> 4)) & m4 //put count of each 8 bits into those 8 bits + x += x >> 8 //put count of each 16 bits into their lowest 8 bits + x += x >> 16 //put count of each 32 bits into their lowest 8 bits + x += x >> 32 //put count of each 64 bits into their lowest 8 bits return x & 0x7f } From 6afa56655bd36ddc16304b290f2274119c898cfa Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 14 Feb 2014 01:55:29 -0500 Subject: [PATCH 5/6] Now with cardinality functions --- bitset.go | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++ bitset_test.go | 33 ++++++++++++++++++++++--- 2 files changed, 95 insertions(+), 3 deletions(-) diff --git a/bitset.go b/bitset.go index e7842bf..60e5b1f 100644 --- a/bitset.go +++ b/bitset.go @@ -371,6 +371,25 @@ func (b *BitSet) Difference(compare *BitSet) (result *BitSet) { return } +// computes the cardinality of the differnce +func (b *BitSet) DifferenceCardinality(compare *BitSet) (uint) { + panicIfNull(b) + panicIfNull(compare) + l := int(compare.wordCount()) + if l > int(b.wordCount()) { + l = int(b.wordCount()) + } + cnt := uint64(0) + for i := 0; i < l ; i++ { + cnt += popcount_2(b.set[i] &^ compare.set[i]) + } + for i := l; i < len(b.set) ; i++ { + cnt += popcount_2(b.set[i]) + } + return uint(cnt) +} + + // Difference of base set and other set // This is the BitSet equivalent of &^ (and not) func (b *BitSet) InPlaceDifference(compare *BitSet) { @@ -411,6 +430,20 @@ func (b *BitSet) Intersection(compare *BitSet) (result *BitSet) { return } + +// Computes the cardinality of the union +func (b *BitSet) IntersectionCardinality(compare *BitSet) (uint) { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + cnt := uint64(0) + for i, word := range b.set { + cnt += popcount_2(word & compare.set[i]) + } + return uint(cnt) +} + + // Intersection of base set and other set // This is the BitSet equivalent of & (and) func (b *BitSet) InPlaceIntersection(compare *BitSet) { @@ -447,6 +480,21 @@ func (b *BitSet) Union(compare *BitSet) (result *BitSet) { return } +func (b *BitSet) UnionCardinality(compare *BitSet) (uint) { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + cnt := uint64(0) + for i, word := range b.set { + cnt += popcount_2(word | compare.set[i]) + } + for i := len(b.set); i < len(compare.set) ; i++ { + cnt += popcount_2(compare.set[i]) + } + + return uint(cnt) +} + // Union of base set and other set // This is the BitSet equivalent of | (or) @@ -484,6 +532,23 @@ func (b *BitSet) SymmetricDifference(compare *BitSet) (result *BitSet) { return } +// computes the cardinality of the symmetric difference +func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) (uint) { + panicIfNull(b) + panicIfNull(compare) + b, compare = sortByLength(b, compare) + cnt := uint64(0) + for i, word := range b.set { + cnt += popcount_2(word ^ compare.set[i]) + } + for i := len(b.set); i < len(compare.set) ; i++ { + cnt += popcount_2(compare.set[i]) + } + + return uint(cnt) +} + + // SymmetricDifference of base set and other set // This is the BitSet equivalent of ^ (xor) func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { diff --git a/bitset_test.go b/bitset_test.go index 68d7a64..eedc8bf 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -188,7 +188,7 @@ func TestCount(t *testing.T) { v := New(tot) checkLast := true for i := uint(0); i < tot; i++ { - sz := v.Count() + sz := uint(v.Count()) if sz != i { t.Errorf("Count reported as %d, but it should be %d", sz, i) checkLast = false @@ -197,7 +197,7 @@ func TestCount(t *testing.T) { v.Set(i) } if checkLast { - sz := v.Count() + sz := uint(v.Count()) if sz != tot { t.Errorf("After all bits set, size reported as %d, but it should be %d", sz, tot) } @@ -209,7 +209,7 @@ func TestCount2(t *testing.T) { tot := uint(64*4 + 11) // just some multi unit64 number v := New(tot) for i := uint(0); i < tot; i += 3 { - sz := v.Count() + sz := uint(v.Count()) if sz != i/3 { t.Errorf("Count reported as %d, but it should be %d", sz, i) break @@ -422,6 +422,13 @@ func TestUnion(t *testing.T) { for i := uint(100); i < 200; i++ { b.Set(i) } + if a.UnionCardinality(b) != 200 { + t.Errorf("Union should have 200 bits set, but had %d", a.UnionCardinality(b)) + } + if a.UnionCardinality(b) != b.UnionCardinality(a) { + t.Errorf("Union should be symmetric") + } + c := a.Union(b) d := b.Union(a) if c.Count() != 200 { @@ -468,6 +475,12 @@ func TestIntersection(t *testing.T) { for i := uint(100); i < 200; i++ { b.Set(i) } + if a.IntersectionCardinality(b) != 50 { + t.Errorf("Intersection should have 50 bits set, but had %d", a.IntersectionCardinality(b)) + } + if a.IntersectionCardinality(b) != b.IntersectionCardinality(a) { + t.Errorf("Intersection should be symmetric") + } c := a.Intersection(b) d := b.Intersection(a) if c.Count() != 50 { @@ -515,6 +528,13 @@ func TestDifference(t *testing.T) { for i := uint(100); i < 200; i++ { b.Set(i) } + if a.DifferenceCardinality(b) != 50 { + t.Errorf("a-b Difference should have 50 bits set, but had %d", a.DifferenceCardinality(b)) + } + if b.DifferenceCardinality(a) != 150 { + t.Errorf("b-a Difference should have 150 bits set, but had %d", b.DifferenceCardinality(a)) + } + c := a.Difference(b) d := b.Difference(a) if c.Count() != 50 { @@ -564,6 +584,13 @@ func TestSymmetricDifference(t *testing.T) { for i := uint(100); i < 200; i++ { b.Set(i) } + if a.SymmetricDifferenceCardinality(b) != 150 { + t.Errorf("a^b Difference should have 150 bits set, but had %d", a.SymmetricDifferenceCardinality(b)) + } + if b.SymmetricDifferenceCardinality(a) != 150 { + t.Errorf("b^a Difference should have 150 bits set, but had %d", b.SymmetricDifferenceCardinality(a)) + } + c := a.SymmetricDifference(b) d := b.SymmetricDifference(a) if c.Count() != 150 { From 16feb3e52cf4551e97fefebc9829097522a207e6 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 14 Feb 2014 02:02:18 -0500 Subject: [PATCH 6/6] Reformatting... --- bitset.go | 156 ++++++++++++++++++++++--------------------------- bitset_test.go | 30 ++++------ 2 files changed, 80 insertions(+), 106 deletions(-) diff --git a/bitset.go b/bitset.go index 60e5b1f..d4fb61b 100644 --- a/bitset.go +++ b/bitset.go @@ -43,7 +43,7 @@ package bitset /* -#cgo CFLAGS: +#cgo CFLAGS: // function to compute the number of set bits in a long integer #if defined(__GNUC__) @@ -81,21 +81,17 @@ import ( "encoding/base64" "encoding/binary" "encoding/json" - "fmt" "errors" - "unsafe" + "fmt" + "unsafe" ) - - // we use the C code only if longs in C are 64-bit integers, otherwise fall back on pure Go const useC = (unsafe.Sizeof(uint64(0)) == unsafe.Sizeof(C.ulong(0))) // Word size of a bit set const wordSize = uint(64) - - // for laster arith. const log2WordSize = uint(6) @@ -110,15 +106,15 @@ type BitSetError string // fixup b.set to be non-nil and return the field value func (b *BitSet) safeSet() []uint64 { if b.set == nil { - b.set = make([]uint64, wordsNeeded(0)) + b.set = make([]uint64, wordsNeeded(0)) } return b.set } func wordsNeeded(i uint) int { - if i > ((^uint(0)) - wordSize + 1 ) { + if i > ((^uint(0)) - wordSize + 1) { return int((^uint(0)) >> log2WordSize) - } + } return int((i + (wordSize - 1)) >> log2WordSize) } @@ -126,7 +122,7 @@ func New(length uint) *BitSet { return &BitSet{length, make([]uint64, wordsNeeded(length))} } -func Cap() uint { +func Cap() uint { return ^uint(0) } @@ -154,7 +150,7 @@ func (b *BitSet) Test(i uint) bool { if i >= b.length { return false } - return b.set[i>>log2WordSize] & (1<<(i&(wordSize-1))) != 0 + return b.set[i>>log2WordSize]&(1<<(i&(wordSize-1))) != 0 } // Set bit i to 1 @@ -193,7 +189,7 @@ func (b *BitSet) Flip(i uint) *BitSet { // return the next bit set from the specified index, including possibly the current index // along with an error code (true = valid, false = no set bit found) // for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) {...} -func (b *BitSet) NextSet(i uint) (uint,bool) { +func (b *BitSet) NextSet(i uint) (uint, bool) { x := int(i >> log2WordSize) if x >= len(b.set) { return 0, false @@ -201,12 +197,12 @@ func (b *BitSet) NextSet(i uint) (uint,bool) { w := b.set[x] w = w >> (i & (wordSize - 1)) if w != 0 { - return i + trailingZeroes64(w),true + return i + trailingZeroes64(w), true } x = x + 1 for x < len(b.set) { if b.set[x] != 0 { - return uint(x) * wordSize + trailingZeroes64(b.set[x]),true + return uint(x)*wordSize + trailingZeroes64(b.set[x]), true } x = x + 1 @@ -214,7 +210,6 @@ func (b *BitSet) NextSet(i uint) (uint,bool) { return 0, false } - // Clear entire BitSet func (b *BitSet) ClearAll() *BitSet { if b != nil && b.set != nil { @@ -233,8 +228,8 @@ func (b *BitSet) wordCount() int { // Clone this BitSet func (b *BitSet) Clone() *BitSet { c := New(b.length) - if b.set != nil {// Clone should not modify current object - copy(c.set, b.set) + if b.set != nil { // Clone should not modify current object + copy(c.set, b.set) } return c } @@ -246,8 +241,8 @@ func (b *BitSet) Copy(c *BitSet) (count uint) { if c == nil { return } - if b.set != nil {// Copy should not modify current object - copy(c.set, b.set) + if b.set != nil { // Copy should not modify current object + copy(c.set, b.set) } count = c.length if b.length < c.length { @@ -278,12 +273,12 @@ func popcount_2(x uint64) uint64 { x += x >> 32 //put count of each 64 bits into their lowest 8 bits return x & 0x7f } - + // Count (number of set bits) func (b *BitSet) Count() uint { if b != nil && b.set != nil { if useC { - return uint(C.totalpop(unsafe.Pointer(&b.set[0]),C.int(len(b.set)))) + return uint(C.totalpop(unsafe.Pointer(&b.set[0]), C.int(len(b.set)))) } cnt := uint64(0) for _, word := range b.set { @@ -294,8 +289,6 @@ func (b *BitSet) Count() uint { return 0 } - - // computes the number of trailing zeroes on the assumption that v is non-zero func trailingZeroes64(v uint64) uint { // NOTE: if 0 == v, then c = 63. @@ -324,8 +317,6 @@ func trailingZeroes64(v uint64) uint { return c } - - // Test the equvalence of two BitSets. // False if they are of different sizes, otherwise true // only if all the same bits are set @@ -341,7 +332,7 @@ func (b *BitSet) Equal(c *BitSet) bool { } // testing for equality shoud not transform the bitset (no call to safeSet) - for p, v := range b.set { + for p, v := range b.set { if c.set[p] != v { return false } @@ -361,51 +352,48 @@ func (b *BitSet) Difference(compare *BitSet) (result *BitSet) { panicIfNull(b) panicIfNull(compare) result = b.Clone() // clone b (in case b is bigger than compare) - l := int(compare.wordCount()) + l := int(compare.wordCount()) if l > int(b.wordCount()) { - l = int(b.wordCount()) + l = int(b.wordCount()) } - for i := 0; i < l ; i++ { - result.set[i] = b.set[i] &^ compare.set[i] + for i := 0; i < l; i++ { + result.set[i] = b.set[i] &^ compare.set[i] } return } // computes the cardinality of the differnce -func (b *BitSet) DifferenceCardinality(compare *BitSet) (uint) { +func (b *BitSet) DifferenceCardinality(compare *BitSet) uint { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) + l := int(compare.wordCount()) if l > int(b.wordCount()) { - l = int(b.wordCount()) + l = int(b.wordCount()) } cnt := uint64(0) - for i := 0; i < l ; i++ { - cnt += popcount_2(b.set[i] &^ compare.set[i]) + for i := 0; i < l; i++ { + cnt += popcount_2(b.set[i] &^ compare.set[i]) } - for i := l; i < len(b.set) ; i++ { + for i := l; i < len(b.set); i++ { cnt += popcount_2(b.set[i]) } return uint(cnt) } - // Difference of base set and other set // This is the BitSet equivalent of &^ (and not) -func (b *BitSet) InPlaceDifference(compare *BitSet) { +func (b *BitSet) InPlaceDifference(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) + l := int(compare.wordCount()) if l > int(b.wordCount()) { - l = int(b.wordCount()) + l = int(b.wordCount()) } - for i := 0; i < l ; i++ { - b.set[i] &^= compare.set[i] + for i := 0; i < l; i++ { + b.set[i] &^= compare.set[i] } } - - // Convenience function: return two bitsets ordered by // increasing length. Note: neither can be nil func sortByLength(a *BitSet, b *BitSet) (ap *BitSet, bp *BitSet) { @@ -430,43 +418,39 @@ func (b *BitSet) Intersection(compare *BitSet) (result *BitSet) { return } - // Computes the cardinality of the union -func (b *BitSet) IntersectionCardinality(compare *BitSet) (uint) { +func (b *BitSet) IntersectionCardinality(compare *BitSet) uint { panicIfNull(b) panicIfNull(compare) b, compare = sortByLength(b, compare) cnt := uint64(0) for i, word := range b.set { - cnt += popcount_2(word & compare.set[i]) + cnt += popcount_2(word & compare.set[i]) } return uint(cnt) } - // Intersection of base set and other set // This is the BitSet equivalent of & (and) -func (b *BitSet) InPlaceIntersection(compare *BitSet) { +func (b *BitSet) InPlaceIntersection(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) + l := int(compare.wordCount()) if l > int(b.wordCount()) { - l = int(b.wordCount()) + l = int(b.wordCount()) } - for i := 0; i < l ; i++ { + for i := 0; i < l; i++ { b.set[i] &= compare.set[i] } - for i := l; i < len(b.set) ; i++ { + for i := l; i < len(b.set); i++ { b.set[i] = 0 } - if compare.length > 0 { - b.extendSetMaybe(compare.length - 1) + if compare.length > 0 { + b.extendSetMaybe(compare.length - 1) } return } - - // Union of base set and other set // This is the BitSet equivalent of | (or) func (b *BitSet) Union(compare *BitSet) (result *BitSet) { @@ -480,7 +464,7 @@ func (b *BitSet) Union(compare *BitSet) (result *BitSet) { return } -func (b *BitSet) UnionCardinality(compare *BitSet) (uint) { +func (b *BitSet) UnionCardinality(compare *BitSet) uint { panicIfNull(b) panicIfNull(compare) b, compare = sortByLength(b, compare) @@ -488,33 +472,32 @@ func (b *BitSet) UnionCardinality(compare *BitSet) (uint) { for i, word := range b.set { cnt += popcount_2(word | compare.set[i]) } - for i := len(b.set); i < len(compare.set) ; i++ { + for i := len(b.set); i < len(compare.set); i++ { cnt += popcount_2(compare.set[i]) } return uint(cnt) } - // Union of base set and other set // This is the BitSet equivalent of | (or) -func (b *BitSet) InPlaceUnion(compare *BitSet) { +func (b *BitSet) InPlaceUnion(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) + l := int(compare.wordCount()) if l > int(b.wordCount()) { - l = int(b.wordCount()) + l = int(b.wordCount()) } - if compare.length > 0 { - b.extendSetMaybe(compare.length - 1) + if compare.length > 0 { + b.extendSetMaybe(compare.length - 1) } - for i := 0; i < l ; i++ { + for i := 0; i < l; i++ { b.set[i] |= compare.set[i] } - if len(compare.set) > l { - for i := l; i < len(compare.set) ; i++ { - b.set[i] = compare.set[i] - } + if len(compare.set) > l { + for i := l; i < len(compare.set); i++ { + b.set[i] = compare.set[i] + } } } @@ -533,7 +516,7 @@ func (b *BitSet) SymmetricDifference(compare *BitSet) (result *BitSet) { } // computes the cardinality of the symmetric difference -func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) (uint) { +func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) uint { panicIfNull(b) panicIfNull(compare) b, compare = sortByLength(b, compare) @@ -541,33 +524,32 @@ func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) (uint) { for i, word := range b.set { cnt += popcount_2(word ^ compare.set[i]) } - for i := len(b.set); i < len(compare.set) ; i++ { + for i := len(b.set); i < len(compare.set); i++ { cnt += popcount_2(compare.set[i]) } return uint(cnt) } - // SymmetricDifference of base set and other set // This is the BitSet equivalent of ^ (xor) -func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { +func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { panicIfNull(b) panicIfNull(compare) - l := int(compare.wordCount()) + l := int(compare.wordCount()) if l > int(b.wordCount()) { - l = int(b.wordCount()) + l = int(b.wordCount()) } - if compare.length > 0 { - b.extendSetMaybe(compare.length - 1) + if compare.length > 0 { + b.extendSetMaybe(compare.length - 1) } - for i := 0; i < l ; i++ { + for i := 0; i < l; i++ { b.set[i] ^= compare.set[i] } - if len(compare.set) > l { - for i := l; i < len(compare.set) ; i++ { - b.set[i] = compare.set[i] - } + if len(compare.set) > l { + for i := l; i < len(compare.set); i++ { + b.set[i] = compare.set[i] + } } } @@ -680,9 +662,9 @@ func (b *BitSet) UnmarshalJSON(data []byte) error { return err } newset := New(uint(length)) - - if uint64(newset.length) != length { - return errors.New("Unmarshalling error: type mismatch") + + if uint64(newset.length) != length { + return errors.New("Unmarshalling error: type mismatch") } // Read remaining bytes as set diff --git a/bitset_test.go b/bitset_test.go index eedc8bf..218a19f 100644 --- a/bitset_test.go +++ b/bitset_test.go @@ -13,7 +13,6 @@ import ( "testing" ) - func TestEmptyBitSet(t *testing.T) { defer func() { if r := recover(); r != nil { @@ -52,7 +51,6 @@ func TestBitSetHuge(t *testing.T) { } } - func TestLen(t *testing.T) { v := New(1000) if v.Len() != 1000 { @@ -106,7 +104,7 @@ func TestIterate(t *testing.T) { v.Set(2) data := make([]uint, 3) c := 0 - for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) { + for i, e := v.NextSet(0); e; i, e = v.NextSet(i + 1) { data[c] = i c++ } @@ -123,7 +121,7 @@ func TestIterate(t *testing.T) { v.Set(2000) data = make([]uint, 5) c = 0 - for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) { + for i, e := v.NextSet(0); e; i, e = v.NextSet(i + 1) { data[c] = i c++ } @@ -464,7 +462,6 @@ func TestInPlaceUnion(t *testing.T) { } } - func TestIntersection(t *testing.T) { a := New(100) b := New(200) @@ -491,7 +488,6 @@ func TestIntersection(t *testing.T) { } } - func TestInplaceIntersection(t *testing.T) { a := New(100) b := New(200) @@ -517,7 +513,6 @@ func TestInplaceIntersection(t *testing.T) { } } - func TestDifference(t *testing.T) { a := New(100) b := New(200) @@ -548,7 +543,6 @@ func TestDifference(t *testing.T) { } } - func TestInPlaceDifference(t *testing.T) { a := New(100) b := New(200) @@ -725,8 +719,6 @@ func BenchmarkCount(b *testing.B) { } } - - // go test -bench=Iterate func BenchmarkIterate(b *testing.B) { b.StopTimer() @@ -736,10 +728,10 @@ func BenchmarkIterate(b *testing.B) { } b.StartTimer() for j := 0; j < b.N; j++ { - c := uint(0) - for i,e := s.NextSet(0); e; i,e = s.NextSet(i + 1) { - c++ - } + c := uint(0) + for i, e := s.NextSet(0); e; i, e = s.NextSet(i + 1) { + c++ + } } } @@ -752,9 +744,9 @@ func BenchmarkSparseIterate(b *testing.B) { } b.StartTimer() for j := 0; j < b.N; j++ { - c := uint(0) - for i,e := s.NextSet(0); e; i,e = s.NextSet(i + 1) { - c++ - } + c := uint(0) + for i, e := s.NextSet(0); e; i, e = s.NextSet(i + 1) { + c++ + } } -} \ No newline at end of file +}