1818
1919import com .google .common .math .LongMath ;
2020import com .google .common .primitives .Ints ;
21+ import com .google .common .primitives .Longs ;
2122
2223import java .math .RoundingMode ;
2324import java .util .Arrays ;
3233 * on their ordinal for BloomFilter serialization.
3334 *
3435 * @author Dimitris Andreou
36+ * @author Kurt Alfred Kluever
3537 */
3638enum BloomFilterStrategies implements BloomFilter .Strategy {
3739 /**
@@ -42,40 +44,100 @@ enum BloomFilterStrategies implements BloomFilter.Strategy {
4244 MURMUR128_MITZ_32 () {
4345 @ Override public <T > boolean put (T object , Funnel <? super T > funnel ,
4446 int numHashFunctions , BitArray bits ) {
47+ long bitSize = bits .bitSize ();
4548 long hash64 = Hashing .murmur3_128 ().hashObject (object , funnel ).asLong ();
4649 int hash1 = (int ) hash64 ;
4750 int hash2 = (int ) (hash64 >>> 32 );
51+
4852 boolean bitsChanged = false ;
4953 for (int i = 1 ; i <= numHashFunctions ; i ++) {
50- int nextHash = hash1 + i * hash2 ;
51- if (nextHash < 0 ) {
52- nextHash = ~nextHash ;
54+ int combinedHash = hash1 + (i * hash2 );
55+ // Flip all the bits if it's negative (guaranteed positive number)
56+ if (combinedHash < 0 ) {
57+ combinedHash = ~combinedHash ;
5358 }
54- bitsChanged |= bits .set (nextHash % bits . bitSize () );
59+ bitsChanged |= bits .set (combinedHash % bitSize );
5560 }
5661 return bitsChanged ;
5762 }
5863
5964 @ Override public <T > boolean mightContain (T object , Funnel <? super T > funnel ,
6065 int numHashFunctions , BitArray bits ) {
66+ long bitSize = bits .bitSize ();
6167 long hash64 = Hashing .murmur3_128 ().hashObject (object , funnel ).asLong ();
6268 int hash1 = (int ) hash64 ;
6369 int hash2 = (int ) (hash64 >>> 32 );
70+
6471 for (int i = 1 ; i <= numHashFunctions ; i ++) {
65- int nextHash = hash1 + i * hash2 ;
66- if (nextHash < 0 ) {
67- nextHash = ~nextHash ;
72+ int combinedHash = hash1 + (i * hash2 );
73+ // Flip all the bits if it's negative (guaranteed positive number)
74+ if (combinedHash < 0 ) {
75+ combinedHash = ~combinedHash ;
6876 }
69- if (!bits .get (nextHash % bits . bitSize () )) {
77+ if (!bits .get (combinedHash % bitSize )) {
7078 return false ;
7179 }
7280 }
7381 return true ;
7482 }
83+ },
84+ /**
85+ * This strategy uses all 128 bits of {@link Hashing#murmur3_128} when hashing. It looks
86+ * different than the implementation in MURMUR128_MITZ_32 because we're avoiding the
87+ * multiplication in the loop and doing a (much simpler) += hash2. We're also changing the
88+ * index to a positive number by AND'ing with Long.MAX_VALUE instead of flipping the bits.
89+ */
90+ MURMUR128_MITZ_64 () {
91+ @ Override
92+ public <T > boolean put (T object , Funnel <? super T > funnel ,
93+ int numHashFunctions , BitArray bits ) {
94+ long bitSize = bits .bitSize ();
95+ byte [] bytes = Hashing .murmur3_128 ().hashObject (object , funnel ).getBytesInternal ();
96+ long hash1 = lowerEight (bytes );
97+ long hash2 = upperEight (bytes );
98+
99+ boolean bitsChanged = false ;
100+ long combinedHash = hash1 + hash2 ;
101+ for (int i = 0 ; i < numHashFunctions ; i ++) {
102+ // Make the combined hash positive and indexable
103+ bitsChanged |= bits .set ((combinedHash & Long .MAX_VALUE ) % bitSize );
104+ combinedHash += hash2 ;
105+ }
106+ return bitsChanged ;
107+ }
108+
109+ @ Override
110+ public <T > boolean mightContain (T object , Funnel <? super T > funnel ,
111+ int numHashFunctions , BitArray bits ) {
112+ long bitSize = bits .bitSize ();
113+ byte [] bytes = Hashing .murmur3_128 ().hashObject (object , funnel ).getBytesInternal ();
114+ long hash1 = lowerEight (bytes );
115+ long hash2 = upperEight (bytes );
116+
117+ long combinedHash = hash1 + hash2 ;
118+ for (int i = 0 ; i < numHashFunctions ; i ++) {
119+ // Make the combined hash positive and indexable
120+ if (!bits .get ((combinedHash & Long .MAX_VALUE ) % bitSize )) {
121+ return false ;
122+ }
123+ combinedHash += hash2 ;
124+ }
125+ return true ;
126+ }
127+
128+ private /* static */ long lowerEight (byte [] bytes ) {
129+ return Longs .fromBytes (
130+ bytes [7 ], bytes [6 ], bytes [5 ], bytes [4 ], bytes [3 ], bytes [2 ], bytes [1 ], bytes [0 ]);
131+ }
132+
133+ private /* static */ long upperEight (byte [] bytes ) {
134+ return Longs .fromBytes (
135+ bytes [15 ], bytes [14 ], bytes [13 ], bytes [12 ], bytes [11 ], bytes [10 ], bytes [9 ], bytes [8 ]);
136+ }
75137 };
76138
77139 // Note: We use this instead of java.util.BitSet because we need access to the long[] data field
78- static class BitArray {
140+ static final class BitArray {
79141 final long [] data ;
80142 long bitCount ;
81143
0 commit comments