@@ -48,9 +48,6 @@ public final class BytesToBytesMap {
4848
4949 private static final HashMapGrowthStrategy growthStrategy = HashMapGrowthStrategy .DOUBLING ;
5050
51- /** Bit mask for the lower 32 bits of a long */
52- private static final long MASK_LONG_LOWER_32_BITS = 0xFFFFFFFFL ;
53-
5451 private final MemoryManager memoryManager ;
5552
5653 /**
@@ -84,10 +81,18 @@ public final class BytesToBytesMap {
8481 * A single array to store the key and value.
8582 *
8683 * Position {@code 2 * i} in the array is used to track a pointer to the key at index {@code i},
87- * while position {@code 2 * i + 1} in the array holds the upper bits of the key's hashcode plus
88- * the relative offset from the key pointer to the value at index {@code i}.
84+ * while position {@code 2 * i + 1} in the array holds key's full 32-bit hashcode.
8985 */
9086 private LongArray longArray ;
87+ // TODO: we're wasting 32 bits of space here; we can probably store fewer bits of the hashcode
88+ // and exploit word-alignment to use fewer bits to hold the address. This might let us store
89+ // only one long per map entry, increasing the chance that this array will fit in cache at the
90+ // expense of maybe performing more lookups if we have hash collisions. Say that we stored only
91+ // 27 bits of the hashcode and 37 bits of the address. 37 bits is enough to address 1 terabyte
92+ // of RAM given word-alignment. If we use 13 bits of this for our page table, that gives us a
93+ // maximum page size of 2^24 * 8 = ~134 megabytes per page. This change will require us to store
94+ // full base addresses in the page table for off-heap mode so that we can reconstruct the full
95+ // absolute memory addresses.
9196
9297 /**
9398 * A {@link BitSet} used to track location of the map where the key is set.
@@ -222,7 +227,7 @@ public Location lookup(
222227 return loc .with (pos , hashcode , false );
223228 } else {
224229 long stored = longArray .get (pos * 2 + 1 );
225- if ((( int ) (stored & MASK_LONG_LOWER_32_BITS ) ) == hashcode ) {
230+ if ((int ) (stored ) == hashcode ) {
226231 // Full hash code matches. Let's compare the keys for equality.
227232 loc .with (pos , hashcode , true );
228233 if (loc .getKeyLength () == keyRowLengthBytes ) {
@@ -270,14 +275,13 @@ public final class Location {
270275 private int keyLength ;
271276 private int valueLength ;
272277
273- private void updateAddressesAndSizes (long fullKeyAddress , long offsetFromKeyToValue ) {
278+ private void updateAddressesAndSizes (long fullKeyAddress ) {
274279 final Object page = memoryManager .getPage (fullKeyAddress );
275280 final long keyOffsetInPage = memoryManager .getOffsetInPage (fullKeyAddress );
276281 keyMemoryLocation .setObjAndOffset (page , keyOffsetInPage + 8 );
277- valueMemoryLocation .setObjAndOffset (page , keyOffsetInPage + 8 + offsetFromKeyToValue );
278282 keyLength = (int ) PlatformDependent .UNSAFE .getLong (page , keyOffsetInPage );
279- valueLength =
280- (int ) PlatformDependent .UNSAFE .getLong (page , keyOffsetInPage + offsetFromKeyToValue );
283+ valueMemoryLocation . setObjAndOffset ( page , keyOffsetInPage + 8 + keyLength + 8 );
284+ valueLength = (int ) PlatformDependent .UNSAFE .getLong (page , keyOffsetInPage + 8 + keyLength );
281285 }
282286
283287 Location with (int pos , int keyHashcode , boolean isDefined ) {
@@ -286,9 +290,7 @@ Location with(int pos, int keyHashcode, boolean isDefined) {
286290 this .keyHashcode = keyHashcode ;
287291 if (isDefined ) {
288292 final long fullKeyAddress = longArray .get (pos * 2 );
289- final long offsetFromKeyToValue =
290- (longArray .get (pos * 2 + 1 ) & ~MASK_LONG_LOWER_32_BITS ) >>> 32 ;
291- updateAddressesAndSizes (fullKeyAddress , offsetFromKeyToValue );
293+ updateAddressesAndSizes (fullKeyAddress );
292294 }
293295 return this ;
294296 }
@@ -399,8 +401,6 @@ public void putNewKey(
399401 pageCursor += 8 ;
400402 final long valueDataOffsetInPage = pageBaseOffset + pageCursor ;
401403 pageCursor += valueLengthBytes ;
402- final long relativeOffsetFromKeyToValue = valueSizeOffsetInPage - keySizeOffsetInPage ;
403- assert (relativeOffsetFromKeyToValue > 0 );
404404
405405 // Copy the key
406406 PlatformDependent .UNSAFE .putLong (pageBaseObject , keySizeOffsetInPage , keyLengthBytes );
@@ -414,10 +414,8 @@ public void putNewKey(
414414 final long storedKeyAddress = memoryManager .encodePageNumberAndOffset (
415415 currentDataPage , keySizeOffsetInPage );
416416 longArray .set (pos * 2 , storedKeyAddress );
417- final long storedValueOffsetAndKeyHashcode =
418- (relativeOffsetFromKeyToValue << 32 ) | (keyHashcode & MASK_LONG_LOWER_32_BITS );
419- longArray .set (pos * 2 + 1 , storedValueOffsetAndKeyHashcode );
420- updateAddressesAndSizes (storedKeyAddress , relativeOffsetFromKeyToValue );
417+ longArray .set (pos * 2 + 1 , keyHashcode );
418+ updateAddressesAndSizes (storedKeyAddress );
421419 isDefined = true ;
422420 if (size > growthThreshold ) {
423421 growAndRehash ();
@@ -518,8 +516,7 @@ private void growAndRehash() {
518516 // Re-mask (we don't recompute the hashcode because we stored all 32 bits of it)
519517 for (int pos = oldBitSet .nextSetBit (0 ); pos >= 0 ; pos = oldBitSet .nextSetBit (pos + 1 )) {
520518 final long keyPointer = oldLongArray .get (pos * 2 );
521- final long valueOffsetPlusHashcode = oldLongArray .get (pos * 2 + 1 );
522- final int hashcode = (int ) (valueOffsetPlusHashcode & MASK_LONG_LOWER_32_BITS );
519+ final int hashcode = (int ) oldLongArray .get (pos * 2 + 1 );
523520 int newPos = hashcode & mask ;
524521 int step = 1 ;
525522 boolean keepGoing = true ;
@@ -530,7 +527,7 @@ private void growAndRehash() {
530527 if (!bitset .isSet (newPos )) {
531528 bitset .set (newPos );
532529 longArray .set (newPos * 2 , keyPointer );
533- longArray .set (newPos * 2 + 1 , valueOffsetPlusHashcode );
530+ longArray .set (newPos * 2 + 1 , hashcode );
534531 keepGoing = false ;
535532 } else {
536533 newPos = (newPos + step ) & mask ;
0 commit comments