Skip to content

Commit b45f070

Browse files
committed
Don't redundantly store the offset from key to value, since we can compute this from the key size.
1 parent a8e4a3f commit b45f070

File tree

1 file changed

+19
-22
lines changed

1 file changed

+19
-22
lines changed

unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,6 @@ public final class BytesToBytesMap {
4848

4949
private static final HashMapGrowthStrategy growthStrategy = HashMapGrowthStrategy.DOUBLING;
5050

51-
/** Bit mask for the lower 32 bits of a long */
52-
private static final long MASK_LONG_LOWER_32_BITS = 0xFFFFFFFFL;
53-
5451
private final MemoryManager memoryManager;
5552

5653
/**
@@ -84,10 +81,18 @@ public final class BytesToBytesMap {
8481
* A single array to store the key and value.
8582
*
8683
* Position {@code 2 * i} in the array is used to track a pointer to the key at index {@code i},
87-
* while position {@code 2 * i + 1} in the array holds the upper bits of the key's hashcode plus
88-
* the relative offset from the key pointer to the value at index {@code i}.
84+
* while position {@code 2 * i + 1} in the array holds key's full 32-bit hashcode.
8985
*/
9086
private LongArray longArray;
87+
// TODO: we're wasting 32 bits of space here; we can probably store fewer bits of the hashcode
88+
// and exploit word-alignment to use fewer bits to hold the address. This might let us store
89+
// only one long per map entry, increasing the chance that this array will fit in cache at the
90+
// expense of maybe performing more lookups if we have hash collisions. Say that we stored only
91+
// 27 bits of the hashcode and 37 bits of the address. 37 bits is enough to address 1 terabyte
92+
// of RAM given word-alignment. If we use 13 bits of this for our page table, that gives us a
93+
// maximum page size of 2^24 * 8 = ~134 megabytes per page. This change will require us to store
94+
// full base addresses in the page table for off-heap mode so that we can reconstruct the full
95+
// absolute memory addresses.
9196

9297
/**
9398
* A {@link BitSet} used to track location of the map where the key is set.
@@ -222,7 +227,7 @@ public Location lookup(
222227
return loc.with(pos, hashcode, false);
223228
} else {
224229
long stored = longArray.get(pos * 2 + 1);
225-
if (((int) (stored & MASK_LONG_LOWER_32_BITS)) == hashcode) {
230+
if ((int) (stored) == hashcode) {
226231
// Full hash code matches. Let's compare the keys for equality.
227232
loc.with(pos, hashcode, true);
228233
if (loc.getKeyLength() == keyRowLengthBytes) {
@@ -270,14 +275,13 @@ public final class Location {
270275
private int keyLength;
271276
private int valueLength;
272277

273-
private void updateAddressesAndSizes(long fullKeyAddress, long offsetFromKeyToValue) {
278+
private void updateAddressesAndSizes(long fullKeyAddress) {
274279
final Object page = memoryManager.getPage(fullKeyAddress);
275280
final long keyOffsetInPage = memoryManager.getOffsetInPage(fullKeyAddress);
276281
keyMemoryLocation.setObjAndOffset(page, keyOffsetInPage + 8);
277-
valueMemoryLocation.setObjAndOffset(page, keyOffsetInPage + 8 + offsetFromKeyToValue);
278282
keyLength = (int) PlatformDependent.UNSAFE.getLong(page, keyOffsetInPage);
279-
valueLength =
280-
(int) PlatformDependent.UNSAFE.getLong(page, keyOffsetInPage + offsetFromKeyToValue);
283+
valueMemoryLocation.setObjAndOffset(page, keyOffsetInPage + 8 + keyLength + 8);
284+
valueLength = (int) PlatformDependent.UNSAFE.getLong(page, keyOffsetInPage + 8 + keyLength);
281285
}
282286

283287
Location with(int pos, int keyHashcode, boolean isDefined) {
@@ -286,9 +290,7 @@ Location with(int pos, int keyHashcode, boolean isDefined) {
286290
this.keyHashcode = keyHashcode;
287291
if (isDefined) {
288292
final long fullKeyAddress = longArray.get(pos * 2);
289-
final long offsetFromKeyToValue =
290-
(longArray.get(pos * 2 + 1) & ~MASK_LONG_LOWER_32_BITS) >>> 32;
291-
updateAddressesAndSizes(fullKeyAddress, offsetFromKeyToValue);
293+
updateAddressesAndSizes(fullKeyAddress);
292294
}
293295
return this;
294296
}
@@ -399,8 +401,6 @@ public void putNewKey(
399401
pageCursor += 8;
400402
final long valueDataOffsetInPage = pageBaseOffset + pageCursor;
401403
pageCursor += valueLengthBytes;
402-
final long relativeOffsetFromKeyToValue = valueSizeOffsetInPage - keySizeOffsetInPage;
403-
assert(relativeOffsetFromKeyToValue > 0);
404404

405405
// Copy the key
406406
PlatformDependent.UNSAFE.putLong(pageBaseObject, keySizeOffsetInPage, keyLengthBytes);
@@ -414,10 +414,8 @@ public void putNewKey(
414414
final long storedKeyAddress = memoryManager.encodePageNumberAndOffset(
415415
currentDataPage, keySizeOffsetInPage);
416416
longArray.set(pos * 2, storedKeyAddress);
417-
final long storedValueOffsetAndKeyHashcode =
418-
(relativeOffsetFromKeyToValue << 32) | (keyHashcode & MASK_LONG_LOWER_32_BITS);
419-
longArray.set(pos * 2 + 1, storedValueOffsetAndKeyHashcode);
420-
updateAddressesAndSizes(storedKeyAddress, relativeOffsetFromKeyToValue);
417+
longArray.set(pos * 2 + 1, keyHashcode);
418+
updateAddressesAndSizes(storedKeyAddress);
421419
isDefined = true;
422420
if (size > growthThreshold) {
423421
growAndRehash();
@@ -518,8 +516,7 @@ private void growAndRehash() {
518516
// Re-mask (we don't recompute the hashcode because we stored all 32 bits of it)
519517
for (int pos = oldBitSet.nextSetBit(0); pos >= 0; pos = oldBitSet.nextSetBit(pos + 1)) {
520518
final long keyPointer = oldLongArray.get(pos * 2);
521-
final long valueOffsetPlusHashcode = oldLongArray.get(pos * 2 + 1);
522-
final int hashcode = (int) (valueOffsetPlusHashcode & MASK_LONG_LOWER_32_BITS);
519+
final int hashcode = (int) oldLongArray.get(pos * 2 + 1);
523520
int newPos = hashcode & mask;
524521
int step = 1;
525522
boolean keepGoing = true;
@@ -530,7 +527,7 @@ private void growAndRehash() {
530527
if (!bitset.isSet(newPos)) {
531528
bitset.set(newPos);
532529
longArray.set(newPos * 2, keyPointer);
533-
longArray.set(newPos * 2 + 1, valueOffsetPlusHashcode);
530+
longArray.set(newPos * 2 + 1, hashcode);
534531
keepGoing = false;
535532
} else {
536533
newPos = (newPos + step) & mask;

0 commit comments

Comments
 (0)