Skip to content

Commit

Permalink
chore(turbopack): Update dashmap from 5.x to 6.x (#72433)
Browse files Browse the repository at this point in the history
Dashmap 6.x claims fairly large performance improvements:

> This release contains performance optimizations, most notably 10-40% gains on Apple Silicon but also 5-10% gains when measured in Intel Sapphire Rapids.

-- https://github.com/xacrimon/dashmap/releases/tag/v6.0.0

I don't think this means too much for us because while `turbo-tasks` uses dashmap pretty heavily, we spend more time actually doing computations than in `turbo-tasks`, so it's a fraction of a fraction.

Still, here's a fairly unscientific (noisy environment, inside a VM) benchmark on my macbook using a microbenchmark that's designed to hit `turbo-tasks` extremely hard (much more so than would happen in realistic scenarios):

```
$ TURBOPACK_BENCH_STRESS=yes cargo bench -p turbo-tasks-memory -- fibonacci/200 
   Compiling turbo-tasks-memory v0.1.0 (/home/bgw.linux/next.js/turbopack/crates/turbo-tasks-memory)
    Finished `bench` profile [optimized] target(s) in 2.80s
     Running benches/mod.rs (target/release/deps/mod-7b9a423ac1e10dce)
Gnuplot not found, using plotters backend
Benchmarking turbo_tasks_memory_stress/fibonacci/200: Warming up for 3.0000 s
Warning: Unable to complete 2000 samples in 5.0s. You may wish to increase target time to 91.9s, or reduce sample count to 100.
turbo_tasks_memory_stress/fibonacci/200
                        time:   [33.008 ms 33.108 ms 33.208 ms]
                        thrpt:  [605.31 Kelem/s 607.13 Kelem/s 608.97 Kelem/s]
                 change:
                        time:   [-2.0573% -1.6309% -1.2221%] (p = 0.00 < 0.05)
                        thrpt:  [+1.2373% +1.6580% +2.1005%]
                        Performance has improved.
Found 44 outliers among 2000 measurements (2.20%)
  2 (0.10%) low mild
  38 (1.90%) high mild
  4 (0.20%) high severe
```

Any potential improvement to realistic topline numbers are going to be <1% and thus too small to measure (especially since I don't have a low-noise way of measuring Apple Silicon where most of the benefits supposedly are, only x86-64).

Closes PACK-3411
  • Loading branch information
bgw authored Nov 7, 2024
1 parent f668af2 commit 5f0adad
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 81 deletions.
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ console = "0.15.5"
console-subscriber = "0.1.8"
criterion = "0.5.1"
crossbeam-channel = "0.5.8"
dashmap = "5.4.0"
dashmap = "6.1.0"
dhat = { version = "0.3.2" }
dialoguer = "0.10.3"
dunce = "1.0.3"
Expand Down
2 changes: 1 addition & 1 deletion turbopack/crates/turbo-tasks-backend/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ auto-hash-map = { workspace = true }
byteorder = "1.5.0"
dashmap = { workspace = true, features = ["raw-api"]}
either = { workspace = true }
hashbrown = { workspace = true }
hashbrown = { workspace = true, features = ["raw"] }
indexmap = { workspace = true }
lmdb-rkv = "0.14.0"
once_cell = { workspace = true }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ where
T: KeyValuePair,
T::Key: Indexed,
{
inner: RefMut<'a, K, InnerStorage<T>, BuildHasherDefault<FxHasher>>,
inner: RefMut<'a, K, InnerStorage<T>>,
}

impl<K, T> Deref for StorageWriteGuard<'_, K, T>
Expand Down
174 changes: 100 additions & 74 deletions turbopack/crates/turbo-tasks-backend/src/utils/dash_map_multi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,26 @@ use std::{
};

use dashmap::{DashMap, RwLockWriteGuard, SharedValue};
use hashbrown::{hash_map, HashMap};
use hashbrown::raw::{Bucket, RawTable};

pub enum RefMut<'a, K, V, S>
where
S: BuildHasher,
{
Base(dashmap::mapref::one::RefMut<'a, K, V, S>),
type RwLockWriteTableGuard<'a, K, V> = RwLockWriteGuard<'a, RawTable<(K, SharedValue<V>)>>;

pub enum RefMut<'a, K, V> {
Base(dashmap::mapref::one::RefMut<'a, K, V>),
Simple {
_guard: RwLockWriteGuard<'a, HashMap<K, SharedValue<V>, S>>,
key: *const K,
value: *mut V,
_guard: RwLockWriteTableGuard<'a, K, V>,
bucket: Bucket<(K, SharedValue<V>)>,
},
Shared {
_guard: Arc<RwLockWriteGuard<'a, HashMap<K, SharedValue<V>, S>>>,
key: *const K,
value: *mut V,
_guard: Arc<RwLockWriteTableGuard<'a, K, V>>,
bucket: Bucket<(K, SharedValue<V>)>,
},
}

unsafe impl<K: Eq + Hash + Sync, V: Sync, S: BuildHasher> Send for RefMut<'_, K, V, S> {}
unsafe impl<K: Eq + Hash + Sync, V: Sync, S: BuildHasher> Sync for RefMut<'_, K, V, S> {}
unsafe impl<K: Eq + Hash + Sync, V: Sync> Send for RefMut<'_, K, V> {}
unsafe impl<K: Eq + Hash + Sync, V: Sync> Sync for RefMut<'_, K, V> {}

impl<K: Eq + Hash, V, S: BuildHasher> RefMut<'_, K, V, S> {
impl<K: Eq + Hash, V> RefMut<'_, K, V> {
pub fn value(&self) -> &V {
self.pair().1
}
Expand All @@ -39,94 +36,123 @@ impl<K: Eq + Hash, V, S: BuildHasher> RefMut<'_, K, V, S> {
pub fn pair(&self) -> (&K, &V) {
match self {
RefMut::Base(r) => r.pair(),
&RefMut::Simple { key, value, .. } => unsafe { (&*key, &*value) },
&RefMut::Shared { key, value, .. } => unsafe { (&*key, &*value) },
RefMut::Simple { bucket, .. } | RefMut::Shared { bucket, .. } => {
// SAFETY:
// - The bucket is still valid, as we're holding a write guard on the shard
// - These bucket pointers are convertable to references
//
// https://doc.rust-lang.org/std/ptr/index.html#pointer-to-reference-conversion
let entry = unsafe { bucket.as_ref() };
(&entry.0, entry.1.get())
}
}
}

pub fn pair_mut(&mut self) -> (&K, &mut V) {
match self {
RefMut::Base(r) => r.pair_mut(),
&mut RefMut::Simple { key, value, .. } => unsafe { (&*key, &mut *value) },
&mut RefMut::Shared { key, value, .. } => unsafe { (&*key, &mut *value) },
RefMut::Simple { bucket, .. } | RefMut::Shared { bucket, .. } => {
// SAFETY: Same as above in `pair`, plus aliasing is prevented via:
// 1. The lifetime of `&mut self`.
// 2. `Simple` values come from separate shards (no aliasing possible)
// 3. `Shared` values are asserted in `get_multiple_mut` to have unique pointers
let entry = unsafe { bucket.as_mut() };
(&entry.0, entry.1.get_mut())
}
}
}
}

impl<K: Eq + Hash, V, S: BuildHasher> Deref for RefMut<'_, K, V, S> {
impl<K: Eq + Hash, V> Deref for RefMut<'_, K, V> {
type Target = V;

fn deref(&self) -> &V {
self.value()
}
}

impl<K: Eq + Hash, V, S: BuildHasher> DerefMut for RefMut<'_, K, V, S> {
impl<K: Eq + Hash, V> DerefMut for RefMut<'_, K, V> {
fn deref_mut(&mut self) -> &mut V {
self.value_mut()
}
}

impl<'a, K, V, S> From<dashmap::mapref::one::RefMut<'a, K, V, S>> for RefMut<'a, K, V, S>
impl<'a, K, V> From<dashmap::mapref::one::RefMut<'a, K, V>> for RefMut<'a, K, V>
where
K: Hash + Eq,
S: BuildHasher,
{
fn from(r: dashmap::mapref::one::RefMut<'a, K, V, S>) -> Self {
fn from(r: dashmap::mapref::one::RefMut<'a, K, V>) -> Self {
RefMut::Base(r)
}
}

pub fn get_multiple_mut<K, V, S>(
map: &DashMap<K, V, S>,
pub fn get_multiple_mut<K, V>(
map: &DashMap<K, V, impl BuildHasher + Clone>,
key1: K,
key2: K,
insert_with: impl Fn() -> V,
) -> (RefMut<'_, K, V, S>, RefMut<'_, K, V, S>)
) -> (RefMut<'_, K, V>, RefMut<'_, K, V>)
where
K: Hash + Eq + Clone,
S: BuildHasher + Clone,
{
let s1 = map.determine_map(&key1);
let s2 = map.determine_map(&key2);
let hasher = map.hasher();
let hash_entry = |entry: &(K, _)| hasher.hash_one(&entry.0);
let h1 = hasher.hash_one(&key1);
let h2 = hasher.hash_one(&key2);

// Use `determine_shard` instead of `determine_map` to avoid extra rehashing.
// This u64 -> usize conversion also happens internally within DashMap using `as usize`.
// See: `DashMap::hash_usize`
let s1 = map.determine_shard(h1 as usize);
let s2 = map.determine_shard(h2 as usize);

let eq1 = |other: &(K, _)| key1.eq(&other.0);
let eq2 = |other: &(K, _)| key2.eq(&other.0);

let shards = map.shards();
if s1 == s2 {
let mut guard = shards[s1].write();
let e1 = guard
.raw_entry_mut()
.from_key(&key1)
.or_insert_with(|| (key1.clone(), SharedValue::new(insert_with())));
let mut key1_ptr = e1.0 as *const K;
let mut value1_ptr = e1.1.get_mut() as *mut V;
let key2_ptr;
let value2_ptr;
match guard.raw_entry_mut().from_key(&key2) {
hash_map::RawEntryMut::Occupied(e) => {
let e2 = e.into_key_value();
key2_ptr = e2.0 as *const K;
value2_ptr = e2.1.get_mut() as *mut V;
}
hash_map::RawEntryMut::Vacant(e) => {
let e2 = e.insert(key2.clone(), SharedValue::new(insert_with()));
key2_ptr = e2.0 as *const K;
value2_ptr = e2.1.get_mut() as *mut V;
// inserting a new entry might invalidate the pointers of the first entry
let e1 = guard.get_key_value_mut(&key1).unwrap();
key1_ptr = e1.0 as *const K;
value1_ptr = e1.1.get_mut() as *mut V;
}
}

// we need to call `find_or_find_insert_slot` to avoid overwriting existing entries, but we
// can't use the returned bucket until after we get `bucket2` (below)
let _ = guard
.find_or_find_insert_slot(h1, eq1, hash_entry)
.unwrap_or_else(|slot| unsafe {
// SAFETY: This slot was previously returned by `find_or_find_insert_slot`, and no
// mutation of the table has occured since that call.
guard.insert_in_slot(h1, slot, (key1.clone(), SharedValue::new(insert_with())))
});

let bucket2 = guard
.find_or_find_insert_slot(h2, eq2, hash_entry)
.unwrap_or_else(|slot| unsafe {
// SAFETY: See previous call above
guard.insert_in_slot(h2, slot, (key2.clone(), SharedValue::new(insert_with())))
});

// Getting `bucket2` might invalidate the bucket pointer of the first entry, *even if no
// insert happens* as `RawTable::find_or_find_insert_slot` will *sometimes* resize the
// table, as it unconditionally reserves space for a potential insertion.
let bucket1 = guard.find(h1, eq1).expect(
"failed to find bucket of previously inserted item, is the hash or eq implementation \
incorrect?",
);

// this assertion is needed for memory safety reasons
assert!(
!std::ptr::eq(bucket1.as_ptr(), bucket2.as_ptr()),
"`get_multiple_mut` was called with equal keys, which breaks mutable referencing rules"
);

let guard = Arc::new(guard);
(
RefMut::Shared {
_guard: guard.clone(),
key: key1_ptr,
value: value1_ptr,
bucket: bucket1,
},
RefMut::Shared {
_guard: guard,
key: key2_ptr,
value: value2_ptr,
bucket: bucket2,
},
)
} else {
Expand All @@ -144,28 +170,28 @@ where
}
}
};
let e1 = guard1
.raw_entry_mut()
.from_key(&key1)
.or_insert_with(|| (key1, SharedValue::new(insert_with())));
let key1 = e1.0 as *const K;
let value1 = e1.1.get_mut() as *mut V;
let e2 = guard2
.raw_entry_mut()
.from_key(&key2)
.or_insert_with(|| (key2, SharedValue::new(insert_with())));
let key2 = e2.0 as *const K;
let value2 = e2.1.get_mut() as *mut V;

let bucket1 = guard1
.find_or_find_insert_slot(h1, eq1, hash_entry)
.unwrap_or_else(|slot| unsafe {
// SAFETY: See first insert_in_slot call
guard1.insert_in_slot(h1, slot, (key1.clone(), SharedValue::new(insert_with())))
});
let bucket2 = guard2
.find_or_find_insert_slot(h2, eq2, hash_entry)
.unwrap_or_else(|slot| unsafe {
// SAFETY: See first insert_in_slot call
guard2.insert_in_slot(h2, slot, (key2.clone(), SharedValue::new(insert_with())))
});

(
RefMut::Simple {
_guard: guard1,
key: key1,
value: value1,
bucket: bucket1,
},
RefMut::Simple {
_guard: guard2,
key: key2,
value: value2,
bucket: bucket2,
},
)
}
Expand Down

0 comments on commit 5f0adad

Please sign in to comment.