From e30cfa31bf264e07458a4833ac6fec77f7fc6139 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 19 May 2026 21:57:49 +0200 Subject: [PATCH 01/40] perf(trie): stack state trie overlays Return overlay stacks from the in-memory state trie overlay manager on cache misses while filling the flattened cache in the background. Teach trie and hashed post-state overlay cursors to merge multiple overlays directly so callers can avoid blocking on uncached full-range overlays. --- crates/chain-state/src/state_trie_overlay.rs | 340 +++++-- .../src/providers/state/historical.rs | 18 +- .../provider/src/providers/state/latest.rs | 4 +- .../provider/src/providers/state/overlay.rs | 88 +- crates/trie/db/src/changesets.rs | 4 +- crates/trie/db/src/proof.rs | 24 +- crates/trie/db/src/state.rs | 12 +- crates/trie/db/src/storage.rs | 5 +- crates/trie/db/tests/fuzz_in_memory_nodes.rs | 7 +- crates/trie/db/tests/post_state.rs | 45 +- crates/trie/sparse/src/parallel.rs | 2 +- .../trie/trie/src/hashed_cursor/post_state.rs | 753 ++++++++------ crates/trie/trie/src/node_iter.rs | 2 +- crates/trie/trie/src/test_utils.rs | 2 +- crates/trie/trie/src/trie_cursor/in_memory.rs | 922 ++++++++++++------ 15 files changed, 1466 insertions(+), 762 deletions(-) diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs index d7baef361fd..3027f0997ac 100644 --- a/crates/chain-state/src/state_trie_overlay.rs +++ b/crates/chain-state/src/state_trie_overlay.rs @@ -18,7 +18,10 @@ use reth_primitives_traits::{ use reth_tasks::WorkerPool; use reth_trie::{updates::TrieUpdatesSorted, HashedPostStateSorted, TrieInputSorted}; use std::{fmt, sync::Arc, time::Instant}; -use tracing::{debug, trace}; +use tracing::{debug, trace, warn}; + +/// State trie overlays ordered from highest to lowest precedence. +pub type StateTrieOverlay = (Vec>, Vec>); /// Manages flattened state trie overlays for in-memory blocks. /// @@ -28,6 +31,7 @@ use tracing::{debug, trace}; pub struct StateTrieOverlayManager { blocks: Arc>>, overlays: Arc>>, + pending_overlays: Arc>, #[cfg(feature = "rayon")] worker_pool: Option>, metrics: StateTrieOverlayMetrics, @@ -50,6 +54,7 @@ impl Default for StateTrieOverlayManager { Self { blocks: Default::default(), overlays: Default::default(), + pending_overlays: Default::default(), #[cfg(feature = "rayon")] worker_pool: None, metrics: Default::default(), @@ -62,6 +67,7 @@ impl std::fmt::Debug for StateTrieOverlayManager { f.debug_struct("StateTrieOverlayManager") .field("blocks", &self.blocks.len()) .field("overlays", &self.overlays.len()) + .field("pending_overlays", &self.pending_overlays.len()) .finish() } } @@ -73,6 +79,7 @@ impl StateTrieOverlayManager { Self { blocks: Default::default(), overlays: Default::default(), + pending_overlays: Default::default(), worker_pool: Some(worker_pool), metrics: Default::default(), } @@ -132,32 +139,9 @@ impl StateTrieOverlayManager { return } - #[cfg(feature = "rayon")] - let Some(worker_pool) = self.worker_pool.clone() else { - return - }; - - #[cfg(not(feature = "rayon"))] - let _ = cached_parent_overlays; - - #[cfg(feature = "rayon")] - { - let parent_span = span; - for anchor_hash in cached_parent_overlays { - let manager = ::clone(self); - let parent_span = parent_span.clone(); - worker_pool.spawn(move || { - let _span = tracing::trace_span!( - target: "chain_state::state_trie_overlay", - parent: parent_span, - "precompute_state_trie_overlay", - tip_hash = %hash, - anchor_hash = %anchor_hash, - ) - .entered(); - let _ = manager.get_overlay(hash, anchor_hash); - }); - } + let _guard = span.enter(); + for anchor_hash in cached_parent_overlays { + self.spawn_overlay_cache_fill(OverlayCacheKey { anchor_hash, tip_hash: hash }); } } @@ -189,13 +173,20 @@ impl StateTrieOverlayManager { if removed_blocks > 0 { let overlays_before = self.overlays.len(); + let pending_overlays_before = self.pending_overlays.len(); let blocks = Arc::clone(&self.blocks); self.overlays.retain(|key, _| { key.tip_hash != key.anchor_hash && Self::anchor_for_parent_in(blocks.as_ref(), key.tip_hash, key.anchor_hash) == Some(key.anchor_hash) }); + self.pending_overlays.retain(|key, _| { + key.tip_hash != key.anchor_hash && + Self::anchor_for_parent_in(blocks.as_ref(), key.tip_hash, key.anchor_hash) == + Some(key.anchor_hash) + }); pruned_overlays = overlays_before.saturating_sub(self.overlays.len()); + pruned_overlays += pending_overlays_before.saturating_sub(self.pending_overlays.len()); span.record("pruned_overlays", pruned_overlays); } debug!( @@ -218,7 +209,7 @@ impl StateTrieOverlayManager { &self, parent_hash: B256, anchor_hash: B256, - ) -> Result<(Arc, Arc), StateTrieOverlayError> { + ) -> Result { debug!( target: "chain_state::state_trie_overlay", tip_hash = %parent_hash, @@ -226,7 +217,7 @@ impl StateTrieOverlayManager { "loading state trie overlay for parent" ); let input = self.get_overlay(parent_hash, anchor_hash)?; - Ok((Arc::clone(&input.nodes), Arc::clone(&input.state))) + Ok(input) } #[tracing::instrument( @@ -245,18 +236,40 @@ impl StateTrieOverlayManager { &self, tip_hash: B256, anchor_hash: B256, - ) -> Result, StateTrieOverlayError> { + ) -> Result { let key = OverlayCacheKey { anchor_hash, tip_hash }; let span = tracing::Span::current(); if let Some(input) = self.overlays.get(&key).map(|entry| Arc::clone(entry.value())) { self.metrics.overlay_cache_reuses.increment(1); span.record("cache_reused", true); - return Ok(input) + return Ok((vec![Arc::clone(&input.nodes)], vec![Arc::clone(&input.state)])) } span.record("cache_reused", false); - // Resolve the block path and any cached parent overlay before locking the child entry. + let blocks = self.resolve_block_path(tip_hash, anchor_hash)?; + span.record("block_count", blocks.len()); + if blocks.is_empty() { + return Ok((Vec::new(), Vec::new())) + } + + let cached_prefix = self.largest_cached_prefix(anchor_hash, &blocks); + span.record("parent_overlay_reused", cached_prefix.is_some()); + + self.spawn_overlay_cache_fill(key); + + Ok(Self::overlay_stack_from_path(&blocks, cached_prefix)) + } + + fn resolve_block_path( + &self, + tip_hash: B256, + anchor_hash: B256, + ) -> Result>, StateTrieOverlayError> { + if tip_hash == anchor_hash { + return Ok(Vec::new()) + } + let mut hash = tip_hash; let mut blocks = Vec::new(); loop { @@ -270,55 +283,152 @@ impl StateTrieOverlayManager { } hash = parent_hash; } - span.record("block_count", blocks.len()); - let parent_input = blocks.first().and_then(|block| { - let parent_hash = block.recovered_block().parent_hash(); - (parent_hash != anchor_hash) - .then(|| { - self.overlays - .get(&OverlayCacheKey { anchor_hash, tip_hash: parent_hash }) - .map(|entry| Arc::clone(entry.value())) - }) - .flatten() - }); - span.record("parent_overlay_reused", parent_input.is_some()); - let compute_input = match parent_input { - Some(parent_input) => { - ComputeOverlayInput::ExtendCached { block: blocks.swap_remove(0), parent_input } + Ok(blocks) + } + + fn largest_cached_prefix( + &self, + anchor_hash: B256, + blocks_newest_to_oldest: &[ExecutedBlock], + ) -> Option<(usize, Arc)> { + blocks_newest_to_oldest.iter().enumerate().find_map(|(idx, block)| { + let tip_hash = block.recovered_block().hash(); + self.overlays + .get(&OverlayCacheKey { anchor_hash, tip_hash }) + .map(|entry| (idx, Arc::clone(entry.value()))) + }) + } + + fn overlay_stack_from_path( + blocks_newest_to_oldest: &[ExecutedBlock], + cached_prefix: Option<(usize, Arc)>, + ) -> StateTrieOverlay { + let individual_block_count = + cached_prefix.as_ref().map_or(blocks_newest_to_oldest.len(), |(idx, _)| *idx); + let mut trie_updates = + Vec::with_capacity(individual_block_count + cached_prefix.is_some() as usize); + let mut hashed_post_state = + Vec::with_capacity(individual_block_count + cached_prefix.is_some() as usize); + + for block in &blocks_newest_to_oldest[..individual_block_count] { + let trie_data = block.trie_data(); + trie_updates.push(trie_data.trie_updates); + hashed_post_state.push(trie_data.hashed_state); + } + + if let Some((_, input)) = cached_prefix { + trie_updates.push(Arc::clone(&input.nodes)); + hashed_post_state.push(Arc::clone(&input.state)); + } + + (trie_updates, hashed_post_state) + } + + fn spawn_overlay_cache_fill(&self, key: OverlayCacheKey) { + if self.overlays.contains_key(&key) { + return + } + match self.pending_overlays.entry(key) { + Entry::Occupied(_) => return, + Entry::Vacant(entry) => { + entry.insert(()); + } + } + + let manager = ::clone(self); + let span = tracing::Span::current(); + + #[cfg(feature = "rayon")] + if let Some(worker_pool) = self.worker_pool.clone() { + worker_pool.spawn(move || { + let _span = tracing::trace_span!( + target: "chain_state::state_trie_overlay", + parent: span, + "compute_state_trie_overlay_cache_fill", + tip_hash = %key.tip_hash, + anchor_hash = %key.anchor_hash, + ) + .entered(); + manager.compute_and_cache_overlay(key); + }); + return + } + + if let Err(error) = + std::thread::Builder::new().name("state-ovly-cache-fill".to_string()).spawn(move || { + let _span = tracing::trace_span!( + target: "chain_state::state_trie_overlay", + parent: span, + "compute_state_trie_overlay_cache_fill", + tip_hash = %key.tip_hash, + anchor_hash = %key.anchor_hash, + ) + .entered(); + manager.compute_and_cache_overlay(key); + }) + { + self.pending_overlays.remove(&key); + warn!( + target: "chain_state::state_trie_overlay", + ?error, + tip_hash = %key.tip_hash, + anchor_hash = %key.anchor_hash, + "failed to spawn state trie overlay cache fill" + ); + } + } + + fn compute_and_cache_overlay(&self, key: OverlayCacheKey) { + let result = self.compute_overlay_for_key(key); + self.pending_overlays.remove(&key); + + if let Err(error) = result { + debug!( + target: "chain_state::state_trie_overlay", + ?error, + tip_hash = %key.tip_hash, + anchor_hash = %key.anchor_hash, + "failed to compute state trie overlay cache fill" + ); + } + } + + fn compute_overlay_for_key( + &self, + key: OverlayCacheKey, + ) -> Result, StateTrieOverlayError> { + if let Some(input) = self.overlays.get(&key).map(|entry| Arc::clone(entry.value())) { + self.metrics.overlay_cache_reuses.increment(1); + return Ok(input) + } + + let blocks = self.resolve_block_path(key.tip_hash, key.anchor_hash)?; + let cached_prefix = self.largest_cached_prefix(key.anchor_hash, &blocks); + let compute_input = match cached_prefix { + Some((idx, parent_input)) => { + ComputeOverlayInput::ExtendCached { blocks: blocks[..idx].to_vec(), parent_input } } None => ComputeOverlayInput::MergeBlocks(blocks), }; + let input = Arc::new(compute_overlay(compute_input, key.anchor_hash, &self.metrics)); + + if key.tip_hash != key.anchor_hash && + Self::anchor_for_parent_in(self.blocks.as_ref(), key.tip_hash, key.anchor_hash) != + Some(key.anchor_hash) + { + return Err(StateTrieOverlayError { + tip_hash: key.tip_hash, + anchor_hash: key.anchor_hash, + }); + } - // The vacant entry is the cache-fill gate: racing callers block instead of recomputing. let input = match self.overlays.entry(key) { Entry::Occupied(entry) => { self.metrics.overlay_cache_reuses.increment(1); - span.record("cache_reused", true); - return Ok(Arc::clone(entry.get())) + Arc::clone(entry.get()) } Entry::Vacant(entry) => { self.metrics.overlay_cache_fills.increment(1); - let input = { - #[cfg(feature = "rayon")] - { - if let Some(worker_pool) = &self.worker_pool { - let compute_span = span; - let metrics = self.metrics.clone(); - Arc::new(worker_pool.install_fn(move || { - let _guard = compute_span.enter(); - compute_overlay(compute_input, anchor_hash, &metrics) - })) - } else { - Arc::new(compute_overlay(compute_input, anchor_hash, &self.metrics)) - } - } - - #[cfg(not(feature = "rayon"))] - { - Arc::new(compute_overlay(compute_input, anchor_hash, &self.metrics)) - } - }; - entry.insert(Arc::clone(&input)); input } @@ -387,7 +497,7 @@ struct OverlayCacheKey { } enum ComputeOverlayInput { - ExtendCached { block: ExecutedBlock, parent_input: Arc }, + ExtendCached { blocks: Vec>, parent_input: Arc }, MergeBlocks(Vec>), } @@ -409,7 +519,7 @@ fn compute_overlay( ) -> TrieInputSorted { let started_at = Instant::now(); let block_count = match &input { - ComputeOverlayInput::ExtendCached { .. } => 1, + ComputeOverlayInput::ExtendCached { blocks, .. } => blocks.len(), ComputeOverlayInput::MergeBlocks(blocks) => blocks.len(), }; let parent_overlay = matches!(&input, ComputeOverlayInput::ExtendCached { .. }); @@ -417,18 +527,19 @@ fn compute_overlay( tracing::Span::current().record("parent_overlay", parent_overlay); let overlay = match input { - ComputeOverlayInput::ExtendCached { block, parent_input } => { - let trie_data = block.trie_data(); - + ComputeOverlayInput::ExtendCached { blocks, parent_input } => { trace!( target: "chain_state::state_trie_overlay", %anchor_hash, - head = %block.recovered_block().hash(), + block_count = blocks.len(), "extending cached parent state trie overlay" ); let mut overlay = parent_input.as_ref().clone(); - extend_overlay(&mut overlay, &trie_data.hashed_state, &trie_data.trie_updates); + for block in blocks.iter().rev() { + let trie_data = block.trie_data(); + extend_overlay(&mut overlay, &trie_data.hashed_state, &trie_data.trie_updates); + } overlay } ComputeOverlayInput::MergeBlocks(blocks) => merge_blocks(blocks), @@ -552,6 +663,10 @@ mod tests { .collect() } + fn state_account_count(states: &[Arc]) -> usize { + states.iter().map(|state| state.accounts.len()).sum() + } + #[test] fn errors_for_unknown_parent() { let manager = StateTrieOverlayManager::::default(); @@ -576,15 +691,42 @@ mod tests { let (_, state) = manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); - assert_eq!(state.accounts.len(), 3); + assert_eq!(state.len(), 3); + assert_eq!(state_account_count(&state), 3); let short_anchor = blocks[1].recovered_block().hash(); let (_, short) = manager.overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor).unwrap(); - assert_eq!(short.accounts.len(), 1); + assert_eq!(short.len(), 1); + assert_eq!(state_account_count(&short), 1); + manager.compute_and_cache_overlay(OverlayCacheKey { + anchor_hash: short_anchor, + tip_hash: blocks[2].recovered_block().hash(), + }); let (_, cached_short) = manager.overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor).unwrap(); - assert!(Arc::ptr_eq(&short, &cached_short)); + assert_eq!(cached_short.len(), 1); + assert_eq!(cached_short[0].accounts.len(), 1); + } + + #[test] + fn cache_miss_returns_largest_cached_prefix_and_individual_blocks() { + let manager = StateTrieOverlayManager::default(); + let blocks = test_blocks(); + for block in &blocks { + manager.insert_block(block.clone()); + } + + let anchor_hash = blocks[0].recovered_block().parent_hash(); + let prefix_tip = blocks[1].recovered_block().hash(); + manager.compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: prefix_tip }); + + let (_, state) = + manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); + assert_eq!(state.len(), 2); + assert_eq!(state[0].accounts.len(), 1); + assert_eq!(state[1].accounts.len(), 2); + assert_eq!(state_account_count(&state), 3); } #[test] @@ -635,7 +777,7 @@ mod tests { let anchor_hash = blocks[0].recovered_block().parent_hash(); let parent_hash = blocks[0].recovered_block().hash(); - manager.overlay_for_parent(parent_hash, anchor_hash).unwrap(); + manager.compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: parent_hash }); let child_hash = blocks[1].recovered_block().hash(); manager.insert_block(blocks[1].clone()); @@ -651,7 +793,32 @@ mod tests { } let (_, state) = manager.overlay_for_parent(child_hash, anchor_hash).unwrap(); - assert_eq!(state.accounts.len(), 2); + assert_eq!(state.len(), 1); + assert_eq!(state[0].accounts.len(), 2); + } + + #[cfg(feature = "rayon")] + #[test] + fn insert_block_respects_pending_child_overlay_fill() { + let manager = StateTrieOverlayManager::new(Arc::new(WorkerPool::new(2, "test-ovly"))); + let blocks = test_blocks(); + + manager.insert_block(blocks[0].clone()); + + let anchor_hash = blocks[0].recovered_block().parent_hash(); + let parent_hash = blocks[0].recovered_block().hash(); + manager.compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: parent_hash }); + + let child_hash = blocks[1].recovered_block().hash(); + let child_key = OverlayCacheKey { anchor_hash, tip_hash: child_hash }; + manager.pending_overlays.insert(child_key, ()); + + manager.insert_block(blocks[1].clone()); + thread::sleep(Duration::from_millis(100)); + + assert!(!manager.overlays.contains_key(&child_key)); + assert!(manager.pending_overlays.contains_key(&child_key)); + manager.pending_overlays.remove(&child_key); } #[test] @@ -663,7 +830,10 @@ mod tests { } let original_anchor = blocks[0].recovered_block().parent_hash(); - manager.overlay_for_parent(blocks[2].recovered_block().hash(), original_anchor).unwrap(); + manager.compute_and_cache_overlay(OverlayCacheKey { + anchor_hash: original_anchor, + tip_hash: blocks[2].recovered_block().hash(), + }); manager.remove_blocks([ blocks[0].recovered_block().hash(), @@ -677,6 +847,6 @@ mod tests { let (_, state) = manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); - assert_eq!(state.accounts.len(), 1); + assert_eq!(state_account_count(&state), 1); } } diff --git a/crates/storage/provider/src/providers/state/historical.rs b/crates/storage/provider/src/providers/state/historical.rs index 7999c8795da..3707e7ac2fe 100644 --- a/crates/storage/provider/src/providers/state/historical.rs +++ b/crates/storage/provider/src/providers/state/historical.rs @@ -23,11 +23,11 @@ use reth_trie::{ hashed_cursor::HashedPostStateCursorFactory, proof::{Proof, StorageProof}, trie_cursor::InMemoryTrieCursorFactory, - updates::TrieUpdates, + updates::{TrieUpdates, TrieUpdatesSorted}, witness::TrieWitness, - AccountProof, ExecutionWitnessMode, HashedPostState, HashedStorage, KeccakKeyHasher, - MultiProof, MultiProofTargets, StateRoot, StorageMultiProof, StorageRoot, TrieInput, - TrieInputSorted, + AccountProof, ExecutionWitnessMode, HashedPostState, HashedPostStateSorted, HashedStorage, + KeccakKeyHasher, MultiProof, MultiProofTargets, StateRoot, StorageMultiProof, StorageRoot, + TrieInput, TrieInputSorted, }; use reth_trie_db::{ ChangesetCache, DatabaseProof, DatabaseStateRoot, DatabaseStorageProof, DatabaseStorageRoot, @@ -314,7 +314,11 @@ where let Overlay { trie_updates, hashed_post_state } = overlay_builder.build_overlay(self.provider)?; - Ok(TrieInputSorted::new(trie_updates, hashed_post_state, prefix_sets)) + Ok(TrieInputSorted::new( + TrieUpdatesSorted::merge_batch(trie_updates), + HashedPostStateSorted::merge_batch(hashed_post_state), + prefix_sets, + )) } /// Set the lowest block number at which the account history is available. @@ -616,11 +620,11 @@ where let witness = TrieWitness::new( InMemoryTrieCursorFactory::new( reth_trie_db::DatabaseTrieCursorFactory::<_, A>::new(self.tx()), - nodes.as_ref(), + [nodes.as_ref()], ), HashedPostStateCursorFactory::new( reth_trie_db::DatabaseHashedCursorFactory::new(self.tx()), - state.as_ref(), + [state.as_ref()], ), ) .with_prefix_sets_mut(prefix_sets) diff --git a/crates/storage/provider/src/providers/state/latest.rs b/crates/storage/provider/src/providers/state/latest.rs index 6f029d89c9d..3908a41c454 100644 --- a/crates/storage/provider/src/providers/state/latest.rs +++ b/crates/storage/provider/src/providers/state/latest.rs @@ -231,11 +231,11 @@ impl StateProofProvider let witness = TrieWitness::new( InMemoryTrieCursorFactory::new( reth_trie_db::DatabaseTrieCursorFactory::<_, A>::new(self.tx()), - &nodes_sorted, + [&nodes_sorted], ), HashedPostStateCursorFactory::new( reth_trie_db::DatabaseHashedCursorFactory::new(self.tx()), - &state_sorted, + [&state_sorted], ), ) .with_prefix_sets_mut(input.prefix_sets) diff --git a/crates/storage/provider/src/providers/state/overlay.rs b/crates/storage/provider/src/providers/state/overlay.rs index 1d45351f0a8..c08b887759a 100644 --- a/crates/storage/provider/src/providers/state/overlay.rs +++ b/crates/storage/provider/src/providers/state/overlay.rs @@ -57,8 +57,8 @@ pub(crate) struct OverlayStateProviderMetrics { /// Contains all fields required to initialize an [`OverlayStateProvider`]. #[derive(Debug, Clone)] pub(super) struct Overlay { - pub(super) trie_updates: Arc, - pub(super) hashed_post_state: Arc, + pub(super) trie_updates: Vec>, + pub(super) hashed_post_state: Vec>, } /// Source of overlay data for [`OverlayStateProviderFactory`]. @@ -175,24 +175,19 @@ impl OverlayBuilder { fn resolve_overlays( &self, anchor_hash: BlockHash, - ) -> ProviderResult<(Arc, Arc)> { + ) -> ProviderResult<(Vec>, Vec>)> { match &self.overlay_source { Some(OverlaySource::Managed { manager, state }) => { let (trie, mut overlay_state) = if anchor_hash == self.parent_hash { - ( - Arc::new(TrieUpdatesSorted::default()), - Arc::new(HashedPostStateSorted::default()), - ) + (Vec::new(), Vec::new()) } else { manager .overlay_for_parent(self.parent_hash, anchor_hash) .map_err(ProviderError::other)? }; - if overlay_state.is_empty() { - overlay_state = Arc::clone(state); - } else if !state.is_empty() { - Arc::make_mut(&mut overlay_state).extend_ref_and_sort(state); + if !state.is_empty() { + overlay_state.insert(0, Arc::clone(state)); } Ok((trie, overlay_state)) @@ -204,12 +199,11 @@ impl OverlayBuilder { self.parent_hash )))) } - Ok((Arc::clone(trie), Arc::clone(state))) + let trie = (!trie.is_empty()).then(|| Arc::clone(trie)).into_iter().collect(); + let state = (!state.is_empty()).then(|| Arc::clone(state)).into_iter().collect(); + Ok((trie, state)) } - None => Ok(( - Arc::new(TrieUpdatesSorted::default()), - Arc::new(HashedPostStateSorted::default()), - )), + None => Ok((Vec::new(), Vec::new())), } } @@ -333,7 +327,7 @@ impl OverlayBuilder { ); // Collect trie reverts using changeset cache - let mut trie_reverts = { + let trie_reverts = { let _guard = debug_span!(target: "providers::state::overlay", "retrieving_trie_reverts") .entered(); @@ -350,7 +344,7 @@ impl OverlayBuilder { }; // Collect state reverts - let mut hashed_state_reverts = { + let hashed_state_reverts = { let _guard = debug_span!(target: "providers::state::overlay", "retrieving_hashed_state_reverts").entered(); let start = Instant::now(); @@ -365,24 +359,24 @@ impl OverlayBuilder { let trie_updates = if trie_reverts.is_empty() { overlay_trie - } else if !overlay_trie.is_empty() { - trie_reverts.extend_ref_and_sort(&overlay_trie); - Arc::new(trie_reverts) } else { - Arc::new(trie_reverts) + let mut trie_updates = overlay_trie; + trie_updates.push(Arc::new(trie_reverts)); + trie_updates }; let hashed_state_updates = if hashed_state_reverts.is_empty() { overlay_state - } else if !overlay_state.is_empty() { - hashed_state_reverts.extend_ref_and_sort(&overlay_state); - Arc::new(hashed_state_reverts) } else { - Arc::new(hashed_state_reverts) + let mut hashed_state_updates = overlay_state; + hashed_state_updates.push(Arc::new(hashed_state_reverts)); + hashed_state_updates }; - trie_updates_total_len = trie_updates.total_len(); - hashed_state_updates_total_len = hashed_state_updates.total_len(); + trie_updates_total_len = + trie_updates.iter().map(|updates| updates.total_len()).sum::(); + hashed_state_updates_total_len = + hashed_state_updates.iter().map(|state| state.total_len()).sum::(); debug!( target: "providers::state::overlay", @@ -398,8 +392,10 @@ impl OverlayBuilder { retrieve_trie_reverts_duration = Duration::ZERO; retrieve_hashed_state_reverts_duration = Duration::ZERO; - trie_updates_total_len = trie_updates.total_len(); - hashed_state_updates_total_len = hashed_state.total_len(); + trie_updates_total_len = + trie_updates.iter().map(|updates| updates.total_len()).sum::(); + hashed_state_updates_total_len = + hashed_state.iter().map(|state| state.total_len()).sum::(); (trie_updates, hashed_state) }; @@ -544,8 +540,8 @@ where #[derive(Debug)] pub struct OverlayStateProvider { provider: Provider, - trie_updates: Arc, - hashed_post_state: Arc, + trie_updates: Vec>, + hashed_post_state: Vec>, is_v2: bool, } @@ -555,10 +551,10 @@ where { /// Create new overlay state provider. The `Provider` must be cloneable, which generally means /// it should be wrapped in an `Arc`. - pub const fn new( + pub fn new( provider: Provider, - trie_updates: Arc, - hashed_post_state: Arc, + trie_updates: Vec>, + hashed_post_state: Vec>, is_v2: bool, ) -> Self { Self { provider, trie_updates, hashed_post_state, is_v2 } @@ -582,7 +578,6 @@ where fn account_trie_cursor(&self) -> Result, DatabaseError> { let tx = self.provider.tx_ref(); - let trie_updates = self.trie_updates.as_ref(); let cursor: Box = if self.is_v2 { Box::new(DatabaseAccountTrieCursor::<_, PackedKeyAdapter>::new( tx.cursor_read::()?, @@ -592,7 +587,7 @@ where tx.cursor_read::()?, )) }; - Ok(InMemoryTrieCursor::new_account(cursor, trie_updates)) + Ok(InMemoryTrieCursor::new_account(cursor, self.trie_updates.iter().map(Arc::as_ref))) } fn storage_trie_cursor( @@ -600,7 +595,6 @@ where hashed_address: B256, ) -> Result, DatabaseError> { let tx = self.provider.tx_ref(); - let trie_updates = self.trie_updates.as_ref(); let cursor: Box = if self.is_v2 { Box::new(DatabaseStorageTrieCursor::<_, PackedKeyAdapter>::new( tx.cursor_dup_read::()?, @@ -612,7 +606,11 @@ where hashed_address, )) }; - Ok(InMemoryTrieCursor::new_storage(cursor, trie_updates, hashed_address)) + Ok(InMemoryTrieCursor::new_storage( + cursor, + self.trie_updates.iter().map(Arc::as_ref), + hashed_address, + )) } } @@ -622,24 +620,27 @@ where { type AccountCursor<'a> = , - &'a Arc, + Vec<&'a HashedPostStateSorted>, > as HashedCursorFactory>::AccountCursor<'a> where Self: 'a; type StorageCursor<'a> = , - &'a Arc, + Vec<&'a HashedPostStateSorted>, > as HashedCursorFactory>::StorageCursor<'a> where Self: 'a; fn hashed_account_cursor(&self) -> Result, DatabaseError> { let db_hashed_cursor_factory = DatabaseHashedCursorFactory::new(self.provider.tx_ref()); + let hashed_post_state = self.hashed_post_state.iter().map(Arc::as_ref).collect::>(); let hashed_cursor_factory = - HashedPostStateCursorFactory::new(db_hashed_cursor_factory, &self.hashed_post_state); + HashedPostStateCursorFactory::new(db_hashed_cursor_factory, hashed_post_state); hashed_cursor_factory.hashed_account_cursor() } @@ -648,8 +649,9 @@ where hashed_address: B256, ) -> Result, DatabaseError> { let db_hashed_cursor_factory = DatabaseHashedCursorFactory::new(self.provider.tx_ref()); + let hashed_post_state = self.hashed_post_state.iter().map(Arc::as_ref).collect::>(); let hashed_cursor_factory = - HashedPostStateCursorFactory::new(db_hashed_cursor_factory, &self.hashed_post_state); + HashedPostStateCursorFactory::new(db_hashed_cursor_factory, hashed_post_state); hashed_cursor_factory.hashed_storage_cursor(hashed_address) } } diff --git a/crates/trie/db/src/changesets.rs b/crates/trie/db/src/changesets.rs index b40bd16462c..e8ab5eb31b7 100644 --- a/crates/trie/db/src/changesets.rs +++ b/crates/trie/db/src/changesets.rs @@ -156,7 +156,7 @@ where // Create an overlay cursor factory that has the trie state from after block-1 let db_cursor_factory = DatabaseTrieCursorFactory::<_, A>::new(provider.tx_ref()); let overlay_factory = - InMemoryTrieCursorFactory::new(db_cursor_factory, &cumulative_trie_updates_prev); + InMemoryTrieCursorFactory::new(db_cursor_factory, [&cumulative_trie_updates_prev]); let changesets = compute_trie_changesets(&overlay_factory, &trie_updates).map_err(ProviderError::other)?; @@ -262,7 +262,7 @@ where // Step 4: Create an InMemoryTrieCursorFactory with the reverts // This gives us the trie state as it was after the target block was processed let db_cursor_factory = DatabaseTrieCursorFactory::<_, A>::new(tx); - let cursor_factory = InMemoryTrieCursorFactory::new(db_cursor_factory, &reverts); + let cursor_factory = InMemoryTrieCursorFactory::new(db_cursor_factory, [&reverts]); // Step 5: Collect all account trie nodes that changed in the target block let account_nodes_ref = changesets.account_nodes_ref(); diff --git a/crates/trie/db/src/proof.rs b/crates/trie/db/src/proof.rs index b7db69f50eb..f44dc19cc03 100644 --- a/crates/trie/db/src/proof.rs +++ b/crates/trie/db/src/proof.rs @@ -51,8 +51,11 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseProof<'a> let nodes_sorted = input.nodes.into_sorted(); let state_sorted = input.state.into_sorted(); Proof::new( - InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), &nodes_sorted), - HashedPostStateCursorFactory::new(self.hashed_cursor_factory().clone(), &state_sorted), + InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), [&nodes_sorted]), + HashedPostStateCursorFactory::new( + self.hashed_cursor_factory().clone(), + [&state_sorted], + ), ) .with_prefix_sets_mut(input.prefix_sets) .account_proof(address, slots) @@ -66,8 +69,11 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseProof<'a> let nodes_sorted = input.nodes.into_sorted(); let state_sorted = input.state.into_sorted(); Proof::new( - InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), &nodes_sorted), - HashedPostStateCursorFactory::new(self.hashed_cursor_factory().clone(), &state_sorted), + InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), [&nodes_sorted]), + HashedPostStateCursorFactory::new( + self.hashed_cursor_factory().clone(), + [&state_sorted], + ), ) .with_prefix_sets_mut(input.prefix_sets) .multiproof(targets) @@ -125,7 +131,10 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseStorageProof<'a, TX> ); StorageProof::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_sorted), + HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(tx), + [&state_sorted], + ), address, ) .with_prefix_set_mut(prefix_set) @@ -147,7 +156,10 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseStorageProof<'a, TX> ); StorageProof::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_sorted), + HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(tx), + [&state_sorted], + ), address, ) .with_prefix_set_mut(prefix_set) diff --git a/crates/trie/db/src/state.rs b/crates/trie/db/src/state.rs index daf42b1a8cc..0a618b30f4e 100644 --- a/crates/trie/db/src/state.rs +++ b/crates/trie/db/src/state.rs @@ -210,7 +210,7 @@ impl<'a, TX: DbTx, A: crate::TrieTableAdapter> DatabaseStateRoot<'a, TX> let prefix_sets = post_state.construct_prefix_sets().freeze(); StateRoot::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), post_state), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), [post_state]), ) .with_prefix_sets(prefix_sets) .root() @@ -223,7 +223,7 @@ impl<'a, TX: DbTx, A: crate::TrieTableAdapter> DatabaseStateRoot<'a, TX> let prefix_sets = post_state.construct_prefix_sets().freeze(); StateRoot::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), post_state), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), [post_state]), ) .with_prefix_sets(prefix_sets) .root_with_updates() @@ -233,11 +233,11 @@ impl<'a, TX: DbTx, A: crate::TrieTableAdapter> DatabaseStateRoot<'a, TX> StateRoot::new( InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - input.nodes.as_ref(), + [input.nodes.as_ref()], ), HashedPostStateCursorFactory::new( DatabaseHashedCursorFactory::new(tx), - input.state.as_ref(), + [input.state.as_ref()], ), ) .with_prefix_sets(input.prefix_sets.freeze()) @@ -251,11 +251,11 @@ impl<'a, TX: DbTx, A: crate::TrieTableAdapter> DatabaseStateRoot<'a, TX> StateRoot::new( InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - input.nodes.as_ref(), + [input.nodes.as_ref()], ), HashedPostStateCursorFactory::new( DatabaseHashedCursorFactory::new(tx), - input.state.as_ref(), + [input.state.as_ref()], ), ) .with_prefix_sets(input.prefix_sets.freeze()) diff --git a/crates/trie/db/src/storage.rs b/crates/trie/db/src/storage.rs index e82f883655d..6e3edd42eae 100644 --- a/crates/trie/db/src/storage.rs +++ b/crates/trie/db/src/storage.rs @@ -92,7 +92,10 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseStorageRoot<'a, TX> HashedPostState::from_hashed_storage(keccak256(address), hashed_storage).into_sorted(); StorageRoot::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_sorted), + HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(tx), + [&state_sorted], + ), address, prefix_set, #[cfg(feature = "metrics")] diff --git a/crates/trie/db/tests/fuzz_in_memory_nodes.rs b/crates/trie/db/tests/fuzz_in_memory_nodes.rs index 229b01e3464..dd58f5f3967 100644 --- a/crates/trie/db/tests/fuzz_in_memory_nodes.rs +++ b/crates/trie/db/tests/fuzz_in_memory_nodes.rs @@ -68,8 +68,9 @@ proptest! { let (state_root, trie_updates) = DbStateRoot::<_, A>::from_tx(provider.tx_ref()) .with_prefix_sets(hashed_state.construct_prefix_sets().freeze()) .with_trie_cursor_factory(InMemoryTrieCursorFactory::new( - DatabaseTrieCursorFactory::<_, A>::new(provider.tx_ref()), &trie_nodes.clone().into_sorted()) - ) + DatabaseTrieCursorFactory::<_, A>::new(provider.tx_ref()), + [&trie_nodes.clone().into_sorted()], + )) .root_with_updates() .unwrap(); @@ -126,7 +127,7 @@ proptest! { .with_prefix_set(hashed_storage.construct_prefix_set().freeze()) .with_trie_cursor_factory(InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(provider.tx_ref()), - &trie_nodes.into_sorted(), + [&trie_nodes.into_sorted()], )) .root_with_updates() .unwrap(); diff --git a/crates/trie/db/tests/post_state.rs b/crates/trie/db/tests/post_state.rs index d3f8fe36484..a3ee272d05a 100644 --- a/crates/trie/db/tests/post_state.rs +++ b/crates/trie/db/tests/post_state.rs @@ -66,7 +66,8 @@ fn post_state_only_accounts() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -87,7 +88,7 @@ fn db_only_accounts() { let tx = db.tx().unwrap(); let factory = HashedPostStateCursorFactory::new( DatabaseHashedCursorFactory::new(&tx), - &sorted_post_state, + [&sorted_post_state], ); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -113,7 +114,8 @@ fn account_cursor_correct_order() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -143,7 +145,8 @@ fn removed_accounts_are_discarded() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let expected = accounts.into_iter().filter(|x| !removed_keys.contains(&x.0)); assert_account_cursor_order(&factory, expected); } @@ -170,7 +173,8 @@ fn post_state_accounts_take_precedence() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -202,7 +206,7 @@ fn fuzz_hashed_account_cursor() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); assert_account_cursor_order(&factory, expected.into_iter()); } ); @@ -230,7 +234,7 @@ fn storage_is_empty() { let sorted = HashedPostState::default().into_sorted(); let tx = db.tx().unwrap(); let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(cursor.is_storage_empty().unwrap()); } @@ -251,7 +255,7 @@ fn storage_is_empty() { let sorted = HashedPostState::default().into_sorted(); let tx = db.tx().unwrap(); let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(!cursor.is_storage_empty().unwrap()); } @@ -268,7 +272,7 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(!cursor.is_storage_empty().unwrap()); } @@ -284,7 +288,7 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(cursor.is_storage_empty().unwrap()); } @@ -301,7 +305,7 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(cursor.is_storage_empty().unwrap()); } @@ -318,7 +322,7 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(!cursor.is_storage_empty().unwrap()); } @@ -354,7 +358,8 @@ fn storage_cursor_correct_order() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let expected = std::iter::once((address, db_storage.into_iter().chain(post_state_storage).collect())); assert_storage_cursor_order(&factory, expected); @@ -394,7 +399,8 @@ fn zero_value_storage_entries_are_discarded() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let expected = std::iter::once(( address, post_state_storage.into_iter().filter(|(_, value)| *value > U256::ZERO).collect(), @@ -431,7 +437,8 @@ fn wiped_storage_is_discarded() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let expected = std::iter::once((address, post_state_storage)); assert_storage_cursor_order(&factory, expected); } @@ -466,7 +473,8 @@ fn post_state_storages_take_precedence() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let expected = std::iter::once((address, storage)); assert_storage_cursor_order(&factory, expected); } @@ -513,7 +521,7 @@ fn fuzz_hashed_storage_cursor() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); assert_storage_cursor_order(&factory, expected.into_iter()); }); } @@ -560,7 +568,8 @@ fn all_storage_slots_deleted_not_wiped_exact_keys() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); diff --git a/crates/trie/sparse/src/parallel.rs b/crates/trie/sparse/src/parallel.rs index 71f5c88e215..7539a490c7b 100644 --- a/crates/trie/sparse/src/parallel.rs +++ b/crates/trie/sparse/src/parallel.rs @@ -3732,7 +3732,7 @@ mod tests { walker, HashedPostStateCursor::new_account( NoopHashedCursor::::default(), - &hashed_post_state, + [&hashed_post_state], ), ); diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 3a821d68a66..b235d1f5a7d 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -1,28 +1,29 @@ use super::{HashedCursor, HashedCursorFactory, HashedStorageCursor}; -use crate::forward_cursor::ForwardInMemoryCursor; use alloy_primitives::{B256, U256}; use reth_primitives_traits::Account; use reth_storage_errors::db::DatabaseError; use reth_trie_common::HashedPostStateSorted; +use std::marker::PhantomData; /// The hashed cursor factory for the post state. #[derive(Clone, Debug)] -pub struct HashedPostStateCursorFactory { +pub struct HashedPostStateCursorFactory<'overlay, CF, T> { cursor_factory: CF, post_state: T, + _marker: PhantomData<&'overlay HashedPostStateSorted>, } -impl HashedPostStateCursorFactory { +impl<'overlay, CF, T> HashedPostStateCursorFactory<'overlay, CF, T> { /// Create a new factory. pub const fn new(cursor_factory: CF, post_state: T) -> Self { - Self { cursor_factory, post_state } + Self { cursor_factory, post_state, _marker: PhantomData } } } -impl<'overlay, CF, T> HashedCursorFactory for HashedPostStateCursorFactory +impl<'overlay, CF, T> HashedCursorFactory for HashedPostStateCursorFactory<'overlay, CF, T> where - CF: HashedCursorFactory, - T: AsRef, + CF: HashedCursorFactory + 'overlay, + T: AsRef<[&'overlay HashedPostStateSorted]>, { type AccountCursor<'cursor> = HashedPostStateCursor<'overlay, CF::AccountCursor<'cursor>, Option> @@ -35,16 +36,19 @@ where fn hashed_account_cursor(&self) -> Result, DatabaseError> { let cursor = self.cursor_factory.hashed_account_cursor()?; - Ok(HashedPostStateCursor::new_account(cursor, self.post_state.as_ref())) + Ok(HashedPostStateCursor::new_account(cursor, self.post_state.as_ref().iter().copied())) } fn hashed_storage_cursor( &self, hashed_address: B256, ) -> Result, DatabaseError> { - let post_state = self.post_state.as_ref(); let cursor = self.cursor_factory.hashed_storage_cursor(hashed_address)?; - Ok(HashedPostStateCursor::new_storage(cursor, post_state, hashed_address)) + Ok(HashedPostStateCursor::new_storage( + cursor, + self.post_state.as_ref().iter().copied(), + hashed_address, + )) } } @@ -84,7 +88,7 @@ impl HashedPostStateCursorValue for U256 { } /// A cursor to iterate over state updates and corresponding database entries. -/// It will always give precedence to the data from the post state updates. +/// It will always give precedence to earlier post state overlays. #[derive(Debug)] pub struct HashedPostStateCursor<'a, C, V> where @@ -92,10 +96,12 @@ where { /// The underlying cursor. cursor: C, - /// Tracks whether the DB cursor is available, positioned, or exhausted. + /// The current DB cursor state. db_cursor_state: DbCursorState, - /// Forward-only in-memory cursor over underlying V. - post_state_cursor: ForwardInMemoryCursor<'a, B256, V>, + /// In-memory cursors over post state overlays. + post_state_cursor: PostStateOverlayCursor<'a, V>, + /// Lower-priority overlays that still need positioning after a lazy exact overlay hit. + deferred_overlay_seek_start: Option, /// The last hashed key that was returned by the cursor. /// De facto, this is a current cursor position. last_key: Option, @@ -103,14 +109,12 @@ where /// Tracks whether `seek` has been called. seeked: bool, /// Reference to the full post state. - post_state: &'a HashedPostStateSorted, + post_states: Vec<&'a HashedPostStateSorted>, } #[derive(Debug)] enum DbCursorState { - NeedsPosition, - Positioned((B256, V)), - Exhausted, + Active(Option<(B256, V)>), Wiped, } @@ -119,22 +123,160 @@ impl DbCursorState { if cursor_wiped { Self::Wiped } else { - Self::NeedsPosition + Self::Active(None) } } + const fn is_wiped(&self) -> bool { + matches!(self, Self::Wiped) + } + const fn entry(&self) -> Option<&(B256, V)> { match self { - Self::Positioned(entry) => Some(entry), - Self::NeedsPosition | Self::Exhausted | Self::Wiped => None, + Self::Active(entry) => entry.as_ref(), + Self::Wiped => None, } } fn set_entry(&mut self, entry: Option<(B256, V)>) { - *self = match entry { - Some(entry) => Self::Positioned(entry), - None => Self::Exhausted, - }; + if let Self::Active(current) = self { + *current = entry; + } + } +} + +#[derive(Clone, Debug)] +struct PostStateOverlayCursor<'a, V> { + cursors: Vec>, +} + +impl<'a> PostStateOverlayCursor<'a, Option> { + fn account(post_states: &[&'a HashedPostStateSorted]) -> Self { + Self { + cursors: post_states + .iter() + .map(|post_state| SeekablePostStateCursor::new(post_state.accounts.as_slice())) + .collect(), + } + } +} + +impl<'a> PostStateOverlayCursor<'a, U256> { + fn storage(post_states: &[&'a HashedPostStateSorted], hashed_address: B256) -> (Self, bool) { + let mut cursors = Vec::new(); + let mut db_wiped = false; + + for post_state in post_states { + if let Some(storage) = post_state.storages.get(&hashed_address) { + cursors.push(SeekablePostStateCursor::new(storage.storage_slots_ref())); + if storage.is_wiped() { + db_wiped = true; + break; + } + } + } + + (Self { cursors }, db_wiped) + } +} + +impl<'a, V> PostStateOverlayCursor<'a, V> +where + V: HashedPostStateCursorValue, +{ + fn seek_from(&mut self, start: usize, key: &B256) { + for cursor in self.cursors.iter_mut().skip(start) { + cursor.seek(key); + } + } + + fn seek_until_exact(&mut self, key: &B256) -> Option<(usize, Option)> { + for (idx, cursor) in self.cursors.iter_mut().enumerate() { + if let Some((cursor_key, value)) = cursor.seek(key) && + cursor_key == key + { + return Some((idx, value.into_option())) + } + } + None + } + + fn first_after(&mut self, key: &B256) { + for cursor in &mut self.cursors { + cursor.first_after(key); + } + } + + fn reset(&mut self) { + for cursor in &mut self.cursors { + cursor.reset(); + } + } + + fn min_current_key(&self) -> Option { + self.cursors.iter().filter_map(|cursor| cursor.current().map(|(key, _)| *key)).min() + } + + fn highest_priority_value_at(&self, key: &B256) -> Option> { + self.cursors.iter().find_map(|cursor| { + let (cursor_key, value) = cursor.current()?; + (cursor_key == key).then(|| value.into_option()) + }) + } + + fn advance_key(&mut self, key: &B256) { + for cursor in &mut self.cursors { + if cursor.current().is_some_and(|(cursor_key, _)| cursor_key == key) { + cursor.first_after(key); + } + } + } + + fn has_visible_value(&self) -> bool { + let mut cursor = self.clone(); + cursor.reset(); + while let Some(key) = cursor.min_current_key() { + if cursor.highest_priority_value_at(&key).flatten().is_some() { + return true + } + cursor.advance_key(&key); + } + false + } +} + +#[derive(Clone, Debug)] +struct SeekablePostStateCursor<'a, V> { + entries: &'a [(B256, V)], + idx: usize, +} + +impl<'a, V> SeekablePostStateCursor<'a, V> { + const fn new(entries: &'a [(B256, V)]) -> Self { + Self { entries, idx: 0 } + } + + fn current(&self) -> Option<&'a (B256, V)> { + self.entries.get(self.idx) + } + + const fn reset(&mut self) { + self.idx = 0; + } + + fn seek(&mut self, key: &B256) -> Option<&'a (B256, V)> { + self.idx = self.entries.partition_point(|(entry_key, _)| entry_key < key); + self.current() + } + + fn first_after(&mut self, key: &B256) -> Option<&'a (B256, V)> { + if self.current().is_some_and(|(entry_key, _)| entry_key > key) { + return self.current() + } + + let remaining = &self.entries[self.idx..]; + self.idx += remaining.partition_point(|(entry_key, _)| entry_key <= key); + self.current() } } @@ -143,16 +285,21 @@ where C: HashedCursor, { /// Create new account cursor which combines a DB cursor and the post state. - pub fn new_account(cursor: C, post_state: &'a HashedPostStateSorted) -> Self { - let post_state_cursor = ForwardInMemoryCursor::new(&post_state.accounts); + pub fn new_account( + cursor: C, + post_states: impl IntoIterator, + ) -> Self { + let post_states = post_states.into_iter().collect::>(); + let post_state_cursor = PostStateOverlayCursor::account(&post_states); Self { cursor, - db_cursor_state: DbCursorState::NeedsPosition, + db_cursor_state: DbCursorState::new(false), post_state_cursor, + deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, - post_state, + post_states, } } } @@ -165,32 +312,30 @@ where /// This allows the cursor to switch between storage tries when `set_hashed_address` is called. pub fn new_storage( cursor: C, - post_state: &'a HashedPostStateSorted, + post_states: impl IntoIterator, hashed_address: B256, ) -> Self { + let post_states = post_states.into_iter().collect::>(); let (post_state_cursor, cursor_wiped) = - Self::get_storage_overlay(post_state, hashed_address); + Self::get_storage_overlay(&post_states, hashed_address); Self { cursor, db_cursor_state: DbCursorState::new(cursor_wiped), post_state_cursor, + deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, - post_state, + post_states, } } /// Returns the storage overlay for `hashed_address` and whether it was wiped. fn get_storage_overlay( - post_state: &'a HashedPostStateSorted, + post_states: &[&'a HashedPostStateSorted], hashed_address: B256, - ) -> (ForwardInMemoryCursor<'a, B256, U256>, bool) { - let post_state_storage = post_state.storages.get(&hashed_address); - let cursor_wiped = post_state_storage.is_some_and(|u| u.is_wiped()); - let storage_slots = post_state_storage.map(|u| u.storage_slots_ref()).unwrap_or(&[]); - - (ForwardInMemoryCursor::new(storage_slots), cursor_wiped) + ) -> (PostStateOverlayCursor<'a, U256>, bool) { + PostStateOverlayCursor::storage(post_states, hashed_address) } } @@ -201,38 +346,26 @@ where { /// Returns a mutable reference to the underlying cursor if it's not wiped, None otherwise. fn get_cursor_mut(&mut self) -> Option<&mut C> { - (!matches!(self.db_cursor_state, DbCursorState::Wiped)).then_some(&mut self.cursor) + (!self.db_cursor_state.is_wiped()).then_some(&mut self.cursor) } - /// Asserts that the next entry to be returned from the cursor is not previous to the last entry - /// returned. fn set_last_key(&mut self, next_entry: &Option<(B256, V::NonZero)>) { - let next_key = next_entry.as_ref().map(|e| e.0); - debug_assert!( - self.last_key.is_none_or(|last| next_key.is_none_or(|next| next >= last)), - "Cannot return entry {:?} previous to the last returned entry at {:?}", - next_key, - self.last_key, - ); - self.last_key = next_key; + self.last_key = next_entry.as_ref().map(|e| e.0); } - /// Positions the DB cursor state using the underlying cursor when needed. + /// Positions the DB cursor state using the underlying cursor. fn cursor_seek(&mut self, key: B256) -> Result<(), DatabaseError> { - // Only seek if: - // 1. We have a cursor entry and need to seek forward (entry.0 < key), OR - // 2. The DB cursor needs to be positioned. - let should_seek = match &self.db_cursor_state { - DbCursorState::NeedsPosition => true, - DbCursorState::Positioned((entry_key, _)) => entry_key < &key, - DbCursorState::Exhausted | DbCursorState::Wiped => false, - }; + let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); + self.db_cursor_state.set_entry(entry); + Ok(()) + } - if should_seek { - let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); - self.db_cursor_state.set_entry(entry); + /// Positions the DB cursor at the first entry after `key`. + fn cursor_first_after(&mut self, key: B256) -> Result<(), DatabaseError> { + self.cursor_seek(key)?; + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &key) { + self.cursor_next()?; } - Ok(()) } @@ -243,53 +376,35 @@ where debug_assert!(self.seeked); } - // Exhausted DB state is stable; only advance when the DB cursor is positioned at an entry. - if matches!(self.db_cursor_state, DbCursorState::Positioned(_)) { - let entry = self.get_cursor_mut().map(|c| c.next()).transpose()?.flatten(); - self.db_cursor_state.set_entry(entry); - } + let entry = self.get_cursor_mut().map(|c| c.next()).transpose()?.flatten(); + self.db_cursor_state.set_entry(entry); Ok(()) } - /// Compares the current in-memory entry with the current entry of the cursor, and applies the - /// in-memory entry to the cursor entry as an overlay. - /// - /// This may consume and move forward the current entries when the overlay indicates a removed - /// node. + /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. fn choose_next_entry(&mut self) -> Result, DatabaseError> { loop { - let post_state_current = - self.post_state_cursor.current().copied().map(|(k, v)| (k, v.into_option())); - let db_entry = self.db_cursor_state.entry(); - - match (post_state_current, db_entry) { - (Some((mem_key, None)), _) - if db_entry.is_none_or(|(db_key, _)| &mem_key < db_key) => - { - // If overlay has a removed value but DB cursor is exhausted or ahead of the - // in-memory cursor then move ahead in-memory, as there might be further - // non-removed overlay values. - self.post_state_cursor.first_after(&mem_key); + let mem_key = self.post_state_cursor.min_current_key(); + let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); + let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { + return Ok(None); + }; + + if let Some(mem_value) = self.post_state_cursor.highest_priority_value_at(&next_key) { + if let Some(value) = mem_value { + return Ok(Some((next_key, value))) } - (Some((mem_key, None)), Some((db_key, _))) if &mem_key == db_key => { - // If overlay has a removed value which is returned from DB then move both - // cursors ahead to the next key. - self.post_state_cursor.first_after(&mem_key); + + self.post_state_cursor.advance_key(&next_key); + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { self.cursor_next()?; } - (Some((mem_key, Some(value))), _) - if db_entry.is_none_or(|(db_key, _)| &mem_key <= db_key) => - { - // If overlay returns a value prior to the DB's value, or the DB is exhausted, - // then we return the overlay's value. - return Ok(Some((mem_key, value))) - } - // All other cases: - // - mem_key > db_key - // - overlay is exhausted - // Return the db_entry. If DB is also exhausted then this returns None. - _ => return Ok(db_entry.copied()), + continue; + } + + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { + return Ok(self.db_cursor_state.entry().copied()) } } } @@ -311,36 +426,27 @@ where /// The returned account key is memoized and the cursor remains positioned at that key until /// [`HashedCursor::seek`] or [`HashedCursor::next`] are called. fn seek(&mut self, key: B256) -> Result, DatabaseError> { - let post_state_entry = - self.post_state_cursor.seek(&key).copied().map(|(k, v)| (k, v.into_option())); - - if let Some((mem_key, Some(value))) = post_state_entry && - mem_key == key + #[cfg(debug_assertions)] { - #[cfg(debug_assertions)] - { - self.seeked = true; - } + self.seeked = true; + } - // An exact overlay hit is the first logical entry at or after `key`, so the DB cursor - // can stay lazy until a later operation needs it. - if matches!(&self.db_cursor_state, DbCursorState::Positioned((db_key, _)) if db_key < &key) - { - self.db_cursor_state = DbCursorState::NeedsPosition; + self.deferred_overlay_seek_start = None; + match self.post_state_cursor.seek_until_exact(&key) { + Some((idx, Some(value))) => { + let entry = Some((key, value)); + self.deferred_overlay_seek_start = Some(idx + 1); + self.set_last_key(&entry); + return Ok(entry) } - - let entry = Some((key, value)); - self.set_last_key(&entry); - return Ok(entry) + Some((idx, None)) => { + self.post_state_cursor.seek_from(idx + 1, &key); + } + None => {} } self.cursor_seek(key)?; - #[cfg(debug_assertions)] - { - self.seeked = true; - } - let entry = self.choose_next_entry()?; self.set_last_key(&entry); Ok(entry) @@ -363,22 +469,15 @@ where return Ok(None); }; - // If either cursor is currently pointing to the last entry which was returned then consume - // that entry so that `choose_next_entry` is looking at the subsequent one. - if let Some((key, _)) = self.post_state_cursor.current() && - key == &last_key - { - self.post_state_cursor.first_after(&last_key); - } - - if matches!(self.db_cursor_state, DbCursorState::NeedsPosition) { - self.cursor_seek(last_key)?; + if let Some(start) = self.deferred_overlay_seek_start.take() { + self.post_state_cursor.seek_from(start, &last_key); } + self.post_state_cursor.first_after(&last_key); - if let Some((key, _)) = self.db_cursor_state.entry() && - key == &last_key - { + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &last_key) { self.cursor_next()?; + } else { + self.cursor_first_after(last_key)?; } let entry = self.choose_next_entry()?; @@ -387,13 +486,12 @@ where } fn reset(&mut self) { - let Self { cursor, db_cursor_state, post_state_cursor, last_key, .. } = self; - - cursor.reset(); - post_state_cursor.reset(); + self.cursor.reset(); + self.post_state_cursor.reset(); - *db_cursor_state = DbCursorState::NeedsPosition; - *last_key = None; + self.db_cursor_state.set_entry(None); + self.deferred_overlay_seek_start = None; + self.last_key = None; #[cfg(debug_assertions)] { self.seeked = false; @@ -413,7 +511,7 @@ where /// [`HashedCursor::next`]. fn is_storage_empty(&mut self) -> Result { // Storage is not empty if it has non-zero slots. - if self.post_state_cursor.has_any(|(_, value)| !value.is_zero()) { + if self.post_state_cursor.has_visible_value() { return Ok(false); } @@ -426,7 +524,10 @@ where self.reset(); self.cursor.set_hashed_address(hashed_address); let (post_state_cursor, cursor_wiped) = - HashedPostStateCursor::::get_storage_overlay(self.post_state, hashed_address); + HashedPostStateCursor::::get_storage_overlay( + &self.post_states, + hashed_address, + ); self.post_state_cursor = post_state_cursor; self.db_cursor_state = DbCursorState::new(cursor_wiped); } @@ -444,7 +545,14 @@ mod tests { } fn storage_post_state(storage_slots: Vec<(B256, U256)>) -> HashedPostStateSorted { - let storage_sorted = reth_trie_common::HashedStorageSorted { storage_slots, wiped: false }; + storage_post_state_with_wipe(storage_slots, false) + } + + fn storage_post_state_with_wipe( + storage_slots: Vec<(B256, U256)>, + wiped: bool, + ) -> HashedPostStateSorted { + let storage_sorted = reth_trie_common::HashedStorageSorted { storage_slots, wiped }; let mut storages = alloy_primitives::map::B256Map::default(); storages.insert(B256::ZERO, storage_sorted); HashedPostStateSorted::new(Vec::new(), storages) @@ -461,7 +569,7 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); let post_state = storage_post_state(post_state_nodes); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &post_state, B256::ZERO); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); let result = cursor.seek(key(0x02)).unwrap(); assert_eq!(result, Some((key(0x02), U256::from(42)))); @@ -483,7 +591,7 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); let post_state = storage_post_state(post_state_nodes); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &post_state, B256::ZERO); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); let result = cursor.seek(key(0x01)).unwrap(); assert_eq!(result, Some((key(0x01), U256::from(1)))); @@ -508,69 +616,206 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); let post_state = storage_post_state(post_state_nodes); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &post_state, B256::ZERO); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); let result = cursor.seek(key(0x02)).unwrap(); assert_eq!(result, Some((key(0x03), U256::from(3)))); assert!(!visited_keys.lock().is_empty(), "exact overlay deletion should consult the DB"); } + #[test] + fn test_seek_overlay_exact_hit_does_not_seek_lower_overlays_or_db() { + let db_nodes = BTreeMap::from([(key(0x06), U256::from(6))]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); + + let higher_priority = + storage_post_state(vec![(key(0x01), U256::from(1)), (key(0x09), U256::from(9))]); + let exact_hit = storage_post_state(vec![(key(0x05), U256::from(5))]); + let lower_priority = + storage_post_state(vec![(key(0x01), U256::from(10)), (key(0x07), U256::from(7))]); + let mut cursor = HashedPostStateCursor::new_storage( + mock_cursor, + [&higher_priority, &exact_hit, &lower_priority], + B256::ZERO, + ); + + let result = cursor.seek(key(0x05)).unwrap(); + assert_eq!(result, Some((key(0x05), U256::from(5)))); + assert_eq!(cursor.post_state_cursor.cursors[0].idx, 1); + assert_eq!(cursor.post_state_cursor.cursors[1].idx, 0); + assert_eq!( + cursor.post_state_cursor.cursors[2].idx, 0, + "lower-priority overlay should not be sought after an exact overlay hit" + ); + assert!(visited_keys.lock().is_empty(), "exact overlay hit should not touch the DB cursor"); + + let result = cursor.next().unwrap(); + assert_eq!(result, Some((key(0x06), U256::from(6)))); + assert!(!visited_keys.lock().is_empty(), "next should lazily position the DB cursor"); + } + + #[test] + fn test_seek_can_move_backwards() { + let db_nodes = BTreeMap::from([(key(0x01), U256::from(1)), (key(0x03), U256::from(3))]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); + + let post_state = storage_post_state(vec![(key(0x02), U256::from(2))]); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); + + assert_eq!(cursor.seek(key(0x03)).unwrap(), Some((key(0x03), U256::from(3)))); + assert_eq!(cursor.seek(key(0x01)).unwrap(), Some((key(0x01), U256::from(1)))); + assert_eq!(cursor.next().unwrap(), Some((key(0x02), U256::from(2)))); + } + + #[test] + fn test_multiple_overlays_resolve_by_precedence() { + let db_nodes = BTreeMap::from([ + (key(0x01), U256::from(1)), + (key(0x02), U256::from(2)), + (key(0x04), U256::from(4)), + ]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); + + let newest = storage_post_state(vec![(key(0x02), U256::ZERO), (key(0x03), U256::from(30))]); + let oldest = storage_post_state(vec![ + (key(0x01), U256::from(10)), + (key(0x02), U256::from(20)), + (key(0x03), U256::from(3)), + ]); + let mut cursor = + HashedPostStateCursor::new_storage(mock_cursor, [&newest, &oldest], B256::ZERO); + + let mut results = Vec::new(); + if let Some(entry) = cursor.seek(B256::ZERO).unwrap() { + results.push(entry); + while let Some(entry) = cursor.next().unwrap() { + results.push(entry); + } + } + + assert_eq!( + results, + vec![ + (key(0x01), U256::from(10)), + (key(0x03), U256::from(30)), + (key(0x04), U256::from(4)), + ] + ); + } + + #[test] + fn test_storage_wipe_overlay_hides_lower_precedence_sources() { + let db_nodes = BTreeMap::from([(key(0x04), U256::from(4))]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); + + let newest = storage_post_state(vec![(key(0x02), U256::from(2))]); + let wiping = storage_post_state_with_wipe(vec![(key(0x01), U256::from(1))], true); + let hidden = storage_post_state(vec![(key(0x03), U256::from(3))]); + let mut cursor = HashedPostStateCursor::new_storage( + mock_cursor, + [&newest, &wiping, &hidden], + B256::ZERO, + ); + + assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x01), U256::from(1)))); + assert_eq!(cursor.next().unwrap(), Some((key(0x02), U256::from(2)))); + assert_eq!(cursor.next().unwrap(), None); + } + mod proptest_tests { use super::*; - use itertools::Itertools; use proptest::prelude::*; - /// Merge `db_nodes` with `post_state_nodes`, applying the post state overlay. - /// This properly handles deletions (ZERO values for U256, None for Account). - fn merge_with_overlay( - db_nodes: Vec<(B256, V::NonZero)>, - post_state_nodes: Vec<(B256, V)>, - ) -> Vec<(B256, V::NonZero)> - where - V: HashedPostStateCursorValue, - V::NonZero: Copy, - { - db_nodes - .into_iter() - .merge_join_by(post_state_nodes, |db_entry, mem_entry| db_entry.0.cmp(&mem_entry.0)) - .filter_map(|entry| match entry { - // Only in db: keep it - itertools::EitherOrBoth::Left((key, node)) => Some((key, node)), - // Only in post state: keep if not a deletion - itertools::EitherOrBoth::Right((key, wrapped)) => { - wrapped.into_option().map(|val| (key, val)) - } - // In both: post state takes precedence (keep if not a deletion) - itertools::EitherOrBoth::Both(_, (key, wrapped)) => { - wrapped.into_option().map(|val| (key, val)) + #[derive(Clone, Debug)] + enum CursorOp { + Next, + Seek(B256), + } + + /// Merge `db_nodes` with post-state overlays. Overlay index 0 has highest priority. + fn merge_with_overlays( + db_nodes: &[(B256, U256)], + overlays: &[Vec<(B256, U256)>], + ) -> Vec<(B256, U256)> { + let mut merged: BTreeMap = db_nodes.iter().cloned().collect(); + + for overlay in overlays.iter().rev() { + for (key, value) in overlay { + if value.is_zero() { + merged.remove(key); + } else { + merged.insert(*key, *value); } - }) - .collect() + } + } + + merged.into_iter().collect() + } + + fn reference_seek( + entries: &[(B256, U256)], + position: &mut Option, + key: B256, + ) -> Option<(B256, U256)> { + let idx = entries.partition_point(|(entry_key, _)| entry_key < &key); + if idx < entries.len() { + *position = Some(idx); + Some(entries[idx]) + } else { + *position = None; + None + } } - /// Generate a strategy for U256 values - fn u256_strategy() -> impl Strategy { - any::().prop_map(U256::from) + fn reference_next( + entries: &[(B256, U256)], + position: &mut Option, + ) -> Option<(B256, U256)> { + let Some(next_idx) = position.and_then(|idx| idx.checked_add(1)) else { + return None; + }; + + if next_idx < entries.len() { + *position = Some(next_idx); + Some(entries[next_idx]) + } else { + *position = None; + None + } + } + + fn nonzero_u256_strategy() -> impl Strategy { + any::().prop_map(|value| U256::from(value.saturating_add(1))) } /// Generate a sorted vector of (B256, U256) entries fn sorted_db_nodes_strategy() -> impl Strategy> { - prop::collection::vec((any::(), u256_strategy()), 0..20).prop_map(|entries| { - let mut result: Vec<(B256, U256)> = entries - .into_iter() - .map(|(byte, value)| (B256::repeat_byte(byte), value)) - .collect(); - result.sort_by_key(|a| a.0); - result.dedup_by(|a, b| a.0 == b.0); - result - }) + prop::collection::vec((any::(), nonzero_u256_strategy()), 0..20).prop_map( + |entries| { + let mut result: Vec<(B256, U256)> = entries + .into_iter() + .map(|(byte, value)| (B256::repeat_byte(byte), value)) + .collect(); + result.sort_by_key(|a| a.0); + result.dedup_by(|a, b| a.0 == b.0); + result + }, + ) } /// Generate a sorted vector of (B256, U256) entries (including deletions as ZERO) fn sorted_post_state_nodes_strategy() -> impl Strategy> { // Explicitly inject ZERO values to model post-state deletions. - prop::collection::vec((any::(), u256_strategy(), any::()), 0..20).prop_map( - |entries| { + prop::collection::vec((any::(), nonzero_u256_strategy(), any::()), 0..20) + .prop_map(|entries| { let mut result: Vec<(B256, U256)> = entries .into_iter() .map(|(byte, value, is_deletion)| { @@ -581,65 +826,60 @@ mod tests { result.sort_by_key(|a| a.0); result.dedup_by(|a, b| a.0 == b.0); result - }, + }) + } + + fn cursor_ops_strategy() -> impl Strategy> { + prop::collection::vec( + prop_oneof![ + Just(CursorOp::Next), + any::().prop_map(|byte| CursorOp::Seek(B256::repeat_byte(byte))), + ], + 10..500, ) } proptest! { - #![proptest_config(ProptestConfig::with_cases(1000))] - /// Tests `HashedPostStateCursor` produces identical results to a pre-merged cursor - /// across 1000 random scenarios. - /// - /// For random DB entries and post-state changes, creates two cursors: - /// - Control: pre-merged data (expected behavior) - /// - Test: `HashedPostStateCursor` (lazy overlay) - /// - /// Executes random sequences of `next()` and `seek()` operations, asserting - /// both cursors return identical results. - #[test] - fn proptest_hashed_post_state_cursor( + #![proptest_config(ProptestConfig::with_cases(10000))] + + /// Tests `HashedPostStateCursor` against a pre-merged reference cursor. + #[test] + fn proptest_hashed_post_state_cursor( db_nodes in sorted_db_nodes_strategy(), - post_state_nodes in sorted_post_state_nodes_strategy(), - op_choices in prop::collection::vec(any::(), 10..500), + overlays in prop::collection::vec(sorted_post_state_nodes_strategy(), 0..5), + ops in cursor_ops_strategy(), ) { reth_tracing::init_test_tracing(); use tracing::debug; - debug!("Starting proptest!"); - - // Create the expected results by merging the two sorted vectors, - // properly handling deletions (ZERO values in post_state_nodes) - let expected_combined = merge_with_overlay(db_nodes.clone(), post_state_nodes.clone()); - - // Collect all keys for operation generation - let all_keys: Vec = expected_combined.iter().map(|(k, _)| *k).collect(); + debug!( + db_keys=?db_nodes.iter().map(|(k, _)| k).collect::>(), + overlays=?overlays + .iter() + .map(|overlay| overlay.iter().map(|(k, v)| (k, !v.is_zero())).collect::>()) + .collect::>(), + num_ops=?ops.len(), + "Starting proptest!", + ); - // Create a control cursor using the combined result with a mock cursor - let control_db_map: BTreeMap = expected_combined.into_iter().collect(); - let control_db_arc = Arc::new(control_db_map); - let control_visited_keys = Arc::new(Mutex::new(Vec::new())); - let mut control_cursor = MockHashedCursor::new(control_db_arc, control_visited_keys); + let expected_combined = merge_with_overlays(&db_nodes, &overlays); + let mut reference_position = None; // Create the HashedPostStateCursor being tested - let db_nodes_map: BTreeMap = db_nodes.into_iter().collect(); + let db_nodes_map: BTreeMap = db_nodes.iter().cloned().collect(); let db_nodes_arc = Arc::new(db_nodes_map); let visited_keys = Arc::new(Mutex::new(Vec::new())); let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); - // Create a HashedPostStateSorted with the storage data let hashed_address = B256::ZERO; - let storage_sorted = reth_trie_common::HashedStorageSorted { - storage_slots: post_state_nodes, - wiped: false, - }; - let mut storages = alloy_primitives::map::B256Map::default(); - storages.insert(hashed_address, storage_sorted); - let post_state = HashedPostStateSorted::new(Vec::new(), storages); - - let mut test_cursor = HashedPostStateCursor::new_storage(mock_cursor, &post_state, hashed_address); + let post_states = + overlays.into_iter().map(storage_post_state).collect::>(); + let mut test_cursor = + HashedPostStateCursor::new_storage(mock_cursor, post_states.iter(), hashed_address); // Test: seek to the beginning first - let control_first = control_cursor.seek(B256::ZERO).unwrap(); + let control_first = + reference_seek(&expected_combined, &mut reference_position, B256::ZERO); let test_first = test_cursor.seek(B256::ZERO).unwrap(); debug!( control=?control_first.as_ref().map(|(k, _)| k), @@ -648,22 +888,12 @@ mod tests { ); assert_eq!(control_first, test_first, "Initial seek mismatch"); - // If both cursors returned None, nothing to test - if control_first.is_none() && test_first.is_none() { - return Ok(()); - } - - // Track the last key returned from the cursor - let mut last_returned_key = control_first.as_ref().map(|(k, _)| *k); - // Execute a sequence of random operations - for choice in op_choices { - let op_type = choice % 2; // Only 2 operation types: next and seek - - match op_type { - 0 => { - // Next operation - let control_result = control_cursor.next().unwrap(); + for op in ops { + match op { + CursorOp::Next => { + let control_result = + reference_next(&expected_combined, &mut reference_position); let test_result = test_cursor.next().unwrap(); debug!( control=?control_result.as_ref().map(|(k, _)| k), @@ -671,32 +901,10 @@ mod tests { "Next returned", ); assert_eq!(control_result, test_result, "Next operation mismatch"); - - last_returned_key = control_result.as_ref().map(|(k, _)| *k); - - // Stop if both cursors are exhausted - if control_result.is_none() && test_result.is_none() { - break; - } } - _ => { - // Seek operation - choose a key >= last_returned_key - if all_keys.is_empty() { - continue; - } - - let valid_keys: Vec<_> = all_keys - .iter() - .filter(|k| last_returned_key.is_none_or(|last| **k >= last)) - .collect(); - - if valid_keys.is_empty() { - continue; - } - - let key = *valid_keys[(choice as usize / 2) % valid_keys.len()]; - - let control_result = control_cursor.seek(key).unwrap(); + CursorOp::Seek(key) => { + let control_result = + reference_seek(&expected_combined, &mut reference_position, key); let test_result = test_cursor.seek(key).unwrap(); debug!( control=?control_result.as_ref().map(|(k, _)| k), @@ -705,13 +913,6 @@ mod tests { "Seek returned", ); assert_eq!(control_result, test_result, "Seek operation mismatch for key {:?}", key); - - last_returned_key = control_result.as_ref().map(|(k, _)| *k); - - // Stop if both cursors are exhausted - if control_result.is_none() && test_result.is_none() { - break; - } } } } diff --git a/crates/trie/trie/src/node_iter.rs b/crates/trie/trie/src/node_iter.rs index 7d53bd4b6d4..45d26238984 100644 --- a/crates/trie/trie/src/node_iter.rs +++ b/crates/trie/trie/src/node_iter.rs @@ -354,7 +354,7 @@ mod tests { walker, HashedPostStateCursor::new_account( NoopHashedCursor::::default(), - &hashed_post_state, + [&hashed_post_state], ), ); diff --git a/crates/trie/trie/src/test_utils.rs b/crates/trie/trie/src/test_utils.rs index c1871a89970..8d3f2f6659f 100644 --- a/crates/trie/trie/src/test_utils.rs +++ b/crates/trie/trie/src/test_utils.rs @@ -126,7 +126,7 @@ impl TrieTestHarness { once((self.hashed_address(), hashed_storage.into_sorted())).collect(), ); let overlay_cursor_factory = - HashedPostStateCursorFactory::new(self.hashed_cursor_factory.clone(), &overlay); + HashedPostStateCursorFactory::new(self.hashed_cursor_factory.clone(), [&overlay]); let (root, _, updates) = StorageRoot::new_hashed( self.trie_cursor_factory.clone(), diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index d2316ac27a2..36ea3ac1764 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -1,29 +1,31 @@ use super::{TrieCursor, TrieCursorFactory, TrieStorageCursor}; -use crate::{forward_cursor::ForwardInMemoryCursor, updates::TrieUpdatesSorted}; +use crate::updates::TrieUpdatesSorted; use alloy_primitives::B256; use reth_storage_errors::db::DatabaseError; use reth_trie_common::{BranchNodeCompact, Nibbles}; +use std::marker::PhantomData; /// The trie cursor factory for the trie updates. #[derive(Debug, Clone)] -pub struct InMemoryTrieCursorFactory { +pub struct InMemoryTrieCursorFactory<'overlay, CF, T> { /// Underlying trie cursor factory. cursor_factory: CF, /// Reference to sorted trie updates. trie_updates: T, + _marker: PhantomData<&'overlay TrieUpdatesSorted>, } -impl InMemoryTrieCursorFactory { +impl<'overlay, CF, T> InMemoryTrieCursorFactory<'overlay, CF, T> { /// Create a new trie cursor factory. pub const fn new(cursor_factory: CF, trie_updates: T) -> Self { - Self { cursor_factory, trie_updates } + Self { cursor_factory, trie_updates, _marker: PhantomData } } } -impl<'overlay, CF, T> TrieCursorFactory for InMemoryTrieCursorFactory +impl<'overlay, CF, T> TrieCursorFactory for InMemoryTrieCursorFactory<'overlay, CF, T> where CF: TrieCursorFactory + 'overlay, - T: AsRef, + T: AsRef<[&'overlay TrieUpdatesSorted]>, { type AccountTrieCursor<'cursor> = InMemoryTrieCursor<'overlay, CF::AccountTrieCursor<'cursor>> @@ -37,43 +39,46 @@ where fn account_trie_cursor(&self) -> Result, DatabaseError> { let cursor = self.cursor_factory.account_trie_cursor()?; - Ok(InMemoryTrieCursor::new_account(cursor, self.trie_updates.as_ref())) + Ok(InMemoryTrieCursor::new_account(cursor, self.trie_updates.as_ref().iter().copied())) } fn storage_trie_cursor( &self, hashed_address: B256, ) -> Result, DatabaseError> { - let trie_updates = self.trie_updates.as_ref(); let cursor = self.cursor_factory.storage_trie_cursor(hashed_address)?; - Ok(InMemoryTrieCursor::new_storage(cursor, trie_updates, hashed_address)) + Ok(InMemoryTrieCursor::new_storage( + cursor, + self.trie_updates.as_ref().iter().copied(), + hashed_address, + )) } } /// A cursor to iterate over trie updates and corresponding database entries. -/// It will always give precedence to the data from the trie updates. +/// It will always give precedence to earlier trie update overlays. #[derive(Debug)] pub struct InMemoryTrieCursor<'a, C> { /// The underlying cursor. cursor: C, - /// Tracks whether the DB cursor is available, positioned, or exhausted. + /// The current DB cursor state. db_cursor_state: DbCursorState, - /// Forward-only in-memory cursor over storage trie nodes. - in_memory_cursor: ForwardInMemoryCursor<'a, Nibbles, Option>, + /// In-memory cursors over trie update overlays. + in_memory_cursor: OverlayCursor<'a>, + /// Lower-priority overlays that still need positioning after a lazy exact overlay hit. + deferred_overlay_seek_start: Option, /// The key most recently returned from the Cursor. last_key: Option, #[cfg(debug_assertions)] /// Whether an initial seek was called. seeked: bool, /// Reference to the full trie updates. - trie_updates: &'a TrieUpdatesSorted, + trie_updates: Vec<&'a TrieUpdatesSorted>, } #[derive(Debug)] enum DbCursorState { - NeedsPosition, - Positioned((Nibbles, BranchNodeCompact)), - Exhausted, + Active(Option<(Nibbles, BranchNodeCompact)>), Wiped, } @@ -82,33 +87,157 @@ impl DbCursorState { if cursor_wiped { Self::Wiped } else { - Self::NeedsPosition + Self::Active(None) } } + const fn is_wiped(&self) -> bool { + matches!(self, Self::Wiped) + } + const fn entry(&self) -> Option<&(Nibbles, BranchNodeCompact)> { match self { - Self::Positioned(entry) => Some(entry), - Self::NeedsPosition | Self::Exhausted | Self::Wiped => None, + Self::Active(entry) => entry.as_ref(), + Self::Wiped => None, } } fn set_entry(&mut self, entry: Option<(Nibbles, BranchNodeCompact)>) { - *self = match entry { - Some(entry) => Self::Positioned(entry), - None => Self::Exhausted, - }; + if let Self::Active(current) = self { + *current = entry; + } + } +} + +#[derive(Debug)] +struct OverlayCursor<'a> { + cursors: Vec>, +} + +impl<'a> OverlayCursor<'a> { + fn account(trie_updates: &[&'a TrieUpdatesSorted]) -> Self { + Self { + cursors: trie_updates + .iter() + .map(|updates| SeekableInMemoryCursor::new(updates.account_nodes_ref())) + .collect(), + } + } + + fn storage(trie_updates: &[&'a TrieUpdatesSorted], hashed_address: B256) -> (Self, bool) { + let mut cursors = Vec::new(); + let mut db_wiped = false; + + for updates in trie_updates { + if let Some(storage) = updates.storage_tries_ref().get(&hashed_address) { + cursors.push(SeekableInMemoryCursor::new(storage.storage_nodes_ref())); + if storage.is_deleted() { + db_wiped = true; + break; + } + } + } + + (Self { cursors }, db_wiped) + } + + fn seek_from(&mut self, start: usize, key: &Nibbles) { + for cursor in self.cursors.iter_mut().skip(start) { + cursor.seek(key); + } + } + + fn seek_until_exact(&mut self, key: &Nibbles) -> Option<(usize, Option)> { + for (idx, cursor) in self.cursors.iter_mut().enumerate() { + if let Some((cursor_key, value)) = cursor.seek(key) && + cursor_key == key + { + return Some((idx, value.clone())) + } + } + None + } + + fn first_after(&mut self, key: &Nibbles) { + for cursor in &mut self.cursors { + cursor.first_after(key); + } + } + + fn reset(&mut self) { + for cursor in &mut self.cursors { + cursor.reset(); + } + } + + fn min_current_key(&self) -> Option { + self.cursors.iter().filter_map(|cursor| cursor.current().map(|(key, _)| *key)).min() + } + + fn highest_priority_value_at(&self, key: &Nibbles) -> Option> { + self.cursors.iter().find_map(|cursor| { + let (cursor_key, value) = cursor.current()?; + (cursor_key == key).then(|| value.clone()) + }) + } + + fn advance_key(&mut self, key: &Nibbles) { + for cursor in &mut self.cursors { + if cursor.current().is_some_and(|(cursor_key, _)| cursor_key == key) { + cursor.first_after(key); + } + } + } +} + +#[derive(Debug)] +struct SeekableInMemoryCursor<'a> { + entries: &'a [(Nibbles, Option)], + idx: usize, +} + +impl<'a> SeekableInMemoryCursor<'a> { + const fn new(entries: &'a [(Nibbles, Option)]) -> Self { + Self { entries, idx: 0 } + } + + fn current(&self) -> Option<&'a (Nibbles, Option)> { + self.entries.get(self.idx) + } + + const fn reset(&mut self) { + self.idx = 0; + } + + fn seek(&mut self, key: &Nibbles) -> Option<&'a (Nibbles, Option)> { + self.idx = self.entries.partition_point(|(entry_key, _)| entry_key < key); + self.current() + } + + fn first_after(&mut self, key: &Nibbles) -> Option<&'a (Nibbles, Option)> { + if self.current().is_some_and(|(entry_key, _)| entry_key > key) { + return self.current() + } + + let remaining = &self.entries[self.idx..]; + self.idx += remaining.partition_point(|(entry_key, _)| entry_key <= key); + self.current() } } impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { /// Create new account trie cursor which combines a DB cursor and the trie updates. - pub fn new_account(cursor: C, trie_updates: &'a TrieUpdatesSorted) -> Self { - let in_memory_cursor = ForwardInMemoryCursor::new(trie_updates.account_nodes_ref()); + pub fn new_account( + cursor: C, + trie_updates: impl IntoIterator, + ) -> Self { + let trie_updates = trie_updates.into_iter().collect::>(); + let in_memory_cursor = OverlayCursor::account(&trie_updates); Self { cursor, - db_cursor_state: DbCursorState::NeedsPosition, + db_cursor_state: DbCursorState::new(false), in_memory_cursor, + deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -120,15 +249,16 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { /// This allows the cursor to switch between storage tries when `set_hashed_address` is called. pub fn new_storage( cursor: C, - trie_updates: &'a TrieUpdatesSorted, + trie_updates: impl IntoIterator, hashed_address: B256, ) -> Self { - let (in_memory_cursor, cursor_wiped) = - Self::get_storage_overlay(trie_updates, hashed_address); + let trie_updates = trie_updates.into_iter().collect::>(); + let (in_memory_cursor, db_wiped) = Self::get_storage_overlay(&trie_updates, hashed_address); Self { cursor, - db_cursor_state: DbCursorState::new(cursor_wiped), + db_cursor_state: DbCursorState::new(db_wiped), in_memory_cursor, + deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -138,50 +268,34 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { /// Returns the storage overlay for `hashed_address` and whether it was deleted. fn get_storage_overlay( - trie_updates: &'a TrieUpdatesSorted, + trie_updates: &[&'a TrieUpdatesSorted], hashed_address: B256, - ) -> (ForwardInMemoryCursor<'a, Nibbles, Option>, bool) { - let storage_trie_updates = trie_updates.storage_tries_ref().get(&hashed_address); - let cursor_wiped = storage_trie_updates.is_some_and(|u| u.is_deleted()); - let storage_nodes = storage_trie_updates.map(|u| u.storage_nodes_ref()).unwrap_or(&[]); - - (ForwardInMemoryCursor::new(storage_nodes), cursor_wiped) + ) -> (OverlayCursor<'a>, bool) { + OverlayCursor::storage(trie_updates, hashed_address) } /// Returns a mutable reference to the underlying cursor if it's not wiped, None otherwise. fn get_cursor_mut(&mut self) -> Option<&mut C> { - (!matches!(self.db_cursor_state, DbCursorState::Wiped)).then_some(&mut self.cursor) + (!self.db_cursor_state.is_wiped()).then_some(&mut self.cursor) } - /// Asserts that the next entry to be returned from the cursor is not previous to the last entry - /// returned. fn set_last_key(&mut self, next_entry: &Option<(Nibbles, BranchNodeCompact)>) { - let next_key = next_entry.as_ref().map(|e| e.0); - debug_assert!( - self.last_key.is_none_or(|last| next_key.is_none_or(|next| next >= last)), - "Cannot return entry {:?} previous to the last returned entry at {:?}", - next_key, - self.last_key, - ); - self.last_key = next_key; + self.last_key = next_entry.as_ref().map(|e| e.0); } - /// Positions the DB cursor state using the underlying cursor when needed. + /// Positions the DB cursor state using the underlying cursor. fn cursor_seek(&mut self, key: Nibbles) -> Result<(), DatabaseError> { - // Only seek if: - // 1. We have a cursor entry and need to seek forward (entry.0 < key), OR - // 2. The DB cursor needs to be positioned. - let should_seek = match &self.db_cursor_state { - DbCursorState::NeedsPosition => true, - DbCursorState::Positioned((entry_key, _)) => entry_key < &key, - DbCursorState::Exhausted | DbCursorState::Wiped => false, - }; + let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); + self.db_cursor_state.set_entry(entry); + Ok(()) + } - if should_seek { - let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); - self.db_cursor_state.set_entry(entry); + /// Positions the DB cursor at the first entry after `key`. + fn cursor_first_after(&mut self, key: Nibbles) -> Result<(), DatabaseError> { + self.cursor_seek(key)?; + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &key) { + self.cursor_next()?; } - Ok(()) } @@ -190,56 +304,37 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { #[cfg(debug_assertions)] { debug_assert!(self.seeked); - debug_assert!(!matches!(self.db_cursor_state, DbCursorState::NeedsPosition)); } - // Exhausted and wiped states are stable; only advance if the DB cursor currently points to - // an entry. - if matches!(self.db_cursor_state, DbCursorState::Positioned(_)) { - let entry = self.get_cursor_mut().map(|c| c.next()).transpose()?.flatten(); - self.db_cursor_state.set_entry(entry); - } + let entry = self.get_cursor_mut().map(|c| c.next()).transpose()?.flatten(); + self.db_cursor_state.set_entry(entry); Ok(()) } - /// Compares the current in-memory entry with the current entry of the cursor, and applies the - /// in-memory entry to the cursor entry as an overlay. - // - /// This may consume and move forward the current entries when the overlay indicates a removed - /// node. + /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. fn choose_next_entry(&mut self) -> Result, DatabaseError> { loop { - let mem_entry = self.in_memory_cursor.current().cloned(); - let db_entry = self.db_cursor_state.entry(); - - match (mem_entry, db_entry) { - (Some((mem_key, None)), _) - if db_entry.is_none_or(|(db_key, _)| &mem_key < db_key) => - { - // If overlay has a removed node but DB cursor is exhausted or ahead of the - // in-memory cursor then move ahead in-memory, as there might be further - // non-removed overlay nodes. - self.in_memory_cursor.first_after(&mem_key); + let mem_key = self.in_memory_cursor.min_current_key(); + let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); + let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { + return Ok(None); + }; + + if let Some(mem_value) = self.in_memory_cursor.highest_priority_value_at(&next_key) { + if let Some(node) = mem_value { + return Ok(Some((next_key, node))) } - (Some((mem_key, None)), Some((db_key, _))) if &mem_key == db_key => { - // If overlay has a removed node which is returned from DB then move both - // cursors ahead to the next key. - self.in_memory_cursor.first_after(&mem_key); + + self.in_memory_cursor.advance_key(&next_key); + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { self.cursor_next()?; } - (Some((mem_key, Some(node))), _) - if db_entry.is_none_or(|(db_key, _)| &mem_key <= db_key) => - { - // If overlay returns a node prior to the DB's node, or the DB is exhausted, - // then we return the overlay's node. - return Ok(Some((mem_key, node))) - } - // All other cases: - // - mem_key > db_key - // - overlay is exhausted - // Return the db_entry. If DB is also exhausted then this returns None. - _ => return Ok(db_entry.cloned()), + continue; + } + + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { + return Ok(self.db_cursor_state.entry().cloned()) } } } @@ -250,39 +345,21 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { &mut self, key: Nibbles, ) -> Result, DatabaseError> { - let mem_entry = self.in_memory_cursor.seek(&key); - - if let Some((mem_key, entry_inner)) = mem_entry && - *mem_key == key - { - #[cfg(debug_assertions)] - { - self.seeked = true; - } - - // An exact overlay hit can move the logical cursor ahead without touching the DB. If - // the DB cursor was still behind this key, force a re-seek before the next DB-backed - // operation so `next()` cannot return a stale earlier entry. - if matches!(&self.db_cursor_state, DbCursorState::Positioned((db_key, _)) if db_key < &key) - { - self.db_cursor_state = DbCursorState::NeedsPosition; - } - - let entry = entry_inner.clone().map(|node| (key, node)); - self.set_last_key(&entry); - return Ok(entry) - } - - self.cursor_seek(key)?; - #[cfg(debug_assertions)] { self.seeked = true; } - let entry = match self.db_cursor_state.entry() { - Some((db_key, node)) if db_key == &key => Some((key, node.clone())), - _ => None, + self.deferred_overlay_seek_start = None; + let entry = if let Some((idx, mem_value)) = self.in_memory_cursor.seek_until_exact(&key) { + if mem_value.is_some() { + self.deferred_overlay_seek_start = Some(idx + 1); + } + mem_value.map(|node| (key, node)) + } else { + let db_entry = self.get_cursor_mut().map(|c| c.seek_exact(key)).transpose()?.flatten(); + self.db_cursor_state.set_entry(db_entry); + self.db_cursor_state.entry().cloned() }; self.set_last_key(&entry); @@ -293,35 +370,26 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { &mut self, key: Nibbles, ) -> Result, DatabaseError> { - let mem_entry = self.in_memory_cursor.seek(&key); - - if let Some((mem_key, Some(node))) = mem_entry && - *mem_key == key + #[cfg(debug_assertions)] { - #[cfg(debug_assertions)] - { - self.seeked = true; - } + self.seeked = true; + } - // An exact overlay hit is the first logical entry at or after `key`, so the DB cursor - // can stay lazy until a later operation needs it. - if matches!(&self.db_cursor_state, DbCursorState::Positioned((db_key, _)) if db_key < &key) - { - self.db_cursor_state = DbCursorState::NeedsPosition; + self.deferred_overlay_seek_start = None; + match self.in_memory_cursor.seek_until_exact(&key) { + Some((idx, Some(node))) => { + let entry = Some((key, node)); + self.deferred_overlay_seek_start = Some(idx + 1); + self.set_last_key(&entry); + return Ok(entry); } - - let entry = Some((key, node.clone())); - self.set_last_key(&entry); - return Ok(entry) + Some((idx, None)) => { + self.in_memory_cursor.seek_from(idx + 1, &key); + } + None => {} } self.cursor_seek(key)?; - - #[cfg(debug_assertions)] - { - self.seeked = true; - } - let entry = self.choose_next_entry()?; self.set_last_key(&entry); Ok(entry) @@ -338,22 +406,14 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { return Ok(None); }; - // If either cursor is currently pointing to the last entry which was returned then consume - // that entry so that `choose_next_entry` is looking at the subsequent one. - if let Some((key, _)) = self.in_memory_cursor.current() && - key == &last_key - { - self.in_memory_cursor.first_after(&last_key); - } - - if matches!(self.db_cursor_state, DbCursorState::NeedsPosition) { - self.cursor_seek(last_key)?; + if let Some(start) = self.deferred_overlay_seek_start.take() { + self.in_memory_cursor.seek_from(start, &last_key); } - - if let Some((key, _)) = self.db_cursor_state.entry() && - key == &last_key - { + self.in_memory_cursor.first_after(&last_key); + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &last_key) { self.cursor_next()?; + } else { + self.cursor_first_after(last_key)?; } let entry = self.choose_next_entry()?; @@ -372,7 +432,8 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.cursor.reset(); self.in_memory_cursor.reset(); - self.db_cursor_state = DbCursorState::NeedsPosition; + self.db_cursor_state.set_entry(None); + self.deferred_overlay_seek_start = None; self.last_key = None; #[cfg(debug_assertions)] { @@ -385,10 +446,10 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let (in_memory_cursor, cursor_wiped) = - Self::get_storage_overlay(self.trie_updates, hashed_address); + let (in_memory_cursor, db_wiped) = + Self::get_storage_overlay(&self.trie_updates, hashed_address); self.in_memory_cursor = in_memory_cursor; - self.db_cursor_state = DbCursorState::new(cursor_wiped); + self.db_cursor_state = DbCursorState::new(db_wiped); } } @@ -414,7 +475,7 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); let trie_updates = TrieUpdatesSorted::new(test_case.in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &trie_updates); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); let mut results = Vec::new(); @@ -598,7 +659,7 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &trie_updates); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); let result = cursor.seek_exact(Nibbles::from_nibbles([0x2])).unwrap(); assert_eq!( @@ -641,7 +702,7 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &trie_updates); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); let result = cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(); assert_eq!( @@ -664,6 +725,82 @@ mod tests { assert!(!visited_keys.lock().is_empty(), "next should lazily position the DB cursor"); } + #[test] + fn test_seek_overlay_exact_hit_does_not_seek_lower_overlays_or_db() { + let db_nodes = vec![( + Nibbles::from_nibbles([0x6]), + BranchNodeCompact::new(0b0110, 0b0110, 0, vec![], None), + )]; + let db_nodes_map: BTreeMap = db_nodes.into_iter().collect(); + let db_nodes_arc = Arc::new(db_nodes_map); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); + + let higher_priority = TrieUpdatesSorted::new( + vec![ + ( + Nibbles::from_nibbles([0x1]), + Some(BranchNodeCompact::new(0b0001, 0b0001, 0, vec![], None)), + ), + ( + Nibbles::from_nibbles([0x9]), + Some(BranchNodeCompact::new(0b1001, 0b1001, 0, vec![], None)), + ), + ], + Default::default(), + ); + let exact_hit = TrieUpdatesSorted::new( + vec![( + Nibbles::from_nibbles([0x5]), + Some(BranchNodeCompact::new(0b0101, 0b0101, 0, vec![], None)), + )], + Default::default(), + ); + let lower_priority = TrieUpdatesSorted::new( + vec![ + ( + Nibbles::from_nibbles([0x1]), + Some(BranchNodeCompact::new(0b0001, 0b0001, 0, vec![], None)), + ), + ( + Nibbles::from_nibbles([0x7]), + Some(BranchNodeCompact::new(0b0111, 0b0111, 0, vec![], None)), + ), + ], + Default::default(), + ); + let mut cursor = InMemoryTrieCursor::new_account( + mock_cursor, + [&higher_priority, &exact_hit, &lower_priority], + ); + + let result = cursor.seek(Nibbles::from_nibbles([0x5])).unwrap(); + assert_eq!( + result, + Some(( + Nibbles::from_nibbles([0x5]), + BranchNodeCompact::new(0b0101, 0b0101, 0, vec![], None) + )) + ); + assert_eq!(cursor.in_memory_cursor.cursors[0].idx, 1); + assert_eq!(cursor.in_memory_cursor.cursors[1].idx, 0); + assert_eq!( + cursor.in_memory_cursor.cursors[2].idx, 0, + "lower-priority overlay should not be sought after an exact overlay hit" + ); + assert!(visited_keys.lock().is_empty(), "exact overlay hit should not touch the DB cursor"); + + let result = cursor.next().unwrap(); + assert_eq!( + result, + Some(( + Nibbles::from_nibbles([0x6]), + BranchNodeCompact::new(0b0110, 0b0110, 0, vec![], None) + )) + ); + assert!(!visited_keys.lock().is_empty(), "next should lazily position the DB cursor"); + } + #[test] fn test_seek_overlay_exact_hit_repositions_stale_db_on_next() { let db_nodes = vec![ @@ -682,7 +819,7 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &trie_updates); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); let result = cursor.seek(Nibbles::from_nibbles([0x1])).unwrap(); assert_eq!( @@ -790,7 +927,7 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &trie_updates); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); assert_eq!(cursor.current().unwrap(), None); @@ -841,7 +978,7 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &trie_updates); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); // Seek to beginning should return None (all nodes are deleted) tracing::debug!("seeking to 0x"); @@ -879,33 +1016,255 @@ mod tests { assert_eq!(result, None, "Expected None from next() but got {:?}", result); } + #[test] + fn test_seek_can_move_backwards() { + let db_nodes = BTreeMap::from([ + (Nibbles::from_nibbles([0x1]), BranchNodeCompact::new(1, 1, 0, vec![], None)), + (Nibbles::from_nibbles([0x3]), BranchNodeCompact::new(3, 3, 0, vec![], None)), + ]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); + + let trie_updates = TrieUpdatesSorted::new( + vec![( + Nibbles::from_nibbles([0x2]), + Some(BranchNodeCompact::new(2, 2, 0, vec![], None)), + )], + Default::default(), + ); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + + assert_eq!( + cursor.seek(Nibbles::from_nibbles([0x3])).unwrap(), + Some((Nibbles::from_nibbles([0x3]), BranchNodeCompact::new(3, 3, 0, vec![], None))) + ); + assert_eq!( + cursor.seek(Nibbles::from_nibbles([0x1])).unwrap(), + Some((Nibbles::from_nibbles([0x1]), BranchNodeCompact::new(1, 1, 0, vec![], None))) + ); + assert_eq!( + cursor.next().unwrap(), + Some((Nibbles::from_nibbles([0x2]), BranchNodeCompact::new(2, 2, 0, vec![], None))) + ); + } + + #[test] + fn test_multiple_overlays_resolve_by_precedence() { + let db_nodes = BTreeMap::from([ + (Nibbles::from_nibbles([0x1]), BranchNodeCompact::new(1, 1, 0, vec![], None)), + (Nibbles::from_nibbles([0x2]), BranchNodeCompact::new(2, 2, 0, vec![], None)), + (Nibbles::from_nibbles([0x4]), BranchNodeCompact::new(4, 4, 0, vec![], None)), + ]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); + + let newest = TrieUpdatesSorted::new( + vec![ + (Nibbles::from_nibbles([0x2]), None), + ( + Nibbles::from_nibbles([0x3]), + Some(BranchNodeCompact::new(30, 30, 0, vec![], None)), + ), + ], + Default::default(), + ); + let oldest = TrieUpdatesSorted::new( + vec![ + ( + Nibbles::from_nibbles([0x1]), + Some(BranchNodeCompact::new(10, 10, 0, vec![], None)), + ), + ( + Nibbles::from_nibbles([0x2]), + Some(BranchNodeCompact::new(20, 20, 0, vec![], None)), + ), + (Nibbles::from_nibbles([0x3]), Some(BranchNodeCompact::new(3, 3, 0, vec![], None))), + ], + Default::default(), + ); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&newest, &oldest]); + + let mut results = Vec::new(); + if let Some(entry) = cursor.seek(Nibbles::default()).unwrap() { + results.push(entry); + while let Some(entry) = cursor.next().unwrap() { + results.push(entry); + } + } + + assert_eq!( + results, + vec![ + (Nibbles::from_nibbles([0x1]), BranchNodeCompact::new(10, 10, 0, vec![], None)), + (Nibbles::from_nibbles([0x3]), BranchNodeCompact::new(30, 30, 0, vec![], None)), + (Nibbles::from_nibbles([0x4]), BranchNodeCompact::new(4, 4, 0, vec![], None)), + ] + ); + } + + #[test] + fn test_storage_deletion_overlay_hides_lower_precedence_sources() { + use crate::updates::StorageTrieUpdatesSorted; + use alloy_primitives::map::B256Map; + + let hashed_address = B256::with_last_byte(1); + let mut db_storage = B256Map::default(); + db_storage.insert( + hashed_address, + BTreeMap::from([( + Nibbles::from_nibbles([0x4]), + BranchNodeCompact::new(4, 4, 0, vec![], None), + )]), + ); + let mut visited_storage_keys = B256Map::default(); + visited_storage_keys.insert(hashed_address, Default::default()); + let mock_cursor = MockTrieCursor::new_storage( + Arc::new(db_storage), + Arc::new(visited_storage_keys), + hashed_address, + ) + .unwrap(); + + let mut newest_storage = B256Map::default(); + newest_storage.insert( + hashed_address, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![( + Nibbles::from_nibbles([0x2]), + Some(BranchNodeCompact::new(2, 2, 0, vec![], None)), + )], + }, + ); + let newest = TrieUpdatesSorted::new(vec![], newest_storage); + + let mut deleting_storage = B256Map::default(); + deleting_storage.insert( + hashed_address, + StorageTrieUpdatesSorted { + is_deleted: true, + storage_nodes: vec![( + Nibbles::from_nibbles([0x1]), + Some(BranchNodeCompact::new(1, 1, 0, vec![], None)), + )], + }, + ); + let deleting = TrieUpdatesSorted::new(vec![], deleting_storage); + + let mut hidden_storage = B256Map::default(); + hidden_storage.insert( + hashed_address, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![( + Nibbles::from_nibbles([0x3]), + Some(BranchNodeCompact::new(3, 3, 0, vec![], None)), + )], + }, + ); + let hidden = TrieUpdatesSorted::new(vec![], hidden_storage); + + let mut cursor = InMemoryTrieCursor::new_storage( + mock_cursor, + [&newest, &deleting, &hidden], + hashed_address, + ); + + assert_eq!( + cursor.seek(Nibbles::default()).unwrap(), + Some((Nibbles::from_nibbles([0x1]), BranchNodeCompact::new(1, 1, 0, vec![], None))) + ); + assert_eq!( + cursor.next().unwrap(), + Some((Nibbles::from_nibbles([0x2]), BranchNodeCompact::new(2, 2, 0, vec![], None))) + ); + assert_eq!(cursor.next().unwrap(), None); + } + mod proptest_tests { use super::*; - use itertools::Itertools; use proptest::prelude::*; - /// Merge `db_nodes` with `in_memory_nodes`, applying the in-memory overlay. - /// This properly handles deletions (None values in `in_memory_nodes`). - fn merge_with_overlay( - db_nodes: Vec<(Nibbles, BranchNodeCompact)>, - in_memory_nodes: Vec<(Nibbles, Option)>, + #[derive(Clone, Debug)] + enum CursorOp { + Next, + Seek(Nibbles), + SeekExact(Nibbles), + } + + /// Merge `db_nodes` with in-memory overlays. Overlay index 0 has highest priority. + fn merge_with_overlays( + db_nodes: &[(Nibbles, BranchNodeCompact)], + overlays: &[Vec<(Nibbles, Option)>], ) -> Vec<(Nibbles, BranchNodeCompact)> { - db_nodes - .into_iter() - .merge_join_by(in_memory_nodes, |db_entry, mem_entry| db_entry.0.cmp(&mem_entry.0)) - .filter_map(|entry| match entry { - // Only in db: keep it - itertools::EitherOrBoth::Left((key, node)) => Some((key, node)), - // Only in memory: keep if not a deletion - itertools::EitherOrBoth::Right((key, node_opt)) => { - node_opt.map(|node| (key, node)) - } - // In both: memory takes precedence (keep if not a deletion) - itertools::EitherOrBoth::Both(_, (key, node_opt)) => { - node_opt.map(|node| (key, node)) + let mut merged: BTreeMap = + db_nodes.iter().cloned().collect(); + + for overlay in overlays.iter().rev() { + for (key, node) in overlay { + match node { + Some(node) => { + merged.insert(*key, node.clone()); + } + None => { + merged.remove(key); + } } - }) - .collect() + } + } + + merged.into_iter().collect() + } + + fn reference_seek( + entries: &[(Nibbles, BranchNodeCompact)], + position: &mut Option, + key: Nibbles, + ) -> Option<(Nibbles, BranchNodeCompact)> { + let idx = entries.partition_point(|(entry_key, _)| entry_key < &key); + if idx < entries.len() { + *position = Some(idx); + Some(entries[idx].clone()) + } else { + *position = None; + None + } + } + + fn reference_seek_exact( + entries: &[(Nibbles, BranchNodeCompact)], + position: &mut Option, + key: Nibbles, + ) -> Option<(Nibbles, BranchNodeCompact)> { + match entries.binary_search_by_key(&key, |(entry_key, _)| *entry_key) { + Ok(idx) => { + *position = Some(idx); + Some(entries[idx].clone()) + } + Err(_) => { + *position = None; + None + } + } + } + + fn reference_next( + entries: &[(Nibbles, BranchNodeCompact)], + position: &mut Option, + ) -> Option<(Nibbles, BranchNodeCompact)> { + let Some(next_idx) = position.and_then(|idx| idx.checked_add(1)) else { + return None; + }; + + if next_idx < entries.len() { + *position = Some(next_idx); + Some(entries[next_idx].clone()) + } else { + *position = None; + None + } } /// Generate a strategy for a `BranchNodeCompact` with simplified parameters. @@ -926,90 +1285,90 @@ mod tests { }) } - /// Generate a sorted vector of (Nibbles, `BranchNodeCompact`) entries + fn nibbles_strategy() -> impl Strategy { + prop::collection::vec(0u8..16, 0..4).prop_map(Nibbles::from_nibbles_unchecked) + } + + /// Generate a sorted vector of (Nibbles, `BranchNodeCompact`) entries. fn sorted_db_nodes_strategy() -> impl Strategy> { - prop::collection::vec( - (prop::collection::vec(any::(), 0..2), branch_node_strategy()), - 0..20, + prop::collection::vec((nibbles_strategy(), branch_node_strategy()), 0..20).prop_map( + |entries| { + let mut result: Vec<(Nibbles, BranchNodeCompact)> = + entries.into_iter().collect(); + result.sort_by_key(|a| a.0); + result.dedup_by(|a, b| a.0 == b.0); + result + }, ) - .prop_map(|entries| { - // Convert Vec to Nibbles and sort - let mut result: Vec<(Nibbles, BranchNodeCompact)> = entries - .into_iter() - .map(|(bytes, node)| (Nibbles::from_nibbles_unchecked(bytes), node)) - .collect(); - result.sort_by_key(|a| a.0); - result.dedup_by(|a, b| a.0 == b.0); - result - }) } - /// Generate a sorted vector of (Nibbles, Option) entries + /// Generate a sorted vector of (Nibbles, Option) entries. fn sorted_in_memory_nodes_strategy( ) -> impl Strategy)>> { prop::collection::vec( - ( - prop::collection::vec(any::(), 0..2), - prop::option::of(branch_node_strategy()), - ), + (nibbles_strategy(), prop::option::of(branch_node_strategy())), 0..20, ) .prop_map(|entries| { - // Convert Vec to Nibbles and sort - let mut result: Vec<(Nibbles, Option)> = entries - .into_iter() - .map(|(bytes, node)| (Nibbles::from_nibbles_unchecked(bytes), node)) - .collect(); + let mut result: Vec<(Nibbles, Option)> = + entries.into_iter().collect(); result.sort_by_key(|a| a.0); result.dedup_by(|a, b| a.0 == b.0); result }) } + fn cursor_ops_strategy() -> impl Strategy> { + prop::collection::vec( + prop_oneof![ + Just(CursorOp::Next), + nibbles_strategy().prop_map(CursorOp::Seek), + nibbles_strategy().prop_map(CursorOp::SeekExact), + ], + 10..500, + ) + } + proptest! { #![proptest_config(ProptestConfig::with_cases(10000))] #[test] fn proptest_in_memory_trie_cursor( db_nodes in sorted_db_nodes_strategy(), - in_memory_nodes in sorted_in_memory_nodes_strategy(), - op_choices in prop::collection::vec(any::(), 10..500), + overlays in prop::collection::vec(sorted_in_memory_nodes_strategy(), 0..5), + ops in cursor_ops_strategy(), ) { reth_tracing::init_test_tracing(); use tracing::debug; debug!( db_paths=?db_nodes.iter().map(|(k, _)| k).collect::>(), - in_mem_nodes=?in_memory_nodes.iter().map(|(k, v)| (k, v.is_some())).collect::>(), - num_op_choices=?op_choices.len(), + overlays=?overlays + .iter() + .map(|overlay| overlay.iter().map(|(k, v)| (k, v.is_some())).collect::>()) + .collect::>(), + num_ops=?ops.len(), "Starting proptest!", ); - // Create the expected results by merging the two sorted vectors, - // properly handling deletions (None values in in_memory_nodes) - let expected_combined = merge_with_overlay(db_nodes.clone(), in_memory_nodes.clone()); - - // Collect all keys for operation generation - let all_keys: Vec = expected_combined.iter().map(|(k, _)| *k).collect(); - - // Create a control cursor using the combined result with a mock cursor - let control_db_map: BTreeMap = - expected_combined.into_iter().collect(); - let control_db_arc = Arc::new(control_db_map); - let control_visited_keys = Arc::new(Mutex::new(Vec::new())); - let mut control_cursor = MockTrieCursor::new(control_db_arc, control_visited_keys); + let expected_combined = merge_with_overlays(&db_nodes, &overlays); + let mut reference_position = None; // Create the InMemoryTrieCursor being tested let db_nodes_map: BTreeMap = - db_nodes.into_iter().collect(); + db_nodes.iter().cloned().collect(); let db_nodes_arc = Arc::new(db_nodes_map); let visited_keys = Arc::new(Mutex::new(Vec::new())); let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); - let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut test_cursor = InMemoryTrieCursor::new_account(mock_cursor, &trie_updates); + let trie_updates = overlays + .into_iter() + .map(|in_memory_nodes| TrieUpdatesSorted::new(in_memory_nodes, Default::default())) + .collect::>(); + let mut test_cursor = InMemoryTrieCursor::new_account(mock_cursor, trie_updates.iter()); // Test: seek to the beginning first - let control_first = control_cursor.seek(Nibbles::default()).unwrap(); + let control_first = + reference_seek(&expected_combined, &mut reference_position, Nibbles::default()); let test_first = test_cursor.seek(Nibbles::default()).unwrap(); debug!( control=?control_first.as_ref().map(|(k, _)| k), @@ -1018,22 +1377,12 @@ mod tests { ); assert_eq!(control_first, test_first, "Initial seek mismatch"); - // If both cursors returned None, nothing to test - if control_first.is_none() && test_first.is_none() { - return Ok(()); - } - - // Track the last key returned from the cursor - let mut last_returned_key = control_first.as_ref().map(|(k, _)| *k); - // Execute a sequence of random operations - for choice in op_choices { - let op_type = choice % 3; - - match op_type { - 0 => { - // Next operation - let control_result = control_cursor.next().unwrap(); + for op in ops { + match op { + CursorOp::Next => { + let control_result = + reference_next(&expected_combined, &mut reference_position); let test_result = test_cursor.next().unwrap(); debug!( control=?control_result.as_ref().map(|(k, _)| k), @@ -1041,32 +1390,10 @@ mod tests { "Next returned", ); assert_eq!(control_result, test_result, "Next operation mismatch"); - - last_returned_key = control_result.as_ref().map(|(k, _)| *k); - - // Stop if both cursors are exhausted - if control_result.is_none() && test_result.is_none() { - break; - } } - 1 => { - // Seek operation - choose a key >= last_returned_key - if all_keys.is_empty() { - continue; - } - - let valid_keys: Vec<_> = all_keys - .iter() - .filter(|k| last_returned_key.is_none_or(|last| **k >= last)) - .collect(); - - if valid_keys.is_empty() { - continue; - } - - let key = *valid_keys[choice as usize % valid_keys.len()]; - - let control_result = control_cursor.seek(key).unwrap(); + CursorOp::Seek(key) => { + let control_result = + reference_seek(&expected_combined, &mut reference_position, key); let test_result = test_cursor.seek(key).unwrap(); debug!( control=?control_result.as_ref().map(|(k, _)| k), @@ -1075,32 +1402,10 @@ mod tests { "Seek returned", ); assert_eq!(control_result, test_result, "Seek operation mismatch for key {:?}", key); - - last_returned_key = control_result.as_ref().map(|(k, _)| *k); - - // Stop if both cursors are exhausted - if control_result.is_none() && test_result.is_none() { - break; - } } - _ => { - // SeekExact operation - choose a key >= last_returned_key - if all_keys.is_empty() { - continue; - } - - let valid_keys: Vec<_> = all_keys - .iter() - .filter(|k| last_returned_key.is_none_or(|last| **k >= last)) - .collect(); - - if valid_keys.is_empty() { - continue; - } - - let key = *valid_keys[choice as usize % valid_keys.len()]; - - let control_result = control_cursor.seek_exact(key).unwrap(); + CursorOp::SeekExact(key) => { + let control_result = + reference_seek_exact(&expected_combined, &mut reference_position, key); let test_result = test_cursor.seek_exact(key).unwrap(); debug!( control=?control_result.as_ref().map(|(k, _)| k), @@ -1109,9 +1414,6 @@ mod tests { "SeekExact returned", ); assert_eq!(control_result, test_result, "SeekExact operation mismatch for key {:?}", key); - - // seek_exact updates the last_key internally but only if it found something - last_returned_key = control_result.as_ref().map(|(k, _)| *k); } } } From 65547ef4985e4ece32f443735e13afecdd8aa5ee Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 21 May 2026 19:01:26 +0200 Subject: [PATCH 02/40] refactor(chain-state): simplify state trie overlay stacks --- crates/chain-state/src/state_trie_overlay.rs | 371 +++++++++--------- .../src/providers/state/historical.rs | 9 +- .../provider/src/providers/state/overlay.rs | 127 +++--- 3 files changed, 254 insertions(+), 253 deletions(-) diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs index 3027f0997ac..4f4e703a20e 100644 --- a/crates/chain-state/src/state_trie_overlay.rs +++ b/crates/chain-state/src/state_trie_overlay.rs @@ -16,22 +16,20 @@ use reth_primitives_traits::{ }; #[cfg(feature = "rayon")] use reth_tasks::WorkerPool; -use reth_trie::{updates::TrieUpdatesSorted, HashedPostStateSorted, TrieInputSorted}; -use std::{fmt, sync::Arc, time::Instant}; -use tracing::{debug, trace, warn}; - -/// State trie overlays ordered from highest to lowest precedence. -pub type StateTrieOverlay = (Vec>, Vec>); +use reth_trie::{updates::TrieUpdatesSorted, HashedPostStateSorted}; +#[cfg(any(test, feature = "rayon"))] +use std::time::Instant; +use std::{fmt, sync::Arc}; +use tracing::debug; /// Manages flattened state trie overlays for in-memory blocks. /// /// The manager owns the in-memory block graph and a cache of flattened state trie overlays keyed by -/// `(anchor_hash, tip_hash)`. +/// `(anchor_hash, tip_hash)`. Cache entries can also mark in-flight background computations. #[derive(Clone)] pub struct StateTrieOverlayManager { blocks: Arc>>, - overlays: Arc>>, - pending_overlays: Arc>, + overlays: Arc>, #[cfg(feature = "rayon")] worker_pool: Option>, metrics: StateTrieOverlayMetrics, @@ -54,7 +52,6 @@ impl Default for StateTrieOverlayManager { Self { blocks: Default::default(), overlays: Default::default(), - pending_overlays: Default::default(), #[cfg(feature = "rayon")] worker_pool: None, metrics: Default::default(), @@ -67,7 +64,6 @@ impl std::fmt::Debug for StateTrieOverlayManager { f.debug_struct("StateTrieOverlayManager") .field("blocks", &self.blocks.len()) .field("overlays", &self.overlays.len()) - .field("pending_overlays", &self.pending_overlays.len()) .finish() } } @@ -79,7 +75,6 @@ impl StateTrieOverlayManager { Self { blocks: Default::default(), overlays: Default::default(), - pending_overlays: Default::default(), worker_pool: Some(worker_pool), metrics: Default::default(), } @@ -125,7 +120,7 @@ impl StateTrieOverlayManager { .iter() .filter_map(|entry| { let key = *entry.key(); - (key.tip_hash == parent_hash).then_some(key.anchor_hash) + (key.tip_hash == parent_hash && entry.value().is_ready()).then_some(key.anchor_hash) }) .collect::>(); @@ -173,20 +168,13 @@ impl StateTrieOverlayManager { if removed_blocks > 0 { let overlays_before = self.overlays.len(); - let pending_overlays_before = self.pending_overlays.len(); let blocks = Arc::clone(&self.blocks); self.overlays.retain(|key, _| { key.tip_hash != key.anchor_hash && Self::anchor_for_parent_in(blocks.as_ref(), key.tip_hash, key.anchor_hash) == Some(key.anchor_hash) }); - self.pending_overlays.retain(|key, _| { - key.tip_hash != key.anchor_hash && - Self::anchor_for_parent_in(blocks.as_ref(), key.tip_hash, key.anchor_hash) == - Some(key.anchor_hash) - }); pruned_overlays = overlays_before.saturating_sub(self.overlays.len()); - pruned_overlays += pending_overlays_before.saturating_sub(self.pending_overlays.len()); span.record("pruned_overlays", pruned_overlays); } debug!( @@ -216,8 +204,8 @@ impl StateTrieOverlayManager { %anchor_hash, "loading state trie overlay for parent" ); - let input = self.get_overlay(parent_hash, anchor_hash)?; - Ok(input) + let overlay = self.get_overlay(parent_hash, anchor_hash)?; + Ok(overlay) } #[tracing::instrument( @@ -240,17 +228,17 @@ impl StateTrieOverlayManager { let key = OverlayCacheKey { anchor_hash, tip_hash }; let span = tracing::Span::current(); - if let Some(input) = self.overlays.get(&key).map(|entry| Arc::clone(entry.value())) { + if let Some(overlay) = self.ready_overlay(key) { self.metrics.overlay_cache_reuses.increment(1); span.record("cache_reused", true); - return Ok((vec![Arc::clone(&input.nodes)], vec![Arc::clone(&input.state)])) + return Ok(overlay) } span.record("cache_reused", false); let blocks = self.resolve_block_path(tip_hash, anchor_hash)?; span.record("block_count", blocks.len()); if blocks.is_empty() { - return Ok((Vec::new(), Vec::new())) + return Ok(StateTrieOverlay::default()) } let cached_prefix = self.largest_cached_prefix(anchor_hash, &blocks); @@ -290,18 +278,17 @@ impl StateTrieOverlayManager { &self, anchor_hash: B256, blocks_newest_to_oldest: &[ExecutedBlock], - ) -> Option<(usize, Arc)> { + ) -> Option<(usize, StateTrieOverlay)> { blocks_newest_to_oldest.iter().enumerate().find_map(|(idx, block)| { let tip_hash = block.recovered_block().hash(); - self.overlays - .get(&OverlayCacheKey { anchor_hash, tip_hash }) - .map(|entry| (idx, Arc::clone(entry.value()))) + self.ready_overlay(OverlayCacheKey { anchor_hash, tip_hash }) + .map(|overlay| (idx, overlay)) }) } fn overlay_stack_from_path( blocks_newest_to_oldest: &[ExecutedBlock], - cached_prefix: Option<(usize, Arc)>, + cached_prefix: Option<(usize, StateTrieOverlay)>, ) -> StateTrieOverlay { let individual_block_count = cached_prefix.as_ref().map_or(blocks_newest_to_oldest.len(), |(idx, _)| *idx); @@ -316,32 +303,36 @@ impl StateTrieOverlayManager { hashed_post_state.push(trie_data.hashed_state); } - if let Some((_, input)) = cached_prefix { - trie_updates.push(Arc::clone(&input.nodes)); - hashed_post_state.push(Arc::clone(&input.state)); + if let Some((_, cached_overlay)) = cached_prefix { + trie_updates.extend(cached_overlay.trie_updates); + hashed_post_state.extend(cached_overlay.hashed_post_state); } - (trie_updates, hashed_post_state) + StateTrieOverlay::new(trie_updates, hashed_post_state) } fn spawn_overlay_cache_fill(&self, key: OverlayCacheKey) { - if self.overlays.contains_key(&key) { - return + #[cfg(not(feature = "rayon"))] + { + let _ = key; } - match self.pending_overlays.entry(key) { - Entry::Occupied(_) => return, - Entry::Vacant(entry) => { - entry.insert(()); + + #[cfg(feature = "rayon")] + { + let Some(worker_pool) = self.worker_pool.clone() else { return }; + + match self.overlays.entry(key) { + Entry::Occupied(_) => return, + Entry::Vacant(entry) => { + entry.insert(OverlayCacheEntry::Pending); + } } - } - let manager = ::clone(self); - let span = tracing::Span::current(); + let manager = ::clone(self); + let span = tracing::Span::current(); - #[cfg(feature = "rayon")] - if let Some(worker_pool) = self.worker_pool.clone() { worker_pool.spawn(move || { - let _span = tracing::trace_span!( + let _span = tracing::debug_span!( target: "chain_state::state_trie_overlay", parent: span, "compute_state_trie_overlay_cache_fill", @@ -351,38 +342,15 @@ impl StateTrieOverlayManager { .entered(); manager.compute_and_cache_overlay(key); }); - return - } - - if let Err(error) = - std::thread::Builder::new().name("state-ovly-cache-fill".to_string()).spawn(move || { - let _span = tracing::trace_span!( - target: "chain_state::state_trie_overlay", - parent: span, - "compute_state_trie_overlay_cache_fill", - tip_hash = %key.tip_hash, - anchor_hash = %key.anchor_hash, - ) - .entered(); - manager.compute_and_cache_overlay(key); - }) - { - self.pending_overlays.remove(&key); - warn!( - target: "chain_state::state_trie_overlay", - ?error, - tip_hash = %key.tip_hash, - anchor_hash = %key.anchor_hash, - "failed to spawn state trie overlay cache fill" - ); } } + #[cfg(any(test, feature = "rayon"))] fn compute_and_cache_overlay(&self, key: OverlayCacheKey) { let result = self.compute_overlay_for_key(key); - self.pending_overlays.remove(&key); if let Err(error) = result { + self.remove_pending_overlay(key); debug!( target: "chain_state::state_trie_overlay", ?error, @@ -393,24 +361,18 @@ impl StateTrieOverlayManager { } } + #[cfg(any(test, feature = "rayon"))] fn compute_overlay_for_key( &self, key: OverlayCacheKey, - ) -> Result, StateTrieOverlayError> { - if let Some(input) = self.overlays.get(&key).map(|entry| Arc::clone(entry.value())) { - self.metrics.overlay_cache_reuses.increment(1); - return Ok(input) - } - + ) -> Result { let blocks = self.resolve_block_path(key.tip_hash, key.anchor_hash)?; let cached_prefix = self.largest_cached_prefix(key.anchor_hash, &blocks); - let compute_input = match cached_prefix { - Some((idx, parent_input)) => { - ComputeOverlayInput::ExtendCached { blocks: blocks[..idx].to_vec(), parent_input } - } - None => ComputeOverlayInput::MergeBlocks(blocks), + let (blocks, parent_overlay) = match cached_prefix { + Some((idx, parent_overlay)) => (blocks[..idx].to_vec(), parent_overlay), + None => (blocks, StateTrieOverlay::default()), }; - let input = Arc::new(compute_overlay(compute_input, key.anchor_hash, &self.metrics)); + let overlay = compute_overlay(blocks, parent_overlay, key.anchor_hash, &self.metrics); if key.tip_hash != key.anchor_hash && Self::anchor_for_parent_in(self.blocks.as_ref(), key.tip_hash, key.anchor_hash) != @@ -422,19 +384,35 @@ impl StateTrieOverlayManager { }); } - let input = match self.overlays.entry(key) { - Entry::Occupied(entry) => { - self.metrics.overlay_cache_reuses.increment(1); - Arc::clone(entry.get()) - } + let overlay = match self.overlays.entry(key) { + Entry::Occupied(mut entry) => match entry.get() { + OverlayCacheEntry::Ready(overlay) => { + self.metrics.overlay_cache_reuses.increment(1); + overlay.clone() + } + OverlayCacheEntry::Pending => { + self.metrics.overlay_cache_fills.increment(1); + entry.insert(OverlayCacheEntry::Ready(overlay.clone())); + overlay + } + }, Entry::Vacant(entry) => { self.metrics.overlay_cache_fills.increment(1); - entry.insert(Arc::clone(&input)); - input + entry.insert(OverlayCacheEntry::Ready(overlay.clone())); + overlay } }; - Ok(input) + Ok(overlay) + } + + fn ready_overlay(&self, key: OverlayCacheKey) -> Option { + self.overlays.get(&key).and_then(|entry| entry.value().ready()) + } + + #[cfg(any(test, feature = "rayon"))] + fn remove_pending_overlay(&self, key: OverlayCacheKey) { + self.overlays.remove_if(&key, |_, entry| matches!(entry, OverlayCacheEntry::Pending)); } /// Returns `preferred_anchor` if it is on the parent chain, otherwise the first missing parent. @@ -469,6 +447,25 @@ impl StateTrieOverlayManager { } } +/// State trie overlays ordered from highest to lowest precedence. +#[derive(Clone, Debug, Default)] +pub struct StateTrieOverlay { + /// Trie updates overlays. + pub trie_updates: Vec>, + /// Hashed post state overlays. + pub hashed_post_state: Vec>, +} + +impl StateTrieOverlay { + /// Create a new state trie overlay. + pub const fn new( + trie_updates: Vec>, + hashed_post_state: Vec>, + ) -> Self { + Self { trie_updates, hashed_post_state } + } +} + /// Error returned when a state trie overlay cannot be built from the manager's current block set. #[derive(Debug)] pub struct StateTrieOverlayError { @@ -496,9 +493,27 @@ struct OverlayCacheKey { tip_hash: B256, } -enum ComputeOverlayInput { - ExtendCached { blocks: Vec>, parent_input: Arc }, - MergeBlocks(Vec>), +#[cfg_attr(not(any(test, feature = "rayon")), allow(dead_code))] +enum OverlayCacheEntry { + /// An in-flight background cache fill. + /// + /// Read paths treat this as a cache miss so they can still return a lazy overlay stack. + Pending, + /// A flattened overlay ready for reuse. + Ready(StateTrieOverlay), +} + +impl OverlayCacheEntry { + const fn is_ready(&self) -> bool { + matches!(self, Self::Ready(_)) + } + + fn ready(&self) -> Option { + match self { + Self::Pending => None, + Self::Ready(overlay) => Some(overlay.clone()), + } + } } #[tracing::instrument( @@ -512,38 +527,21 @@ enum ComputeOverlayInput { elapsed_us = tracing::field::Empty, ) )] +#[cfg(any(test, feature = "rayon"))] fn compute_overlay( - input: ComputeOverlayInput, + blocks: Vec>, + parent_overlay: StateTrieOverlay, anchor_hash: B256, metrics: &StateTrieOverlayMetrics, -) -> TrieInputSorted { +) -> StateTrieOverlay { let started_at = Instant::now(); - let block_count = match &input { - ComputeOverlayInput::ExtendCached { blocks, .. } => blocks.len(), - ComputeOverlayInput::MergeBlocks(blocks) => blocks.len(), - }; - let parent_overlay = matches!(&input, ComputeOverlayInput::ExtendCached { .. }); + let block_count = blocks.len(); + let parent_overlay_reused = + !parent_overlay.trie_updates.is_empty() || !parent_overlay.hashed_post_state.is_empty(); tracing::Span::current().record("block_count", block_count); - tracing::Span::current().record("parent_overlay", parent_overlay); - - let overlay = match input { - ComputeOverlayInput::ExtendCached { blocks, parent_input } => { - trace!( - target: "chain_state::state_trie_overlay", - %anchor_hash, - block_count = blocks.len(), - "extending cached parent state trie overlay" - ); + tracing::Span::current().record("parent_overlay", parent_overlay_reused); - let mut overlay = parent_input.as_ref().clone(); - for block in blocks.iter().rev() { - let trie_data = block.trie_data(); - extend_overlay(&mut overlay, &trie_data.hashed_state, &trie_data.trie_updates); - } - overlay - } - ComputeOverlayInput::MergeBlocks(blocks) => merge_blocks(blocks), - }; + let overlay = flatten_overlay(blocks, parent_overlay); let elapsed = started_at.elapsed(); metrics.overlay_computation_duration_seconds.record(elapsed.as_secs_f64()); @@ -552,7 +550,7 @@ fn compute_overlay( target: "chain_state::state_trie_overlay", %anchor_hash, block_count, - parent_overlay, + parent_overlay = parent_overlay_reused, ?elapsed, "computed state trie overlay" ); @@ -560,64 +558,45 @@ fn compute_overlay( overlay } -fn merge_blocks(blocks: Vec>) -> TrieInputSorted { +#[cfg(any(test, feature = "rayon"))] +fn flatten_overlay( + blocks: Vec>, + parent_overlay: StateTrieOverlay, +) -> StateTrieOverlay { let trie_data = blocks.iter().map(ExecutedBlock::trie_data).collect::>(); + let StateTrieOverlay { trie_updates: parent_trie_updates, hashed_post_state } = parent_overlay; #[cfg(feature = "rayon")] - let (nodes, state) = rayon::join( + let (trie_updates, hashed_post_state) = rayon::join( || { TrieUpdatesSorted::merge_batch( - trie_data.iter().map(|data| Arc::clone(&data.trie_updates)), + trie_data + .iter() + .map(|data| Arc::clone(&data.trie_updates)) + .chain(parent_trie_updates), ) }, || { HashedPostStateSorted::merge_batch( - trie_data.iter().map(|data| Arc::clone(&data.hashed_state)), + trie_data + .iter() + .map(|data| Arc::clone(&data.hashed_state)) + .chain(hashed_post_state), ) }, ); #[cfg(not(feature = "rayon"))] - let (nodes, state) = ( - TrieUpdatesSorted::merge_batch(trie_data.iter().map(|data| Arc::clone(&data.trie_updates))), + let (trie_updates, hashed_post_state) = ( + TrieUpdatesSorted::merge_batch( + trie_data.iter().map(|data| Arc::clone(&data.trie_updates)).chain(parent_trie_updates), + ), HashedPostStateSorted::merge_batch( - trie_data.iter().map(|data| Arc::clone(&data.hashed_state)), + trie_data.iter().map(|data| Arc::clone(&data.hashed_state)).chain(hashed_post_state), ), ); - TrieInputSorted::new(nodes, state, Default::default()) -} - -fn extend_overlay( - overlay: &mut TrieInputSorted, - hashed_state: &HashedPostStateSorted, - trie_updates: &TrieUpdatesSorted, -) { - #[cfg(feature = "rayon")] - { - rayon::join( - || { - if !hashed_state.is_empty() { - Arc::make_mut(&mut overlay.state).extend_ref_and_sort(hashed_state); - } - }, - || { - if !trie_updates.is_empty() { - Arc::make_mut(&mut overlay.nodes).extend_ref_and_sort(trie_updates); - } - }, - ); - } - - #[cfg(not(feature = "rayon"))] - { - if !hashed_state.is_empty() { - Arc::make_mut(&mut overlay.state).extend_ref_and_sort(hashed_state); - } - if !trie_updates.is_empty() { - Arc::make_mut(&mut overlay.nodes).extend_ref_and_sort(trie_updates); - } - } + StateTrieOverlay::new(vec![trie_updates], vec![hashed_post_state]) } #[cfg(test)] @@ -689,22 +668,28 @@ mod tests { let anchor_hash = blocks[0].recovered_block().parent_hash(); - let (_, state) = - manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); + let state = manager + .overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash) + .unwrap() + .hashed_post_state; assert_eq!(state.len(), 3); assert_eq!(state_account_count(&state), 3); let short_anchor = blocks[1].recovered_block().hash(); - let (_, short) = - manager.overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor).unwrap(); + let short = manager + .overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor) + .unwrap() + .hashed_post_state; assert_eq!(short.len(), 1); assert_eq!(state_account_count(&short), 1); manager.compute_and_cache_overlay(OverlayCacheKey { anchor_hash: short_anchor, tip_hash: blocks[2].recovered_block().hash(), }); - let (_, cached_short) = - manager.overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor).unwrap(); + let cached_short = manager + .overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor) + .unwrap() + .hashed_post_state; assert_eq!(cached_short.len(), 1); assert_eq!(cached_short[0].accounts.len(), 1); } @@ -721,14 +706,41 @@ mod tests { let prefix_tip = blocks[1].recovered_block().hash(); manager.compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: prefix_tip }); - let (_, state) = - manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); + let state = manager + .overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash) + .unwrap() + .hashed_post_state; assert_eq!(state.len(), 2); assert_eq!(state[0].accounts.len(), 1); assert_eq!(state[1].accounts.len(), 2); assert_eq!(state_account_count(&state), 3); } + #[test] + fn pending_overlay_entries_are_ignored_by_read_path() { + let manager = StateTrieOverlayManager::default(); + let blocks = test_blocks(); + for block in &blocks { + manager.insert_block(block.clone()); + } + + let anchor_hash = blocks[0].recovered_block().parent_hash(); + let prefix_tip = blocks[1].recovered_block().hash(); + let prefix_key = OverlayCacheKey { anchor_hash, tip_hash: prefix_tip }; + manager.overlays.insert(prefix_key, OverlayCacheEntry::Pending); + + let state = manager + .overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash) + .unwrap() + .hashed_post_state; + assert_eq!(state.len(), 3); + assert_eq!(state_account_count(&state), 3); + assert!(matches!( + manager.overlays.get(&prefix_key).as_deref(), + Some(OverlayCacheEntry::Pending) + )); + } + #[test] fn returns_anchor_for_in_memory_parent() { let manager = StateTrieOverlayManager::default(); @@ -784,7 +796,7 @@ mod tests { let child_key = OverlayCacheKey { anchor_hash, tip_hash: child_hash }; let deadline = Instant::now() + Duration::from_secs(5); - while !manager.overlays.contains_key(&child_key) { + while manager.ready_overlay(child_key).is_none() { assert!( Instant::now() < deadline, "timed out waiting for optimistically prepared child overlay" @@ -792,7 +804,7 @@ mod tests { thread::sleep(Duration::from_millis(10)); } - let (_, state) = manager.overlay_for_parent(child_hash, anchor_hash).unwrap(); + let state = manager.overlay_for_parent(child_hash, anchor_hash).unwrap().hashed_post_state; assert_eq!(state.len(), 1); assert_eq!(state[0].accounts.len(), 2); } @@ -811,14 +823,17 @@ mod tests { let child_hash = blocks[1].recovered_block().hash(); let child_key = OverlayCacheKey { anchor_hash, tip_hash: child_hash }; - manager.pending_overlays.insert(child_key, ()); + manager.overlays.insert(child_key, OverlayCacheEntry::Pending); manager.insert_block(blocks[1].clone()); thread::sleep(Duration::from_millis(100)); - assert!(!manager.overlays.contains_key(&child_key)); - assert!(manager.pending_overlays.contains_key(&child_key)); - manager.pending_overlays.remove(&child_key); + assert!(manager.ready_overlay(child_key).is_none()); + assert!(matches!( + manager.overlays.get(&child_key).as_deref(), + Some(OverlayCacheEntry::Pending) + )); + manager.overlays.remove(&child_key); } #[test] @@ -845,8 +860,10 @@ mod tests { .overlay_for_parent(blocks[2].recovered_block().hash(), original_anchor) .is_err()); - let (_, state) = - manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); + let state = manager + .overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash) + .unwrap() + .hashed_post_state; assert_eq!(state_account_count(&state), 1); } } diff --git a/crates/storage/provider/src/providers/state/historical.rs b/crates/storage/provider/src/providers/state/historical.rs index 3707e7ac2fe..ef480292584 100644 --- a/crates/storage/provider/src/providers/state/historical.rs +++ b/crates/storage/provider/src/providers/state/historical.rs @@ -1,4 +1,4 @@ -use super::overlay::{Overlay, OverlayBuilder, OverlaySource}; +use super::overlay::{OverlayBuilder, OverlaySource}; use crate::{ AccountReader, BlockHashReader, ChangeSetReader, EitherReader, HashedPostStateProvider, ProviderError, RocksDBProviderFactory, StateProvider, StateRootProvider, @@ -311,12 +311,11 @@ where let TrieInputSorted { nodes, state, prefix_sets } = input; let overlay_builder = OverlayBuilder::::new(anchor_hash, self.changeset_cache.clone()) .with_overlay_source(Some(OverlaySource::Immediate { trie: nodes, state })); - let Overlay { trie_updates, hashed_post_state } = - overlay_builder.build_overlay(self.provider)?; + let overlay = overlay_builder.build_overlay(self.provider)?; Ok(TrieInputSorted::new( - TrieUpdatesSorted::merge_batch(trie_updates), - HashedPostStateSorted::merge_batch(hashed_post_state), + TrieUpdatesSorted::merge_batch(overlay.trie_updates), + HashedPostStateSorted::merge_batch(overlay.hashed_post_state), prefix_sets, )) } diff --git a/crates/storage/provider/src/providers/state/overlay.rs b/crates/storage/provider/src/providers/state/overlay.rs index c08b887759a..490144fdc86 100644 --- a/crates/storage/provider/src/providers/state/overlay.rs +++ b/crates/storage/provider/src/providers/state/overlay.rs @@ -1,7 +1,7 @@ use alloy_eips::BlockNumHash; use alloy_primitives::{BlockHash, BlockNumber, B256}; use metrics::{Counter, Histogram}; -use reth_chain_state::{EthPrimitives, StateTrieOverlayManager}; +use reth_chain_state::{EthPrimitives, StateTrieOverlay, StateTrieOverlayManager}; use reth_db_api::{tables, transaction::DbTx, DatabaseError}; use reth_errors::{ProviderError, ProviderResult}; use reth_metrics::Metrics; @@ -50,17 +50,10 @@ pub(crate) struct OverlayStateProviderMetrics { hashed_state_size: Histogram, /// Overall duration of the [`OverlayStateProviderFactory::database_provider_ro`] call database_provider_ro_duration: Histogram, - /// Number of cache misses when fetching [`Overlay`]s from the overlay cache. + /// Number of cache misses when fetching [`StateTrieOverlay`]s from the overlay cache. overlay_cache_misses: Counter, } -/// Contains all fields required to initialize an [`OverlayStateProvider`]. -#[derive(Debug, Clone)] -pub(super) struct Overlay { - pub(super) trie_updates: Vec>, - pub(super) hashed_post_state: Vec>, -} - /// Source of overlay data for [`OverlayStateProviderFactory`]. #[derive(Debug, Clone)] pub(super) enum OverlaySource { @@ -172,14 +165,11 @@ impl OverlayBuilder { } /// Resolves the effective overlay (trie updates, hashed state). - fn resolve_overlays( - &self, - anchor_hash: BlockHash, - ) -> ProviderResult<(Vec>, Vec>)> { + fn resolve_overlays(&self, anchor_hash: BlockHash) -> ProviderResult { match &self.overlay_source { Some(OverlaySource::Managed { manager, state }) => { - let (trie, mut overlay_state) = if anchor_hash == self.parent_hash { - (Vec::new(), Vec::new()) + let mut overlay = if anchor_hash == self.parent_hash { + StateTrieOverlay::default() } else { manager .overlay_for_parent(self.parent_hash, anchor_hash) @@ -187,10 +177,10 @@ impl OverlayBuilder { }; if !state.is_empty() { - overlay_state.insert(0, Arc::clone(state)); + overlay.hashed_post_state.insert(0, Arc::clone(state)); } - Ok((trie, overlay_state)) + Ok(overlay) } Some(OverlaySource::Immediate { trie, state }) => { if anchor_hash != self.parent_hash { @@ -201,9 +191,9 @@ impl OverlayBuilder { } let trie = (!trie.is_empty()).then(|| Arc::clone(trie)).into_iter().collect(); let state = (!state.is_empty()).then(|| Arc::clone(state)).into_iter().collect(); - Ok((trie, state)) + Ok(StateTrieOverlay::new(trie, state)) } - None => Ok((Vec::new(), Vec::new())), + None => Ok(StateTrieOverlay::default()), } } @@ -272,7 +262,7 @@ impl OverlayBuilder { Ok(Some(anchor_number + 1..=db_tip_block.number)) } - /// Calculates a new [`Overlay`] given a transaction and the current db tip. + /// Calculates a new [`StateTrieOverlay`] given a transaction and the current db tip. #[instrument( level = "debug", target = "providers::state::overlay", @@ -283,7 +273,7 @@ impl OverlayBuilder { &self, provider: &Provider, db_tip_block: BlockNumHash, - ) -> ProviderResult + ) -> ProviderResult where Provider: ChangeSetReader + StorageChangeSetReader @@ -317,7 +307,7 @@ impl OverlayBuilder { }; // Collect any reverts which are required to bring the DB view back to the anchor hash. - let (trie_updates, hashed_post_state) = if let Some(revert_blocks) = + let overlay = if let Some(revert_blocks) = self.reverts_required(provider, db_tip_block, anchor_hash)? { debug!( @@ -355,28 +345,20 @@ impl OverlayBuilder { // Resolve overlays and extend reverts with them. // If reverts are empty, use overlays directly to avoid cloning. - let (overlay_trie, overlay_state) = self.resolve_overlays(anchor_hash)?; - - let trie_updates = if trie_reverts.is_empty() { - overlay_trie - } else { - let mut trie_updates = overlay_trie; - trie_updates.push(Arc::new(trie_reverts)); - trie_updates - }; + let mut overlay = self.resolve_overlays(anchor_hash)?; - let hashed_state_updates = if hashed_state_reverts.is_empty() { - overlay_state - } else { - let mut hashed_state_updates = overlay_state; - hashed_state_updates.push(Arc::new(hashed_state_reverts)); - hashed_state_updates - }; + if !trie_reverts.is_empty() { + overlay.trie_updates.push(Arc::new(trie_reverts)); + } + + if !hashed_state_reverts.is_empty() { + overlay.hashed_post_state.push(Arc::new(hashed_state_reverts)); + } trie_updates_total_len = - trie_updates.iter().map(|updates| updates.total_len()).sum::(); + overlay.trie_updates.iter().map(|updates| updates.total_len()).sum::(); hashed_state_updates_total_len = - hashed_state_updates.iter().map(|state| state.total_len()).sum::(); + overlay.hashed_post_state.iter().map(|state| state.total_len()).sum::(); debug!( target: "providers::state::overlay", @@ -385,19 +367,19 @@ impl OverlayBuilder { "Reverted to anchor block", ); - (trie_updates, hashed_state_updates) + overlay } else { // If no reverts are needed then the db tip is the anchor hash. Use overlays directly. - let (trie_updates, hashed_state) = self.resolve_overlays(db_tip_block.hash)?; + let overlay = self.resolve_overlays(db_tip_block.hash)?; retrieve_trie_reverts_duration = Duration::ZERO; retrieve_hashed_state_reverts_duration = Duration::ZERO; trie_updates_total_len = - trie_updates.iter().map(|updates| updates.total_len()).sum::(); + overlay.trie_updates.iter().map(|updates| updates.total_len()).sum::(); hashed_state_updates_total_len = - hashed_state.iter().map(|state| state.total_len()).sum::(); + overlay.hashed_post_state.iter().map(|state| state.total_len()).sum::(); - (trie_updates, hashed_state) + overlay }; // Record metrics @@ -410,12 +392,15 @@ impl OverlayBuilder { self.metrics.trie_updates_size.record(trie_updates_total_len as f64); self.metrics.hashed_state_size.record(hashed_state_updates_total_len as f64); - Ok(Overlay { trie_updates, hashed_post_state }) + Ok(overlay) } /// Builds the effective overlay for the given provider. #[instrument(level = "debug", target = "providers::state::overlay", skip_all)] - pub(super) fn build_overlay(&self, provider: &Provider) -> ProviderResult + pub(super) fn build_overlay( + &self, + provider: &Provider, + ) -> ProviderResult where Provider: StageCheckpointReader + PruneCheckpointReader @@ -440,9 +425,10 @@ pub struct OverlayStateProviderFactory { factory: F, /// Overlay builder containing the configuration and overlay calculation logic. overlay_builder: OverlayBuilder, - /// A cache which maps `db_tip -> Overlay`. If the db tip changes during usage of the factory - /// then a new entry will get added to this, but in most cases only one entry is present. - overlay_cache: Arc>, + /// A cache which maps `db_tip -> StateTrieOverlay`. If the db tip changes during usage of the + /// factory then a new entry will get added to this, but in most cases only one entry is + /// present. + overlay_cache: Arc>, } impl OverlayStateProviderFactory { @@ -468,10 +454,10 @@ impl OverlayStateProviderFactory { self } - /// Fetches an [`Overlay`] from the cache based on the current db tip block. If there is no - /// cached value then this calculates the [`Overlay`] and populates the cache. + /// Fetches a [`StateTrieOverlay`] from the cache based on the current db tip block. If there is + /// no cached value then this calculates the [`StateTrieOverlay`] and populates the cache. #[instrument(level = "debug", target = "providers::state::overlay", skip_all)] - fn get_overlay(&self, provider: &Provider) -> ProviderResult + fn get_overlay(&self, provider: &Provider) -> ProviderResult where Provider: StageCheckpointReader + PruneCheckpointReader @@ -524,11 +510,11 @@ where res }; - let Overlay { trie_updates, hashed_post_state } = self.get_overlay(&provider)?; + let overlay = self.get_overlay(&provider)?; let is_v2 = provider.cached_storage_settings().is_v2(); self.overlay_builder.metrics.database_provider_ro_duration.record(overall_start.elapsed()); - Ok(OverlayStateProvider::new(provider, trie_updates, hashed_post_state, is_v2)) + Ok(OverlayStateProvider::new(provider, overlay, is_v2)) } } @@ -540,8 +526,7 @@ where #[derive(Debug)] pub struct OverlayStateProvider { provider: Provider, - trie_updates: Vec>, - hashed_post_state: Vec>, + overlay: StateTrieOverlay, is_v2: bool, } @@ -551,13 +536,8 @@ where { /// Create new overlay state provider. The `Provider` must be cloneable, which generally means /// it should be wrapped in an `Arc`. - pub fn new( - provider: Provider, - trie_updates: Vec>, - hashed_post_state: Vec>, - is_v2: bool, - ) -> Self { - Self { provider, trie_updates, hashed_post_state, is_v2 } + pub const fn new(provider: Provider, overlay: StateTrieOverlay, is_v2: bool) -> Self { + Self { provider, overlay, is_v2 } } } @@ -587,7 +567,10 @@ where tx.cursor_read::()?, )) }; - Ok(InMemoryTrieCursor::new_account(cursor, self.trie_updates.iter().map(Arc::as_ref))) + Ok(InMemoryTrieCursor::new_account( + cursor, + self.overlay.trie_updates.iter().map(Arc::as_ref), + )) } fn storage_trie_cursor( @@ -608,7 +591,7 @@ where }; Ok(InMemoryTrieCursor::new_storage( cursor, - self.trie_updates.iter().map(Arc::as_ref), + self.overlay.trie_updates.iter().map(Arc::as_ref), hashed_address, )) } @@ -638,7 +621,8 @@ where fn hashed_account_cursor(&self) -> Result, DatabaseError> { let db_hashed_cursor_factory = DatabaseHashedCursorFactory::new(self.provider.tx_ref()); - let hashed_post_state = self.hashed_post_state.iter().map(Arc::as_ref).collect::>(); + let hashed_post_state = + self.overlay.hashed_post_state.iter().map(Arc::as_ref).collect::>(); let hashed_cursor_factory = HashedPostStateCursorFactory::new(db_hashed_cursor_factory, hashed_post_state); hashed_cursor_factory.hashed_account_cursor() @@ -649,7 +633,8 @@ where hashed_address: B256, ) -> Result, DatabaseError> { let db_hashed_cursor_factory = DatabaseHashedCursorFactory::new(self.provider.tx_ref()); - let hashed_post_state = self.hashed_post_state.iter().map(Arc::as_ref).collect::>(); + let hashed_post_state = + self.overlay.hashed_post_state.iter().map(Arc::as_ref).collect::>(); let hashed_cursor_factory = HashedPostStateCursorFactory::new(db_hashed_cursor_factory, hashed_post_state); hashed_cursor_factory.hashed_storage_cursor(hashed_address) @@ -668,9 +653,9 @@ mod tests { let builder = OverlayBuilder::::new(parent_hash, ChangesetCache::default()) .with_state_trie_overlay_manager(StateTrieOverlayManager::default()); - let (trie, state) = builder.resolve_overlays(parent_hash).unwrap(); - assert!(trie.is_empty()); - assert!(state.is_empty()); + let overlay = builder.resolve_overlays(parent_hash).unwrap(); + assert!(overlay.trie_updates.is_empty()); + assert!(overlay.hashed_post_state.is_empty()); } #[test] From 7f636fa82a59cf948169024eed7973cd1ff6dc8a Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 25 May 2026 16:08:42 +0200 Subject: [PATCH 03/40] perf(trie): index state overlay storage cursors --- crates/chain-state/src/state_trie_overlay.rs | 106 +++-- .../provider/src/providers/state/overlay.rs | 49 +-- .../trie/trie/src/hashed_cursor/post_state.rs | 365 ++++++++++++++-- crates/trie/trie/src/trie_cursor/in_memory.rs | 404 ++++++++++++++++-- 4 files changed, 806 insertions(+), 118 deletions(-) diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs index 4f4e703a20e..348ecc4156f 100644 --- a/crates/chain-state/src/state_trie_overlay.rs +++ b/crates/chain-state/src/state_trie_overlay.rs @@ -16,7 +16,10 @@ use reth_primitives_traits::{ }; #[cfg(feature = "rayon")] use reth_tasks::WorkerPool; -use reth_trie::{updates::TrieUpdatesSorted, HashedPostStateSorted}; +use reth_trie::{ + hashed_cursor::HashedPostStateOverlay, trie_cursor::TrieUpdatesOverlay, + updates::TrieUpdatesSorted, HashedPostStateSorted, +}; #[cfg(any(test, feature = "rayon"))] use std::time::Instant; use std::{fmt, sync::Arc}; @@ -136,7 +139,7 @@ impl StateTrieOverlayManager { let _guard = span.enter(); for anchor_hash in cached_parent_overlays { - self.spawn_overlay_cache_fill(OverlayCacheKey { anchor_hash, tip_hash: hash }); + self.spawn_overlay_cache_fill(OverlayCacheKey { anchor_hash, tip_hash: hash }, None); } } @@ -244,9 +247,10 @@ impl StateTrieOverlayManager { let cached_prefix = self.largest_cached_prefix(anchor_hash, &blocks); span.record("parent_overlay_reused", cached_prefix.is_some()); - self.spawn_overlay_cache_fill(key); + let overlay = Self::overlay_stack_from_path(&blocks, cached_prefix.as_ref()); + self.spawn_overlay_cache_fill(key, Some(ResolvedOverlayPath { blocks, cached_prefix })); - Ok(Self::overlay_stack_from_path(&blocks, cached_prefix)) + Ok(overlay) } fn resolve_block_path( @@ -288,10 +292,10 @@ impl StateTrieOverlayManager { fn overlay_stack_from_path( blocks_newest_to_oldest: &[ExecutedBlock], - cached_prefix: Option<(usize, StateTrieOverlay)>, + cached_prefix: Option<&(usize, StateTrieOverlay)>, ) -> StateTrieOverlay { let individual_block_count = - cached_prefix.as_ref().map_or(blocks_newest_to_oldest.len(), |(idx, _)| *idx); + cached_prefix.map_or(blocks_newest_to_oldest.len(), |(idx, _)| *idx); let mut trie_updates = Vec::with_capacity(individual_block_count + cached_prefix.is_some() as usize); let mut hashed_post_state = @@ -304,17 +308,18 @@ impl StateTrieOverlayManager { } if let Some((_, cached_overlay)) = cached_prefix { - trie_updates.extend(cached_overlay.trie_updates); - hashed_post_state.extend(cached_overlay.hashed_post_state); + trie_updates.extend(cached_overlay.trie_updates.iter().cloned()); + hashed_post_state.extend(cached_overlay.hashed_post_state.iter().cloned()); } StateTrieOverlay::new(trie_updates, hashed_post_state) } - fn spawn_overlay_cache_fill(&self, key: OverlayCacheKey) { + fn spawn_overlay_cache_fill(&self, key: OverlayCacheKey, path: Option>) { #[cfg(not(feature = "rayon"))] { let _ = key; + let _ = path; } #[cfg(feature = "rayon")] @@ -340,14 +345,18 @@ impl StateTrieOverlayManager { anchor_hash = %key.anchor_hash, ) .entered(); - manager.compute_and_cache_overlay(key); + manager.compute_and_cache_overlay(key, path); }); } } #[cfg(any(test, feature = "rayon"))] - fn compute_and_cache_overlay(&self, key: OverlayCacheKey) { - let result = self.compute_overlay_for_key(key); + fn compute_and_cache_overlay( + &self, + key: OverlayCacheKey, + path: Option>, + ) { + let result = self.compute_overlay_for_key(key, path); if let Err(error) = result { self.remove_pending_overlay(key); @@ -365,12 +374,23 @@ impl StateTrieOverlayManager { fn compute_overlay_for_key( &self, key: OverlayCacheKey, + path: Option>, ) -> Result { - let blocks = self.resolve_block_path(key.tip_hash, key.anchor_hash)?; - let cached_prefix = self.largest_cached_prefix(key.anchor_hash, &blocks); - let (blocks, parent_overlay) = match cached_prefix { - Some((idx, parent_overlay)) => (blocks[..idx].to_vec(), parent_overlay), - None => (blocks, StateTrieOverlay::default()), + let path = match path { + Some(path) => path, + None => { + let blocks = self.resolve_block_path(key.tip_hash, key.anchor_hash)?; + let cached_prefix = self.largest_cached_prefix(key.anchor_hash, &blocks); + ResolvedOverlayPath { blocks, cached_prefix } + } + }; + let (blocks, parent_overlay) = match path.cached_prefix { + Some((idx, parent_overlay)) => { + let mut blocks = path.blocks; + blocks.truncate(idx); + (blocks, parent_overlay) + } + None => (path.blocks, StateTrieOverlay::default()), }; let overlay = compute_overlay(blocks, parent_overlay, key.anchor_hash, &self.metrics); @@ -451,18 +471,21 @@ impl StateTrieOverlayManager { #[derive(Clone, Debug, Default)] pub struct StateTrieOverlay { /// Trie updates overlays. - pub trie_updates: Vec>, + pub trie_updates: TrieUpdatesOverlay, /// Hashed post state overlays. - pub hashed_post_state: Vec>, + pub hashed_post_state: HashedPostStateOverlay, } impl StateTrieOverlay { /// Create a new state trie overlay. - pub const fn new( + pub fn new( trie_updates: Vec>, hashed_post_state: Vec>, ) -> Self { - Self { trie_updates, hashed_post_state } + Self { + trie_updates: TrieUpdatesOverlay::new(trie_updates), + hashed_post_state: HashedPostStateOverlay::new(hashed_post_state), + } } } @@ -493,6 +516,12 @@ struct OverlayCacheKey { tip_hash: B256, } +#[cfg_attr(not(any(test, feature = "rayon")), allow(dead_code))] +struct ResolvedOverlayPath { + blocks: Vec>, + cached_prefix: Option<(usize, StateTrieOverlay)>, +} + #[cfg_attr(not(any(test, feature = "rayon")), allow(dead_code))] enum OverlayCacheEntry { /// An in-flight background cache fill. @@ -682,10 +711,13 @@ mod tests { .hashed_post_state; assert_eq!(short.len(), 1); assert_eq!(state_account_count(&short), 1); - manager.compute_and_cache_overlay(OverlayCacheKey { - anchor_hash: short_anchor, - tip_hash: blocks[2].recovered_block().hash(), - }); + manager.compute_and_cache_overlay( + OverlayCacheKey { + anchor_hash: short_anchor, + tip_hash: blocks[2].recovered_block().hash(), + }, + None, + ); let cached_short = manager .overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor) .unwrap() @@ -704,7 +736,8 @@ mod tests { let anchor_hash = blocks[0].recovered_block().parent_hash(); let prefix_tip = blocks[1].recovered_block().hash(); - manager.compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: prefix_tip }); + manager + .compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: prefix_tip }, None); let state = manager .overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash) @@ -789,7 +822,10 @@ mod tests { let anchor_hash = blocks[0].recovered_block().parent_hash(); let parent_hash = blocks[0].recovered_block().hash(); - manager.compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: parent_hash }); + manager.compute_and_cache_overlay( + OverlayCacheKey { anchor_hash, tip_hash: parent_hash }, + None, + ); let child_hash = blocks[1].recovered_block().hash(); manager.insert_block(blocks[1].clone()); @@ -819,7 +855,10 @@ mod tests { let anchor_hash = blocks[0].recovered_block().parent_hash(); let parent_hash = blocks[0].recovered_block().hash(); - manager.compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: parent_hash }); + manager.compute_and_cache_overlay( + OverlayCacheKey { anchor_hash, tip_hash: parent_hash }, + None, + ); let child_hash = blocks[1].recovered_block().hash(); let child_key = OverlayCacheKey { anchor_hash, tip_hash: child_hash }; @@ -845,10 +884,13 @@ mod tests { } let original_anchor = blocks[0].recovered_block().parent_hash(); - manager.compute_and_cache_overlay(OverlayCacheKey { - anchor_hash: original_anchor, - tip_hash: blocks[2].recovered_block().hash(), - }); + manager.compute_and_cache_overlay( + OverlayCacheKey { + anchor_hash: original_anchor, + tip_hash: blocks[2].recovered_block().hash(), + }, + None, + ); manager.remove_blocks([ blocks[0].recovered_block().hash(), diff --git a/crates/storage/provider/src/providers/state/overlay.rs b/crates/storage/provider/src/providers/state/overlay.rs index 490144fdc86..af6badd667b 100644 --- a/crates/storage/provider/src/providers/state/overlay.rs +++ b/crates/storage/provider/src/providers/state/overlay.rs @@ -1,5 +1,5 @@ use alloy_eips::BlockNumHash; -use alloy_primitives::{BlockHash, BlockNumber, B256}; +use alloy_primitives::{BlockHash, BlockNumber, B256, U256}; use metrics::{Counter, Histogram}; use reth_chain_state::{EthPrimitives, StateTrieOverlay, StateTrieOverlayManager}; use reth_db_api::{tables, transaction::DbTx, DatabaseError}; @@ -7,7 +7,7 @@ use reth_errors::{ProviderError, ProviderResult}; use reth_metrics::Metrics; use reth_primitives_traits::{ dashmap::{self, DashMap}, - NodePrimitives, + Account, NodePrimitives, }; use reth_prune_types::PruneSegment; use reth_stages_types::StageId; @@ -17,7 +17,7 @@ use reth_storage_api::{ StorageChangeSetReader, StorageSettingsCache, }; use reth_trie::{ - hashed_cursor::{HashedCursorFactory, HashedPostStateCursorFactory}, + hashed_cursor::{HashedCursorFactory, HashedPostStateCursor}, trie_cursor::{InMemoryTrieCursor, TrieCursor, TrieCursorFactory, TrieStorageCursor}, updates::TrieUpdatesSorted, HashedPostStateSorted, @@ -567,10 +567,7 @@ where tx.cursor_read::()?, )) }; - Ok(InMemoryTrieCursor::new_account( - cursor, - self.overlay.trie_updates.iter().map(Arc::as_ref), - )) + Ok(InMemoryTrieCursor::new_account_from_overlay(cursor, &self.overlay.trie_updates)) } fn storage_trie_cursor( @@ -589,9 +586,9 @@ where hashed_address, )) }; - Ok(InMemoryTrieCursor::new_storage( + Ok(InMemoryTrieCursor::new_storage_from_overlay( cursor, - self.overlay.trie_updates.iter().map(Arc::as_ref), + &self.overlay.trie_updates, hashed_address, )) } @@ -602,30 +599,27 @@ where Provider: DBProvider, { type AccountCursor<'a> - = , - Vec<&'a HashedPostStateSorted>, - > as HashedCursorFactory>::AccountCursor<'a> + as HashedCursorFactory>::AccountCursor<'a>, + Option, + > where Self: 'a; type StorageCursor<'a> - = , - Vec<&'a HashedPostStateSorted>, - > as HashedCursorFactory>::StorageCursor<'a> + as HashedCursorFactory>::StorageCursor<'a>, + U256, + > where Self: 'a; fn hashed_account_cursor(&self) -> Result, DatabaseError> { let db_hashed_cursor_factory = DatabaseHashedCursorFactory::new(self.provider.tx_ref()); - let hashed_post_state = - self.overlay.hashed_post_state.iter().map(Arc::as_ref).collect::>(); - let hashed_cursor_factory = - HashedPostStateCursorFactory::new(db_hashed_cursor_factory, hashed_post_state); - hashed_cursor_factory.hashed_account_cursor() + let cursor = db_hashed_cursor_factory.hashed_account_cursor()?; + Ok(HashedPostStateCursor::new_account_from_overlay(cursor, &self.overlay.hashed_post_state)) } fn hashed_storage_cursor( @@ -633,11 +627,12 @@ where hashed_address: B256, ) -> Result, DatabaseError> { let db_hashed_cursor_factory = DatabaseHashedCursorFactory::new(self.provider.tx_ref()); - let hashed_post_state = - self.overlay.hashed_post_state.iter().map(Arc::as_ref).collect::>(); - let hashed_cursor_factory = - HashedPostStateCursorFactory::new(db_hashed_cursor_factory, hashed_post_state); - hashed_cursor_factory.hashed_storage_cursor(hashed_address) + let cursor = db_hashed_cursor_factory.hashed_storage_cursor(hashed_address)?; + Ok(HashedPostStateCursor::new_storage_from_overlay( + cursor, + &self.overlay.hashed_post_state, + hashed_address, + )) } } diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index b235d1f5a7d..4fc6c58e86f 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -1,9 +1,13 @@ use super::{HashedCursor, HashedCursorFactory, HashedStorageCursor}; -use alloy_primitives::{B256, U256}; +use alloy_primitives::{map::B256Map, B256, U256}; use reth_primitives_traits::Account; use reth_storage_errors::db::DatabaseError; use reth_trie_common::HashedPostStateSorted; -use std::marker::PhantomData; +use std::{ + marker::PhantomData, + ops::{Deref, Index}, + sync::Arc, +}; /// The hashed cursor factory for the post state. #[derive(Clone, Debug)] @@ -87,6 +91,174 @@ impl HashedPostStateCursorValue for U256 { } } +/// Hashed post-state overlays ordered from highest to lowest precedence. +#[derive(Clone, Debug, Default)] +pub struct HashedPostStateOverlay { + states: Vec>, + storage_index: Arc>, +} + +impl HashedPostStateOverlay { + /// Create a new indexed hashed post-state overlay stack. + pub fn new(states: Vec>) -> Self { + let storage_index = Arc::new(build_hashed_storage_index(&states)); + Self { states, storage_index } + } + + /// Returns `true` if there are no hashed post-state overlays. + pub const fn is_empty(&self) -> bool { + self.states.is_empty() + } + + /// Returns the number of hashed post-state overlays. + pub const fn len(&self) -> usize { + self.states.len() + } + + /// Returns an iterator over hashed post-state overlays. + pub fn iter(&self) -> impl Iterator> { + self.states.iter() + } + + /// Push a hashed post-state overlay at the end of the precedence stack. + pub fn push(&mut self, state: Arc) { + self.states.push(state); + self.rebuild_storage_index(); + } + + /// Insert a hashed post-state overlay at `index`. + pub fn insert(&mut self, index: usize, state: Arc) { + self.states.insert(index, state); + self.rebuild_storage_index(); + } + + fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'_, U256>, bool) { + let Some(index) = self.storage_index.get(&hashed_address) else { + return (PostStateOverlayCursor::default(), false); + }; + + ( + PostStateOverlayCursor { + cursors: index + .indices + .iter() + .filter_map(|idx| self.states[*idx].storages.get(&hashed_address)) + .map(|storage| SeekablePostStateCursor::new(storage.storage_slots_ref())) + .collect(), + }, + index.db_wiped, + ) + } + + fn rebuild_storage_index(&mut self) { + self.storage_index = Arc::new(build_hashed_storage_index(&self.states)); + } +} + +impl From>> for HashedPostStateOverlay { + fn from(states: Vec>) -> Self { + Self::new(states) + } +} + +impl IntoIterator for HashedPostStateOverlay { + type IntoIter = std::vec::IntoIter; + type Item = Arc; + + fn into_iter(self) -> Self::IntoIter { + self.states.into_iter() + } +} + +impl Index for HashedPostStateOverlay { + type Output = Arc; + + fn index(&self, index: usize) -> &Self::Output { + &self.states[index] + } +} + +impl Deref for HashedPostStateOverlay { + type Target = [Arc]; + + fn deref(&self) -> &Self::Target { + &self.states + } +} + +#[derive(Clone, Debug)] +struct StorageOverlayIndex { + indices: Arc<[usize]>, + db_wiped: bool, +} + +#[derive(Default)] +struct StorageOverlayIndexBuilder { + indices: Vec, + db_wiped: bool, +} + +fn build_hashed_storage_index( + states: &[Arc], +) -> B256Map { + let mut index: B256Map = B256Map::default(); + + for (idx, state) in states.iter().enumerate() { + for (hashed_address, storage) in &state.storages { + let entry = index.entry(*hashed_address).or_default(); + if entry.db_wiped { + continue; + } + + entry.indices.push(idx); + if storage.is_wiped() { + entry.db_wiped = true; + } + } + } + + index + .into_iter() + .map(|(hashed_address, entry)| { + ( + hashed_address, + StorageOverlayIndex { indices: entry.indices.into(), db_wiped: entry.db_wiped }, + ) + }) + .collect() +} + +#[derive(Clone, Debug)] +enum HashedPostStateSource<'a> { + Refs(Vec<&'a HashedPostStateSorted>), + Indexed(&'a HashedPostStateOverlay), +} + +impl<'a> HashedPostStateSource<'a> { + fn from_refs(post_states: impl IntoIterator) -> Self { + Self::Refs(post_states.into_iter().collect()) + } + + fn account_overlay(&self) -> PostStateOverlayCursor<'a, Option> { + match self { + Self::Refs(post_states) => PostStateOverlayCursor::account(post_states), + Self::Indexed(post_states) => PostStateOverlayCursor { + cursors: post_states + .iter() + .map(|post_state| SeekablePostStateCursor::new(post_state.accounts.as_slice())) + .collect(), + }, + } + } + + fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'a, U256>, bool) { + match self { + Self::Refs(post_states) => PostStateOverlayCursor::storage(post_states, hashed_address), + Self::Indexed(post_states) => post_states.storage_overlay(hashed_address), + } + } +} + /// A cursor to iterate over state updates and corresponding database entries. /// It will always give precedence to earlier post state overlays. #[derive(Debug)] @@ -108,13 +280,14 @@ where #[cfg(debug_assertions)] /// Tracks whether `seek` has been called. seeked: bool, - /// Reference to the full post state. - post_states: Vec<&'a HashedPostStateSorted>, + /// Source of post-state overlays. + post_states: HashedPostStateSource<'a>, } #[derive(Debug)] enum DbCursorState { - Active(Option<(B256, V)>), + Unpositioned, + Positioned((B256, V)), Wiped, } @@ -123,7 +296,7 @@ impl DbCursorState { if cursor_wiped { Self::Wiped } else { - Self::Active(None) + Self::Unpositioned } } @@ -133,19 +306,19 @@ impl DbCursorState { const fn entry(&self) -> Option<&(B256, V)> { match self { - Self::Active(entry) => entry.as_ref(), - Self::Wiped => None, + Self::Positioned(entry) => Some(entry), + Self::Unpositioned | Self::Wiped => None, } } fn set_entry(&mut self, entry: Option<(B256, V)>) { - if let Self::Active(current) = self { - *current = entry; + if !self.is_wiped() { + *self = entry.map(Self::Positioned).unwrap_or(Self::Unpositioned); } } } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] struct PostStateOverlayCursor<'a, V> { cursors: Vec>, } @@ -289,8 +462,29 @@ where cursor: C, post_states: impl IntoIterator, ) -> Self { - let post_states = post_states.into_iter().collect::>(); - let post_state_cursor = PostStateOverlayCursor::account(&post_states); + let post_states = HashedPostStateSource::from_refs(post_states); + let post_state_cursor = post_states.account_overlay(); + Self { + cursor, + db_cursor_state: DbCursorState::new(false), + post_state_cursor, + deferred_overlay_seek_start: None, + last_key: None, + #[cfg(debug_assertions)] + seeked: false, + post_states, + } + } +} + +impl<'a, C> HashedPostStateCursor<'a, C, Option> +where + C: HashedCursor, +{ + /// Create new account cursor from an indexed hashed post-state overlay. + pub fn new_account_from_overlay(cursor: C, post_states: &'a HashedPostStateOverlay) -> Self { + let post_states = HashedPostStateSource::Indexed(post_states); + let post_state_cursor = post_states.account_overlay(); Self { cursor, db_cursor_state: DbCursorState::new(false), @@ -315,9 +509,8 @@ where post_states: impl IntoIterator, hashed_address: B256, ) -> Self { - let post_states = post_states.into_iter().collect::>(); - let (post_state_cursor, cursor_wiped) = - Self::get_storage_overlay(&post_states, hashed_address); + let post_states = HashedPostStateSource::from_refs(post_states); + let (post_state_cursor, cursor_wiped) = post_states.storage_overlay(hashed_address); Self { cursor, db_cursor_state: DbCursorState::new(cursor_wiped), @@ -330,12 +523,24 @@ where } } - /// Returns the storage overlay for `hashed_address` and whether it was wiped. - fn get_storage_overlay( - post_states: &[&'a HashedPostStateSorted], + /// Create new storage cursor from an indexed hashed post-state overlay. + pub fn new_storage_from_overlay( + cursor: C, + post_states: &'a HashedPostStateOverlay, hashed_address: B256, - ) -> (PostStateOverlayCursor<'a, U256>, bool) { - PostStateOverlayCursor::storage(post_states, hashed_address) + ) -> Self { + let post_states = HashedPostStateSource::Indexed(post_states); + let (post_state_cursor, cursor_wiped) = post_states.storage_overlay(hashed_address); + Self { + cursor, + db_cursor_state: DbCursorState::new(cursor_wiped), + post_state_cursor, + deferred_overlay_seek_start: None, + last_key: None, + #[cfg(debug_assertions)] + seeked: false, + post_states, + } } } @@ -523,11 +728,7 @@ where fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let (post_state_cursor, cursor_wiped) = - HashedPostStateCursor::::get_storage_overlay( - &self.post_states, - hashed_address, - ); + let (post_state_cursor, cursor_wiped) = self.post_states.storage_overlay(hashed_address); self.post_state_cursor = post_state_cursor; self.db_cursor_state = DbCursorState::new(cursor_wiped); } @@ -544,17 +745,36 @@ mod tests { B256::repeat_byte(byte) } + fn account(nonce: u64) -> Account { + Account { nonce, balance: U256::from(nonce), bytecode_hash: None } + } + fn storage_post_state(storage_slots: Vec<(B256, U256)>) -> HashedPostStateSorted { - storage_post_state_with_wipe(storage_slots, false) + storage_post_state_for_address(B256::ZERO, storage_slots) } fn storage_post_state_with_wipe( storage_slots: Vec<(B256, U256)>, wiped: bool, + ) -> HashedPostStateSorted { + storage_post_state_with_wipe_for_address(B256::ZERO, storage_slots, wiped) + } + + fn storage_post_state_for_address( + hashed_address: B256, + storage_slots: Vec<(B256, U256)>, + ) -> HashedPostStateSorted { + storage_post_state_with_wipe_for_address(hashed_address, storage_slots, false) + } + + fn storage_post_state_with_wipe_for_address( + hashed_address: B256, + storage_slots: Vec<(B256, U256)>, + wiped: bool, ) -> HashedPostStateSorted { let storage_sorted = reth_trie_common::HashedStorageSorted { storage_slots, wiped }; let mut storages = alloy_primitives::map::B256Map::default(); - storages.insert(B256::ZERO, storage_sorted); + storages.insert(hashed_address, storage_sorted); HashedPostStateSorted::new(Vec::new(), storages) } @@ -709,6 +929,35 @@ mod tests { ); } + #[test] + fn test_indexed_account_overlay_resolves_by_precedence() { + let db_nodes = BTreeMap::from([(key(0x01), account(1)), (key(0x03), account(3))]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); + + let newest = HashedPostStateSorted::new( + vec![(key(0x01), None), (key(0x02), Some(account(20)))], + Default::default(), + ); + let oldest = HashedPostStateSorted::new( + vec![(key(0x01), Some(account(10))), (key(0x03), Some(account(30)))], + Default::default(), + ); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); + let mut cursor = HashedPostStateCursor::new_account_from_overlay(mock_cursor, &overlay); + + let mut results = Vec::new(); + if let Some(entry) = cursor.seek(B256::ZERO).unwrap() { + results.push(entry); + while let Some(entry) = cursor.next().unwrap() { + results.push(entry); + } + } + + assert_eq!(results, vec![(key(0x02), account(20)), (key(0x03), account(30))]); + } + #[test] fn test_storage_wipe_overlay_hides_lower_precedence_sources() { let db_nodes = BTreeMap::from([(key(0x04), U256::from(4))]); @@ -730,6 +979,58 @@ mod tests { assert_eq!(cursor.next().unwrap(), None); } + #[test] + fn test_indexed_storage_wipe_overlay_hides_lower_precedence_sources() { + let db_nodes = BTreeMap::from([(key(0x04), U256::from(4))]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); + + let newest = storage_post_state(vec![(key(0x02), U256::from(2))]); + let wiping = storage_post_state_with_wipe(vec![(key(0x01), U256::from(1))], true); + let hidden = storage_post_state(vec![(key(0x03), U256::from(3))]); + let overlay = + HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(wiping), Arc::new(hidden)]); + let mut cursor = + HashedPostStateCursor::new_storage_from_overlay(mock_cursor, &overlay, B256::ZERO); + + assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x01), U256::from(1)))); + assert_eq!(cursor.next().unwrap(), Some((key(0x02), U256::from(2)))); + assert_eq!(cursor.next().unwrap(), None); + } + + #[test] + fn test_indexed_storage_overlay_switches_hashed_address() { + let first_address = B256::with_last_byte(1); + let second_address = B256::with_last_byte(2); + let mut db_storage = B256Map::default(); + db_storage.insert(first_address, BTreeMap::from([(key(0x04), U256::from(4))])); + db_storage.insert(second_address, BTreeMap::from([(key(0x05), U256::from(5))])); + let visited_keys = + Arc::new(db_storage.keys().map(|key| (*key, Default::default())).collect()); + let mock_cursor = + MockHashedCursor::new_storage(Arc::new(db_storage), visited_keys, first_address) + .unwrap(); + + let first_overlay = + storage_post_state_for_address(first_address, vec![(key(0x01), U256::from(1))]); + let second_overlay = + storage_post_state_for_address(second_address, vec![(key(0x02), U256::from(2))]); + let overlay = + HashedPostStateOverlay::new(vec![Arc::new(first_overlay), Arc::new(second_overlay)]); + let mut cursor = + HashedPostStateCursor::new_storage_from_overlay(mock_cursor, &overlay, first_address); + + assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x01), U256::from(1)))); + assert_eq!(cursor.next().unwrap(), Some((key(0x04), U256::from(4)))); + + cursor.set_hashed_address(second_address); + + assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x02), U256::from(2)))); + assert_eq!(cursor.next().unwrap(), Some((key(0x05), U256::from(5)))); + assert_eq!(cursor.next().unwrap(), None); + } + mod proptest_tests { use super::*; use proptest::prelude::*; @@ -745,7 +1046,7 @@ mod tests { db_nodes: &[(B256, U256)], overlays: &[Vec<(B256, U256)>], ) -> Vec<(B256, U256)> { - let mut merged: BTreeMap = db_nodes.iter().cloned().collect(); + let mut merged: BTreeMap = db_nodes.iter().copied().collect(); for overlay in overlays.iter().rev() { for (key, value) in overlay { @@ -779,9 +1080,7 @@ mod tests { entries: &[(B256, U256)], position: &mut Option, ) -> Option<(B256, U256)> { - let Some(next_idx) = position.and_then(|idx| idx.checked_add(1)) else { - return None; - }; + let next_idx = position.and_then(|idx| idx.checked_add(1))?; if next_idx < entries.len() { *position = Some(next_idx); @@ -866,7 +1165,7 @@ mod tests { let mut reference_position = None; // Create the HashedPostStateCursor being tested - let db_nodes_map: BTreeMap = db_nodes.iter().cloned().collect(); + let db_nodes_map: BTreeMap = db_nodes.iter().copied().collect(); let db_nodes_arc = Arc::new(db_nodes_map); let visited_keys = Arc::new(Mutex::new(Vec::new())); let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index 36ea3ac1764..6f8c2761ad1 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -1,9 +1,13 @@ use super::{TrieCursor, TrieCursorFactory, TrieStorageCursor}; use crate::updates::TrieUpdatesSorted; -use alloy_primitives::B256; +use alloy_primitives::{map::B256Map, B256}; use reth_storage_errors::db::DatabaseError; use reth_trie_common::{BranchNodeCompact, Nibbles}; -use std::marker::PhantomData; +use std::{ + marker::PhantomData, + ops::{Deref, Index}, + sync::Arc, +}; /// The trie cursor factory for the trie updates. #[derive(Debug, Clone)] @@ -55,6 +59,166 @@ where } } +/// Trie updates overlays ordered from highest to lowest precedence. +#[derive(Clone, Debug, Default)] +pub struct TrieUpdatesOverlay { + updates: Vec>, + storage_index: Arc>, +} + +impl TrieUpdatesOverlay { + /// Create a new indexed trie updates overlay stack. + pub fn new(updates: Vec>) -> Self { + let storage_index = Arc::new(build_trie_storage_index(&updates)); + Self { updates, storage_index } + } + + /// Returns `true` if there are no trie update overlays. + pub const fn is_empty(&self) -> bool { + self.updates.is_empty() + } + + /// Returns the number of trie update overlays. + pub const fn len(&self) -> usize { + self.updates.len() + } + + /// Returns an iterator over trie update overlays. + pub fn iter(&self) -> impl Iterator> { + self.updates.iter() + } + + /// Push a trie update overlay at the end of the precedence stack. + pub fn push(&mut self, update: Arc) { + self.updates.push(update); + self.rebuild_storage_index(); + } + + fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'_>, bool) { + let Some(index) = self.storage_index.get(&hashed_address) else { + return (OverlayCursor::default(), false); + }; + + ( + OverlayCursor { + cursors: index + .indices + .iter() + .filter_map(|idx| self.updates[*idx].storage_tries_ref().get(&hashed_address)) + .map(|storage| SeekableInMemoryCursor::new(storage.storage_nodes_ref())) + .collect(), + }, + index.db_wiped, + ) + } + + fn rebuild_storage_index(&mut self) { + self.storage_index = Arc::new(build_trie_storage_index(&self.updates)); + } +} + +impl From>> for TrieUpdatesOverlay { + fn from(updates: Vec>) -> Self { + Self::new(updates) + } +} + +impl IntoIterator for TrieUpdatesOverlay { + type IntoIter = std::vec::IntoIter; + type Item = Arc; + + fn into_iter(self) -> Self::IntoIter { + self.updates.into_iter() + } +} + +impl Index for TrieUpdatesOverlay { + type Output = Arc; + + fn index(&self, index: usize) -> &Self::Output { + &self.updates[index] + } +} + +impl Deref for TrieUpdatesOverlay { + type Target = [Arc]; + + fn deref(&self) -> &Self::Target { + &self.updates + } +} + +#[derive(Clone, Debug)] +struct StorageOverlayIndex { + indices: Arc<[usize]>, + db_wiped: bool, +} + +#[derive(Default)] +struct StorageOverlayIndexBuilder { + indices: Vec, + db_wiped: bool, +} + +fn build_trie_storage_index(updates: &[Arc]) -> B256Map { + let mut index: B256Map = B256Map::default(); + + for (idx, updates) in updates.iter().enumerate() { + for (hashed_address, storage) in updates.storage_tries_ref() { + let entry = index.entry(*hashed_address).or_default(); + if entry.db_wiped { + continue; + } + + entry.indices.push(idx); + if storage.is_deleted() { + entry.db_wiped = true; + } + } + } + + index + .into_iter() + .map(|(hashed_address, entry)| { + ( + hashed_address, + StorageOverlayIndex { indices: entry.indices.into(), db_wiped: entry.db_wiped }, + ) + }) + .collect() +} + +#[derive(Clone, Debug)] +enum TrieUpdatesSource<'a> { + Refs(Vec<&'a TrieUpdatesSorted>), + Indexed(&'a TrieUpdatesOverlay), +} + +impl<'a> TrieUpdatesSource<'a> { + fn from_refs(trie_updates: impl IntoIterator) -> Self { + Self::Refs(trie_updates.into_iter().collect()) + } + + fn account_overlay(&self) -> OverlayCursor<'a> { + match self { + Self::Refs(trie_updates) => OverlayCursor::account(trie_updates), + Self::Indexed(trie_updates) => OverlayCursor { + cursors: trie_updates + .iter() + .map(|updates| SeekableInMemoryCursor::new(updates.account_nodes_ref())) + .collect(), + }, + } + } + + fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'a>, bool) { + match self { + Self::Refs(trie_updates) => OverlayCursor::storage(trie_updates, hashed_address), + Self::Indexed(trie_updates) => trie_updates.storage_overlay(hashed_address), + } + } +} + /// A cursor to iterate over trie updates and corresponding database entries. /// It will always give precedence to earlier trie update overlays. #[derive(Debug)] @@ -72,13 +236,14 @@ pub struct InMemoryTrieCursor<'a, C> { #[cfg(debug_assertions)] /// Whether an initial seek was called. seeked: bool, - /// Reference to the full trie updates. - trie_updates: Vec<&'a TrieUpdatesSorted>, + /// Source of trie update overlays. + trie_updates: TrieUpdatesSource<'a>, } #[derive(Debug)] enum DbCursorState { - Active(Option<(Nibbles, BranchNodeCompact)>), + Unpositioned, + Positioned((Nibbles, BranchNodeCompact)), Wiped, } @@ -87,7 +252,7 @@ impl DbCursorState { if cursor_wiped { Self::Wiped } else { - Self::Active(None) + Self::Unpositioned } } @@ -97,19 +262,19 @@ impl DbCursorState { const fn entry(&self) -> Option<&(Nibbles, BranchNodeCompact)> { match self { - Self::Active(entry) => entry.as_ref(), - Self::Wiped => None, + Self::Positioned(entry) => Some(entry), + Self::Unpositioned | Self::Wiped => None, } } fn set_entry(&mut self, entry: Option<(Nibbles, BranchNodeCompact)>) { - if let Self::Active(current) = self { - *current = entry; + if !self.is_wiped() { + *self = entry.map(Self::Positioned).unwrap_or(Self::Unpositioned); } } } -#[derive(Debug)] +#[derive(Debug, Default)] struct OverlayCursor<'a> { cursors: Vec>, } @@ -231,8 +396,24 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { cursor: C, trie_updates: impl IntoIterator, ) -> Self { - let trie_updates = trie_updates.into_iter().collect::>(); - let in_memory_cursor = OverlayCursor::account(&trie_updates); + let trie_updates = TrieUpdatesSource::from_refs(trie_updates); + let in_memory_cursor = trie_updates.account_overlay(); + Self { + cursor, + db_cursor_state: DbCursorState::new(false), + in_memory_cursor, + deferred_overlay_seek_start: None, + last_key: None, + #[cfg(debug_assertions)] + seeked: false, + trie_updates, + } + } + + /// Create new account trie cursor from an indexed trie updates overlay. + pub fn new_account_from_overlay(cursor: C, trie_updates: &'a TrieUpdatesOverlay) -> Self { + let trie_updates = TrieUpdatesSource::Indexed(trie_updates); + let in_memory_cursor = trie_updates.account_overlay(); Self { cursor, db_cursor_state: DbCursorState::new(false), @@ -252,8 +433,8 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { trie_updates: impl IntoIterator, hashed_address: B256, ) -> Self { - let trie_updates = trie_updates.into_iter().collect::>(); - let (in_memory_cursor, db_wiped) = Self::get_storage_overlay(&trie_updates, hashed_address); + let trie_updates = TrieUpdatesSource::from_refs(trie_updates); + let (in_memory_cursor, db_wiped) = trie_updates.storage_overlay(hashed_address); Self { cursor, db_cursor_state: DbCursorState::new(db_wiped), @@ -266,12 +447,24 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { } } - /// Returns the storage overlay for `hashed_address` and whether it was deleted. - fn get_storage_overlay( - trie_updates: &[&'a TrieUpdatesSorted], + /// Create new storage trie cursor from an indexed trie updates overlay. + pub fn new_storage_from_overlay( + cursor: C, + trie_updates: &'a TrieUpdatesOverlay, hashed_address: B256, - ) -> (OverlayCursor<'a>, bool) { - OverlayCursor::storage(trie_updates, hashed_address) + ) -> Self { + let trie_updates = TrieUpdatesSource::Indexed(trie_updates); + let (in_memory_cursor, db_wiped) = trie_updates.storage_overlay(hashed_address); + Self { + cursor, + db_cursor_state: DbCursorState::new(db_wiped), + in_memory_cursor, + deferred_overlay_seek_start: None, + last_key: None, + #[cfg(debug_assertions)] + seeked: false, + trie_updates, + } } /// Returns a mutable reference to the underlying cursor if it's not wiped, None otherwise. @@ -446,8 +639,7 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let (in_memory_cursor, db_wiped) = - Self::get_storage_overlay(&self.trie_updates, hashed_address); + let (in_memory_cursor, db_wiped) = self.trie_updates.storage_overlay(hashed_address); self.in_memory_cursor = in_memory_cursor; self.db_cursor_state = DbCursorState::new(db_wiped); } @@ -457,6 +649,7 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { mod tests { use super::*; use crate::trie_cursor::mock::MockTrieCursor; + use alloy_primitives::map::B256Map; use parking_lot::Mutex; use std::{collections::BTreeMap, sync::Arc}; @@ -498,6 +691,37 @@ mod tests { ); } + fn branch_node(id: u16) -> BranchNodeCompact { + BranchNodeCompact::new(id, id, 0, vec![], None) + } + + fn storage_trie_updates( + hashed_address: B256, + is_deleted: bool, + storage_nodes: Vec<(Nibbles, Option)>, + ) -> TrieUpdatesSorted { + let mut storage_tries = B256Map::default(); + storage_tries.insert( + hashed_address, + crate::updates::StorageTrieUpdatesSorted { is_deleted, storage_nodes }, + ); + TrieUpdatesSorted::new(vec![], storage_tries) + } + + fn mock_storage_cursor( + hashed_address: B256, + storage_tries: B256Map>, + ) -> MockTrieCursor { + let visited_storage_keys = + storage_tries.keys().map(|key| (*key, Default::default())).collect(); + MockTrieCursor::new_storage( + Arc::new(storage_tries), + Arc::new(visited_storage_keys), + hashed_address, + ) + .unwrap() + } + #[test] fn test_empty_db_and_memory() { let test_case = InMemoryTrieCursorTestCase { @@ -1104,10 +1328,56 @@ mod tests { ); } + #[test] + fn test_indexed_account_overlay_resolves_by_precedence() { + let db_nodes = BTreeMap::from([ + (Nibbles::from_nibbles([0x1]), branch_node(1)), + (Nibbles::from_nibbles([0x2]), branch_node(2)), + (Nibbles::from_nibbles([0x4]), branch_node(4)), + ]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); + + let newest = TrieUpdatesSorted::new( + vec![ + (Nibbles::from_nibbles([0x2]), None), + (Nibbles::from_nibbles([0x3]), Some(branch_node(30))), + ], + Default::default(), + ); + let oldest = TrieUpdatesSorted::new( + vec![ + (Nibbles::from_nibbles([0x1]), Some(branch_node(10))), + (Nibbles::from_nibbles([0x2]), Some(branch_node(20))), + (Nibbles::from_nibbles([0x3]), Some(branch_node(3))), + ], + Default::default(), + ); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); + let mut cursor = InMemoryTrieCursor::new_account_from_overlay(mock_cursor, &overlay); + + let mut results = Vec::new(); + if let Some(entry) = cursor.seek(Nibbles::default()).unwrap() { + results.push(entry); + while let Some(entry) = cursor.next().unwrap() { + results.push(entry); + } + } + + assert_eq!( + results, + vec![ + (Nibbles::from_nibbles([0x1]), branch_node(10)), + (Nibbles::from_nibbles([0x3]), branch_node(30)), + (Nibbles::from_nibbles([0x4]), branch_node(4)), + ] + ); + } + #[test] fn test_storage_deletion_overlay_hides_lower_precedence_sources() { use crate::updates::StorageTrieUpdatesSorted; - use alloy_primitives::map::B256Map; let hashed_address = B256::with_last_byte(1); let mut db_storage = B256Map::default(); @@ -1183,6 +1453,90 @@ mod tests { assert_eq!(cursor.next().unwrap(), None); } + #[test] + fn test_indexed_storage_deletion_overlay_hides_lower_precedence_sources() { + let hashed_address = B256::with_last_byte(1); + let mut db_storage = B256Map::default(); + db_storage.insert( + hashed_address, + BTreeMap::from([(Nibbles::from_nibbles([0x4]), branch_node(4))]), + ); + let mock_cursor = mock_storage_cursor(hashed_address, db_storage); + + let newest = storage_trie_updates( + hashed_address, + false, + vec![(Nibbles::from_nibbles([0x2]), Some(branch_node(2)))], + ); + let deleting = storage_trie_updates( + hashed_address, + true, + vec![(Nibbles::from_nibbles([0x1]), Some(branch_node(1)))], + ); + let hidden = storage_trie_updates( + hashed_address, + false, + vec![(Nibbles::from_nibbles([0x3]), Some(branch_node(3)))], + ); + let overlay = + TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(deleting), Arc::new(hidden)]); + let mut cursor = + InMemoryTrieCursor::new_storage_from_overlay(mock_cursor, &overlay, hashed_address); + + assert_eq!( + cursor.seek(Nibbles::default()).unwrap(), + Some((Nibbles::from_nibbles([0x1]), branch_node(1))) + ); + assert_eq!(cursor.next().unwrap(), Some((Nibbles::from_nibbles([0x2]), branch_node(2)))); + assert_eq!(cursor.next().unwrap(), None); + } + + #[test] + fn test_indexed_storage_overlay_switches_hashed_address() { + let first_address = B256::with_last_byte(1); + let second_address = B256::with_last_byte(2); + let mut db_storage = B256Map::default(); + db_storage.insert( + first_address, + BTreeMap::from([(Nibbles::from_nibbles([0x4]), branch_node(4))]), + ); + db_storage.insert( + second_address, + BTreeMap::from([(Nibbles::from_nibbles([0x5]), branch_node(5))]), + ); + let mock_cursor = mock_storage_cursor(first_address, db_storage); + + let first_overlay = storage_trie_updates( + first_address, + false, + vec![(Nibbles::from_nibbles([0x1]), Some(branch_node(1)))], + ); + let second_overlay = storage_trie_updates( + second_address, + false, + vec![(Nibbles::from_nibbles([0x2]), Some(branch_node(2)))], + ); + let overlay = + TrieUpdatesOverlay::new(vec![Arc::new(first_overlay), Arc::new(second_overlay)]); + let mut cursor = + InMemoryTrieCursor::new_storage_from_overlay(mock_cursor, &overlay, first_address); + + assert_eq!( + cursor.seek(Nibbles::default()).unwrap(), + Some((Nibbles::from_nibbles([0x1]), branch_node(1))) + ); + assert_eq!(cursor.next().unwrap(), Some((Nibbles::from_nibbles([0x4]), branch_node(4)))); + + cursor.set_hashed_address(second_address); + + assert_eq!( + cursor.seek(Nibbles::default()).unwrap(), + Some((Nibbles::from_nibbles([0x2]), branch_node(2))) + ); + assert_eq!(cursor.next().unwrap(), Some((Nibbles::from_nibbles([0x5]), branch_node(5)))); + assert_eq!(cursor.next().unwrap(), None); + } + mod proptest_tests { use super::*; use proptest::prelude::*; @@ -1254,9 +1608,7 @@ mod tests { entries: &[(Nibbles, BranchNodeCompact)], position: &mut Option, ) -> Option<(Nibbles, BranchNodeCompact)> { - let Some(next_idx) = position.and_then(|idx| idx.checked_add(1)) else { - return None; - }; + let next_idx = position.and_then(|idx| idx.checked_add(1))?; if next_idx < entries.len() { *position = Some(next_idx); From 7bb9d50bf79d5db3b1f14c952840aa18ed0ae33f Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 25 May 2026 17:41:48 +0200 Subject: [PATCH 04/40] refactor(trie): share storage overlay indexing --- .../trie/trie/src/hashed_cursor/post_state.rs | 972 +++++++++--------- crates/trie/trie/src/lib.rs | 2 + crates/trie/trie/src/storage_overlay_index.rs | 95 ++ crates/trie/trie/src/trie_cursor/in_memory.rs | 615 ++++++----- 4 files changed, 853 insertions(+), 831 deletions(-) create mode 100644 crates/trie/trie/src/storage_overlay_index.rs diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 4fc6c58e86f..125544f3914 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -1,5 +1,8 @@ use super::{HashedCursor, HashedCursorFactory, HashedStorageCursor}; -use alloy_primitives::{map::B256Map, B256, U256}; +use crate::storage_overlay_index::{ + StorageOverlayIndex, StorageOverlayIndexEntry, StorageOverlayIndexMut, +}; +use alloy_primitives::{B256, U256}; use reth_primitives_traits::Account; use reth_storage_errors::db::DatabaseError; use reth_trie_common::HashedPostStateSorted; @@ -91,174 +94,6 @@ impl HashedPostStateCursorValue for U256 { } } -/// Hashed post-state overlays ordered from highest to lowest precedence. -#[derive(Clone, Debug, Default)] -pub struct HashedPostStateOverlay { - states: Vec>, - storage_index: Arc>, -} - -impl HashedPostStateOverlay { - /// Create a new indexed hashed post-state overlay stack. - pub fn new(states: Vec>) -> Self { - let storage_index = Arc::new(build_hashed_storage_index(&states)); - Self { states, storage_index } - } - - /// Returns `true` if there are no hashed post-state overlays. - pub const fn is_empty(&self) -> bool { - self.states.is_empty() - } - - /// Returns the number of hashed post-state overlays. - pub const fn len(&self) -> usize { - self.states.len() - } - - /// Returns an iterator over hashed post-state overlays. - pub fn iter(&self) -> impl Iterator> { - self.states.iter() - } - - /// Push a hashed post-state overlay at the end of the precedence stack. - pub fn push(&mut self, state: Arc) { - self.states.push(state); - self.rebuild_storage_index(); - } - - /// Insert a hashed post-state overlay at `index`. - pub fn insert(&mut self, index: usize, state: Arc) { - self.states.insert(index, state); - self.rebuild_storage_index(); - } - - fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'_, U256>, bool) { - let Some(index) = self.storage_index.get(&hashed_address) else { - return (PostStateOverlayCursor::default(), false); - }; - - ( - PostStateOverlayCursor { - cursors: index - .indices - .iter() - .filter_map(|idx| self.states[*idx].storages.get(&hashed_address)) - .map(|storage| SeekablePostStateCursor::new(storage.storage_slots_ref())) - .collect(), - }, - index.db_wiped, - ) - } - - fn rebuild_storage_index(&mut self) { - self.storage_index = Arc::new(build_hashed_storage_index(&self.states)); - } -} - -impl From>> for HashedPostStateOverlay { - fn from(states: Vec>) -> Self { - Self::new(states) - } -} - -impl IntoIterator for HashedPostStateOverlay { - type IntoIter = std::vec::IntoIter; - type Item = Arc; - - fn into_iter(self) -> Self::IntoIter { - self.states.into_iter() - } -} - -impl Index for HashedPostStateOverlay { - type Output = Arc; - - fn index(&self, index: usize) -> &Self::Output { - &self.states[index] - } -} - -impl Deref for HashedPostStateOverlay { - type Target = [Arc]; - - fn deref(&self) -> &Self::Target { - &self.states - } -} - -#[derive(Clone, Debug)] -struct StorageOverlayIndex { - indices: Arc<[usize]>, - db_wiped: bool, -} - -#[derive(Default)] -struct StorageOverlayIndexBuilder { - indices: Vec, - db_wiped: bool, -} - -fn build_hashed_storage_index( - states: &[Arc], -) -> B256Map { - let mut index: B256Map = B256Map::default(); - - for (idx, state) in states.iter().enumerate() { - for (hashed_address, storage) in &state.storages { - let entry = index.entry(*hashed_address).or_default(); - if entry.db_wiped { - continue; - } - - entry.indices.push(idx); - if storage.is_wiped() { - entry.db_wiped = true; - } - } - } - - index - .into_iter() - .map(|(hashed_address, entry)| { - ( - hashed_address, - StorageOverlayIndex { indices: entry.indices.into(), db_wiped: entry.db_wiped }, - ) - }) - .collect() -} - -#[derive(Clone, Debug)] -enum HashedPostStateSource<'a> { - Refs(Vec<&'a HashedPostStateSorted>), - Indexed(&'a HashedPostStateOverlay), -} - -impl<'a> HashedPostStateSource<'a> { - fn from_refs(post_states: impl IntoIterator) -> Self { - Self::Refs(post_states.into_iter().collect()) - } - - fn account_overlay(&self) -> PostStateOverlayCursor<'a, Option> { - match self { - Self::Refs(post_states) => PostStateOverlayCursor::account(post_states), - Self::Indexed(post_states) => PostStateOverlayCursor { - cursors: post_states - .iter() - .map(|post_state| SeekablePostStateCursor::new(post_state.accounts.as_slice())) - .collect(), - }, - } - } - - fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'a, U256>, bool) { - match self { - Self::Refs(post_states) => PostStateOverlayCursor::storage(post_states, hashed_address), - Self::Indexed(post_states) => post_states.storage_overlay(hashed_address), - } - } -} - /// A cursor to iterate over state updates and corresponding database entries. /// It will always give precedence to earlier post state overlays. #[derive(Debug)] @@ -284,453 +119,580 @@ where post_states: HashedPostStateSource<'a>, } -#[derive(Debug)] -enum DbCursorState { - Unpositioned, - Positioned((B256, V)), - Wiped, -} - -impl DbCursorState { - const fn new(cursor_wiped: bool) -> Self { - if cursor_wiped { - Self::Wiped - } else { - Self::Unpositioned +impl<'a, C> HashedPostStateCursor<'a, C, Option> +where + C: HashedCursor, +{ + /// Create new account cursor which combines a DB cursor and the post state. + pub fn new_account( + cursor: C, + post_states: impl IntoIterator, + ) -> Self { + let post_states = HashedPostStateSource::from_refs(post_states); + let post_state_cursor = post_states.account_overlay(); + Self { + cursor, + db_cursor_state: DbCursorState::new(false), + post_state_cursor, + deferred_overlay_seek_start: None, + last_key: None, + #[cfg(debug_assertions)] + seeked: false, + post_states, } } +} - const fn is_wiped(&self) -> bool { - matches!(self, Self::Wiped) - } - - const fn entry(&self) -> Option<&(B256, V)> { - match self { - Self::Positioned(entry) => Some(entry), - Self::Unpositioned | Self::Wiped => None, +impl<'a, C> HashedPostStateCursor<'a, C, Option> +where + C: HashedCursor, +{ + /// Create new account cursor from an indexed hashed post-state overlay. + pub fn new_account_from_overlay(cursor: C, post_states: &'a HashedPostStateOverlay) -> Self { + let post_states = HashedPostStateSource::Indexed(post_states); + let post_state_cursor = post_states.account_overlay(); + Self { + cursor, + db_cursor_state: DbCursorState::new(false), + post_state_cursor, + deferred_overlay_seek_start: None, + last_key: None, + #[cfg(debug_assertions)] + seeked: false, + post_states, } } +} - fn set_entry(&mut self, entry: Option<(B256, V)>) { - if !self.is_wiped() { - *self = entry.map(Self::Positioned).unwrap_or(Self::Unpositioned); +impl<'a, C> HashedPostStateCursor<'a, C, U256> +where + C: HashedStorageCursor, +{ + /// Create new storage cursor with full post state reference. + /// This allows the cursor to switch between storage tries when `set_hashed_address` is called. + pub fn new_storage( + cursor: C, + post_states: impl IntoIterator, + hashed_address: B256, + ) -> Self { + let post_states = HashedPostStateSource::from_refs(post_states); + let (post_state_cursor, cursor_wiped) = post_states.storage_overlay(hashed_address); + Self { + cursor, + db_cursor_state: DbCursorState::new(cursor_wiped), + post_state_cursor, + deferred_overlay_seek_start: None, + last_key: None, + #[cfg(debug_assertions)] + seeked: false, + post_states, } } -} - -#[derive(Clone, Debug, Default)] -struct PostStateOverlayCursor<'a, V> { - cursors: Vec>, -} -impl<'a> PostStateOverlayCursor<'a, Option> { - fn account(post_states: &[&'a HashedPostStateSorted]) -> Self { + /// Create new storage cursor from an indexed hashed post-state overlay. + pub fn new_storage_from_overlay( + cursor: C, + post_states: &'a HashedPostStateOverlay, + hashed_address: B256, + ) -> Self { + let post_states = HashedPostStateSource::Indexed(post_states); + let (post_state_cursor, cursor_wiped) = post_states.storage_overlay(hashed_address); Self { - cursors: post_states - .iter() - .map(|post_state| SeekablePostStateCursor::new(post_state.accounts.as_slice())) - .collect(), + cursor, + db_cursor_state: DbCursorState::new(cursor_wiped), + post_state_cursor, + deferred_overlay_seek_start: None, + last_key: None, + #[cfg(debug_assertions)] + seeked: false, + post_states, } } } -impl<'a> PostStateOverlayCursor<'a, U256> { - fn storage(post_states: &[&'a HashedPostStateSorted], hashed_address: B256) -> (Self, bool) { - let mut cursors = Vec::new(); - let mut db_wiped = false; - - for post_state in post_states { - if let Some(storage) = post_state.storages.get(&hashed_address) { - cursors.push(SeekablePostStateCursor::new(storage.storage_slots_ref())); - if storage.is_wiped() { - db_wiped = true; - break; +impl<'a, C, V> HashedPostStateCursor<'a, C, V> +where + C: HashedCursor, + V: HashedPostStateCursorValue, +{ + /// Returns a mutable reference to the underlying cursor if it's not wiped, None otherwise. + fn get_cursor_mut(&mut self) -> Option<&mut C> { + (!self.db_cursor_state.is_wiped()).then_some(&mut self.cursor) + } + + fn set_last_key(&mut self, next_entry: &Option<(B256, V::NonZero)>) { + self.last_key = next_entry.as_ref().map(|e| e.0); + } + + /// Positions the DB cursor state using the underlying cursor. + fn cursor_seek(&mut self, key: B256) -> Result<(), DatabaseError> { + let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); + self.db_cursor_state.set_entry(entry); + Ok(()) + } + + /// Positions the DB cursor at the first entry after `key`. + fn cursor_first_after(&mut self, key: B256) -> Result<(), DatabaseError> { + self.cursor_seek(key)?; + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &key) { + self.cursor_next()?; + } + Ok(()) + } + + /// Advances the DB cursor state to the subsequent entry using the underlying cursor. + fn cursor_next(&mut self) -> Result<(), DatabaseError> { + #[cfg(debug_assertions)] + { + debug_assert!(self.seeked); + } + + let entry = self.get_cursor_mut().map(|c| c.next()).transpose()?.flatten(); + self.db_cursor_state.set_entry(entry); + + Ok(()) + } + + /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. + fn choose_next_entry(&mut self) -> Result, DatabaseError> { + loop { + let mem_key = self.post_state_cursor.min_current_key(); + let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); + let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { + return Ok(None); + }; + + if let Some(mem_value) = self.post_state_cursor.highest_priority_value_at(&next_key) { + if let Some(value) = mem_value { + return Ok(Some((next_key, value))) + } + + self.post_state_cursor.advance_key(&next_key); + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { + self.cursor_next()?; } + continue; } - } - (Self { cursors }, db_wiped) + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { + return Ok(self.db_cursor_state.entry().copied()) + } + } } } -impl<'a, V> PostStateOverlayCursor<'a, V> +impl HashedCursor for HashedPostStateCursor<'_, C, V> where + C: HashedCursor, V: HashedPostStateCursorValue, { - fn seek_from(&mut self, start: usize, key: &B256) { - for cursor in self.cursors.iter_mut().skip(start) { - cursor.seek(key); + type Value = V::NonZero; + + /// Seek the next entry for a given hashed key. + /// + /// If the post state contains the exact match for the key, return it. + /// Otherwise, retrieve the next entries that are greater than or equal to the key from the + /// database and the post state. The two entries are compared and the lowest is returned. + /// + /// The returned account key is memoized and the cursor remains positioned at that key until + /// [`HashedCursor::seek`] or [`HashedCursor::next`] are called. + fn seek(&mut self, key: B256) -> Result, DatabaseError> { + #[cfg(debug_assertions)] + { + self.seeked = true; } - } - fn seek_until_exact(&mut self, key: &B256) -> Option<(usize, Option)> { - for (idx, cursor) in self.cursors.iter_mut().enumerate() { - if let Some((cursor_key, value)) = cursor.seek(key) && - cursor_key == key - { - return Some((idx, value.into_option())) + self.deferred_overlay_seek_start = None; + match self.post_state_cursor.seek_until_exact(&key) { + Some((idx, Some(value))) => { + let entry = Some((key, value)); + self.deferred_overlay_seek_start = Some(idx + 1); + self.set_last_key(&entry); + return Ok(entry) } + Some((idx, None)) => { + self.post_state_cursor.seek_from(idx + 1, &key); + } + None => {} } - None + + self.cursor_seek(key)?; + + let entry = self.choose_next_entry()?; + self.set_last_key(&entry); + Ok(entry) } - fn first_after(&mut self, key: &B256) { - for cursor in &mut self.cursors { - cursor.first_after(key); + /// Retrieve the next entry from the cursor. + /// + /// If the cursor is positioned at the entry, return the entry with next greater key. + /// Returns [None] if the previous memoized or the next greater entries are missing. + /// + /// NOTE: This function will not return any entry unless [`HashedCursor::seek`] has been called. + fn next(&mut self) -> Result, DatabaseError> { + #[cfg(debug_assertions)] + { + debug_assert!(self.seeked, "Cursor must be seek'd before next is called"); } - } - fn reset(&mut self) { - for cursor in &mut self.cursors { - cursor.reset(); + // A `last_key` of `None` indicates that the cursor is exhausted. + let Some(last_key) = self.last_key else { + return Ok(None); + }; + + if let Some(start) = self.deferred_overlay_seek_start.take() { + self.post_state_cursor.seek_from(start, &last_key); } - } + self.post_state_cursor.first_after(&last_key); - fn min_current_key(&self) -> Option { - self.cursors.iter().filter_map(|cursor| cursor.current().map(|(key, _)| *key)).min() - } + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &last_key) { + self.cursor_next()?; + } else { + self.cursor_first_after(last_key)?; + } - fn highest_priority_value_at(&self, key: &B256) -> Option> { - self.cursors.iter().find_map(|cursor| { - let (cursor_key, value) = cursor.current()?; - (cursor_key == key).then(|| value.into_option()) - }) + let entry = self.choose_next_entry()?; + self.set_last_key(&entry); + Ok(entry) } - fn advance_key(&mut self, key: &B256) { - for cursor in &mut self.cursors { - if cursor.current().is_some_and(|(cursor_key, _)| cursor_key == key) { - cursor.first_after(key); - } + fn reset(&mut self) { + self.cursor.reset(); + self.post_state_cursor.reset(); + + self.db_cursor_state.set_entry(None); + self.deferred_overlay_seek_start = None; + self.last_key = None; + #[cfg(debug_assertions)] + { + self.seeked = false; } } +} - fn has_visible_value(&self) -> bool { - let mut cursor = self.clone(); - cursor.reset(); - while let Some(key) = cursor.min_current_key() { - if cursor.highest_priority_value_at(&key).flatten().is_some() { - return true - } - cursor.advance_key(&key); +/// The cursor to iterate over post state hashed values and corresponding database entries. +/// It will always give precedence to the data from the post state. +impl HashedStorageCursor for HashedPostStateCursor<'_, C, U256> +where + C: HashedStorageCursor, +{ + /// Returns `true` if the account has no storage entries. + /// + /// This function should be called before attempting to call [`HashedCursor::seek`] or + /// [`HashedCursor::next`]. + fn is_storage_empty(&mut self) -> Result { + // Storage is not empty if it has non-zero slots. + if self.post_state_cursor.has_visible_value() { + return Ok(false); } - false + + // If no non-zero slots in post state, check the database. + // Returns true if cursor is wiped. + self.get_cursor_mut().map_or(Ok(true), |c| c.is_storage_empty()) + } + + fn set_hashed_address(&mut self, hashed_address: B256) { + self.reset(); + self.cursor.set_hashed_address(hashed_address); + let (post_state_cursor, cursor_wiped) = self.post_states.storage_overlay(hashed_address); + self.post_state_cursor = post_state_cursor; + self.db_cursor_state = DbCursorState::new(cursor_wiped); } } -#[derive(Clone, Debug)] -struct SeekablePostStateCursor<'a, V> { - entries: &'a [(B256, V)], - idx: usize, +/// Hashed post-state overlays ordered from highest to lowest precedence. +#[derive(Clone, Debug, Default)] +pub struct HashedPostStateOverlay { + states: Vec>, + storage_index: Arc, } -impl<'a, V> SeekablePostStateCursor<'a, V> { - const fn new(entries: &'a [(B256, V)]) -> Self { - Self { entries, idx: 0 } +impl HashedPostStateOverlay { + /// Create a new indexed hashed post-state overlay stack. + pub fn new(states: Vec>) -> Self { + let storage_index = Arc::new(StorageOverlayIndexEntry::new(&states)); + Self { states, storage_index } } - fn current(&self) -> Option<&'a (B256, V)> { - self.entries.get(self.idx) + /// Returns `true` if there are no hashed post-state overlays. + pub const fn is_empty(&self) -> bool { + self.states.is_empty() } - const fn reset(&mut self) { - self.idx = 0; + /// Returns the number of hashed post-state overlays. + pub const fn len(&self) -> usize { + self.states.len() } - fn seek(&mut self, key: &B256) -> Option<&'a (B256, V)> { - self.idx = self.entries.partition_point(|(entry_key, _)| entry_key < key); - self.current() + /// Returns an iterator over hashed post-state overlays. + pub fn iter(&self) -> impl Iterator> { + self.states.iter() } - fn first_after(&mut self, key: &B256) -> Option<&'a (B256, V)> { - if self.current().is_some_and(|(entry_key, _)| entry_key > key) { - return self.current() + /// Push a hashed post-state overlay at the end of the precedence stack. + pub fn push(&mut self, state: Arc) { + Arc::make_mut(&mut self.storage_index).append(self.states.len(), state.as_ref()); + self.states.push(state); + } + + /// Insert a hashed post-state overlay at `index`. + pub fn insert(&mut self, index: usize, state: Arc) { + if index == 0 { + Arc::make_mut(&mut self.storage_index).prepend(state.as_ref()); + self.states.insert(index, state); + } else { + self.states.insert(index, state); + self.storage_index = Arc::new(StorageOverlayIndexEntry::new(&self.states)); } + } - let remaining = &self.entries[self.idx..]; - self.idx += remaining.partition_point(|(entry_key, _)| entry_key <= key); - self.current() + fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'_, U256>, bool) { + let Some(index) = self.storage_index.get(&hashed_address) else { + return (PostStateOverlayCursor::default(), false); + }; + + ( + PostStateOverlayCursor { + cursors: index + .indices + .iter() + .filter_map(|idx| self.states[*idx].storages.get(&hashed_address)) + .map(|storage| SeekablePostStateCursor::new(storage.storage_slots_ref())) + .collect(), + }, + index.db_wiped, + ) } } -impl<'a, C> HashedPostStateCursor<'a, C, Option> -where - C: HashedCursor, -{ - /// Create new account cursor which combines a DB cursor and the post state. - pub fn new_account( - cursor: C, - post_states: impl IntoIterator, - ) -> Self { - let post_states = HashedPostStateSource::from_refs(post_states); - let post_state_cursor = post_states.account_overlay(); - Self { - cursor, - db_cursor_state: DbCursorState::new(false), - post_state_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - post_states, - } +impl From>> for HashedPostStateOverlay { + fn from(states: Vec>) -> Self { + Self::new(states) } } -impl<'a, C> HashedPostStateCursor<'a, C, Option> -where - C: HashedCursor, -{ - /// Create new account cursor from an indexed hashed post-state overlay. - pub fn new_account_from_overlay(cursor: C, post_states: &'a HashedPostStateOverlay) -> Self { - let post_states = HashedPostStateSource::Indexed(post_states); - let post_state_cursor = post_states.account_overlay(); - Self { - cursor, - db_cursor_state: DbCursorState::new(false), - post_state_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - post_states, - } +impl IntoIterator for HashedPostStateOverlay { + type IntoIter = std::vec::IntoIter; + type Item = Arc; + + fn into_iter(self) -> Self::IntoIter { + self.states.into_iter() } } -impl<'a, C> HashedPostStateCursor<'a, C, U256> -where - C: HashedStorageCursor, -{ - /// Create new storage cursor with full post state reference. - /// This allows the cursor to switch between storage tries when `set_hashed_address` is called. - pub fn new_storage( - cursor: C, - post_states: impl IntoIterator, - hashed_address: B256, - ) -> Self { - let post_states = HashedPostStateSource::from_refs(post_states); - let (post_state_cursor, cursor_wiped) = post_states.storage_overlay(hashed_address); - Self { - cursor, - db_cursor_state: DbCursorState::new(cursor_wiped), - post_state_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - post_states, +impl Index for HashedPostStateOverlay { + type Output = Arc; + + fn index(&self, index: usize) -> &Self::Output { + &self.states[index] + } +} + +impl Deref for HashedPostStateOverlay { + type Target = [Arc]; + + fn deref(&self) -> &Self::Target { + &self.states + } +} + +#[derive(Clone, Debug)] +enum HashedPostStateSource<'a> { + Refs(Vec<&'a HashedPostStateSorted>), + Indexed(&'a HashedPostStateOverlay), +} + +impl<'a> HashedPostStateSource<'a> { + fn from_refs(post_states: impl IntoIterator) -> Self { + Self::Refs(post_states.into_iter().collect()) + } + + fn account_overlay(&self) -> PostStateOverlayCursor<'a, Option> { + match self { + Self::Refs(post_states) => PostStateOverlayCursor::account(post_states), + Self::Indexed(post_states) => PostStateOverlayCursor { + cursors: post_states + .iter() + .map(|post_state| SeekablePostStateCursor::new(post_state.accounts.as_slice())) + .collect(), + }, } } - /// Create new storage cursor from an indexed hashed post-state overlay. - pub fn new_storage_from_overlay( - cursor: C, - post_states: &'a HashedPostStateOverlay, - hashed_address: B256, - ) -> Self { - let post_states = HashedPostStateSource::Indexed(post_states); - let (post_state_cursor, cursor_wiped) = post_states.storage_overlay(hashed_address); - Self { - cursor, - db_cursor_state: DbCursorState::new(cursor_wiped), - post_state_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - post_states, + fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'a, U256>, bool) { + match self { + Self::Refs(post_states) => PostStateOverlayCursor::storage(post_states, hashed_address), + Self::Indexed(post_states) => post_states.storage_overlay(hashed_address), } } } -impl<'a, C, V> HashedPostStateCursor<'a, C, V> -where - C: HashedCursor, - V: HashedPostStateCursorValue, -{ - /// Returns a mutable reference to the underlying cursor if it's not wiped, None otherwise. - fn get_cursor_mut(&mut self) -> Option<&mut C> { - (!self.db_cursor_state.is_wiped()).then_some(&mut self.cursor) - } +#[derive(Debug)] +enum DbCursorState { + Unpositioned, + Positioned((B256, V)), + Wiped, +} - fn set_last_key(&mut self, next_entry: &Option<(B256, V::NonZero)>) { - self.last_key = next_entry.as_ref().map(|e| e.0); +impl DbCursorState { + const fn new(cursor_wiped: bool) -> Self { + if cursor_wiped { + Self::Wiped + } else { + Self::Unpositioned + } } - /// Positions the DB cursor state using the underlying cursor. - fn cursor_seek(&mut self, key: B256) -> Result<(), DatabaseError> { - let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); - self.db_cursor_state.set_entry(entry); - Ok(()) + const fn is_wiped(&self) -> bool { + matches!(self, Self::Wiped) } - /// Positions the DB cursor at the first entry after `key`. - fn cursor_first_after(&mut self, key: B256) -> Result<(), DatabaseError> { - self.cursor_seek(key)?; - if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &key) { - self.cursor_next()?; + const fn entry(&self) -> Option<&(B256, V)> { + match self { + Self::Positioned(entry) => Some(entry), + Self::Unpositioned | Self::Wiped => None, } - Ok(()) } - /// Advances the DB cursor state to the subsequent entry using the underlying cursor. - fn cursor_next(&mut self) -> Result<(), DatabaseError> { - #[cfg(debug_assertions)] - { - debug_assert!(self.seeked); + fn set_entry(&mut self, entry: Option<(B256, V)>) { + if !self.is_wiped() { + *self = entry.map(Self::Positioned).unwrap_or(Self::Unpositioned); } + } +} - let entry = self.get_cursor_mut().map(|c| c.next()).transpose()?.flatten(); - self.db_cursor_state.set_entry(entry); +#[derive(Clone, Debug, Default)] +struct PostStateOverlayCursor<'a, V> { + cursors: Vec>, +} - Ok(()) +impl<'a> PostStateOverlayCursor<'a, Option> { + fn account(post_states: &[&'a HashedPostStateSorted]) -> Self { + Self { + cursors: post_states + .iter() + .map(|post_state| SeekablePostStateCursor::new(post_state.accounts.as_slice())) + .collect(), + } } +} - /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. - fn choose_next_entry(&mut self) -> Result, DatabaseError> { - loop { - let mem_key = self.post_state_cursor.min_current_key(); - let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); - let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { - return Ok(None); - }; - - if let Some(mem_value) = self.post_state_cursor.highest_priority_value_at(&next_key) { - if let Some(value) = mem_value { - return Ok(Some((next_key, value))) - } +impl<'a> PostStateOverlayCursor<'a, U256> { + fn storage(post_states: &[&'a HashedPostStateSorted], hashed_address: B256) -> (Self, bool) { + let mut cursors = Vec::new(); + let mut db_wiped = false; - self.post_state_cursor.advance_key(&next_key); - if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { - self.cursor_next()?; + for post_state in post_states { + if let Some(storage) = post_state.storages.get(&hashed_address) { + cursors.push(SeekablePostStateCursor::new(storage.storage_slots_ref())); + if storage.is_wiped() { + db_wiped = true; + break; } - continue; - } - - if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { - return Ok(self.db_cursor_state.entry().copied()) } } + + (Self { cursors }, db_wiped) } } -impl HashedCursor for HashedPostStateCursor<'_, C, V> +impl<'a, V> PostStateOverlayCursor<'a, V> where - C: HashedCursor, V: HashedPostStateCursorValue, { - type Value = V::NonZero; - - /// Seek the next entry for a given hashed key. - /// - /// If the post state contains the exact match for the key, return it. - /// Otherwise, retrieve the next entries that are greater than or equal to the key from the - /// database and the post state. The two entries are compared and the lowest is returned. - /// - /// The returned account key is memoized and the cursor remains positioned at that key until - /// [`HashedCursor::seek`] or [`HashedCursor::next`] are called. - fn seek(&mut self, key: B256) -> Result, DatabaseError> { - #[cfg(debug_assertions)] - { - self.seeked = true; + fn seek_from(&mut self, start: usize, key: &B256) { + for cursor in self.cursors.iter_mut().skip(start) { + cursor.seek(key); } + } - self.deferred_overlay_seek_start = None; - match self.post_state_cursor.seek_until_exact(&key) { - Some((idx, Some(value))) => { - let entry = Some((key, value)); - self.deferred_overlay_seek_start = Some(idx + 1); - self.set_last_key(&entry); - return Ok(entry) - } - Some((idx, None)) => { - self.post_state_cursor.seek_from(idx + 1, &key); + fn seek_until_exact(&mut self, key: &B256) -> Option<(usize, Option)> { + for (idx, cursor) in self.cursors.iter_mut().enumerate() { + if let Some((cursor_key, value)) = cursor.seek(key) && + cursor_key == key + { + return Some((idx, value.into_option())) } - None => {} } - - self.cursor_seek(key)?; - - let entry = self.choose_next_entry()?; - self.set_last_key(&entry); - Ok(entry) + None } - /// Retrieve the next entry from the cursor. - /// - /// If the cursor is positioned at the entry, return the entry with next greater key. - /// Returns [None] if the previous memoized or the next greater entries are missing. - /// - /// NOTE: This function will not return any entry unless [`HashedCursor::seek`] has been called. - fn next(&mut self) -> Result, DatabaseError> { - #[cfg(debug_assertions)] - { - debug_assert!(self.seeked, "Cursor must be seek'd before next is called"); + fn first_after(&mut self, key: &B256) { + for cursor in &mut self.cursors { + cursor.first_after(key); } + } - // A `last_key` of `None` indicates that the cursor is exhausted. - let Some(last_key) = self.last_key else { - return Ok(None); - }; - - if let Some(start) = self.deferred_overlay_seek_start.take() { - self.post_state_cursor.seek_from(start, &last_key); + fn reset(&mut self) { + for cursor in &mut self.cursors { + cursor.reset(); } - self.post_state_cursor.first_after(&last_key); + } - if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &last_key) { - self.cursor_next()?; - } else { - self.cursor_first_after(last_key)?; - } + fn min_current_key(&self) -> Option { + self.cursors.iter().filter_map(|cursor| cursor.current().map(|(key, _)| *key)).min() + } - let entry = self.choose_next_entry()?; - self.set_last_key(&entry); - Ok(entry) + fn highest_priority_value_at(&self, key: &B256) -> Option> { + self.cursors.iter().find_map(|cursor| { + let (cursor_key, value) = cursor.current()?; + (cursor_key == key).then(|| value.into_option()) + }) } - fn reset(&mut self) { - self.cursor.reset(); - self.post_state_cursor.reset(); + fn advance_key(&mut self, key: &B256) { + for cursor in &mut self.cursors { + if cursor.current().is_some_and(|(cursor_key, _)| cursor_key == key) { + cursor.first_after(key); + } + } + } - self.db_cursor_state.set_entry(None); - self.deferred_overlay_seek_start = None; - self.last_key = None; - #[cfg(debug_assertions)] - { - self.seeked = false; + fn has_visible_value(&self) -> bool { + let mut cursor = self.clone(); + cursor.reset(); + while let Some(key) = cursor.min_current_key() { + if cursor.highest_priority_value_at(&key).flatten().is_some() { + return true + } + cursor.advance_key(&key); } + false } } -/// The cursor to iterate over post state hashed values and corresponding database entries. -/// It will always give precedence to the data from the post state. -impl HashedStorageCursor for HashedPostStateCursor<'_, C, U256> -where - C: HashedStorageCursor, -{ - /// Returns `true` if the account has no storage entries. - /// - /// This function should be called before attempting to call [`HashedCursor::seek`] or - /// [`HashedCursor::next`]. - fn is_storage_empty(&mut self) -> Result { - // Storage is not empty if it has non-zero slots. - if self.post_state_cursor.has_visible_value() { - return Ok(false); - } +#[derive(Clone, Debug)] +struct SeekablePostStateCursor<'a, V> { + entries: &'a [(B256, V)], + idx: usize, +} - // If no non-zero slots in post state, check the database. - // Returns true if cursor is wiped. - self.get_cursor_mut().map_or(Ok(true), |c| c.is_storage_empty()) +impl<'a, V> SeekablePostStateCursor<'a, V> { + const fn new(entries: &'a [(B256, V)]) -> Self { + Self { entries, idx: 0 } } - fn set_hashed_address(&mut self, hashed_address: B256) { - self.reset(); - self.cursor.set_hashed_address(hashed_address); - let (post_state_cursor, cursor_wiped) = self.post_states.storage_overlay(hashed_address); - self.post_state_cursor = post_state_cursor; - self.db_cursor_state = DbCursorState::new(cursor_wiped); + fn current(&self) -> Option<&'a (B256, V)> { + self.entries.get(self.idx) + } + + const fn reset(&mut self) { + self.idx = 0; + } + + fn seek(&mut self, key: &B256) -> Option<&'a (B256, V)> { + self.idx = self.entries.partition_point(|(entry_key, _)| entry_key < key); + self.current() + } + + fn first_after(&mut self, key: &B256) -> Option<&'a (B256, V)> { + if self.current().is_some_and(|(entry_key, _)| entry_key > key) { + return self.current() + } + + let remaining = &self.entries[self.idx..]; + self.idx += remaining.partition_point(|(entry_key, _)| entry_key <= key); + self.current() } } @@ -738,6 +700,7 @@ where mod tests { use super::*; use crate::hashed_cursor::mock::MockHashedCursor; + use alloy_primitives::map::B256Map; use parking_lot::Mutex; use std::{collections::BTreeMap, sync::Arc}; @@ -944,7 +907,8 @@ mod tests { vec![(key(0x01), Some(account(10))), (key(0x03), Some(account(30)))], Default::default(), ); - let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); + let mut overlay = HashedPostStateOverlay::new(vec![Arc::new(oldest)]); + overlay.insert(0, Arc::new(newest)); let mut cursor = HashedPostStateCursor::new_account_from_overlay(mock_cursor, &overlay); let mut results = Vec::new(); @@ -989,8 +953,8 @@ mod tests { let newest = storage_post_state(vec![(key(0x02), U256::from(2))]); let wiping = storage_post_state_with_wipe(vec![(key(0x01), U256::from(1))], true); let hidden = storage_post_state(vec![(key(0x03), U256::from(3))]); - let overlay = - HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(wiping), Arc::new(hidden)]); + let mut overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(wiping)]); + overlay.push(Arc::new(hidden)); let mut cursor = HashedPostStateCursor::new_storage_from_overlay(mock_cursor, &overlay, B256::ZERO); diff --git a/crates/trie/trie/src/lib.rs b/crates/trie/trie/src/lib.rs index 90c54fbce28..e506843a6f4 100644 --- a/crates/trie/trie/src/lib.rs +++ b/crates/trie/trie/src/lib.rs @@ -17,6 +17,8 @@ /// The implementation of forward-only in-memory cursor. pub mod forward_cursor; +mod storage_overlay_index; + /// The cursor implementations for navigating account and storage tries. pub mod trie_cursor; diff --git a/crates/trie/trie/src/storage_overlay_index.rs b/crates/trie/trie/src/storage_overlay_index.rs new file mode 100644 index 00000000000..88e6cca89cd --- /dev/null +++ b/crates/trie/trie/src/storage_overlay_index.rs @@ -0,0 +1,95 @@ +use alloy_primitives::{map::B256Map, B256}; +use reth_trie_common::{updates::TrieUpdatesSorted, HashedPostStateSorted}; +use std::sync::Arc; + +/// Source of per-account storage overlays for [`StorageOverlayIndex`]. +pub(crate) trait StorageOverlayIndexSource { + /// Returns every hashed address touched by this overlay and whether that storage overlay wipes + /// lower-priority database or overlay contents for the address. + fn storage_overlay_index_entries(&self) -> impl Iterator + '_; +} + +impl StorageOverlayIndexSource for TrieUpdatesSorted { + fn storage_overlay_index_entries(&self) -> impl Iterator + '_ { + self.storage_tries_ref() + .iter() + .map(|(hashed_address, storage)| (*hashed_address, storage.is_deleted())) + } +} + +impl StorageOverlayIndexSource for HashedPostStateSorted { + fn storage_overlay_index_entries(&self) -> impl Iterator + '_ { + self.storages.iter().map(|(hashed_address, storage)| (*hashed_address, storage.is_wiped())) + } +} + +/// Precomputed lookup from hashed address to the overlay layers that contain storage for it. +pub(crate) type StorageOverlayIndex = B256Map; + +/// Incremental updates for a [`StorageOverlayIndex`]. +pub(crate) trait StorageOverlayIndexMut { + /// Adds a lower-priority overlay to this storage overlay index. + fn append(&mut self, overlay_index: usize, overlay: &T); + + /// Adds a highest-priority overlay to this storage overlay index. + fn prepend(&mut self, overlay: &T); +} + +/// Index entry for one hashed address in a [`StorageOverlayIndex`]. +#[derive(Clone, Debug, Default)] +pub(crate) struct StorageOverlayIndexEntry { + /// Overlay indices that should be searched for a hashed address, ordered by precedence. + pub(crate) indices: Arc>, + /// Whether an overlay at one of [`Self::indices`] wipes lower-priority database contents. + pub(crate) db_wiped: bool, +} + +impl StorageOverlayIndexEntry { + /// Builds a storage overlay index for the full overlay stack. + pub(crate) fn new(overlays: &[Arc]) -> StorageOverlayIndex { + let mut index = StorageOverlayIndex::default(); + + for (idx, overlay) in overlays.iter().enumerate() { + index.append(idx, overlay.as_ref()); + } + + index + } +} + +impl StorageOverlayIndexMut for StorageOverlayIndex { + fn append(&mut self, overlay_index: usize, overlay: &T) { + for (hashed_address, wipes_db) in overlay.storage_overlay_index_entries() { + let entry = self.entry(hashed_address).or_default(); + if entry.db_wiped { + continue; + } + + Arc::make_mut(&mut entry.indices).push(overlay_index); + if wipes_db { + entry.db_wiped = true; + } + } + } + + fn prepend(&mut self, overlay: &T) { + for entry in self.values_mut() { + for idx in Arc::make_mut(&mut entry.indices) { + *idx += 1; + } + } + + for (hashed_address, wipes_db) in overlay.storage_overlay_index_entries() { + let entry = self.entry(hashed_address).or_default(); + let indices = Arc::make_mut(&mut entry.indices); + + if wipes_db { + indices.clear(); + indices.push(0); + entry.db_wiped = true; + } else { + indices.insert(0, 0); + } + } + } +} diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index 6f8c2761ad1..6f39b47e8b4 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -1,6 +1,11 @@ use super::{TrieCursor, TrieCursorFactory, TrieStorageCursor}; -use crate::updates::TrieUpdatesSorted; -use alloy_primitives::{map::B256Map, B256}; +use crate::{ + storage_overlay_index::{ + StorageOverlayIndex, StorageOverlayIndexEntry, StorageOverlayIndexMut, + }, + updates::TrieUpdatesSorted, +}; +use alloy_primitives::B256; use reth_storage_errors::db::DatabaseError; use reth_trie_common::{BranchNodeCompact, Nibbles}; use std::{ @@ -59,17 +64,293 @@ where } } +/// A cursor to iterate over trie updates and corresponding database entries. +/// It will always give precedence to earlier trie update overlays. +#[derive(Debug)] +pub struct InMemoryTrieCursor<'a, C> { + /// The underlying cursor. + cursor: C, + /// The current DB cursor state. + db_cursor_state: DbCursorState, + /// In-memory cursors over trie update overlays. + in_memory_cursor: OverlayCursor<'a>, + /// Lower-priority overlays that still need positioning after a lazy exact overlay hit. + deferred_overlay_seek_start: Option, + /// The key most recently returned from the Cursor. + last_key: Option, + #[cfg(debug_assertions)] + /// Whether an initial seek was called. + seeked: bool, + /// Source of trie update overlays. + trie_updates: TrieUpdatesSource<'a>, +} + +impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { + /// Create new account trie cursor which combines a DB cursor and the trie updates. + pub fn new_account( + cursor: C, + trie_updates: impl IntoIterator, + ) -> Self { + let trie_updates = TrieUpdatesSource::from_refs(trie_updates); + let in_memory_cursor = trie_updates.account_overlay(); + Self { + cursor, + db_cursor_state: DbCursorState::new(false), + in_memory_cursor, + deferred_overlay_seek_start: None, + last_key: None, + #[cfg(debug_assertions)] + seeked: false, + trie_updates, + } + } + + /// Create new account trie cursor from an indexed trie updates overlay. + pub fn new_account_from_overlay(cursor: C, trie_updates: &'a TrieUpdatesOverlay) -> Self { + let trie_updates = TrieUpdatesSource::Indexed(trie_updates); + let in_memory_cursor = trie_updates.account_overlay(); + Self { + cursor, + db_cursor_state: DbCursorState::new(false), + in_memory_cursor, + deferred_overlay_seek_start: None, + last_key: None, + #[cfg(debug_assertions)] + seeked: false, + trie_updates, + } + } + + /// Create new storage trie cursor with full trie updates reference. + /// This allows the cursor to switch between storage tries when `set_hashed_address` is called. + pub fn new_storage( + cursor: C, + trie_updates: impl IntoIterator, + hashed_address: B256, + ) -> Self { + let trie_updates = TrieUpdatesSource::from_refs(trie_updates); + let (in_memory_cursor, db_wiped) = trie_updates.storage_overlay(hashed_address); + Self { + cursor, + db_cursor_state: DbCursorState::new(db_wiped), + in_memory_cursor, + deferred_overlay_seek_start: None, + last_key: None, + #[cfg(debug_assertions)] + seeked: false, + trie_updates, + } + } + + /// Create new storage trie cursor from an indexed trie updates overlay. + pub fn new_storage_from_overlay( + cursor: C, + trie_updates: &'a TrieUpdatesOverlay, + hashed_address: B256, + ) -> Self { + let trie_updates = TrieUpdatesSource::Indexed(trie_updates); + let (in_memory_cursor, db_wiped) = trie_updates.storage_overlay(hashed_address); + Self { + cursor, + db_cursor_state: DbCursorState::new(db_wiped), + in_memory_cursor, + deferred_overlay_seek_start: None, + last_key: None, + #[cfg(debug_assertions)] + seeked: false, + trie_updates, + } + } + + /// Returns a mutable reference to the underlying cursor if it's not wiped, None otherwise. + fn get_cursor_mut(&mut self) -> Option<&mut C> { + (!self.db_cursor_state.is_wiped()).then_some(&mut self.cursor) + } + + fn set_last_key(&mut self, next_entry: &Option<(Nibbles, BranchNodeCompact)>) { + self.last_key = next_entry.as_ref().map(|e| e.0); + } + + /// Positions the DB cursor state using the underlying cursor. + fn cursor_seek(&mut self, key: Nibbles) -> Result<(), DatabaseError> { + let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); + self.db_cursor_state.set_entry(entry); + Ok(()) + } + + /// Positions the DB cursor at the first entry after `key`. + fn cursor_first_after(&mut self, key: Nibbles) -> Result<(), DatabaseError> { + self.cursor_seek(key)?; + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &key) { + self.cursor_next()?; + } + Ok(()) + } + + /// Advances the DB cursor state to the subsequent entry using the underlying cursor. + fn cursor_next(&mut self) -> Result<(), DatabaseError> { + #[cfg(debug_assertions)] + { + debug_assert!(self.seeked); + } + + let entry = self.get_cursor_mut().map(|c| c.next()).transpose()?.flatten(); + self.db_cursor_state.set_entry(entry); + + Ok(()) + } + + /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. + fn choose_next_entry(&mut self) -> Result, DatabaseError> { + loop { + let mem_key = self.in_memory_cursor.min_current_key(); + let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); + let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { + return Ok(None); + }; + + if let Some(mem_value) = self.in_memory_cursor.highest_priority_value_at(&next_key) { + if let Some(node) = mem_value { + return Ok(Some((next_key, node))) + } + + self.in_memory_cursor.advance_key(&next_key); + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { + self.cursor_next()?; + } + continue; + } + + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { + return Ok(self.db_cursor_state.entry().cloned()) + } + } + } +} + +impl TrieCursor for InMemoryTrieCursor<'_, C> { + fn seek_exact( + &mut self, + key: Nibbles, + ) -> Result, DatabaseError> { + #[cfg(debug_assertions)] + { + self.seeked = true; + } + + self.deferred_overlay_seek_start = None; + let entry = if let Some((idx, mem_value)) = self.in_memory_cursor.seek_until_exact(&key) { + if mem_value.is_some() { + self.deferred_overlay_seek_start = Some(idx + 1); + } + mem_value.map(|node| (key, node)) + } else { + let db_entry = self.get_cursor_mut().map(|c| c.seek_exact(key)).transpose()?.flatten(); + self.db_cursor_state.set_entry(db_entry); + self.db_cursor_state.entry().cloned() + }; + + self.set_last_key(&entry); + Ok(entry) + } + + fn seek( + &mut self, + key: Nibbles, + ) -> Result, DatabaseError> { + #[cfg(debug_assertions)] + { + self.seeked = true; + } + + self.deferred_overlay_seek_start = None; + match self.in_memory_cursor.seek_until_exact(&key) { + Some((idx, Some(node))) => { + let entry = Some((key, node)); + self.deferred_overlay_seek_start = Some(idx + 1); + self.set_last_key(&entry); + return Ok(entry); + } + Some((idx, None)) => { + self.in_memory_cursor.seek_from(idx + 1, &key); + } + None => {} + } + + self.cursor_seek(key)?; + let entry = self.choose_next_entry()?; + self.set_last_key(&entry); + Ok(entry) + } + + fn next(&mut self) -> Result, DatabaseError> { + #[cfg(debug_assertions)] + { + debug_assert!(self.seeked, "Cursor must be seek'd before next is called"); + } + + // A `last_key` of `None` indicates that the cursor is exhausted. + let Some(last_key) = self.last_key else { + return Ok(None); + }; + + if let Some(start) = self.deferred_overlay_seek_start.take() { + self.in_memory_cursor.seek_from(start, &last_key); + } + self.in_memory_cursor.first_after(&last_key); + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &last_key) { + self.cursor_next()?; + } else { + self.cursor_first_after(last_key)?; + } + + let entry = self.choose_next_entry()?; + self.set_last_key(&entry); + Ok(entry) + } + + fn current(&mut self) -> Result, DatabaseError> { + match &self.last_key { + Some(key) => Ok(Some(*key)), + None => Ok(self.get_cursor_mut().map(|c| c.current()).transpose()?.flatten()), + } + } + + fn reset(&mut self) { + self.cursor.reset(); + self.in_memory_cursor.reset(); + + self.db_cursor_state.set_entry(None); + self.deferred_overlay_seek_start = None; + self.last_key = None; + #[cfg(debug_assertions)] + { + self.seeked = false; + } + } +} + +impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { + fn set_hashed_address(&mut self, hashed_address: B256) { + self.reset(); + self.cursor.set_hashed_address(hashed_address); + let (in_memory_cursor, db_wiped) = self.trie_updates.storage_overlay(hashed_address); + self.in_memory_cursor = in_memory_cursor; + self.db_cursor_state = DbCursorState::new(db_wiped); + } +} + /// Trie updates overlays ordered from highest to lowest precedence. #[derive(Clone, Debug, Default)] pub struct TrieUpdatesOverlay { updates: Vec>, - storage_index: Arc>, + storage_index: Arc, } impl TrieUpdatesOverlay { /// Create a new indexed trie updates overlay stack. pub fn new(updates: Vec>) -> Self { - let storage_index = Arc::new(build_trie_storage_index(&updates)); + let storage_index = Arc::new(StorageOverlayIndexEntry::new(&updates)); Self { updates, storage_index } } @@ -90,8 +371,8 @@ impl TrieUpdatesOverlay { /// Push a trie update overlay at the end of the precedence stack. pub fn push(&mut self, update: Arc) { + Arc::make_mut(&mut self.storage_index).append(self.updates.len(), update.as_ref()); self.updates.push(update); - self.rebuild_storage_index(); } fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'_>, bool) { @@ -111,10 +392,6 @@ impl TrieUpdatesOverlay { index.db_wiped, ) } - - fn rebuild_storage_index(&mut self) { - self.storage_index = Arc::new(build_trie_storage_index(&self.updates)); - } } impl From>> for TrieUpdatesOverlay { @@ -148,46 +425,6 @@ impl Deref for TrieUpdatesOverlay { } } -#[derive(Clone, Debug)] -struct StorageOverlayIndex { - indices: Arc<[usize]>, - db_wiped: bool, -} - -#[derive(Default)] -struct StorageOverlayIndexBuilder { - indices: Vec, - db_wiped: bool, -} - -fn build_trie_storage_index(updates: &[Arc]) -> B256Map { - let mut index: B256Map = B256Map::default(); - - for (idx, updates) in updates.iter().enumerate() { - for (hashed_address, storage) in updates.storage_tries_ref() { - let entry = index.entry(*hashed_address).or_default(); - if entry.db_wiped { - continue; - } - - entry.indices.push(idx); - if storage.is_deleted() { - entry.db_wiped = true; - } - } - } - - index - .into_iter() - .map(|(hashed_address, entry)| { - ( - hashed_address, - StorageOverlayIndex { indices: entry.indices.into(), db_wiped: entry.db_wiped }, - ) - }) - .collect() -} - #[derive(Clone, Debug)] enum TrieUpdatesSource<'a> { Refs(Vec<&'a TrieUpdatesSorted>), @@ -219,27 +456,6 @@ impl<'a> TrieUpdatesSource<'a> { } } -/// A cursor to iterate over trie updates and corresponding database entries. -/// It will always give precedence to earlier trie update overlays. -#[derive(Debug)] -pub struct InMemoryTrieCursor<'a, C> { - /// The underlying cursor. - cursor: C, - /// The current DB cursor state. - db_cursor_state: DbCursorState, - /// In-memory cursors over trie update overlays. - in_memory_cursor: OverlayCursor<'a>, - /// Lower-priority overlays that still need positioning after a lazy exact overlay hit. - deferred_overlay_seek_start: Option, - /// The key most recently returned from the Cursor. - last_key: Option, - #[cfg(debug_assertions)] - /// Whether an initial seek was called. - seeked: bool, - /// Source of trie update overlays. - trie_updates: TrieUpdatesSource<'a>, -} - #[derive(Debug)] enum DbCursorState { Unpositioned, @@ -390,261 +606,6 @@ impl<'a> SeekableInMemoryCursor<'a> { } } -impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { - /// Create new account trie cursor which combines a DB cursor and the trie updates. - pub fn new_account( - cursor: C, - trie_updates: impl IntoIterator, - ) -> Self { - let trie_updates = TrieUpdatesSource::from_refs(trie_updates); - let in_memory_cursor = trie_updates.account_overlay(); - Self { - cursor, - db_cursor_state: DbCursorState::new(false), - in_memory_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - trie_updates, - } - } - - /// Create new account trie cursor from an indexed trie updates overlay. - pub fn new_account_from_overlay(cursor: C, trie_updates: &'a TrieUpdatesOverlay) -> Self { - let trie_updates = TrieUpdatesSource::Indexed(trie_updates); - let in_memory_cursor = trie_updates.account_overlay(); - Self { - cursor, - db_cursor_state: DbCursorState::new(false), - in_memory_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - trie_updates, - } - } - - /// Create new storage trie cursor with full trie updates reference. - /// This allows the cursor to switch between storage tries when `set_hashed_address` is called. - pub fn new_storage( - cursor: C, - trie_updates: impl IntoIterator, - hashed_address: B256, - ) -> Self { - let trie_updates = TrieUpdatesSource::from_refs(trie_updates); - let (in_memory_cursor, db_wiped) = trie_updates.storage_overlay(hashed_address); - Self { - cursor, - db_cursor_state: DbCursorState::new(db_wiped), - in_memory_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - trie_updates, - } - } - - /// Create new storage trie cursor from an indexed trie updates overlay. - pub fn new_storage_from_overlay( - cursor: C, - trie_updates: &'a TrieUpdatesOverlay, - hashed_address: B256, - ) -> Self { - let trie_updates = TrieUpdatesSource::Indexed(trie_updates); - let (in_memory_cursor, db_wiped) = trie_updates.storage_overlay(hashed_address); - Self { - cursor, - db_cursor_state: DbCursorState::new(db_wiped), - in_memory_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - trie_updates, - } - } - - /// Returns a mutable reference to the underlying cursor if it's not wiped, None otherwise. - fn get_cursor_mut(&mut self) -> Option<&mut C> { - (!self.db_cursor_state.is_wiped()).then_some(&mut self.cursor) - } - - fn set_last_key(&mut self, next_entry: &Option<(Nibbles, BranchNodeCompact)>) { - self.last_key = next_entry.as_ref().map(|e| e.0); - } - - /// Positions the DB cursor state using the underlying cursor. - fn cursor_seek(&mut self, key: Nibbles) -> Result<(), DatabaseError> { - let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); - self.db_cursor_state.set_entry(entry); - Ok(()) - } - - /// Positions the DB cursor at the first entry after `key`. - fn cursor_first_after(&mut self, key: Nibbles) -> Result<(), DatabaseError> { - self.cursor_seek(key)?; - if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &key) { - self.cursor_next()?; - } - Ok(()) - } - - /// Advances the DB cursor state to the subsequent entry using the underlying cursor. - fn cursor_next(&mut self) -> Result<(), DatabaseError> { - #[cfg(debug_assertions)] - { - debug_assert!(self.seeked); - } - - let entry = self.get_cursor_mut().map(|c| c.next()).transpose()?.flatten(); - self.db_cursor_state.set_entry(entry); - - Ok(()) - } - - /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. - fn choose_next_entry(&mut self) -> Result, DatabaseError> { - loop { - let mem_key = self.in_memory_cursor.min_current_key(); - let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); - let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { - return Ok(None); - }; - - if let Some(mem_value) = self.in_memory_cursor.highest_priority_value_at(&next_key) { - if let Some(node) = mem_value { - return Ok(Some((next_key, node))) - } - - self.in_memory_cursor.advance_key(&next_key); - if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { - self.cursor_next()?; - } - continue; - } - - if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { - return Ok(self.db_cursor_state.entry().cloned()) - } - } - } -} - -impl TrieCursor for InMemoryTrieCursor<'_, C> { - fn seek_exact( - &mut self, - key: Nibbles, - ) -> Result, DatabaseError> { - #[cfg(debug_assertions)] - { - self.seeked = true; - } - - self.deferred_overlay_seek_start = None; - let entry = if let Some((idx, mem_value)) = self.in_memory_cursor.seek_until_exact(&key) { - if mem_value.is_some() { - self.deferred_overlay_seek_start = Some(idx + 1); - } - mem_value.map(|node| (key, node)) - } else { - let db_entry = self.get_cursor_mut().map(|c| c.seek_exact(key)).transpose()?.flatten(); - self.db_cursor_state.set_entry(db_entry); - self.db_cursor_state.entry().cloned() - }; - - self.set_last_key(&entry); - Ok(entry) - } - - fn seek( - &mut self, - key: Nibbles, - ) -> Result, DatabaseError> { - #[cfg(debug_assertions)] - { - self.seeked = true; - } - - self.deferred_overlay_seek_start = None; - match self.in_memory_cursor.seek_until_exact(&key) { - Some((idx, Some(node))) => { - let entry = Some((key, node)); - self.deferred_overlay_seek_start = Some(idx + 1); - self.set_last_key(&entry); - return Ok(entry); - } - Some((idx, None)) => { - self.in_memory_cursor.seek_from(idx + 1, &key); - } - None => {} - } - - self.cursor_seek(key)?; - let entry = self.choose_next_entry()?; - self.set_last_key(&entry); - Ok(entry) - } - - fn next(&mut self) -> Result, DatabaseError> { - #[cfg(debug_assertions)] - { - debug_assert!(self.seeked, "Cursor must be seek'd before next is called"); - } - - // A `last_key` of `None` indicates that the cursor is exhausted. - let Some(last_key) = self.last_key else { - return Ok(None); - }; - - if let Some(start) = self.deferred_overlay_seek_start.take() { - self.in_memory_cursor.seek_from(start, &last_key); - } - self.in_memory_cursor.first_after(&last_key); - if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &last_key) { - self.cursor_next()?; - } else { - self.cursor_first_after(last_key)?; - } - - let entry = self.choose_next_entry()?; - self.set_last_key(&entry); - Ok(entry) - } - - fn current(&mut self) -> Result, DatabaseError> { - match &self.last_key { - Some(key) => Ok(Some(*key)), - None => Ok(self.get_cursor_mut().map(|c| c.current()).transpose()?.flatten()), - } - } - - fn reset(&mut self) { - self.cursor.reset(); - self.in_memory_cursor.reset(); - - self.db_cursor_state.set_entry(None); - self.deferred_overlay_seek_start = None; - self.last_key = None; - #[cfg(debug_assertions)] - { - self.seeked = false; - } - } -} - -impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { - fn set_hashed_address(&mut self, hashed_address: B256) { - self.reset(); - self.cursor.set_hashed_address(hashed_address); - let (in_memory_cursor, db_wiped) = self.trie_updates.storage_overlay(hashed_address); - self.in_memory_cursor = in_memory_cursor; - self.db_cursor_state = DbCursorState::new(db_wiped); - } -} - #[cfg(test)] mod tests { use super::*; @@ -1478,8 +1439,8 @@ mod tests { false, vec![(Nibbles::from_nibbles([0x3]), Some(branch_node(3)))], ); - let overlay = - TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(deleting), Arc::new(hidden)]); + let mut overlay = TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(deleting)]); + overlay.push(Arc::new(hidden)); let mut cursor = InMemoryTrieCursor::new_storage_from_overlay(mock_cursor, &overlay, hashed_address); From 825b42629a2b411263738c062edc773eb09ed72d Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 27 May 2026 14:51:09 +0200 Subject: [PATCH 05/40] perf(trie): reuse overlay cursor positions --- crates/chain-state/src/state_trie_overlay.rs | 125 ++-- .../src/providers/state/historical.rs | 15 +- .../provider/src/providers/state/latest.rs | 14 +- .../provider/src/providers/state/overlay.rs | 30 +- crates/trie/db/src/changesets.rs | 9 +- crates/trie/db/src/proof.rs | 39 +- crates/trie/db/src/state.rs | 32 +- crates/trie/db/src/storage.rs | 10 +- crates/trie/db/tests/fuzz_in_memory_nodes.rs | 11 +- crates/trie/db/tests/post_state.rs | 107 ++- crates/trie/sparse/src/parallel.rs | 6 +- crates/trie/trie/src/forward_cursor.rs | 187 ------ .../trie/trie/src/hashed_cursor/post_state.rs | 632 ++++++++---------- crates/trie/trie/src/lib.rs | 5 +- crates/trie/trie/src/node_iter.rs | 7 +- crates/trie/trie/src/overlay_cursor.rs | 223 ++++++ crates/trie/trie/src/storage_overlay_index.rs | 95 --- crates/trie/trie/src/test_utils.rs | 6 +- crates/trie/trie/src/trie_cursor/in_memory.rs | 575 ++++++---------- 19 files changed, 941 insertions(+), 1187 deletions(-) delete mode 100644 crates/trie/trie/src/forward_cursor.rs create mode 100644 crates/trie/trie/src/overlay_cursor.rs delete mode 100644 crates/trie/trie/src/storage_overlay_index.rs diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs index 348ecc4156f..a552e3068fb 100644 --- a/crates/chain-state/src/state_trie_overlay.rs +++ b/crates/chain-state/src/state_trie_overlay.rs @@ -1,8 +1,8 @@ -//! Flattened state trie overlays for in-memory blocks. +//! State trie overlay stacks for in-memory blocks. //! //! Payload validation needs a view of the state trie as of an in-memory parent block even when that //! parent has not been persisted yet. [`StateTrieOverlayManager`] tracks those in-memory blocks and -//! builds reusable flattened state trie overlays on demand. +//! builds reusable state trie overlays on demand. use crate::{EthPrimitives, ExecutedBlock}; use alloy_primitives::B256; @@ -25,9 +25,9 @@ use std::time::Instant; use std::{fmt, sync::Arc}; use tracing::debug; -/// Manages flattened state trie overlays for in-memory blocks. +/// Manages state trie overlays for in-memory blocks. /// -/// The manager owns the in-memory block graph and a cache of flattened state trie overlays keyed by +/// The manager owns the in-memory block graph and a cache of state trie overlays keyed by /// `(anchor_hash, tip_hash)`. Cache entries can also mark in-flight background computations. #[derive(Clone)] pub struct StateTrieOverlayManager { @@ -308,8 +308,8 @@ impl StateTrieOverlayManager { } if let Some((_, cached_overlay)) = cached_prefix { - trie_updates.extend(cached_overlay.trie_updates.iter().cloned()); - hashed_post_state.extend(cached_overlay.hashed_post_state.iter().cloned()); + trie_updates.extend(cached_overlay.trie_update_layers.iter().cloned()); + hashed_post_state.extend(cached_overlay.hashed_post_state_layers.iter().cloned()); } StateTrieOverlay::new(trie_updates, hashed_post_state) @@ -474,6 +474,8 @@ pub struct StateTrieOverlay { pub trie_updates: TrieUpdatesOverlay, /// Hashed post state overlays. pub hashed_post_state: HashedPostStateOverlay, + trie_update_layers: Vec>, + hashed_post_state_layers: Vec>, } impl StateTrieOverlay { @@ -483,10 +485,50 @@ impl StateTrieOverlay { hashed_post_state: Vec>, ) -> Self { Self { - trie_updates: TrieUpdatesOverlay::new(trie_updates), - hashed_post_state: HashedPostStateOverlay::new(hashed_post_state), + trie_updates: TrieUpdatesOverlay::new(trie_updates.clone()), + hashed_post_state: HashedPostStateOverlay::new(hashed_post_state.clone()), + trie_update_layers: trie_updates, + hashed_post_state_layers: hashed_post_state, } } + + /// Returns `true` if this overlay has no layers. + pub const fn is_empty(&self) -> bool { + self.trie_update_layers.is_empty() && self.hashed_post_state_layers.is_empty() + } + + /// Add a trie updates layer at the end of the precedence stack. + pub fn push_trie_updates(&mut self, trie_updates: Arc) { + self.trie_update_layers.push(trie_updates); + self.trie_updates = TrieUpdatesOverlay::new(self.trie_update_layers.clone()); + } + + /// Add a hashed post-state layer at the end of the precedence stack. + pub fn push_hashed_post_state(&mut self, hashed_post_state: Arc) { + self.hashed_post_state_layers.push(hashed_post_state); + self.hashed_post_state = HashedPostStateOverlay::new(self.hashed_post_state_layers.clone()); + } + + /// Add a hashed post-state layer at the beginning of the precedence stack. + pub fn prepend_hashed_post_state(&mut self, hashed_post_state: Arc) { + self.hashed_post_state_layers.insert(0, hashed_post_state); + self.hashed_post_state = HashedPostStateOverlay::new(self.hashed_post_state_layers.clone()); + } + + /// Total number of trie update entries across all layers. + pub fn trie_updates_total_len(&self) -> usize { + self.trie_update_layers.iter().map(|updates| updates.total_len()).sum() + } + + /// Total number of hashed post-state entries across all layers. + pub fn hashed_post_state_total_len(&self) -> usize { + self.hashed_post_state_layers.iter().map(|state| state.total_len()).sum() + } + + /// Consume the overlay into its original layer stacks. + pub fn into_layers(self) -> (Vec>, Vec>) { + (self.trie_update_layers, self.hashed_post_state_layers) + } } /// Error returned when a state trie overlay cannot be built from the manager's current block set. @@ -565,8 +607,7 @@ fn compute_overlay( ) -> StateTrieOverlay { let started_at = Instant::now(); let block_count = blocks.len(); - let parent_overlay_reused = - !parent_overlay.trie_updates.is_empty() || !parent_overlay.hashed_post_state.is_empty(); + let parent_overlay_reused = !parent_overlay.is_empty(); tracing::Span::current().record("block_count", block_count); tracing::Span::current().record("parent_overlay", parent_overlay_reused); @@ -593,7 +634,7 @@ fn flatten_overlay( parent_overlay: StateTrieOverlay, ) -> StateTrieOverlay { let trie_data = blocks.iter().map(ExecutedBlock::trie_data).collect::>(); - let StateTrieOverlay { trie_updates: parent_trie_updates, hashed_post_state } = parent_overlay; + let (parent_trie_updates, parent_hashed_post_state) = parent_overlay.into_layers(); #[cfg(feature = "rayon")] let (trie_updates, hashed_post_state) = rayon::join( @@ -610,7 +651,7 @@ fn flatten_overlay( trie_data .iter() .map(|data| Arc::clone(&data.hashed_state)) - .chain(hashed_post_state), + .chain(parent_hashed_post_state), ) }, ); @@ -621,7 +662,10 @@ fn flatten_overlay( trie_data.iter().map(|data| Arc::clone(&data.trie_updates)).chain(parent_trie_updates), ), HashedPostStateSorted::merge_batch( - trie_data.iter().map(|data| Arc::clone(&data.hashed_state)).chain(hashed_post_state), + trie_data + .iter() + .map(|data| Arc::clone(&data.hashed_state)) + .chain(parent_hashed_post_state), ), ); @@ -697,20 +741,18 @@ mod tests { let anchor_hash = blocks[0].recovered_block().parent_hash(); - let state = manager - .overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash) - .unwrap() - .hashed_post_state; + let overlay = + manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); + let state = &overlay.hashed_post_state_layers; assert_eq!(state.len(), 3); - assert_eq!(state_account_count(&state), 3); + assert_eq!(state_account_count(state), 3); let short_anchor = blocks[1].recovered_block().hash(); - let short = manager - .overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor) - .unwrap() - .hashed_post_state; + let short_overlay = + manager.overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor).unwrap(); + let short = &short_overlay.hashed_post_state_layers; assert_eq!(short.len(), 1); - assert_eq!(state_account_count(&short), 1); + assert_eq!(state_account_count(short), 1); manager.compute_and_cache_overlay( OverlayCacheKey { anchor_hash: short_anchor, @@ -718,10 +760,9 @@ mod tests { }, None, ); - let cached_short = manager - .overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor) - .unwrap() - .hashed_post_state; + let cached_short_overlay = + manager.overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor).unwrap(); + let cached_short = &cached_short_overlay.hashed_post_state_layers; assert_eq!(cached_short.len(), 1); assert_eq!(cached_short[0].accounts.len(), 1); } @@ -739,14 +780,13 @@ mod tests { manager .compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: prefix_tip }, None); - let state = manager - .overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash) - .unwrap() - .hashed_post_state; + let overlay = + manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); + let state = &overlay.hashed_post_state_layers; assert_eq!(state.len(), 2); assert_eq!(state[0].accounts.len(), 1); assert_eq!(state[1].accounts.len(), 2); - assert_eq!(state_account_count(&state), 3); + assert_eq!(state_account_count(state), 3); } #[test] @@ -762,12 +802,11 @@ mod tests { let prefix_key = OverlayCacheKey { anchor_hash, tip_hash: prefix_tip }; manager.overlays.insert(prefix_key, OverlayCacheEntry::Pending); - let state = manager - .overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash) - .unwrap() - .hashed_post_state; + let overlay = + manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); + let state = &overlay.hashed_post_state_layers; assert_eq!(state.len(), 3); - assert_eq!(state_account_count(&state), 3); + assert_eq!(state_account_count(state), 3); assert!(matches!( manager.overlays.get(&prefix_key).as_deref(), Some(OverlayCacheEntry::Pending) @@ -840,7 +879,8 @@ mod tests { thread::sleep(Duration::from_millis(10)); } - let state = manager.overlay_for_parent(child_hash, anchor_hash).unwrap().hashed_post_state; + let overlay = manager.overlay_for_parent(child_hash, anchor_hash).unwrap(); + let state = &overlay.hashed_post_state_layers; assert_eq!(state.len(), 1); assert_eq!(state[0].accounts.len(), 2); } @@ -902,10 +942,9 @@ mod tests { .overlay_for_parent(blocks[2].recovered_block().hash(), original_anchor) .is_err()); - let state = manager - .overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash) - .unwrap() - .hashed_post_state; - assert_eq!(state_account_count(&state), 1); + let overlay = + manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); + let state = &overlay.hashed_post_state_layers; + assert_eq!(state_account_count(state), 1); } } diff --git a/crates/storage/provider/src/providers/state/historical.rs b/crates/storage/provider/src/providers/state/historical.rs index ef480292584..fd2f4de85e5 100644 --- a/crates/storage/provider/src/providers/state/historical.rs +++ b/crates/storage/provider/src/providers/state/historical.rs @@ -20,9 +20,9 @@ use reth_storage_api::{ }; use reth_storage_errors::provider::ProviderResult; use reth_trie::{ - hashed_cursor::HashedPostStateCursorFactory, + hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, proof::{Proof, StorageProof}, - trie_cursor::InMemoryTrieCursorFactory, + trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, updates::{TrieUpdates, TrieUpdatesSorted}, witness::TrieWitness, AccountProof, ExecutionWitnessMode, HashedPostState, HashedPostStateSorted, HashedStorage, @@ -312,10 +312,11 @@ where let overlay_builder = OverlayBuilder::::new(anchor_hash, self.changeset_cache.clone()) .with_overlay_source(Some(OverlaySource::Immediate { trie: nodes, state })); let overlay = overlay_builder.build_overlay(self.provider)?; + let (trie_updates, hashed_post_state) = overlay.into_layers(); Ok(TrieInputSorted::new( - TrieUpdatesSorted::merge_batch(overlay.trie_updates), - HashedPostStateSorted::merge_batch(overlay.hashed_post_state), + TrieUpdatesSorted::merge_batch(trie_updates), + HashedPostStateSorted::merge_batch(hashed_post_state), prefix_sets, )) } @@ -616,14 +617,16 @@ where reth_trie_db::with_adapter!(self.provider, |A| { let TrieInputSorted { nodes, state, prefix_sets } = self.build_overlay(TrieInputSorted::from_unsorted(input))?; + let nodes_overlay = TrieUpdatesOverlay::new(vec![nodes]); + let state_overlay = HashedPostStateOverlay::new(vec![state]); let witness = TrieWitness::new( InMemoryTrieCursorFactory::new( reth_trie_db::DatabaseTrieCursorFactory::<_, A>::new(self.tx()), - [nodes.as_ref()], + &nodes_overlay, ), HashedPostStateCursorFactory::new( reth_trie_db::DatabaseHashedCursorFactory::new(self.tx()), - [state.as_ref()], + &state_overlay, ), ) .with_prefix_sets_mut(prefix_sets) diff --git a/crates/storage/provider/src/providers/state/latest.rs b/crates/storage/provider/src/providers/state/latest.rs index 3908a41c454..9ae013e83ea 100644 --- a/crates/storage/provider/src/providers/state/latest.rs +++ b/crates/storage/provider/src/providers/state/latest.rs @@ -9,9 +9,9 @@ use reth_storage_api::{ }; use reth_storage_errors::provider::{ProviderError, ProviderResult}; use reth_trie::{ - hashed_cursor::HashedPostStateCursorFactory, + hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, proof::{Proof, StorageProof}, - trie_cursor::InMemoryTrieCursorFactory, + trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, updates::TrieUpdates, witness::TrieWitness, AccountProof, ExecutionWitnessMode, HashedPostState, HashedStorage, KeccakKeyHasher, @@ -19,6 +19,7 @@ use reth_trie::{ TrieInputSorted, }; use reth_trie_db::{DatabaseProof, DatabaseStateRoot, DatabaseStorageProof, DatabaseStorageRoot}; +use std::sync::Arc; type DbStateRoot<'a, TX, A> = StateRoot< reth_trie_db::DatabaseTrieCursorFactory<&'a TX, A>, @@ -226,16 +227,17 @@ impl StateProofProvider mode: ExecutionWitnessMode, ) -> ProviderResult> { reth_trie_db::with_adapter!(self.0, |A| { - let nodes_sorted = input.nodes.into_sorted(); - let state_sorted = input.state.into_sorted(); + let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::new(input.nodes.into_sorted())]); + let state_overlay = + HashedPostStateOverlay::new(vec![Arc::new(input.state.into_sorted())]); let witness = TrieWitness::new( InMemoryTrieCursorFactory::new( reth_trie_db::DatabaseTrieCursorFactory::<_, A>::new(self.tx()), - [&nodes_sorted], + &nodes_overlay, ), HashedPostStateCursorFactory::new( reth_trie_db::DatabaseHashedCursorFactory::new(self.tx()), - [&state_sorted], + &state_overlay, ), ) .with_prefix_sets_mut(input.prefix_sets) diff --git a/crates/storage/provider/src/providers/state/overlay.rs b/crates/storage/provider/src/providers/state/overlay.rs index af6badd667b..95422f361ee 100644 --- a/crates/storage/provider/src/providers/state/overlay.rs +++ b/crates/storage/provider/src/providers/state/overlay.rs @@ -177,7 +177,7 @@ impl OverlayBuilder { }; if !state.is_empty() { - overlay.hashed_post_state.insert(0, Arc::clone(state)); + overlay.prepend_hashed_post_state(Arc::clone(state)); } Ok(overlay) @@ -348,17 +348,15 @@ impl OverlayBuilder { let mut overlay = self.resolve_overlays(anchor_hash)?; if !trie_reverts.is_empty() { - overlay.trie_updates.push(Arc::new(trie_reverts)); + overlay.push_trie_updates(Arc::new(trie_reverts)); } if !hashed_state_reverts.is_empty() { - overlay.hashed_post_state.push(Arc::new(hashed_state_reverts)); + overlay.push_hashed_post_state(Arc::new(hashed_state_reverts)); } - trie_updates_total_len = - overlay.trie_updates.iter().map(|updates| updates.total_len()).sum::(); - hashed_state_updates_total_len = - overlay.hashed_post_state.iter().map(|state| state.total_len()).sum::(); + trie_updates_total_len = overlay.trie_updates_total_len(); + hashed_state_updates_total_len = overlay.hashed_post_state_total_len(); debug!( target: "providers::state::overlay", @@ -374,10 +372,8 @@ impl OverlayBuilder { retrieve_trie_reverts_duration = Duration::ZERO; retrieve_hashed_state_reverts_duration = Duration::ZERO; - trie_updates_total_len = - overlay.trie_updates.iter().map(|updates| updates.total_len()).sum::(); - hashed_state_updates_total_len = - overlay.hashed_post_state.iter().map(|state| state.total_len()).sum::(); + trie_updates_total_len = overlay.trie_updates_total_len(); + hashed_state_updates_total_len = overlay.hashed_post_state_total_len(); overlay }; @@ -567,7 +563,7 @@ where tx.cursor_read::()?, )) }; - Ok(InMemoryTrieCursor::new_account_from_overlay(cursor, &self.overlay.trie_updates)) + Ok(InMemoryTrieCursor::new_account(cursor, &self.overlay.trie_updates)) } fn storage_trie_cursor( @@ -586,11 +582,7 @@ where hashed_address, )) }; - Ok(InMemoryTrieCursor::new_storage_from_overlay( - cursor, - &self.overlay.trie_updates, - hashed_address, - )) + Ok(InMemoryTrieCursor::new_storage(cursor, &self.overlay.trie_updates, hashed_address)) } } @@ -619,7 +611,7 @@ where fn hashed_account_cursor(&self) -> Result, DatabaseError> { let db_hashed_cursor_factory = DatabaseHashedCursorFactory::new(self.provider.tx_ref()); let cursor = db_hashed_cursor_factory.hashed_account_cursor()?; - Ok(HashedPostStateCursor::new_account_from_overlay(cursor, &self.overlay.hashed_post_state)) + Ok(HashedPostStateCursor::new_account(cursor, &self.overlay.hashed_post_state)) } fn hashed_storage_cursor( @@ -628,7 +620,7 @@ where ) -> Result, DatabaseError> { let db_hashed_cursor_factory = DatabaseHashedCursorFactory::new(self.provider.tx_ref()); let cursor = db_hashed_cursor_factory.hashed_storage_cursor(hashed_address)?; - Ok(HashedPostStateCursor::new_storage_from_overlay( + Ok(HashedPostStateCursor::new_storage( cursor, &self.overlay.hashed_post_state, hashed_address, diff --git a/crates/trie/db/src/changesets.rs b/crates/trie/db/src/changesets.rs index e8ab5eb31b7..75cd2304212 100644 --- a/crates/trie/db/src/changesets.rs +++ b/crates/trie/db/src/changesets.rs @@ -20,7 +20,7 @@ use reth_storage_api::{ use reth_storage_errors::provider::{ProviderError, ProviderResult}; use reth_trie::{ changesets::compute_trie_changesets, - trie_cursor::{InMemoryTrieCursorFactory, TrieCursor, TrieCursorFactory}, + trie_cursor::{InMemoryTrieCursorFactory, TrieCursor, TrieCursorFactory, TrieUpdatesOverlay}, TrieInputSorted, }; use reth_trie_common::updates::{StorageTrieUpdatesSorted, TrieUpdatesSorted}; @@ -155,8 +155,8 @@ where // Step 5: Compute changesets using cumulative trie updates for block-1 as overlay // Create an overlay cursor factory that has the trie state from after block-1 let db_cursor_factory = DatabaseTrieCursorFactory::<_, A>::new(provider.tx_ref()); - let overlay_factory = - InMemoryTrieCursorFactory::new(db_cursor_factory, [&cumulative_trie_updates_prev]); + let trie_overlay = TrieUpdatesOverlay::new(vec![Arc::new(cumulative_trie_updates_prev)]); + let overlay_factory = InMemoryTrieCursorFactory::new(db_cursor_factory, &trie_overlay); let changesets = compute_trie_changesets(&overlay_factory, &trie_updates).map_err(ProviderError::other)?; @@ -262,7 +262,8 @@ where // Step 4: Create an InMemoryTrieCursorFactory with the reverts // This gives us the trie state as it was after the target block was processed let db_cursor_factory = DatabaseTrieCursorFactory::<_, A>::new(tx); - let cursor_factory = InMemoryTrieCursorFactory::new(db_cursor_factory, [&reverts]); + let trie_overlay = TrieUpdatesOverlay::new(vec![Arc::new(reverts)]); + let cursor_factory = InMemoryTrieCursorFactory::new(db_cursor_factory, &trie_overlay); // Step 5: Collect all account trie nodes that changed in the target block let account_nodes_ref = changesets.account_nodes_ref(); diff --git a/crates/trie/db/src/proof.rs b/crates/trie/db/src/proof.rs index f44dc19cc03..5e07a57aebc 100644 --- a/crates/trie/db/src/proof.rs +++ b/crates/trie/db/src/proof.rs @@ -3,12 +3,13 @@ use alloy_primitives::{keccak256, map::HashMap, Address, B256}; use reth_db_api::transaction::DbTx; use reth_execution_errors::StateProofError; use reth_trie::{ - hashed_cursor::HashedPostStateCursorFactory, + hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, proof::{Proof, StorageProof}, - trie_cursor::InMemoryTrieCursorFactory, + trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, AccountProof, HashedPostStateSorted, HashedStorage, MultiProof, MultiProofTargets, StorageMultiProof, TrieInput, }; +use std::sync::Arc; /// Extends [`Proof`] with operations specific for working with a database transaction. pub trait DatabaseProof<'a> { @@ -48,14 +49,11 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseProof<'a> address: Address, slots: &[B256], ) -> Result { - let nodes_sorted = input.nodes.into_sorted(); - let state_sorted = input.state.into_sorted(); + let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::new(input.nodes.into_sorted())]); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(input.state.into_sorted())]); Proof::new( - InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), [&nodes_sorted]), - HashedPostStateCursorFactory::new( - self.hashed_cursor_factory().clone(), - [&state_sorted], - ), + InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), &nodes_overlay), + HashedPostStateCursorFactory::new(self.hashed_cursor_factory().clone(), &state_overlay), ) .with_prefix_sets_mut(input.prefix_sets) .account_proof(address, slots) @@ -66,14 +64,11 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseProof<'a> input: TrieInput, targets: MultiProofTargets, ) -> Result { - let nodes_sorted = input.nodes.into_sorted(); - let state_sorted = input.state.into_sorted(); + let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::new(input.nodes.into_sorted())]); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(input.state.into_sorted())]); Proof::new( - InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), [&nodes_sorted]), - HashedPostStateCursorFactory::new( - self.hashed_cursor_factory().clone(), - [&state_sorted], - ), + InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), &nodes_overlay), + HashedPostStateCursorFactory::new(self.hashed_cursor_factory().clone(), &state_overlay), ) .with_prefix_sets_mut(input.prefix_sets) .multiproof(targets) @@ -129,12 +124,10 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseStorageProof<'a, TX> Default::default(), HashMap::from_iter([(hashed_address, storage.into_sorted())]), ); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(state_sorted)]); StorageProof::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(tx), - [&state_sorted], - ), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), address, ) .with_prefix_set_mut(prefix_set) @@ -154,12 +147,10 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseStorageProof<'a, TX> Default::default(), HashMap::from_iter([(hashed_address, storage.into_sorted())]), ); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(state_sorted)]); StorageProof::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(tx), - [&state_sorted], - ), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), address, ) .with_prefix_set_mut(prefix_set) diff --git a/crates/trie/db/src/state.rs b/crates/trie/db/src/state.rs index 0a618b30f4e..4763cc16302 100644 --- a/crates/trie/db/src/state.rs +++ b/crates/trie/db/src/state.rs @@ -10,13 +10,15 @@ use reth_storage_api::{ }; use reth_storage_errors::provider::ProviderError; use reth_trie::{ - hashed_cursor::HashedPostStateCursorFactory, trie_cursor::InMemoryTrieCursorFactory, - updates::TrieUpdates, HashedPostStateSorted, HashedStorageSorted, StateRoot, StateRootProgress, - TrieInputSorted, + hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, + trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, + updates::TrieUpdates, + HashedPostStateSorted, HashedStorageSorted, StateRoot, StateRootProgress, TrieInputSorted, }; use std::{ collections::HashSet, ops::{Bound, RangeBounds, RangeInclusive}, + sync::Arc, }; use tracing::{debug, instrument}; @@ -208,9 +210,10 @@ impl<'a, TX: DbTx, A: crate::TrieTableAdapter> DatabaseStateRoot<'a, TX> post_state: &HashedPostStateSorted, ) -> Result { let prefix_sets = post_state.construct_prefix_sets().freeze(); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state.clone())]); StateRoot::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), [post_state]), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), ) .with_prefix_sets(prefix_sets) .root() @@ -221,24 +224,24 @@ impl<'a, TX: DbTx, A: crate::TrieTableAdapter> DatabaseStateRoot<'a, TX> post_state: &HashedPostStateSorted, ) -> Result<(B256, TrieUpdates), StateRootError> { let prefix_sets = post_state.construct_prefix_sets().freeze(); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state.clone())]); StateRoot::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), [post_state]), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), ) .with_prefix_sets(prefix_sets) .root_with_updates() } fn overlay_root_from_nodes(tx: &'a TX, input: TrieInputSorted) -> Result { + let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::clone(&input.nodes)]); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::clone(&input.state)]); StateRoot::new( InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - [input.nodes.as_ref()], - ), - HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(tx), - [input.state.as_ref()], + &nodes_overlay, ), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), ) .with_prefix_sets(input.prefix_sets.freeze()) .root() @@ -248,15 +251,14 @@ impl<'a, TX: DbTx, A: crate::TrieTableAdapter> DatabaseStateRoot<'a, TX> tx: &'a TX, input: TrieInputSorted, ) -> Result<(B256, TrieUpdates), StateRootError> { + let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::clone(&input.nodes)]); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::clone(&input.state)]); StateRoot::new( InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - [input.nodes.as_ref()], - ), - HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(tx), - [input.state.as_ref()], + &nodes_overlay, ), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), ) .with_prefix_sets(input.prefix_sets.freeze()) .root_with_updates() diff --git a/crates/trie/db/src/storage.rs b/crates/trie/db/src/storage.rs index 6e3edd42eae..2247036f9e5 100644 --- a/crates/trie/db/src/storage.rs +++ b/crates/trie/db/src/storage.rs @@ -5,8 +5,10 @@ use reth_execution_errors::StorageRootError; use reth_storage_api::{BlockNumReader, StorageChangeSetReader}; use reth_storage_errors::provider::ProviderResult; use reth_trie::{ - hashed_cursor::HashedPostStateCursorFactory, HashedPostState, HashedStorage, StorageRoot, + hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, + HashedPostState, HashedStorage, StorageRoot, }; +use std::sync::Arc; #[cfg(feature = "metrics")] use reth_trie::metrics::TrieRootMetrics; @@ -90,12 +92,10 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseStorageRoot<'a, TX> let prefix_set = hashed_storage.construct_prefix_set().freeze(); let state_sorted = HashedPostState::from_hashed_storage(keccak256(address), hashed_storage).into_sorted(); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(state_sorted)]); StorageRoot::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(tx), - [&state_sorted], - ), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), address, prefix_set, #[cfg(feature = "metrics")] diff --git a/crates/trie/db/tests/fuzz_in_memory_nodes.rs b/crates/trie/db/tests/fuzz_in_memory_nodes.rs index dd58f5f3967..04099cb9b5f 100644 --- a/crates/trie/db/tests/fuzz_in_memory_nodes.rs +++ b/crates/trie/db/tests/fuzz_in_memory_nodes.rs @@ -12,14 +12,14 @@ use reth_provider::test_utils::create_test_provider_factory; use reth_storage_api::StorageSettingsCache; use reth_trie::{ test_utils::{state_root_prehashed, storage_root_prehashed}, - trie_cursor::InMemoryTrieCursorFactory, + trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, updates::TrieUpdates, HashedPostState, HashedStorage, StateRoot, StorageRoot, }; use reth_trie_db::{ DatabaseHashedCursorFactory, DatabaseStateRoot, DatabaseStorageRoot, DatabaseTrieCursorFactory, }; -use std::collections::BTreeMap; +use std::{collections::BTreeMap, sync::Arc}; type DbStateRoot<'a, TX, A> = StateRoot, DatabaseHashedCursorFactory<&'a TX>>; @@ -65,11 +65,13 @@ proptest! { } // Compute root with in-memory trie nodes overlay + let trie_overlay = + TrieUpdatesOverlay::new(vec![Arc::new(trie_nodes.clone().into_sorted())]); let (state_root, trie_updates) = DbStateRoot::<_, A>::from_tx(provider.tx_ref()) .with_prefix_sets(hashed_state.construct_prefix_sets().freeze()) .with_trie_cursor_factory(InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(provider.tx_ref()), - [&trie_nodes.clone().into_sorted()], + trie_overlay, )) .root_with_updates() .unwrap(); @@ -122,12 +124,13 @@ proptest! { // Compute root with in-memory trie nodes overlay let mut trie_nodes = TrieUpdates::default(); trie_nodes.insert_storage_updates(hashed_address, storage_trie_nodes.clone()); + let trie_overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_nodes.into_sorted())]); let (storage_root, _, trie_updates) = DbStorageRoot::<_, A>::from_tx_hashed(provider.tx_ref(), hashed_address) .with_prefix_set(hashed_storage.construct_prefix_set().freeze()) .with_trie_cursor_factory(InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(provider.tx_ref()), - [&trie_nodes.into_sorted()], + trie_overlay, )) .root_with_updates() .unwrap(); diff --git a/crates/trie/db/tests/post_state.rs b/crates/trie/db/tests/post_state.rs index a3ee272d05a..9187b0386c9 100644 --- a/crates/trie/db/tests/post_state.rs +++ b/crates/trie/db/tests/post_state.rs @@ -8,12 +8,17 @@ use reth_db_api::{database::Database, transaction::DbTxMut}; use reth_primitives_traits::{Account, StorageEntry}; use reth_trie::{ hashed_cursor::{ - HashedCursor, HashedCursorFactory, HashedPostStateCursorFactory, HashedStorageCursor, + HashedCursor, HashedCursorFactory, HashedPostStateCursorFactory, HashedPostStateOverlay, + HashedStorageCursor, }, - HashedPostState, HashedStorage, + HashedPostState, HashedPostStateSorted, HashedStorage, }; use reth_trie_db::DatabaseHashedCursorFactory; -use std::collections::BTreeMap; +use std::{collections::BTreeMap, sync::Arc}; + +fn post_state_overlay(sorted: &HashedPostStateSorted) -> HashedPostStateOverlay { + HashedPostStateOverlay::new(vec![Arc::new(sorted.clone())]) +} fn assert_account_cursor_order( factory: &impl HashedCursorFactory, @@ -66,8 +71,10 @@ fn post_state_only_accounts() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -88,7 +95,7 @@ fn db_only_accounts() { let tx = db.tx().unwrap(); let factory = HashedPostStateCursorFactory::new( DatabaseHashedCursorFactory::new(&tx), - [&sorted_post_state], + post_state_overlay(&sorted_post_state), ); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -114,8 +121,10 @@ fn account_cursor_correct_order() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -145,8 +154,10 @@ fn removed_accounts_are_discarded() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let expected = accounts.into_iter().filter(|x| !removed_keys.contains(&x.0)); assert_account_cursor_order(&factory, expected); } @@ -173,8 +184,10 @@ fn post_state_accounts_take_precedence() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -206,7 +219,7 @@ fn fuzz_hashed_account_cursor() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), post_state_overlay(&sorted)); assert_account_cursor_order(&factory, expected.into_iter()); } ); @@ -233,8 +246,10 @@ fn storage_is_empty() { { let sorted = HashedPostState::default().into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(cursor.is_storage_empty().unwrap()); } @@ -254,8 +269,10 @@ fn storage_is_empty() { { let sorted = HashedPostState::default().into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(!cursor.is_storage_empty().unwrap()); } @@ -271,8 +288,10 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(!cursor.is_storage_empty().unwrap()); } @@ -287,8 +306,10 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(cursor.is_storage_empty().unwrap()); } @@ -304,8 +325,10 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(cursor.is_storage_empty().unwrap()); } @@ -321,8 +344,10 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(!cursor.is_storage_empty().unwrap()); } @@ -358,8 +383,10 @@ fn storage_cursor_correct_order() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let expected = std::iter::once((address, db_storage.into_iter().chain(post_state_storage).collect())); assert_storage_cursor_order(&factory, expected); @@ -399,8 +426,10 @@ fn zero_value_storage_entries_are_discarded() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let expected = std::iter::once(( address, post_state_storage.into_iter().filter(|(_, value)| *value > U256::ZERO).collect(), @@ -437,8 +466,10 @@ fn wiped_storage_is_discarded() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let expected = std::iter::once((address, post_state_storage)); assert_storage_cursor_order(&factory, expected); } @@ -473,8 +504,10 @@ fn post_state_storages_take_precedence() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let expected = std::iter::once((address, storage)); assert_storage_cursor_order(&factory, expected); } @@ -521,7 +554,7 @@ fn fuzz_hashed_storage_cursor() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), post_state_overlay(&sorted)); assert_storage_cursor_order(&factory, expected.into_iter()); }); } @@ -568,8 +601,10 @@ fn all_storage_slots_deleted_not_wiped_exact_keys() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); diff --git a/crates/trie/sparse/src/parallel.rs b/crates/trie/sparse/src/parallel.rs index 7539a490c7b..d4ccd56e2e1 100644 --- a/crates/trie/sparse/src/parallel.rs +++ b/crates/trie/sparse/src/parallel.rs @@ -3426,7 +3426,7 @@ mod tests { test_utils::create_test_provider_factory, StorageSettingsCache, TrieWriter, }; use reth_trie::{ - hashed_cursor::{noop::NoopHashedCursor, HashedPostStateCursor}, + hashed_cursor::{noop::NoopHashedCursor, HashedPostStateCursor, HashedPostStateOverlay}, node_iter::{TrieElement, TrieNodeIter}, trie_cursor::{noop::NoopAccountTrieCursor, TrieCursor, TrieCursorFactory}, walker::TrieWalker, @@ -3728,11 +3728,13 @@ mod tests { (nibbles.pack().into_inner().unwrap().into(), Some(account)) })) .into_sorted(); + let hashed_post_state = + HashedPostStateOverlay::new(vec![alloc::sync::Arc::new(hashed_post_state)]); let mut node_iter = TrieNodeIter::state_trie( walker, HashedPostStateCursor::new_account( NoopHashedCursor::::default(), - [&hashed_post_state], + &hashed_post_state, ), ); diff --git a/crates/trie/trie/src/forward_cursor.rs b/crates/trie/trie/src/forward_cursor.rs deleted file mode 100644 index eafdfbb8ed5..00000000000 --- a/crates/trie/trie/src/forward_cursor.rs +++ /dev/null @@ -1,187 +0,0 @@ -/// The implementation of forward-only in memory cursor over the entries. -/// -/// The cursor operates under the assumption that the supplied collection is pre-sorted. -#[derive(Debug)] -pub struct ForwardInMemoryCursor<'a, K, V> { - /// The reference to the pre-sorted collection of entries. - entries: &'a [(K, V)], - /// Current index in the collection. - idx: usize, -} - -impl<'a, K, V> ForwardInMemoryCursor<'a, K, V> { - /// Create new forward cursor positioned at the beginning of the collection. - /// - /// The cursor expects all of the entries to have been sorted in advance. - #[inline] - pub const fn new(entries: &'a [(K, V)]) -> Self { - Self { entries, idx: 0 } - } - - /// Returns `true` if the cursor is empty, regardless of its position. - #[inline] - pub const fn is_empty(&self) -> bool { - self.entries.is_empty() - } - - /// Returns `true` if any entry satisfies the predicate. - #[inline] - pub fn has_any(&self, predicate: F) -> bool - where - F: Fn(&(K, V)) -> bool, - { - self.entries.iter().any(predicate) - } - - /// Returns the current entry pointed to be the cursor, or `None` if no entries are left. - #[inline] - pub fn current(&self) -> Option<&(K, V)> { - self.entries.get(self.idx) - } - - /// Resets the cursor to the beginning of the collection. - #[inline] - pub const fn reset(&mut self) { - self.idx = 0; - } - - #[inline] - fn next(&mut self) -> Option<&(K, V)> { - let entry = self.entries.get(self.idx)?; - self.idx += 1; - Some(entry) - } -} - -/// Threshold for remaining entries above which binary search is used instead of linear scan. -/// For small slices, linear scan has better cache locality and lower overhead. -const BINARY_SEARCH_THRESHOLD: usize = 64; - -impl ForwardInMemoryCursor<'_, K, V> { - /// Returns the first entry from the current cursor position that's greater or equal to the - /// provided key. This method advances the cursor forward. - pub fn seek(&mut self, key: &K) -> Option<&(K, V)> { - self.advance_while(|k| k < key) - } - - /// Returns the first entry from the current cursor position that's greater than the provided - /// key. This method advances the cursor forward. - pub fn first_after(&mut self, key: &K) -> Option<&(K, V)> { - self.advance_while(|k| k <= key) - } - - /// Advances the cursor forward while `predicate` returns `true` or until the collection is - /// exhausted. - /// - /// Uses binary search for large remaining slices (>= 64 entries), linear scan for small ones. - /// - /// Returns the first entry for which `predicate` returns `false` or `None`. The cursor will - /// point to the returned entry. - fn advance_while(&mut self, predicate: impl Fn(&K) -> bool) -> Option<&(K, V)> { - let remaining = self.entries.len().saturating_sub(self.idx); - if remaining >= BINARY_SEARCH_THRESHOLD { - let slice = &self.entries[self.idx..]; - let pos = slice.partition_point(|(k, _)| predicate(k)); - self.idx += pos; - } else { - while self.current().is_some_and(|(k, _)| predicate(k)) { - self.next(); - } - } - self.current() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_cursor_small() { - let mut cursor = ForwardInMemoryCursor::new(&[(1, ()), (2, ()), (3, ()), (4, ()), (5, ())]); - assert_eq!(cursor.current(), Some(&(1, ()))); - - assert_eq!(cursor.seek(&0), Some(&(1, ()))); - assert_eq!(cursor.current(), Some(&(1, ()))); - - assert_eq!(cursor.seek(&3), Some(&(3, ()))); - assert_eq!(cursor.current(), Some(&(3, ()))); - - assert_eq!(cursor.seek(&3), Some(&(3, ()))); - assert_eq!(cursor.current(), Some(&(3, ()))); - - assert_eq!(cursor.seek(&4), Some(&(4, ()))); - assert_eq!(cursor.current(), Some(&(4, ()))); - - assert_eq!(cursor.seek(&6), None); - assert_eq!(cursor.current(), None); - } - - #[test] - fn test_cursor_large_binary_search() { - // Create a large enough collection to trigger binary search - let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 2, ())).collect(); - let mut cursor = ForwardInMemoryCursor::new(&entries); - - // Seek to beginning - assert_eq!(cursor.seek(&0), Some(&(0, ()))); - assert_eq!(cursor.idx, 0); - - // Seek to middle (should use binary search) - assert_eq!(cursor.seek(&100), Some(&(100, ()))); - assert_eq!(cursor.idx, 50); - - // Seek to non-existent key (should find next greater) - assert_eq!(cursor.seek(&101), Some(&(102, ()))); - assert_eq!(cursor.idx, 51); - - // Seek to end - assert_eq!(cursor.seek(&398), Some(&(398, ()))); - assert_eq!(cursor.idx, 199); - - // Seek past end - assert_eq!(cursor.seek(&1000), None); - } - - #[test] - fn test_first_after_large() { - let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 2, ())).collect(); - let mut cursor = ForwardInMemoryCursor::new(&entries); - - // first_after should find strictly greater - assert_eq!(cursor.first_after(&0), Some(&(2, ()))); - assert_eq!(cursor.idx, 1); - - // Reset and test from beginning - cursor.reset(); - assert_eq!(cursor.first_after(&99), Some(&(100, ()))); - - // first_after on exact match - cursor.reset(); - assert_eq!(cursor.first_after(&100), Some(&(102, ()))); - } - - #[test] - fn test_cursor_consistency() { - // Verify binary search and linear scan produce same results - let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 3, ())).collect(); - - for search_key in [0, 1, 3, 50, 150, 299, 300, 597, 598, 599, 1000] { - // Test with fresh cursor (binary search path) - let mut cursor1 = ForwardInMemoryCursor::new(&entries); - let result1 = cursor1.seek(&search_key); - - // Manually advance to trigger linear path by getting close first - let mut cursor2 = ForwardInMemoryCursor::new(&entries); - if search_key > 100 { - cursor2.seek(&(search_key - 50)); - } - let result2 = cursor2.seek(&search_key); - - assert_eq!( - result1, result2, - "Mismatch for key {search_key}: binary={result1:?}, linear={result2:?}" - ); - } - } -} diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 125544f3914..046160b8aba 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -1,16 +1,10 @@ use super::{HashedCursor, HashedCursorFactory, HashedStorageCursor}; -use crate::storage_overlay_index::{ - StorageOverlayIndex, StorageOverlayIndexEntry, StorageOverlayIndexMut, -}; -use alloy_primitives::{B256, U256}; +use crate::overlay_cursor::{DbCursorState, OverlayLayer, PositionedOverlayCursor}; +use alloy_primitives::{map::B256Map, B256, U256}; use reth_primitives_traits::Account; use reth_storage_errors::db::DatabaseError; use reth_trie_common::HashedPostStateSorted; -use std::{ - marker::PhantomData, - ops::{Deref, Index}, - sync::Arc, -}; +use std::{marker::PhantomData, sync::Arc}; /// The hashed cursor factory for the post state. #[derive(Clone, Debug)] @@ -30,20 +24,20 @@ impl<'overlay, CF, T> HashedPostStateCursorFactory<'overlay, CF, T> { impl<'overlay, CF, T> HashedCursorFactory for HashedPostStateCursorFactory<'overlay, CF, T> where CF: HashedCursorFactory + 'overlay, - T: AsRef<[&'overlay HashedPostStateSorted]>, + T: AsRef, { type AccountCursor<'cursor> - = HashedPostStateCursor<'overlay, CF::AccountCursor<'cursor>, Option> + = HashedPostStateCursor<'cursor, CF::AccountCursor<'cursor>, Option> where Self: 'cursor; type StorageCursor<'cursor> - = HashedPostStateCursor<'overlay, CF::StorageCursor<'cursor>, U256> + = HashedPostStateCursor<'cursor, CF::StorageCursor<'cursor>, U256> where Self: 'cursor; fn hashed_account_cursor(&self) -> Result, DatabaseError> { let cursor = self.cursor_factory.hashed_account_cursor()?; - Ok(HashedPostStateCursor::new_account(cursor, self.post_state.as_ref().iter().copied())) + Ok(HashedPostStateCursor::new_account(cursor, self.post_state.as_ref())) } fn hashed_storage_cursor( @@ -51,11 +45,7 @@ where hashed_address: B256, ) -> Result, DatabaseError> { let cursor = self.cursor_factory.hashed_storage_cursor(hashed_address)?; - Ok(HashedPostStateCursor::new_storage( - cursor, - self.post_state.as_ref().iter().copied(), - hashed_address, - )) + Ok(HashedPostStateCursor::new_storage(cursor, self.post_state.as_ref(), hashed_address)) } } @@ -104,11 +94,9 @@ where /// The underlying cursor. cursor: C, /// The current DB cursor state. - db_cursor_state: DbCursorState, + db_cursor_state: DbCursorState, /// In-memory cursors over post state overlays. post_state_cursor: PostStateOverlayCursor<'a, V>, - /// Lower-priority overlays that still need positioning after a lazy exact overlay hit. - deferred_overlay_seek_start: Option, /// The last hashed key that was returned by the cursor. /// De facto, this is a current cursor position. last_key: Option, @@ -116,31 +104,7 @@ where /// Tracks whether `seek` has been called. seeked: bool, /// Source of post-state overlays. - post_states: HashedPostStateSource<'a>, -} - -impl<'a, C> HashedPostStateCursor<'a, C, Option> -where - C: HashedCursor, -{ - /// Create new account cursor which combines a DB cursor and the post state. - pub fn new_account( - cursor: C, - post_states: impl IntoIterator, - ) -> Self { - let post_states = HashedPostStateSource::from_refs(post_states); - let post_state_cursor = post_states.account_overlay(); - Self { - cursor, - db_cursor_state: DbCursorState::new(false), - post_state_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - post_states, - } - } + post_states: &'a HashedPostStateOverlay, } impl<'a, C> HashedPostStateCursor<'a, C, Option> @@ -148,14 +112,12 @@ where C: HashedCursor, { /// Create new account cursor from an indexed hashed post-state overlay. - pub fn new_account_from_overlay(cursor: C, post_states: &'a HashedPostStateOverlay) -> Self { - let post_states = HashedPostStateSource::Indexed(post_states); + pub fn new_account(cursor: C, post_states: &'a HashedPostStateOverlay) -> Self { let post_state_cursor = post_states.account_overlay(); Self { cursor, db_cursor_state: DbCursorState::new(false), post_state_cursor, - deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -168,40 +130,17 @@ impl<'a, C> HashedPostStateCursor<'a, C, U256> where C: HashedStorageCursor, { - /// Create new storage cursor with full post state reference. - /// This allows the cursor to switch between storage tries when `set_hashed_address` is called. - pub fn new_storage( - cursor: C, - post_states: impl IntoIterator, - hashed_address: B256, - ) -> Self { - let post_states = HashedPostStateSource::from_refs(post_states); - let (post_state_cursor, cursor_wiped) = post_states.storage_overlay(hashed_address); - Self { - cursor, - db_cursor_state: DbCursorState::new(cursor_wiped), - post_state_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - post_states, - } - } - /// Create new storage cursor from an indexed hashed post-state overlay. - pub fn new_storage_from_overlay( + pub fn new_storage( cursor: C, post_states: &'a HashedPostStateOverlay, hashed_address: B256, ) -> Self { - let post_states = HashedPostStateSource::Indexed(post_states); let (post_state_cursor, cursor_wiped) = post_states.storage_overlay(hashed_address); Self { cursor, db_cursor_state: DbCursorState::new(cursor_wiped), post_state_cursor, - deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -226,6 +165,11 @@ where /// Positions the DB cursor state using the underlying cursor. fn cursor_seek(&mut self, key: B256) -> Result<(), DatabaseError> { + if self.db_cursor_state.is_positioned_at(&key) { + self.db_cursor_state.validate_position(); + return Ok(()) + } + let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); self.db_cursor_state.set_entry(entry); Ok(()) @@ -254,9 +198,13 @@ where } /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. - fn choose_next_entry(&mut self) -> Result, DatabaseError> { + fn choose_next_entry( + &mut self, + mut overlay_bound: B256, + mut overlay_bound_inclusive: bool, + ) -> Result, DatabaseError> { loop { - let mem_key = self.post_state_cursor.min_current_key(); + let mem_key = self.post_state_cursor.next_key(&overlay_bound, overlay_bound_inclusive); let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { return Ok(None); @@ -267,7 +215,8 @@ where return Ok(Some((next_key, value))) } - self.post_state_cursor.advance_key(&next_key); + overlay_bound = next_key; + overlay_bound_inclusive = false; if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { self.cursor_next()?; } @@ -302,23 +251,16 @@ where self.seeked = true; } - self.deferred_overlay_seek_start = None; - match self.post_state_cursor.seek_until_exact(&key) { - Some((idx, Some(value))) => { - let entry = Some((key, value)); - self.deferred_overlay_seek_start = Some(idx + 1); - self.set_last_key(&entry); - return Ok(entry) - } - Some((idx, None)) => { - self.post_state_cursor.seek_from(idx + 1, &key); - } - None => {} + if let Some(Some(value)) = self.post_state_cursor.seek_exact(&key) { + self.db_cursor_state.invalidate_position(); + let entry = Some((key, value)); + self.set_last_key(&entry); + return Ok(entry) } self.cursor_seek(key)?; - let entry = self.choose_next_entry()?; + let entry = self.choose_next_entry(key, true)?; self.set_last_key(&entry); Ok(entry) } @@ -340,28 +282,22 @@ where return Ok(None); }; - if let Some(start) = self.deferred_overlay_seek_start.take() { - self.post_state_cursor.seek_from(start, &last_key); + match self.db_cursor_state.entry().map(|(db_key, _)| *db_key) { + Some(db_key) if db_key == last_key => self.cursor_next()?, + Some(db_key) if db_key > last_key && self.db_cursor_state.position_valid() => {} + _ => self.cursor_first_after(last_key)?, } - self.post_state_cursor.first_after(&last_key); - if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &last_key) { - self.cursor_next()?; - } else { - self.cursor_first_after(last_key)?; - } - - let entry = self.choose_next_entry()?; + let entry = self.choose_next_entry(last_key, false)?; self.set_last_key(&entry); Ok(entry) } fn reset(&mut self) { self.cursor.reset(); - self.post_state_cursor.reset(); self.db_cursor_state.set_entry(None); - self.deferred_overlay_seek_start = None; + self.post_state_cursor.reset(); self.last_key = None; #[cfg(debug_assertions)] { @@ -394,8 +330,9 @@ where fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let (post_state_cursor, cursor_wiped) = self.post_states.storage_overlay(hashed_address); - self.post_state_cursor = post_state_cursor; + let (layers, cursor_wiped, has_visible_value) = + self.post_states.storage_overlay_layers(hashed_address); + self.post_state_cursor.retarget(layers, has_visible_value); self.db_cursor_state = DbCursorState::new(cursor_wiped); } } @@ -403,196 +340,120 @@ where /// Hashed post-state overlays ordered from highest to lowest precedence. #[derive(Clone, Debug, Default)] pub struct HashedPostStateOverlay { - states: Vec>, - storage_index: Arc, + account_overlay: Arc>>>, + storage_overlays: Arc>, } impl HashedPostStateOverlay { /// Create a new indexed hashed post-state overlay stack. pub fn new(states: Vec>) -> Self { - let storage_index = Arc::new(StorageOverlayIndexEntry::new(&states)); - Self { states, storage_index } + let account_overlay = Self::build_account_overlay(&states); + let storage_overlays = Self::build_storage_overlays(&states); + Self { account_overlay, storage_overlays } } - /// Returns `true` if there are no hashed post-state overlays. - pub const fn is_empty(&self) -> bool { - self.states.is_empty() + /// Returns `true` if the overlay does not contain any hashed post-state updates. + pub fn is_empty(&self) -> bool { + self.account_overlay.is_empty() && self.storage_overlays.is_empty() } - /// Returns the number of hashed post-state overlays. - pub const fn len(&self) -> usize { - self.states.len() + fn build_account_overlay( + states: &[Arc], + ) -> Arc>>> { + Arc::new( + states + .iter() + .filter(|state| !state.accounts.is_empty()) + .map(|state| { + PostStateOverlayLayer::new(Arc::clone(state), state.accounts.as_slice()) + }) + .collect(), + ) } - /// Returns an iterator over hashed post-state overlays. - pub fn iter(&self) -> impl Iterator> { - self.states.iter() - } + fn build_storage_overlays( + states: &[Arc], + ) -> Arc> { + let mut overlays: B256Map = B256Map::default(); - /// Push a hashed post-state overlay at the end of the precedence stack. - pub fn push(&mut self, state: Arc) { - Arc::make_mut(&mut self.storage_index).append(self.states.len(), state.as_ref()); - self.states.push(state); - } + for state in states { + for (hashed_address, storage) in &state.storages { + let overlay = overlays.entry(*hashed_address).or_default(); + if overlay.db_wiped { + continue; + } - /// Insert a hashed post-state overlay at `index`. - pub fn insert(&mut self, index: usize, state: Arc) { - if index == 0 { - Arc::make_mut(&mut self.storage_index).prepend(state.as_ref()); - self.states.insert(index, state); - } else { - self.states.insert(index, state); - self.storage_index = Arc::new(StorageOverlayIndexEntry::new(&self.states)); - } - } + if !storage.storage_slots_ref().is_empty() { + overlay.layers.push(PostStateOverlayLayer::new( + Arc::clone(state), + storage.storage_slots_ref(), + )); + } - fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'_, U256>, bool) { - let Some(index) = self.storage_index.get(&hashed_address) else { - return (PostStateOverlayCursor::default(), false); - }; + if storage.is_wiped() { + overlay.db_wiped = true; + } + } + } - ( - PostStateOverlayCursor { - cursors: index - .indices - .iter() - .filter_map(|idx| self.states[*idx].storages.get(&hashed_address)) - .map(|storage| SeekablePostStateCursor::new(storage.storage_slots_ref())) - .collect(), - }, - index.db_wiped, - ) - } -} + for overlay in overlays.values_mut() { + overlay.has_visible_value = has_visible_storage_value(&overlay.layers); + } -impl From>> for HashedPostStateOverlay { - fn from(states: Vec>) -> Self { - Self::new(states) + Arc::new(overlays) } -} -impl IntoIterator for HashedPostStateOverlay { - type IntoIter = std::vec::IntoIter; - type Item = Arc; - - fn into_iter(self) -> Self::IntoIter { - self.states.into_iter() + fn account_overlay(&self) -> PostStateOverlayCursor<'_, Option> { + PostStateOverlayCursor::new(self.account_overlay.as_slice(), false) } -} -impl Index for HashedPostStateOverlay { - type Output = Arc; - - fn index(&self, index: usize) -> &Self::Output { - &self.states[index] + fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'_, U256>, bool) { + let (layers, db_wiped, has_visible_value) = self.storage_overlay_layers(hashed_address); + (PostStateOverlayCursor::new(layers, has_visible_value), db_wiped) } -} -impl Deref for HashedPostStateOverlay { - type Target = [Arc]; + fn storage_overlay_layers( + &self, + hashed_address: B256, + ) -> (&[PostStateOverlayLayer], bool, bool) { + let Some(overlay) = self.storage_overlays.get(&hashed_address) else { + return (&[], false, false); + }; - fn deref(&self) -> &Self::Target { - &self.states + (overlay.layers.as_slice(), overlay.db_wiped, overlay.has_visible_value) } } -#[derive(Clone, Debug)] -enum HashedPostStateSource<'a> { - Refs(Vec<&'a HashedPostStateSorted>), - Indexed(&'a HashedPostStateOverlay), -} - -impl<'a> HashedPostStateSource<'a> { - fn from_refs(post_states: impl IntoIterator) -> Self { - Self::Refs(post_states.into_iter().collect()) - } - - fn account_overlay(&self) -> PostStateOverlayCursor<'a, Option> { - match self { - Self::Refs(post_states) => PostStateOverlayCursor::account(post_states), - Self::Indexed(post_states) => PostStateOverlayCursor { - cursors: post_states - .iter() - .map(|post_state| SeekablePostStateCursor::new(post_state.accounts.as_slice())) - .collect(), - }, - } - } - - fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'a, U256>, bool) { - match self { - Self::Refs(post_states) => PostStateOverlayCursor::storage(post_states, hashed_address), - Self::Indexed(post_states) => post_states.storage_overlay(hashed_address), - } +impl AsRef for HashedPostStateOverlay { + fn as_ref(&self) -> &Self { + self } } #[derive(Debug)] -enum DbCursorState { - Unpositioned, - Positioned((B256, V)), - Wiped, -} - -impl DbCursorState { - const fn new(cursor_wiped: bool) -> Self { - if cursor_wiped { - Self::Wiped - } else { - Self::Unpositioned - } - } - - const fn is_wiped(&self) -> bool { - matches!(self, Self::Wiped) - } - - const fn entry(&self) -> Option<&(B256, V)> { - match self { - Self::Positioned(entry) => Some(entry), - Self::Unpositioned | Self::Wiped => None, - } - } - - fn set_entry(&mut self, entry: Option<(B256, V)>) { - if !self.is_wiped() { - *self = entry.map(Self::Positioned).unwrap_or(Self::Unpositioned); - } - } -} - -#[derive(Clone, Debug, Default)] struct PostStateOverlayCursor<'a, V> { - cursors: Vec>, + cursor: PositionedOverlayCursor<'a, HashedPostStateSorted, B256, V>, + has_visible_value: bool, } -impl<'a> PostStateOverlayCursor<'a, Option> { - fn account(post_states: &[&'a HashedPostStateSorted]) -> Self { - Self { - cursors: post_states - .iter() - .map(|post_state| SeekablePostStateCursor::new(post_state.accounts.as_slice())) - .collect(), - } +impl Default for PostStateOverlayCursor<'_, V> { + fn default() -> Self { + Self::new(&[], false) } } -impl<'a> PostStateOverlayCursor<'a, U256> { - fn storage(post_states: &[&'a HashedPostStateSorted], hashed_address: B256) -> (Self, bool) { - let mut cursors = Vec::new(); - let mut db_wiped = false; +impl<'a, V> PostStateOverlayCursor<'a, V> { + fn new(layers: &'a [PostStateOverlayLayer], has_visible_value: bool) -> Self { + Self { cursor: PositionedOverlayCursor::new(layers), has_visible_value } + } - for post_state in post_states { - if let Some(storage) = post_state.storages.get(&hashed_address) { - cursors.push(SeekablePostStateCursor::new(storage.storage_slots_ref())); - if storage.is_wiped() { - db_wiped = true; - break; - } - } - } + fn reset(&mut self) { + self.cursor.reset(); + } - (Self { cursors }, db_wiped) + fn retarget(&mut self, layers: &'a [PostStateOverlayLayer], has_visible_value: bool) { + self.cursor.retarget(layers); + self.has_visible_value = has_visible_value; } } @@ -600,100 +461,48 @@ impl<'a, V> PostStateOverlayCursor<'a, V> where V: HashedPostStateCursorValue, { - fn seek_from(&mut self, start: usize, key: &B256) { - for cursor in self.cursors.iter_mut().skip(start) { - cursor.seek(key); - } - } - - fn seek_until_exact(&mut self, key: &B256) -> Option<(usize, Option)> { - for (idx, cursor) in self.cursors.iter_mut().enumerate() { - if let Some((cursor_key, value)) = cursor.seek(key) && - cursor_key == key - { - return Some((idx, value.into_option())) - } - } - None + fn seek_exact(&mut self, key: &B256) -> Option> { + self.cursor.seek_exact(key).map(|value| (*value).into_option()) } - fn first_after(&mut self, key: &B256) { - for cursor in &mut self.cursors { - cursor.first_after(key); - } - } - - fn reset(&mut self) { - for cursor in &mut self.cursors { - cursor.reset(); - } - } - - fn min_current_key(&self) -> Option { - self.cursors.iter().filter_map(|cursor| cursor.current().map(|(key, _)| *key)).min() + fn next_key(&mut self, key: &B256, inclusive: bool) -> Option { + self.cursor.next_key(key, inclusive) } fn highest_priority_value_at(&self, key: &B256) -> Option> { - self.cursors.iter().find_map(|cursor| { - let (cursor_key, value) = cursor.current()?; - (cursor_key == key).then(|| value.into_option()) - }) - } - - fn advance_key(&mut self, key: &B256) { - for cursor in &mut self.cursors { - if cursor.current().is_some_and(|(cursor_key, _)| cursor_key == key) { - cursor.first_after(key); - } - } + self.cursor.highest_priority_value_at(key).map(|value| (*value).into_option()) } - fn has_visible_value(&self) -> bool { - let mut cursor = self.clone(); - cursor.reset(); - while let Some(key) = cursor.min_current_key() { - if cursor.highest_priority_value_at(&key).flatten().is_some() { - return true - } - cursor.advance_key(&key); - } - false + const fn has_visible_value(&self) -> bool { + self.has_visible_value } } -#[derive(Clone, Debug)] -struct SeekablePostStateCursor<'a, V> { - entries: &'a [(B256, V)], - idx: usize, +#[derive(Clone, Debug, Default)] +struct HashedStorageOverlay { + layers: Vec>, + db_wiped: bool, + has_visible_value: bool, } -impl<'a, V> SeekablePostStateCursor<'a, V> { - const fn new(entries: &'a [(B256, V)]) -> Self { - Self { entries, idx: 0 } - } - - fn current(&self) -> Option<&'a (B256, V)> { - self.entries.get(self.idx) - } - - const fn reset(&mut self) { - self.idx = 0; - } - - fn seek(&mut self, key: &B256) -> Option<&'a (B256, V)> { - self.idx = self.entries.partition_point(|(entry_key, _)| entry_key < key); - self.current() - } - - fn first_after(&mut self, key: &B256) -> Option<&'a (B256, V)> { - if self.current().is_some_and(|(entry_key, _)| entry_key > key) { - return self.current() +type PostStateOverlayLayer = OverlayLayer; + +fn has_visible_storage_value(layers: &[PostStateOverlayLayer]) -> bool { + for (layer_idx, layer) in layers.iter().enumerate() { + for (key, value) in layer.entries() { + if !value.is_zero() && + !layers[..layer_idx].iter().any(|higher_layer| { + higher_layer + .entries() + .binary_search_by_key(key, |(entry_key, _)| *entry_key) + .is_ok() + }) + { + return true + } } - - let remaining = &self.entries[self.idx..]; - self.idx += remaining.partition_point(|(entry_key, _)| entry_key <= key); - self.current() } + false } #[cfg(test)] @@ -741,6 +550,14 @@ mod tests { HashedPostStateSorted::new(Vec::new(), storages) } + fn storage_cursor<'a>( + cursor: MockHashedCursor, + overlay: &'a HashedPostStateOverlay, + hashed_address: B256, + ) -> HashedPostStateCursor<'a, MockHashedCursor, U256> { + HashedPostStateCursor::new_storage(cursor, overlay, hashed_address) + } + #[test] fn test_seek_overlay_exact_hit_does_not_touch_db_until_next() { let db_nodes = vec![(key(0x02), U256::from(2)), (key(0x03), U256::from(3))]; @@ -752,7 +569,8 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); let post_state = storage_post_state(post_state_nodes); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); let result = cursor.seek(key(0x02)).unwrap(); assert_eq!(result, Some((key(0x02), U256::from(42)))); @@ -774,7 +592,8 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); let post_state = storage_post_state(post_state_nodes); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); let result = cursor.seek(key(0x01)).unwrap(); assert_eq!(result, Some((key(0x01), U256::from(1)))); @@ -788,6 +607,33 @@ mod tests { assert_eq!(result, Some((key(0x03), U256::from(3)))); } + #[test] + fn test_seek_overlay_exact_hit_repositions_stale_ahead_db_on_next() { + let db_nodes = vec![(key(0x03), U256::from(3)), (key(0x05), U256::from(5))]; + let post_state_nodes = vec![(key(0x02), U256::from(2))]; + + let db_nodes_map: BTreeMap = db_nodes.into_iter().collect(); + let db_nodes_arc = Arc::new(db_nodes_map); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); + + let post_state = storage_post_state(post_state_nodes); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); + + let result = cursor.seek(key(0x05)).unwrap(); + assert_eq!(result, Some((key(0x05), U256::from(5)))); + assert_eq!(visited_keys.lock().len(), 1); + + let result = cursor.seek(key(0x02)).unwrap(); + assert_eq!(result, Some((key(0x02), U256::from(2)))); + assert_eq!(visited_keys.lock().len(), 1, "exact overlay hit should not seek the DB"); + + let result = cursor.next().unwrap(); + assert_eq!(result, Some((key(0x03), U256::from(3)))); + assert_eq!(visited_keys.lock().len(), 2, "next should reposition the stale DB cursor"); + } + #[test] fn test_seek_overlay_exact_deletion_still_seeks_db() { let db_nodes = vec![(key(0x02), U256::from(2)), (key(0x03), U256::from(3))]; @@ -799,7 +645,8 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); let post_state = storage_post_state(post_state_nodes); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); let result = cursor.seek(key(0x02)).unwrap(); assert_eq!(result, Some((key(0x03), U256::from(3)))); @@ -818,20 +665,15 @@ mod tests { let exact_hit = storage_post_state(vec![(key(0x05), U256::from(5))]); let lower_priority = storage_post_state(vec![(key(0x01), U256::from(10)), (key(0x07), U256::from(7))]); - let mut cursor = HashedPostStateCursor::new_storage( - mock_cursor, - [&higher_priority, &exact_hit, &lower_priority], - B256::ZERO, - ); + let overlay = HashedPostStateOverlay::new(vec![ + Arc::new(higher_priority), + Arc::new(exact_hit), + Arc::new(lower_priority), + ]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); let result = cursor.seek(key(0x05)).unwrap(); assert_eq!(result, Some((key(0x05), U256::from(5)))); - assert_eq!(cursor.post_state_cursor.cursors[0].idx, 1); - assert_eq!(cursor.post_state_cursor.cursors[1].idx, 0); - assert_eq!( - cursor.post_state_cursor.cursors[2].idx, 0, - "lower-priority overlay should not be sought after an exact overlay hit" - ); assert!(visited_keys.lock().is_empty(), "exact overlay hit should not touch the DB cursor"); let result = cursor.next().unwrap(); @@ -847,13 +689,34 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); let post_state = storage_post_state(vec![(key(0x02), U256::from(2))]); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); assert_eq!(cursor.seek(key(0x03)).unwrap(), Some((key(0x03), U256::from(3)))); assert_eq!(cursor.seek(key(0x01)).unwrap(), Some((key(0x01), U256::from(1)))); assert_eq!(cursor.next().unwrap(), Some((key(0x02), U256::from(2)))); } + #[test] + fn test_seek_reuses_exact_db_position() { + let db_nodes = BTreeMap::from([(key(0x01), account(1)), (key(0x02), account(2))]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); + + let overlay = HashedPostStateOverlay::default(); + let mut cursor = HashedPostStateCursor::new_account(mock_cursor, &overlay); + + assert_eq!(cursor.seek(key(0x01)).unwrap(), Some((key(0x01), account(1)))); + assert_eq!(visited_keys.lock().len(), 1); + + assert_eq!(cursor.next().unwrap(), Some((key(0x02), account(2)))); + assert_eq!(visited_keys.lock().len(), 2); + + assert_eq!(cursor.seek(key(0x02)).unwrap(), Some((key(0x02), account(2)))); + assert_eq!(visited_keys.lock().len(), 2, "seek should reuse the exact DB position"); + } + #[test] fn test_multiple_overlays_resolve_by_precedence() { let db_nodes = BTreeMap::from([ @@ -871,8 +734,8 @@ mod tests { (key(0x02), U256::from(20)), (key(0x03), U256::from(3)), ]); - let mut cursor = - HashedPostStateCursor::new_storage(mock_cursor, [&newest, &oldest], B256::ZERO); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); let mut results = Vec::new(); if let Some(entry) = cursor.seek(B256::ZERO).unwrap() { @@ -907,9 +770,8 @@ mod tests { vec![(key(0x01), Some(account(10))), (key(0x03), Some(account(30)))], Default::default(), ); - let mut overlay = HashedPostStateOverlay::new(vec![Arc::new(oldest)]); - overlay.insert(0, Arc::new(newest)); - let mut cursor = HashedPostStateCursor::new_account_from_overlay(mock_cursor, &overlay); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); + let mut cursor = HashedPostStateCursor::new_account(mock_cursor, &overlay); let mut results = Vec::new(); if let Some(entry) = cursor.seek(B256::ZERO).unwrap() { @@ -932,11 +794,9 @@ mod tests { let newest = storage_post_state(vec![(key(0x02), U256::from(2))]); let wiping = storage_post_state_with_wipe(vec![(key(0x01), U256::from(1))], true); let hidden = storage_post_state(vec![(key(0x03), U256::from(3))]); - let mut cursor = HashedPostStateCursor::new_storage( - mock_cursor, - [&newest, &wiping, &hidden], - B256::ZERO, - ); + let overlay = + HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(wiping), Arc::new(hidden)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x01), U256::from(1)))); assert_eq!(cursor.next().unwrap(), Some((key(0x02), U256::from(2)))); @@ -953,10 +813,9 @@ mod tests { let newest = storage_post_state(vec![(key(0x02), U256::from(2))]); let wiping = storage_post_state_with_wipe(vec![(key(0x01), U256::from(1))], true); let hidden = storage_post_state(vec![(key(0x03), U256::from(3))]); - let mut overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(wiping)]); - overlay.push(Arc::new(hidden)); - let mut cursor = - HashedPostStateCursor::new_storage_from_overlay(mock_cursor, &overlay, B256::ZERO); + let overlay = + HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(wiping), Arc::new(hidden)]); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &overlay, B256::ZERO); assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x01), U256::from(1)))); assert_eq!(cursor.next().unwrap(), Some((key(0x02), U256::from(2)))); @@ -982,8 +841,7 @@ mod tests { storage_post_state_for_address(second_address, vec![(key(0x02), U256::from(2))]); let overlay = HashedPostStateOverlay::new(vec![Arc::new(first_overlay), Arc::new(second_overlay)]); - let mut cursor = - HashedPostStateCursor::new_storage_from_overlay(mock_cursor, &overlay, first_address); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &overlay, first_address); assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x01), U256::from(1)))); assert_eq!(cursor.next().unwrap(), Some((key(0x04), U256::from(4)))); @@ -995,6 +853,37 @@ mod tests { assert_eq!(cursor.next().unwrap(), None); } + #[test] + fn test_storage_empty_respects_layer_precedence() { + let mut db_storage = B256Map::default(); + db_storage.insert(B256::ZERO, BTreeMap::new()); + let visited_keys = + Arc::new(db_storage.keys().map(|key| (*key, Default::default())).collect()); + let mock_cursor = + MockHashedCursor::new_storage(Arc::new(db_storage), visited_keys, B256::ZERO).unwrap(); + + let newest = storage_post_state(vec![(key(0x01), U256::ZERO)]); + let hidden = storage_post_state(vec![(key(0x01), U256::from(1))]); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(hidden)]); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &overlay, B256::ZERO); + + assert!(cursor.is_storage_empty().unwrap()); + + let mut db_storage = B256Map::default(); + db_storage.insert(B256::ZERO, BTreeMap::new()); + let visited_keys = + Arc::new(db_storage.keys().map(|key| (*key, Default::default())).collect()); + let mock_cursor = + MockHashedCursor::new_storage(Arc::new(db_storage), visited_keys, B256::ZERO).unwrap(); + + let newest = storage_post_state(vec![(key(0x01), U256::ZERO)]); + let visible = storage_post_state(vec![(key(0x02), U256::from(2))]); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(visible)]); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &overlay, B256::ZERO); + + assert!(!cursor.is_storage_empty().unwrap()); + } + mod proptest_tests { use super::*; use proptest::prelude::*; @@ -1135,10 +1024,13 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); let hashed_address = B256::ZERO; - let post_states = - overlays.into_iter().map(storage_post_state).collect::>(); - let mut test_cursor = - HashedPostStateCursor::new_storage(mock_cursor, post_states.iter(), hashed_address); + let post_states = overlays + .into_iter() + .map(storage_post_state) + .map(Arc::new) + .collect::>(); + let overlay = HashedPostStateOverlay::new(post_states); + let mut test_cursor = storage_cursor(mock_cursor, &overlay, hashed_address); // Test: seek to the beginning first let control_first = diff --git a/crates/trie/trie/src/lib.rs b/crates/trie/trie/src/lib.rs index e506843a6f4..a91048036c5 100644 --- a/crates/trie/trie/src/lib.rs +++ b/crates/trie/trie/src/lib.rs @@ -14,10 +14,7 @@ )] #![cfg_attr(docsrs, feature(doc_cfg))] -/// The implementation of forward-only in-memory cursor. -pub mod forward_cursor; - -mod storage_overlay_index; +mod overlay_cursor; /// The cursor implementations for navigating account and storage tries. pub mod trie_cursor; diff --git a/crates/trie/trie/src/node_iter.rs b/crates/trie/trie/src/node_iter.rs index 45d26238984..facedbb4dce 100644 --- a/crates/trie/trie/src/node_iter.rs +++ b/crates/trie/trie/src/node_iter.rs @@ -310,7 +310,7 @@ mod tests { use crate::{ hashed_cursor::{ mock::MockHashedCursorFactory, noop::NoopHashedCursor, HashedCursorFactory, - HashedPostStateCursor, + HashedPostStateCursor, HashedPostStateOverlay, }, mock::{KeyVisit, KeyVisitType}, trie_cursor::{ @@ -331,7 +331,7 @@ mod tests { prefix_set::PrefixSetMut, updates::TrieUpdates, BranchNode, HashedPostState, LeafNode, RlpNode, }; - use std::collections::BTreeMap; + use std::{collections::BTreeMap, sync::Arc}; /// Calculate the branch node stored in the database by feeding the provided state to the hash /// builder and taking the trie updates. @@ -349,12 +349,13 @@ mod tests { (nibbles.pack().into_inner().unwrap().into(), Some(account)) })) .into_sorted(); + let hashed_post_state = HashedPostStateOverlay::new(vec![Arc::new(hashed_post_state)]); let mut node_iter = TrieNodeIter::state_trie( walker, HashedPostStateCursor::new_account( NoopHashedCursor::::default(), - [&hashed_post_state], + &hashed_post_state, ), ); diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs new file mode 100644 index 00000000000..7796e1abbbc --- /dev/null +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -0,0 +1,223 @@ +use std::{fmt, slice, sync::Arc}; + +const OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN: usize = 64; + +#[derive(Debug)] +pub(crate) enum DbCursorState { + Unpositioned, + Positioned { entry: (K, V), position_valid: bool }, + Wiped, +} + +impl DbCursorState { + pub(crate) const fn new(cursor_wiped: bool) -> Self { + if cursor_wiped { + Self::Wiped + } else { + Self::Unpositioned + } + } + + pub(crate) const fn is_wiped(&self) -> bool { + matches!(self, Self::Wiped) + } + + pub(crate) const fn entry(&self) -> Option<&(K, V)> { + match self { + Self::Positioned { entry, .. } => Some(entry), + Self::Unpositioned | Self::Wiped => None, + } + } + + pub(crate) const fn position_valid(&self) -> bool { + matches!(self, Self::Positioned { position_valid: true, .. }) + } + + pub(crate) fn set_entry(&mut self, entry: Option<(K, V)>) { + if !self.is_wiped() { + *self = entry + .map(|entry| Self::Positioned { entry, position_valid: true }) + .unwrap_or(Self::Unpositioned); + } + } + + pub(crate) const fn validate_position(&mut self) { + if let Self::Positioned { position_valid, .. } = self { + *position_valid = true; + } + } + + pub(crate) const fn invalidate_position(&mut self) { + if let Self::Positioned { position_valid, .. } = self { + *position_valid = false; + } + } +} + +impl DbCursorState { + pub(crate) fn is_positioned_at(&self, key: &K) -> bool { + matches!(self, Self::Positioned { entry: (db_key, _), .. } if db_key == key) + } +} + +#[derive(Debug)] +pub(crate) struct PositionedOverlayCursor<'a, O, K, V> { + layers: &'a [OverlayLayer], + positions: Vec, +} + +impl Default for PositionedOverlayCursor<'_, O, K, V> { + fn default() -> Self { + Self::new(&[]) + } +} + +impl<'a, O, K, V> PositionedOverlayCursor<'a, O, K, V> { + pub(crate) fn new(layers: &'a [OverlayLayer]) -> Self { + Self { layers, positions: vec![0; layers.len()] } + } + + pub(crate) fn reset(&mut self) { + self.positions.fill(0); + } + + pub(crate) fn retarget(&mut self, layers: &'a [OverlayLayer]) { + self.layers = layers; + self.positions.clear(); + self.positions.resize(layers.len(), 0); + } +} + +impl PositionedOverlayCursor<'_, O, K, V> +where + K: Ord, +{ + pub(crate) fn seek_exact(&mut self, key: &K) -> Option<&V> { + let Self { layers, positions } = self; + + layers.iter().enumerate().find_map(|(layer_idx, layer)| { + let entries = layer.entries(); + let idx = seek_overlay_entries( + entries, + positions.get_mut(layer_idx), + key, + OverlaySeekMode::Inclusive, + )?; + (&entries[idx].0 == key).then_some(&entries[idx].1) + }) + } + + pub(crate) fn highest_priority_value_at(&self, key: &K) -> Option<&V> { + self.layers.iter().enumerate().find_map(|(layer_idx, layer)| { + let entries = layer.entries(); + if let Some(position) = self.positions.get(layer_idx) { + entries + .get(*position) + .and_then(|(entry_key, value)| (entry_key == key).then_some(value)) + } else { + let idx = entries.binary_search_by(|(entry_key, _)| entry_key.cmp(key)).ok()?; + Some(&entries[idx].1) + } + }) + } +} + +impl PositionedOverlayCursor<'_, O, K, V> +where + K: Copy + Ord, +{ + pub(crate) fn next_key(&mut self, key: &K, inclusive: bool) -> Option { + let mode = if inclusive { OverlaySeekMode::Inclusive } else { OverlaySeekMode::Exclusive }; + let Self { layers, positions } = self; + + layers + .iter() + .enumerate() + .filter_map(|(layer_idx, layer)| { + let entries = layer.entries(); + let idx = seek_overlay_entries(entries, positions.get_mut(layer_idx), key, mode)?; + Some(entries[idx].0) + }) + .min() + } +} + +#[derive(Clone, Copy)] +enum OverlaySeekMode { + Inclusive, + Exclusive, +} + +impl OverlaySeekMode { + fn skips(self, entry_key: &K, bound: &K) -> bool { + match self { + Self::Inclusive => entry_key < bound, + Self::Exclusive => entry_key <= bound, + } + } +} + +fn seek_overlay_entries( + entries: &[(K, V)], + mut position: Option<&mut usize>, + key: &K, + mode: OverlaySeekMode, +) -> Option +where + K: Ord, +{ + let mut start = + position.as_ref().map(|position| **position).unwrap_or_default().min(entries.len()); + if start > 0 && !mode.skips(&entries[start - 1].0, key) { + start = 0; + } + + let remaining = &entries[start..]; + let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { + remaining.partition_point(|(entry_key, _)| mode.skips(entry_key, key)) + } else { + let mut advance = 0; + while advance < remaining.len() && mode.skips(&remaining[advance].0, key) { + advance += 1; + } + advance + }; + + let idx = start + advance; + if let Some(position) = position.as_mut() { + **position = idx; + } + (idx < entries.len()).then_some(idx) +} + +#[derive(Clone)] +pub(crate) struct OverlayLayer { + _owner: Arc, + entries_ptr: *const (K, V), + entries_len: usize, +} + +impl OverlayLayer { + pub(crate) const fn new(owner: Arc, entries: &[(K, V)]) -> Self { + Self { _owner: owner, entries_ptr: entries.as_ptr(), entries_len: entries.len() } + } + + pub(crate) const fn entries(&self) -> &[(K, V)] { + // SAFETY: `entries_ptr` and `entries_len` are captured from a slice inside `_owner`. + // The `Arc` keeps that allocation alive, and the overlay owners are never mutated through + // this layer. + unsafe { slice::from_raw_parts(self.entries_ptr, self.entries_len) } + } +} + +// SAFETY: the raw pointer only targets immutable data owned by `_owner`, and `_owner` is retained +// for at least as long as the pointer is used. +unsafe impl Send for OverlayLayer {} +// SAFETY: see the `Send` impl; shared access only exposes immutable slices. +unsafe impl Sync for OverlayLayer {} + +impl fmt::Debug for OverlayLayer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("OverlayLayer").field("entries_len", &self.entries_len).finish() + } +} diff --git a/crates/trie/trie/src/storage_overlay_index.rs b/crates/trie/trie/src/storage_overlay_index.rs deleted file mode 100644 index 88e6cca89cd..00000000000 --- a/crates/trie/trie/src/storage_overlay_index.rs +++ /dev/null @@ -1,95 +0,0 @@ -use alloy_primitives::{map::B256Map, B256}; -use reth_trie_common::{updates::TrieUpdatesSorted, HashedPostStateSorted}; -use std::sync::Arc; - -/// Source of per-account storage overlays for [`StorageOverlayIndex`]. -pub(crate) trait StorageOverlayIndexSource { - /// Returns every hashed address touched by this overlay and whether that storage overlay wipes - /// lower-priority database or overlay contents for the address. - fn storage_overlay_index_entries(&self) -> impl Iterator + '_; -} - -impl StorageOverlayIndexSource for TrieUpdatesSorted { - fn storage_overlay_index_entries(&self) -> impl Iterator + '_ { - self.storage_tries_ref() - .iter() - .map(|(hashed_address, storage)| (*hashed_address, storage.is_deleted())) - } -} - -impl StorageOverlayIndexSource for HashedPostStateSorted { - fn storage_overlay_index_entries(&self) -> impl Iterator + '_ { - self.storages.iter().map(|(hashed_address, storage)| (*hashed_address, storage.is_wiped())) - } -} - -/// Precomputed lookup from hashed address to the overlay layers that contain storage for it. -pub(crate) type StorageOverlayIndex = B256Map; - -/// Incremental updates for a [`StorageOverlayIndex`]. -pub(crate) trait StorageOverlayIndexMut { - /// Adds a lower-priority overlay to this storage overlay index. - fn append(&mut self, overlay_index: usize, overlay: &T); - - /// Adds a highest-priority overlay to this storage overlay index. - fn prepend(&mut self, overlay: &T); -} - -/// Index entry for one hashed address in a [`StorageOverlayIndex`]. -#[derive(Clone, Debug, Default)] -pub(crate) struct StorageOverlayIndexEntry { - /// Overlay indices that should be searched for a hashed address, ordered by precedence. - pub(crate) indices: Arc>, - /// Whether an overlay at one of [`Self::indices`] wipes lower-priority database contents. - pub(crate) db_wiped: bool, -} - -impl StorageOverlayIndexEntry { - /// Builds a storage overlay index for the full overlay stack. - pub(crate) fn new(overlays: &[Arc]) -> StorageOverlayIndex { - let mut index = StorageOverlayIndex::default(); - - for (idx, overlay) in overlays.iter().enumerate() { - index.append(idx, overlay.as_ref()); - } - - index - } -} - -impl StorageOverlayIndexMut for StorageOverlayIndex { - fn append(&mut self, overlay_index: usize, overlay: &T) { - for (hashed_address, wipes_db) in overlay.storage_overlay_index_entries() { - let entry = self.entry(hashed_address).or_default(); - if entry.db_wiped { - continue; - } - - Arc::make_mut(&mut entry.indices).push(overlay_index); - if wipes_db { - entry.db_wiped = true; - } - } - } - - fn prepend(&mut self, overlay: &T) { - for entry in self.values_mut() { - for idx in Arc::make_mut(&mut entry.indices) { - *idx += 1; - } - } - - for (hashed_address, wipes_db) in overlay.storage_overlay_index_entries() { - let entry = self.entry(hashed_address).or_default(); - let indices = Arc::make_mut(&mut entry.indices); - - if wipes_db { - indices.clear(); - indices.push(0); - entry.db_wiped = true; - } else { - indices.insert(0, 0); - } - } - } -} diff --git a/crates/trie/trie/src/test_utils.rs b/crates/trie/trie/src/test_utils.rs index 8d3f2f6659f..966d90ac3d2 100644 --- a/crates/trie/trie/src/test_utils.rs +++ b/crates/trie/trie/src/test_utils.rs @@ -55,6 +55,7 @@ pub fn storage_root_prehashed>(storage: I) use crate::{ hashed_cursor::{ mock::MockHashedCursorFactory, HashedCursorFactory, HashedPostStateCursorFactory, + HashedPostStateOverlay, }, proof_v2::StorageProofCalculator, trie_cursor::{mock::MockTrieCursorFactory, TrieCursorFactory}, @@ -65,7 +66,7 @@ use reth_trie_common::{ prefix_set::PrefixSetMut, updates::StorageTrieUpdates, BranchNodeCompact, HashedPostStateSorted, HashedStorage, Nibbles, ProofTrieNodeV2, ProofV2Target, }; -use std::{collections::BTreeMap, iter::once}; +use std::{collections::BTreeMap, iter::once, sync::Arc}; /// General-purpose test harness for storage trie tests. /// @@ -125,8 +126,9 @@ impl TrieTestHarness { Vec::new(), once((self.hashed_address(), hashed_storage.into_sorted())).collect(), ); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(overlay)]); let overlay_cursor_factory = - HashedPostStateCursorFactory::new(self.hashed_cursor_factory.clone(), [&overlay]); + HashedPostStateCursorFactory::new(self.hashed_cursor_factory.clone(), &overlay); let (root, _, updates) = StorageRoot::new_hashed( self.trie_cursor_factory.clone(), diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index 6f39b47e8b4..40fd74b1a58 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -1,18 +1,12 @@ use super::{TrieCursor, TrieCursorFactory, TrieStorageCursor}; use crate::{ - storage_overlay_index::{ - StorageOverlayIndex, StorageOverlayIndexEntry, StorageOverlayIndexMut, - }, + overlay_cursor::{DbCursorState, OverlayLayer, PositionedOverlayCursor}, updates::TrieUpdatesSorted, }; -use alloy_primitives::B256; +use alloy_primitives::{map::B256Map, B256}; use reth_storage_errors::db::DatabaseError; use reth_trie_common::{BranchNodeCompact, Nibbles}; -use std::{ - marker::PhantomData, - ops::{Deref, Index}, - sync::Arc, -}; +use std::{marker::PhantomData, sync::Arc}; /// The trie cursor factory for the trie updates. #[derive(Debug, Clone)] @@ -34,21 +28,21 @@ impl<'overlay, CF, T> InMemoryTrieCursorFactory<'overlay, CF, T> { impl<'overlay, CF, T> TrieCursorFactory for InMemoryTrieCursorFactory<'overlay, CF, T> where CF: TrieCursorFactory + 'overlay, - T: AsRef<[&'overlay TrieUpdatesSorted]>, + T: AsRef, { type AccountTrieCursor<'cursor> - = InMemoryTrieCursor<'overlay, CF::AccountTrieCursor<'cursor>> + = InMemoryTrieCursor<'cursor, CF::AccountTrieCursor<'cursor>> where Self: 'cursor; type StorageTrieCursor<'cursor> - = InMemoryTrieCursor<'overlay, CF::StorageTrieCursor<'cursor>> + = InMemoryTrieCursor<'cursor, CF::StorageTrieCursor<'cursor>> where Self: 'cursor; fn account_trie_cursor(&self) -> Result, DatabaseError> { let cursor = self.cursor_factory.account_trie_cursor()?; - Ok(InMemoryTrieCursor::new_account(cursor, self.trie_updates.as_ref().iter().copied())) + Ok(InMemoryTrieCursor::new_account(cursor, self.trie_updates.as_ref())) } fn storage_trie_cursor( @@ -56,11 +50,7 @@ where hashed_address: B256, ) -> Result, DatabaseError> { let cursor = self.cursor_factory.storage_trie_cursor(hashed_address)?; - Ok(InMemoryTrieCursor::new_storage( - cursor, - self.trie_updates.as_ref().iter().copied(), - hashed_address, - )) + Ok(InMemoryTrieCursor::new_storage(cursor, self.trie_updates.as_ref(), hashed_address)) } } @@ -71,70 +61,26 @@ pub struct InMemoryTrieCursor<'a, C> { /// The underlying cursor. cursor: C, /// The current DB cursor state. - db_cursor_state: DbCursorState, + db_cursor_state: DbCursorState, /// In-memory cursors over trie update overlays. in_memory_cursor: OverlayCursor<'a>, - /// Lower-priority overlays that still need positioning after a lazy exact overlay hit. - deferred_overlay_seek_start: Option, /// The key most recently returned from the Cursor. last_key: Option, #[cfg(debug_assertions)] /// Whether an initial seek was called. seeked: bool, /// Source of trie update overlays. - trie_updates: TrieUpdatesSource<'a>, + trie_updates: &'a TrieUpdatesOverlay, } impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { - /// Create new account trie cursor which combines a DB cursor and the trie updates. - pub fn new_account( - cursor: C, - trie_updates: impl IntoIterator, - ) -> Self { - let trie_updates = TrieUpdatesSource::from_refs(trie_updates); - let in_memory_cursor = trie_updates.account_overlay(); - Self { - cursor, - db_cursor_state: DbCursorState::new(false), - in_memory_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - trie_updates, - } - } - /// Create new account trie cursor from an indexed trie updates overlay. - pub fn new_account_from_overlay(cursor: C, trie_updates: &'a TrieUpdatesOverlay) -> Self { - let trie_updates = TrieUpdatesSource::Indexed(trie_updates); + pub fn new_account(cursor: C, trie_updates: &'a TrieUpdatesOverlay) -> Self { let in_memory_cursor = trie_updates.account_overlay(); Self { cursor, db_cursor_state: DbCursorState::new(false), in_memory_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - trie_updates, - } - } - - /// Create new storage trie cursor with full trie updates reference. - /// This allows the cursor to switch between storage tries when `set_hashed_address` is called. - pub fn new_storage( - cursor: C, - trie_updates: impl IntoIterator, - hashed_address: B256, - ) -> Self { - let trie_updates = TrieUpdatesSource::from_refs(trie_updates); - let (in_memory_cursor, db_wiped) = trie_updates.storage_overlay(hashed_address); - Self { - cursor, - db_cursor_state: DbCursorState::new(db_wiped), - in_memory_cursor, - deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -143,18 +89,16 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { } /// Create new storage trie cursor from an indexed trie updates overlay. - pub fn new_storage_from_overlay( + pub fn new_storage( cursor: C, trie_updates: &'a TrieUpdatesOverlay, hashed_address: B256, ) -> Self { - let trie_updates = TrieUpdatesSource::Indexed(trie_updates); let (in_memory_cursor, db_wiped) = trie_updates.storage_overlay(hashed_address); Self { cursor, db_cursor_state: DbCursorState::new(db_wiped), in_memory_cursor, - deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -173,6 +117,11 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { /// Positions the DB cursor state using the underlying cursor. fn cursor_seek(&mut self, key: Nibbles) -> Result<(), DatabaseError> { + if self.db_cursor_state.is_positioned_at(&key) { + self.db_cursor_state.validate_position(); + return Ok(()) + } + let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); self.db_cursor_state.set_entry(entry); Ok(()) @@ -201,20 +150,27 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { } /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. - fn choose_next_entry(&mut self) -> Result, DatabaseError> { + fn choose_next_entry( + &mut self, + mut overlay_bound: Nibbles, + mut overlay_bound_inclusive: bool, + ) -> Result, DatabaseError> { loop { - let mem_key = self.in_memory_cursor.min_current_key(); + let mem_key = self.in_memory_cursor.next_key(&overlay_bound, overlay_bound_inclusive); let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { return Ok(None); }; - if let Some(mem_value) = self.in_memory_cursor.highest_priority_value_at(&next_key) { + if let Some(mem_value) = + self.in_memory_cursor.highest_priority_value_at(&next_key).cloned() + { if let Some(node) = mem_value { return Ok(Some((next_key, node))) } - self.in_memory_cursor.advance_key(&next_key); + overlay_bound = next_key; + overlay_bound_inclusive = false; if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { self.cursor_next()?; } @@ -238,11 +194,8 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.seeked = true; } - self.deferred_overlay_seek_start = None; - let entry = if let Some((idx, mem_value)) = self.in_memory_cursor.seek_until_exact(&key) { - if mem_value.is_some() { - self.deferred_overlay_seek_start = Some(idx + 1); - } + let entry = if let Some(mem_value) = self.in_memory_cursor.seek_exact(&key).cloned() { + self.db_cursor_state.invalidate_position(); mem_value.map(|node| (key, node)) } else { let db_entry = self.get_cursor_mut().map(|c| c.seek_exact(key)).transpose()?.flatten(); @@ -263,22 +216,15 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.seeked = true; } - self.deferred_overlay_seek_start = None; - match self.in_memory_cursor.seek_until_exact(&key) { - Some((idx, Some(node))) => { - let entry = Some((key, node)); - self.deferred_overlay_seek_start = Some(idx + 1); - self.set_last_key(&entry); - return Ok(entry); - } - Some((idx, None)) => { - self.in_memory_cursor.seek_from(idx + 1, &key); - } - None => {} + if let Some(Some(node)) = self.in_memory_cursor.seek_exact(&key).cloned() { + self.db_cursor_state.invalidate_position(); + let entry = Some((key, node)); + self.set_last_key(&entry); + return Ok(entry); } self.cursor_seek(key)?; - let entry = self.choose_next_entry()?; + let entry = self.choose_next_entry(key, true)?; self.set_last_key(&entry); Ok(entry) } @@ -294,17 +240,13 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { return Ok(None); }; - if let Some(start) = self.deferred_overlay_seek_start.take() { - self.in_memory_cursor.seek_from(start, &last_key); - } - self.in_memory_cursor.first_after(&last_key); - if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &last_key) { - self.cursor_next()?; - } else { - self.cursor_first_after(last_key)?; + match self.db_cursor_state.entry().map(|(db_key, _)| *db_key) { + Some(db_key) if db_key == last_key => self.cursor_next()?, + Some(db_key) if db_key > last_key && self.db_cursor_state.position_valid() => {} + _ => self.cursor_first_after(last_key)?, } - let entry = self.choose_next_entry()?; + let entry = self.choose_next_entry(last_key, false)?; self.set_last_key(&entry); Ok(entry) } @@ -318,10 +260,9 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { fn reset(&mut self) { self.cursor.reset(); - self.in_memory_cursor.reset(); self.db_cursor_state.set_entry(None); - self.deferred_overlay_seek_start = None; + self.in_memory_cursor.reset(); self.last_key = None; #[cfg(debug_assertions)] { @@ -334,8 +275,8 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let (in_memory_cursor, db_wiped) = self.trie_updates.storage_overlay(hashed_address); - self.in_memory_cursor = in_memory_cursor; + let (layers, db_wiped) = self.trie_updates.storage_overlay_layers(hashed_address); + self.in_memory_cursor.retarget(layers); self.db_cursor_state = DbCursorState::new(db_wiped); } } @@ -343,268 +284,94 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { /// Trie updates overlays ordered from highest to lowest precedence. #[derive(Clone, Debug, Default)] pub struct TrieUpdatesOverlay { - updates: Vec>, - storage_index: Arc, + account_overlay: Arc>, + storage_overlays: Arc>, } impl TrieUpdatesOverlay { /// Create a new indexed trie updates overlay stack. pub fn new(updates: Vec>) -> Self { - let storage_index = Arc::new(StorageOverlayIndexEntry::new(&updates)); - Self { updates, storage_index } - } - - /// Returns `true` if there are no trie update overlays. - pub const fn is_empty(&self) -> bool { - self.updates.is_empty() + let account_overlay = Self::build_account_overlay(&updates); + let storage_overlays = Self::build_storage_overlays(&updates); + Self { account_overlay, storage_overlays } } - /// Returns the number of trie update overlays. - pub const fn len(&self) -> usize { - self.updates.len() + /// Returns `true` if the overlay does not contain any trie updates. + pub fn is_empty(&self) -> bool { + self.account_overlay.is_empty() && self.storage_overlays.is_empty() } - /// Returns an iterator over trie update overlays. - pub fn iter(&self) -> impl Iterator> { - self.updates.iter() - } - - /// Push a trie update overlay at the end of the precedence stack. - pub fn push(&mut self, update: Arc) { - Arc::make_mut(&mut self.storage_index).append(self.updates.len(), update.as_ref()); - self.updates.push(update); - } - - fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'_>, bool) { - let Some(index) = self.storage_index.get(&hashed_address) else { - return (OverlayCursor::default(), false); - }; - - ( - OverlayCursor { - cursors: index - .indices - .iter() - .filter_map(|idx| self.updates[*idx].storage_tries_ref().get(&hashed_address)) - .map(|storage| SeekableInMemoryCursor::new(storage.storage_nodes_ref())) - .collect(), - }, - index.db_wiped, + fn build_account_overlay(updates: &[Arc]) -> Arc> { + Arc::new( + updates + .iter() + .filter(|update| !update.account_nodes_ref().is_empty()) + .map(|update| TrieOverlayLayer::new(Arc::clone(update), update.account_nodes_ref())) + .collect(), ) } -} - -impl From>> for TrieUpdatesOverlay { - fn from(updates: Vec>) -> Self { - Self::new(updates) - } -} - -impl IntoIterator for TrieUpdatesOverlay { - type IntoIter = std::vec::IntoIter; - type Item = Arc; - - fn into_iter(self) -> Self::IntoIter { - self.updates.into_iter() - } -} - -impl Index for TrieUpdatesOverlay { - type Output = Arc; - - fn index(&self, index: usize) -> &Self::Output { - &self.updates[index] - } -} - -impl Deref for TrieUpdatesOverlay { - type Target = [Arc]; - - fn deref(&self) -> &Self::Target { - &self.updates - } -} - -#[derive(Clone, Debug)] -enum TrieUpdatesSource<'a> { - Refs(Vec<&'a TrieUpdatesSorted>), - Indexed(&'a TrieUpdatesOverlay), -} -impl<'a> TrieUpdatesSource<'a> { - fn from_refs(trie_updates: impl IntoIterator) -> Self { - Self::Refs(trie_updates.into_iter().collect()) - } + fn build_storage_overlays( + updates: &[Arc], + ) -> Arc> { + let mut overlays: B256Map = B256Map::default(); - fn account_overlay(&self) -> OverlayCursor<'a> { - match self { - Self::Refs(trie_updates) => OverlayCursor::account(trie_updates), - Self::Indexed(trie_updates) => OverlayCursor { - cursors: trie_updates - .iter() - .map(|updates| SeekableInMemoryCursor::new(updates.account_nodes_ref())) - .collect(), - }, - } - } - - fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'a>, bool) { - match self { - Self::Refs(trie_updates) => OverlayCursor::storage(trie_updates, hashed_address), - Self::Indexed(trie_updates) => trie_updates.storage_overlay(hashed_address), - } - } -} - -#[derive(Debug)] -enum DbCursorState { - Unpositioned, - Positioned((Nibbles, BranchNodeCompact)), - Wiped, -} - -impl DbCursorState { - const fn new(cursor_wiped: bool) -> Self { - if cursor_wiped { - Self::Wiped - } else { - Self::Unpositioned - } - } - - const fn is_wiped(&self) -> bool { - matches!(self, Self::Wiped) - } - - const fn entry(&self) -> Option<&(Nibbles, BranchNodeCompact)> { - match self { - Self::Positioned(entry) => Some(entry), - Self::Unpositioned | Self::Wiped => None, - } - } - - fn set_entry(&mut self, entry: Option<(Nibbles, BranchNodeCompact)>) { - if !self.is_wiped() { - *self = entry.map(Self::Positioned).unwrap_or(Self::Unpositioned); - } - } -} - -#[derive(Debug, Default)] -struct OverlayCursor<'a> { - cursors: Vec>, -} - -impl<'a> OverlayCursor<'a> { - fn account(trie_updates: &[&'a TrieUpdatesSorted]) -> Self { - Self { - cursors: trie_updates - .iter() - .map(|updates| SeekableInMemoryCursor::new(updates.account_nodes_ref())) - .collect(), - } - } + for update in updates { + for (hashed_address, storage) in update.storage_tries_ref() { + let overlay = overlays.entry(*hashed_address).or_default(); + if overlay.db_wiped { + continue; + } - fn storage(trie_updates: &[&'a TrieUpdatesSorted], hashed_address: B256) -> (Self, bool) { - let mut cursors = Vec::new(); - let mut db_wiped = false; + if !storage.storage_nodes_ref().is_empty() { + overlay.layers.push(TrieOverlayLayer::new( + Arc::clone(update), + storage.storage_nodes_ref(), + )); + } - for updates in trie_updates { - if let Some(storage) = updates.storage_tries_ref().get(&hashed_address) { - cursors.push(SeekableInMemoryCursor::new(storage.storage_nodes_ref())); if storage.is_deleted() { - db_wiped = true; - break; + overlay.db_wiped = true; } } } - (Self { cursors }, db_wiped) + Arc::new(overlays) } - fn seek_from(&mut self, start: usize, key: &Nibbles) { - for cursor in self.cursors.iter_mut().skip(start) { - cursor.seek(key); - } - } - - fn seek_until_exact(&mut self, key: &Nibbles) -> Option<(usize, Option)> { - for (idx, cursor) in self.cursors.iter_mut().enumerate() { - if let Some((cursor_key, value)) = cursor.seek(key) && - cursor_key == key - { - return Some((idx, value.clone())) - } - } - None - } - - fn first_after(&mut self, key: &Nibbles) { - for cursor in &mut self.cursors { - cursor.first_after(key); - } + fn account_overlay(&self) -> OverlayCursor<'_> { + OverlayCursor::new(self.account_overlay.as_slice()) } - fn reset(&mut self) { - for cursor in &mut self.cursors { - cursor.reset(); - } + fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'_>, bool) { + let (layers, db_wiped) = self.storage_overlay_layers(hashed_address); + (OverlayCursor::new(layers), db_wiped) } - fn min_current_key(&self) -> Option { - self.cursors.iter().filter_map(|cursor| cursor.current().map(|(key, _)| *key)).min() - } + fn storage_overlay_layers(&self, hashed_address: B256) -> (&[TrieOverlayLayer], bool) { + let Some(overlay) = self.storage_overlays.get(&hashed_address) else { + return (&[], false); + }; - fn highest_priority_value_at(&self, key: &Nibbles) -> Option> { - self.cursors.iter().find_map(|cursor| { - let (cursor_key, value) = cursor.current()?; - (cursor_key == key).then(|| value.clone()) - }) + (overlay.layers.as_slice(), overlay.db_wiped) } +} - fn advance_key(&mut self, key: &Nibbles) { - for cursor in &mut self.cursors { - if cursor.current().is_some_and(|(cursor_key, _)| cursor_key == key) { - cursor.first_after(key); - } - } +impl AsRef for TrieUpdatesOverlay { + fn as_ref(&self) -> &Self { + self } } -#[derive(Debug)] -struct SeekableInMemoryCursor<'a> { - entries: &'a [(Nibbles, Option)], - idx: usize, +#[derive(Clone, Debug, Default)] +struct TrieStorageOverlay { + layers: Vec, + db_wiped: bool, } -impl<'a> SeekableInMemoryCursor<'a> { - const fn new(entries: &'a [(Nibbles, Option)]) -> Self { - Self { entries, idx: 0 } - } - - fn current(&self) -> Option<&'a (Nibbles, Option)> { - self.entries.get(self.idx) - } - - const fn reset(&mut self) { - self.idx = 0; - } - - fn seek(&mut self, key: &Nibbles) -> Option<&'a (Nibbles, Option)> { - self.idx = self.entries.partition_point(|(entry_key, _)| entry_key < key); - self.current() - } - - fn first_after(&mut self, key: &Nibbles) -> Option<&'a (Nibbles, Option)> { - if self.current().is_some_and(|(entry_key, _)| entry_key > key) { - return self.current() - } - - let remaining = &self.entries[self.idx..]; - self.idx += remaining.partition_point(|(entry_key, _)| entry_key <= key); - self.current() - } -} +type OverlayCursor<'a> = + PositionedOverlayCursor<'a, TrieUpdatesSorted, Nibbles, Option>; +type TrieOverlayLayer = OverlayLayer>; #[cfg(test)] mod tests { @@ -629,7 +396,8 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); let trie_updates = TrieUpdatesSorted::new(test_case.in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let mut results = Vec::new(); @@ -844,7 +612,8 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let result = cursor.seek_exact(Nibbles::from_nibbles([0x2])).unwrap(); assert_eq!( @@ -887,7 +656,8 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let result = cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(); assert_eq!( @@ -954,10 +724,12 @@ mod tests { ], Default::default(), ); - let mut cursor = InMemoryTrieCursor::new_account( - mock_cursor, - [&higher_priority, &exact_hit, &lower_priority], - ); + let overlay = TrieUpdatesOverlay::new(vec![ + Arc::new(higher_priority), + Arc::new(exact_hit), + Arc::new(lower_priority), + ]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let result = cursor.seek(Nibbles::from_nibbles([0x5])).unwrap(); assert_eq!( @@ -967,12 +739,6 @@ mod tests { BranchNodeCompact::new(0b0101, 0b0101, 0, vec![], None) )) ); - assert_eq!(cursor.in_memory_cursor.cursors[0].idx, 1); - assert_eq!(cursor.in_memory_cursor.cursors[1].idx, 0); - assert_eq!( - cursor.in_memory_cursor.cursors[2].idx, 0, - "lower-priority overlay should not be sought after an exact overlay hit" - ); assert!(visited_keys.lock().is_empty(), "exact overlay hit should not touch the DB cursor"); let result = cursor.next().unwrap(); @@ -1004,7 +770,8 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let result = cursor.seek(Nibbles::from_nibbles([0x1])).unwrap(); assert_eq!( @@ -1036,6 +803,58 @@ mod tests { ); } + #[test] + fn test_seek_overlay_exact_hit_repositions_stale_ahead_db_on_next() { + let db_nodes = vec![ + (Nibbles::from_nibbles([0x3]), BranchNodeCompact::new(0b0011, 0b0011, 0, vec![], None)), + (Nibbles::from_nibbles([0x5]), BranchNodeCompact::new(0b0101, 0b0101, 0, vec![], None)), + ]; + + let in_memory_nodes = vec![( + Nibbles::from_nibbles([0x2]), + Some(BranchNodeCompact::new(0b0010, 0b0010, 0, vec![], None)), + )]; + + let db_nodes_map: BTreeMap = db_nodes.into_iter().collect(); + let db_nodes_arc = Arc::new(db_nodes_map); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); + + let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + + let result = cursor.seek(Nibbles::from_nibbles([0x5])).unwrap(); + assert_eq!( + result, + Some(( + Nibbles::from_nibbles([0x5]), + BranchNodeCompact::new(0b0101, 0b0101, 0, vec![], None) + )) + ); + assert_eq!(visited_keys.lock().len(), 1); + + let result = cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(); + assert_eq!( + result, + Some(( + Nibbles::from_nibbles([0x2]), + BranchNodeCompact::new(0b0010, 0b0010, 0, vec![], None) + )) + ); + assert_eq!(visited_keys.lock().len(), 1, "exact overlay hit should not seek the DB"); + + let result = cursor.next().unwrap(); + assert_eq!( + result, + Some(( + Nibbles::from_nibbles([0x3]), + BranchNodeCompact::new(0b0011, 0b0011, 0, vec![], None) + )) + ); + assert_eq!(visited_keys.lock().len(), 2, "next should reposition the stale DB cursor"); + } + #[test] fn test_multiple_consecutive_deletes() { let db_nodes: Vec<(Nibbles, BranchNodeCompact)> = (1..=10) @@ -1112,7 +931,8 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); assert_eq!(cursor.current().unwrap(), None); @@ -1163,7 +983,8 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); // Seek to beginning should return None (all nodes are deleted) tracing::debug!("seeking to 0x"); @@ -1218,7 +1039,8 @@ mod tests { )], Default::default(), ); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); assert_eq!( cursor.seek(Nibbles::from_nibbles([0x3])).unwrap(), @@ -1234,6 +1056,35 @@ mod tests { ); } + #[test] + fn test_seek_reuses_exact_db_position() { + let db_nodes = BTreeMap::from([ + (Nibbles::from_nibbles([0x1]), branch_node(1)), + (Nibbles::from_nibbles([0x2]), branch_node(2)), + ]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); + + let overlay = TrieUpdatesOverlay::default(); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + + assert_eq!( + cursor.seek(Nibbles::from_nibbles([0x1])).unwrap(), + Some((Nibbles::from_nibbles([0x1]), branch_node(1))) + ); + assert_eq!(visited_keys.lock().len(), 1); + + assert_eq!(cursor.next().unwrap(), Some((Nibbles::from_nibbles([0x2]), branch_node(2)))); + assert_eq!(visited_keys.lock().len(), 2); + + assert_eq!( + cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(), + Some((Nibbles::from_nibbles([0x2]), branch_node(2))) + ); + assert_eq!(visited_keys.lock().len(), 2, "seek should reuse the exact DB position"); + } + #[test] fn test_multiple_overlays_resolve_by_precedence() { let db_nodes = BTreeMap::from([ @@ -1269,7 +1120,8 @@ mod tests { ], Default::default(), ); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&newest, &oldest]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let mut results = Vec::new(); if let Some(entry) = cursor.seek(Nibbles::default()).unwrap() { @@ -1316,7 +1168,7 @@ mod tests { Default::default(), ); let overlay = TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); - let mut cursor = InMemoryTrieCursor::new_account_from_overlay(mock_cursor, &overlay); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let mut results = Vec::new(); if let Some(entry) = cursor.seek(Nibbles::default()).unwrap() { @@ -1397,11 +1249,9 @@ mod tests { ); let hidden = TrieUpdatesSorted::new(vec![], hidden_storage); - let mut cursor = InMemoryTrieCursor::new_storage( - mock_cursor, - [&newest, &deleting, &hidden], - hashed_address, - ); + let overlay = + TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(deleting), Arc::new(hidden)]); + let mut cursor = InMemoryTrieCursor::new_storage(mock_cursor, &overlay, hashed_address); assert_eq!( cursor.seek(Nibbles::default()).unwrap(), @@ -1439,10 +1289,9 @@ mod tests { false, vec![(Nibbles::from_nibbles([0x3]), Some(branch_node(3)))], ); - let mut overlay = TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(deleting)]); - overlay.push(Arc::new(hidden)); - let mut cursor = - InMemoryTrieCursor::new_storage_from_overlay(mock_cursor, &overlay, hashed_address); + let overlay = + TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(deleting), Arc::new(hidden)]); + let mut cursor = InMemoryTrieCursor::new_storage(mock_cursor, &overlay, hashed_address); assert_eq!( cursor.seek(Nibbles::default()).unwrap(), @@ -1479,8 +1328,7 @@ mod tests { ); let overlay = TrieUpdatesOverlay::new(vec![Arc::new(first_overlay), Arc::new(second_overlay)]); - let mut cursor = - InMemoryTrieCursor::new_storage_from_overlay(mock_cursor, &overlay, first_address); + let mut cursor = InMemoryTrieCursor::new_storage(mock_cursor, &overlay, first_address); assert_eq!( cursor.seek(Nibbles::default()).unwrap(), @@ -1676,8 +1524,11 @@ mod tests { let trie_updates = overlays .into_iter() .map(|in_memory_nodes| TrieUpdatesSorted::new(in_memory_nodes, Default::default())) + .map(Arc::new) .collect::>(); - let mut test_cursor = InMemoryTrieCursor::new_account(mock_cursor, trie_updates.iter()); + let overlay = TrieUpdatesOverlay::new(trie_updates); + let mut test_cursor = + InMemoryTrieCursor::new_account(mock_cursor, &overlay); // Test: seek to the beginning first let control_first = From 3574b5430a438bc7331086050b58684f826078b4 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Wed, 27 May 2026 14:52:58 +0200 Subject: [PATCH 06/40] perf(trie): reuse overlay cursor positions --- crates/chain-state/src/state_trie_overlay.rs | 125 ++-- .../src/providers/state/historical.rs | 15 +- .../provider/src/providers/state/latest.rs | 14 +- .../provider/src/providers/state/overlay.rs | 30 +- crates/trie/db/src/changesets.rs | 9 +- crates/trie/db/src/proof.rs | 39 +- crates/trie/db/src/state.rs | 32 +- crates/trie/db/src/storage.rs | 10 +- crates/trie/db/tests/fuzz_in_memory_nodes.rs | 11 +- crates/trie/db/tests/post_state.rs | 107 ++- crates/trie/sparse/src/parallel.rs | 6 +- crates/trie/trie/src/forward_cursor.rs | 187 ------ .../trie/trie/src/hashed_cursor/post_state.rs | 632 ++++++++---------- crates/trie/trie/src/lib.rs | 5 +- crates/trie/trie/src/node_iter.rs | 7 +- crates/trie/trie/src/overlay_cursor.rs | 223 ++++++ crates/trie/trie/src/storage_overlay_index.rs | 95 --- crates/trie/trie/src/test_utils.rs | 6 +- crates/trie/trie/src/trie_cursor/in_memory.rs | 575 ++++++---------- 19 files changed, 941 insertions(+), 1187 deletions(-) delete mode 100644 crates/trie/trie/src/forward_cursor.rs create mode 100644 crates/trie/trie/src/overlay_cursor.rs delete mode 100644 crates/trie/trie/src/storage_overlay_index.rs diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs index 348ecc4156f..a552e3068fb 100644 --- a/crates/chain-state/src/state_trie_overlay.rs +++ b/crates/chain-state/src/state_trie_overlay.rs @@ -1,8 +1,8 @@ -//! Flattened state trie overlays for in-memory blocks. +//! State trie overlay stacks for in-memory blocks. //! //! Payload validation needs a view of the state trie as of an in-memory parent block even when that //! parent has not been persisted yet. [`StateTrieOverlayManager`] tracks those in-memory blocks and -//! builds reusable flattened state trie overlays on demand. +//! builds reusable state trie overlays on demand. use crate::{EthPrimitives, ExecutedBlock}; use alloy_primitives::B256; @@ -25,9 +25,9 @@ use std::time::Instant; use std::{fmt, sync::Arc}; use tracing::debug; -/// Manages flattened state trie overlays for in-memory blocks. +/// Manages state trie overlays for in-memory blocks. /// -/// The manager owns the in-memory block graph and a cache of flattened state trie overlays keyed by +/// The manager owns the in-memory block graph and a cache of state trie overlays keyed by /// `(anchor_hash, tip_hash)`. Cache entries can also mark in-flight background computations. #[derive(Clone)] pub struct StateTrieOverlayManager { @@ -308,8 +308,8 @@ impl StateTrieOverlayManager { } if let Some((_, cached_overlay)) = cached_prefix { - trie_updates.extend(cached_overlay.trie_updates.iter().cloned()); - hashed_post_state.extend(cached_overlay.hashed_post_state.iter().cloned()); + trie_updates.extend(cached_overlay.trie_update_layers.iter().cloned()); + hashed_post_state.extend(cached_overlay.hashed_post_state_layers.iter().cloned()); } StateTrieOverlay::new(trie_updates, hashed_post_state) @@ -474,6 +474,8 @@ pub struct StateTrieOverlay { pub trie_updates: TrieUpdatesOverlay, /// Hashed post state overlays. pub hashed_post_state: HashedPostStateOverlay, + trie_update_layers: Vec>, + hashed_post_state_layers: Vec>, } impl StateTrieOverlay { @@ -483,10 +485,50 @@ impl StateTrieOverlay { hashed_post_state: Vec>, ) -> Self { Self { - trie_updates: TrieUpdatesOverlay::new(trie_updates), - hashed_post_state: HashedPostStateOverlay::new(hashed_post_state), + trie_updates: TrieUpdatesOverlay::new(trie_updates.clone()), + hashed_post_state: HashedPostStateOverlay::new(hashed_post_state.clone()), + trie_update_layers: trie_updates, + hashed_post_state_layers: hashed_post_state, } } + + /// Returns `true` if this overlay has no layers. + pub const fn is_empty(&self) -> bool { + self.trie_update_layers.is_empty() && self.hashed_post_state_layers.is_empty() + } + + /// Add a trie updates layer at the end of the precedence stack. + pub fn push_trie_updates(&mut self, trie_updates: Arc) { + self.trie_update_layers.push(trie_updates); + self.trie_updates = TrieUpdatesOverlay::new(self.trie_update_layers.clone()); + } + + /// Add a hashed post-state layer at the end of the precedence stack. + pub fn push_hashed_post_state(&mut self, hashed_post_state: Arc) { + self.hashed_post_state_layers.push(hashed_post_state); + self.hashed_post_state = HashedPostStateOverlay::new(self.hashed_post_state_layers.clone()); + } + + /// Add a hashed post-state layer at the beginning of the precedence stack. + pub fn prepend_hashed_post_state(&mut self, hashed_post_state: Arc) { + self.hashed_post_state_layers.insert(0, hashed_post_state); + self.hashed_post_state = HashedPostStateOverlay::new(self.hashed_post_state_layers.clone()); + } + + /// Total number of trie update entries across all layers. + pub fn trie_updates_total_len(&self) -> usize { + self.trie_update_layers.iter().map(|updates| updates.total_len()).sum() + } + + /// Total number of hashed post-state entries across all layers. + pub fn hashed_post_state_total_len(&self) -> usize { + self.hashed_post_state_layers.iter().map(|state| state.total_len()).sum() + } + + /// Consume the overlay into its original layer stacks. + pub fn into_layers(self) -> (Vec>, Vec>) { + (self.trie_update_layers, self.hashed_post_state_layers) + } } /// Error returned when a state trie overlay cannot be built from the manager's current block set. @@ -565,8 +607,7 @@ fn compute_overlay( ) -> StateTrieOverlay { let started_at = Instant::now(); let block_count = blocks.len(); - let parent_overlay_reused = - !parent_overlay.trie_updates.is_empty() || !parent_overlay.hashed_post_state.is_empty(); + let parent_overlay_reused = !parent_overlay.is_empty(); tracing::Span::current().record("block_count", block_count); tracing::Span::current().record("parent_overlay", parent_overlay_reused); @@ -593,7 +634,7 @@ fn flatten_overlay( parent_overlay: StateTrieOverlay, ) -> StateTrieOverlay { let trie_data = blocks.iter().map(ExecutedBlock::trie_data).collect::>(); - let StateTrieOverlay { trie_updates: parent_trie_updates, hashed_post_state } = parent_overlay; + let (parent_trie_updates, parent_hashed_post_state) = parent_overlay.into_layers(); #[cfg(feature = "rayon")] let (trie_updates, hashed_post_state) = rayon::join( @@ -610,7 +651,7 @@ fn flatten_overlay( trie_data .iter() .map(|data| Arc::clone(&data.hashed_state)) - .chain(hashed_post_state), + .chain(parent_hashed_post_state), ) }, ); @@ -621,7 +662,10 @@ fn flatten_overlay( trie_data.iter().map(|data| Arc::clone(&data.trie_updates)).chain(parent_trie_updates), ), HashedPostStateSorted::merge_batch( - trie_data.iter().map(|data| Arc::clone(&data.hashed_state)).chain(hashed_post_state), + trie_data + .iter() + .map(|data| Arc::clone(&data.hashed_state)) + .chain(parent_hashed_post_state), ), ); @@ -697,20 +741,18 @@ mod tests { let anchor_hash = blocks[0].recovered_block().parent_hash(); - let state = manager - .overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash) - .unwrap() - .hashed_post_state; + let overlay = + manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); + let state = &overlay.hashed_post_state_layers; assert_eq!(state.len(), 3); - assert_eq!(state_account_count(&state), 3); + assert_eq!(state_account_count(state), 3); let short_anchor = blocks[1].recovered_block().hash(); - let short = manager - .overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor) - .unwrap() - .hashed_post_state; + let short_overlay = + manager.overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor).unwrap(); + let short = &short_overlay.hashed_post_state_layers; assert_eq!(short.len(), 1); - assert_eq!(state_account_count(&short), 1); + assert_eq!(state_account_count(short), 1); manager.compute_and_cache_overlay( OverlayCacheKey { anchor_hash: short_anchor, @@ -718,10 +760,9 @@ mod tests { }, None, ); - let cached_short = manager - .overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor) - .unwrap() - .hashed_post_state; + let cached_short_overlay = + manager.overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor).unwrap(); + let cached_short = &cached_short_overlay.hashed_post_state_layers; assert_eq!(cached_short.len(), 1); assert_eq!(cached_short[0].accounts.len(), 1); } @@ -739,14 +780,13 @@ mod tests { manager .compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: prefix_tip }, None); - let state = manager - .overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash) - .unwrap() - .hashed_post_state; + let overlay = + manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); + let state = &overlay.hashed_post_state_layers; assert_eq!(state.len(), 2); assert_eq!(state[0].accounts.len(), 1); assert_eq!(state[1].accounts.len(), 2); - assert_eq!(state_account_count(&state), 3); + assert_eq!(state_account_count(state), 3); } #[test] @@ -762,12 +802,11 @@ mod tests { let prefix_key = OverlayCacheKey { anchor_hash, tip_hash: prefix_tip }; manager.overlays.insert(prefix_key, OverlayCacheEntry::Pending); - let state = manager - .overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash) - .unwrap() - .hashed_post_state; + let overlay = + manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); + let state = &overlay.hashed_post_state_layers; assert_eq!(state.len(), 3); - assert_eq!(state_account_count(&state), 3); + assert_eq!(state_account_count(state), 3); assert!(matches!( manager.overlays.get(&prefix_key).as_deref(), Some(OverlayCacheEntry::Pending) @@ -840,7 +879,8 @@ mod tests { thread::sleep(Duration::from_millis(10)); } - let state = manager.overlay_for_parent(child_hash, anchor_hash).unwrap().hashed_post_state; + let overlay = manager.overlay_for_parent(child_hash, anchor_hash).unwrap(); + let state = &overlay.hashed_post_state_layers; assert_eq!(state.len(), 1); assert_eq!(state[0].accounts.len(), 2); } @@ -902,10 +942,9 @@ mod tests { .overlay_for_parent(blocks[2].recovered_block().hash(), original_anchor) .is_err()); - let state = manager - .overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash) - .unwrap() - .hashed_post_state; - assert_eq!(state_account_count(&state), 1); + let overlay = + manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); + let state = &overlay.hashed_post_state_layers; + assert_eq!(state_account_count(state), 1); } } diff --git a/crates/storage/provider/src/providers/state/historical.rs b/crates/storage/provider/src/providers/state/historical.rs index ef480292584..fd2f4de85e5 100644 --- a/crates/storage/provider/src/providers/state/historical.rs +++ b/crates/storage/provider/src/providers/state/historical.rs @@ -20,9 +20,9 @@ use reth_storage_api::{ }; use reth_storage_errors::provider::ProviderResult; use reth_trie::{ - hashed_cursor::HashedPostStateCursorFactory, + hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, proof::{Proof, StorageProof}, - trie_cursor::InMemoryTrieCursorFactory, + trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, updates::{TrieUpdates, TrieUpdatesSorted}, witness::TrieWitness, AccountProof, ExecutionWitnessMode, HashedPostState, HashedPostStateSorted, HashedStorage, @@ -312,10 +312,11 @@ where let overlay_builder = OverlayBuilder::::new(anchor_hash, self.changeset_cache.clone()) .with_overlay_source(Some(OverlaySource::Immediate { trie: nodes, state })); let overlay = overlay_builder.build_overlay(self.provider)?; + let (trie_updates, hashed_post_state) = overlay.into_layers(); Ok(TrieInputSorted::new( - TrieUpdatesSorted::merge_batch(overlay.trie_updates), - HashedPostStateSorted::merge_batch(overlay.hashed_post_state), + TrieUpdatesSorted::merge_batch(trie_updates), + HashedPostStateSorted::merge_batch(hashed_post_state), prefix_sets, )) } @@ -616,14 +617,16 @@ where reth_trie_db::with_adapter!(self.provider, |A| { let TrieInputSorted { nodes, state, prefix_sets } = self.build_overlay(TrieInputSorted::from_unsorted(input))?; + let nodes_overlay = TrieUpdatesOverlay::new(vec![nodes]); + let state_overlay = HashedPostStateOverlay::new(vec![state]); let witness = TrieWitness::new( InMemoryTrieCursorFactory::new( reth_trie_db::DatabaseTrieCursorFactory::<_, A>::new(self.tx()), - [nodes.as_ref()], + &nodes_overlay, ), HashedPostStateCursorFactory::new( reth_trie_db::DatabaseHashedCursorFactory::new(self.tx()), - [state.as_ref()], + &state_overlay, ), ) .with_prefix_sets_mut(prefix_sets) diff --git a/crates/storage/provider/src/providers/state/latest.rs b/crates/storage/provider/src/providers/state/latest.rs index 3908a41c454..9ae013e83ea 100644 --- a/crates/storage/provider/src/providers/state/latest.rs +++ b/crates/storage/provider/src/providers/state/latest.rs @@ -9,9 +9,9 @@ use reth_storage_api::{ }; use reth_storage_errors::provider::{ProviderError, ProviderResult}; use reth_trie::{ - hashed_cursor::HashedPostStateCursorFactory, + hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, proof::{Proof, StorageProof}, - trie_cursor::InMemoryTrieCursorFactory, + trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, updates::TrieUpdates, witness::TrieWitness, AccountProof, ExecutionWitnessMode, HashedPostState, HashedStorage, KeccakKeyHasher, @@ -19,6 +19,7 @@ use reth_trie::{ TrieInputSorted, }; use reth_trie_db::{DatabaseProof, DatabaseStateRoot, DatabaseStorageProof, DatabaseStorageRoot}; +use std::sync::Arc; type DbStateRoot<'a, TX, A> = StateRoot< reth_trie_db::DatabaseTrieCursorFactory<&'a TX, A>, @@ -226,16 +227,17 @@ impl StateProofProvider mode: ExecutionWitnessMode, ) -> ProviderResult> { reth_trie_db::with_adapter!(self.0, |A| { - let nodes_sorted = input.nodes.into_sorted(); - let state_sorted = input.state.into_sorted(); + let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::new(input.nodes.into_sorted())]); + let state_overlay = + HashedPostStateOverlay::new(vec![Arc::new(input.state.into_sorted())]); let witness = TrieWitness::new( InMemoryTrieCursorFactory::new( reth_trie_db::DatabaseTrieCursorFactory::<_, A>::new(self.tx()), - [&nodes_sorted], + &nodes_overlay, ), HashedPostStateCursorFactory::new( reth_trie_db::DatabaseHashedCursorFactory::new(self.tx()), - [&state_sorted], + &state_overlay, ), ) .with_prefix_sets_mut(input.prefix_sets) diff --git a/crates/storage/provider/src/providers/state/overlay.rs b/crates/storage/provider/src/providers/state/overlay.rs index af6badd667b..95422f361ee 100644 --- a/crates/storage/provider/src/providers/state/overlay.rs +++ b/crates/storage/provider/src/providers/state/overlay.rs @@ -177,7 +177,7 @@ impl OverlayBuilder { }; if !state.is_empty() { - overlay.hashed_post_state.insert(0, Arc::clone(state)); + overlay.prepend_hashed_post_state(Arc::clone(state)); } Ok(overlay) @@ -348,17 +348,15 @@ impl OverlayBuilder { let mut overlay = self.resolve_overlays(anchor_hash)?; if !trie_reverts.is_empty() { - overlay.trie_updates.push(Arc::new(trie_reverts)); + overlay.push_trie_updates(Arc::new(trie_reverts)); } if !hashed_state_reverts.is_empty() { - overlay.hashed_post_state.push(Arc::new(hashed_state_reverts)); + overlay.push_hashed_post_state(Arc::new(hashed_state_reverts)); } - trie_updates_total_len = - overlay.trie_updates.iter().map(|updates| updates.total_len()).sum::(); - hashed_state_updates_total_len = - overlay.hashed_post_state.iter().map(|state| state.total_len()).sum::(); + trie_updates_total_len = overlay.trie_updates_total_len(); + hashed_state_updates_total_len = overlay.hashed_post_state_total_len(); debug!( target: "providers::state::overlay", @@ -374,10 +372,8 @@ impl OverlayBuilder { retrieve_trie_reverts_duration = Duration::ZERO; retrieve_hashed_state_reverts_duration = Duration::ZERO; - trie_updates_total_len = - overlay.trie_updates.iter().map(|updates| updates.total_len()).sum::(); - hashed_state_updates_total_len = - overlay.hashed_post_state.iter().map(|state| state.total_len()).sum::(); + trie_updates_total_len = overlay.trie_updates_total_len(); + hashed_state_updates_total_len = overlay.hashed_post_state_total_len(); overlay }; @@ -567,7 +563,7 @@ where tx.cursor_read::()?, )) }; - Ok(InMemoryTrieCursor::new_account_from_overlay(cursor, &self.overlay.trie_updates)) + Ok(InMemoryTrieCursor::new_account(cursor, &self.overlay.trie_updates)) } fn storage_trie_cursor( @@ -586,11 +582,7 @@ where hashed_address, )) }; - Ok(InMemoryTrieCursor::new_storage_from_overlay( - cursor, - &self.overlay.trie_updates, - hashed_address, - )) + Ok(InMemoryTrieCursor::new_storage(cursor, &self.overlay.trie_updates, hashed_address)) } } @@ -619,7 +611,7 @@ where fn hashed_account_cursor(&self) -> Result, DatabaseError> { let db_hashed_cursor_factory = DatabaseHashedCursorFactory::new(self.provider.tx_ref()); let cursor = db_hashed_cursor_factory.hashed_account_cursor()?; - Ok(HashedPostStateCursor::new_account_from_overlay(cursor, &self.overlay.hashed_post_state)) + Ok(HashedPostStateCursor::new_account(cursor, &self.overlay.hashed_post_state)) } fn hashed_storage_cursor( @@ -628,7 +620,7 @@ where ) -> Result, DatabaseError> { let db_hashed_cursor_factory = DatabaseHashedCursorFactory::new(self.provider.tx_ref()); let cursor = db_hashed_cursor_factory.hashed_storage_cursor(hashed_address)?; - Ok(HashedPostStateCursor::new_storage_from_overlay( + Ok(HashedPostStateCursor::new_storage( cursor, &self.overlay.hashed_post_state, hashed_address, diff --git a/crates/trie/db/src/changesets.rs b/crates/trie/db/src/changesets.rs index e8ab5eb31b7..75cd2304212 100644 --- a/crates/trie/db/src/changesets.rs +++ b/crates/trie/db/src/changesets.rs @@ -20,7 +20,7 @@ use reth_storage_api::{ use reth_storage_errors::provider::{ProviderError, ProviderResult}; use reth_trie::{ changesets::compute_trie_changesets, - trie_cursor::{InMemoryTrieCursorFactory, TrieCursor, TrieCursorFactory}, + trie_cursor::{InMemoryTrieCursorFactory, TrieCursor, TrieCursorFactory, TrieUpdatesOverlay}, TrieInputSorted, }; use reth_trie_common::updates::{StorageTrieUpdatesSorted, TrieUpdatesSorted}; @@ -155,8 +155,8 @@ where // Step 5: Compute changesets using cumulative trie updates for block-1 as overlay // Create an overlay cursor factory that has the trie state from after block-1 let db_cursor_factory = DatabaseTrieCursorFactory::<_, A>::new(provider.tx_ref()); - let overlay_factory = - InMemoryTrieCursorFactory::new(db_cursor_factory, [&cumulative_trie_updates_prev]); + let trie_overlay = TrieUpdatesOverlay::new(vec![Arc::new(cumulative_trie_updates_prev)]); + let overlay_factory = InMemoryTrieCursorFactory::new(db_cursor_factory, &trie_overlay); let changesets = compute_trie_changesets(&overlay_factory, &trie_updates).map_err(ProviderError::other)?; @@ -262,7 +262,8 @@ where // Step 4: Create an InMemoryTrieCursorFactory with the reverts // This gives us the trie state as it was after the target block was processed let db_cursor_factory = DatabaseTrieCursorFactory::<_, A>::new(tx); - let cursor_factory = InMemoryTrieCursorFactory::new(db_cursor_factory, [&reverts]); + let trie_overlay = TrieUpdatesOverlay::new(vec![Arc::new(reverts)]); + let cursor_factory = InMemoryTrieCursorFactory::new(db_cursor_factory, &trie_overlay); // Step 5: Collect all account trie nodes that changed in the target block let account_nodes_ref = changesets.account_nodes_ref(); diff --git a/crates/trie/db/src/proof.rs b/crates/trie/db/src/proof.rs index f44dc19cc03..5e07a57aebc 100644 --- a/crates/trie/db/src/proof.rs +++ b/crates/trie/db/src/proof.rs @@ -3,12 +3,13 @@ use alloy_primitives::{keccak256, map::HashMap, Address, B256}; use reth_db_api::transaction::DbTx; use reth_execution_errors::StateProofError; use reth_trie::{ - hashed_cursor::HashedPostStateCursorFactory, + hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, proof::{Proof, StorageProof}, - trie_cursor::InMemoryTrieCursorFactory, + trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, AccountProof, HashedPostStateSorted, HashedStorage, MultiProof, MultiProofTargets, StorageMultiProof, TrieInput, }; +use std::sync::Arc; /// Extends [`Proof`] with operations specific for working with a database transaction. pub trait DatabaseProof<'a> { @@ -48,14 +49,11 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseProof<'a> address: Address, slots: &[B256], ) -> Result { - let nodes_sorted = input.nodes.into_sorted(); - let state_sorted = input.state.into_sorted(); + let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::new(input.nodes.into_sorted())]); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(input.state.into_sorted())]); Proof::new( - InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), [&nodes_sorted]), - HashedPostStateCursorFactory::new( - self.hashed_cursor_factory().clone(), - [&state_sorted], - ), + InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), &nodes_overlay), + HashedPostStateCursorFactory::new(self.hashed_cursor_factory().clone(), &state_overlay), ) .with_prefix_sets_mut(input.prefix_sets) .account_proof(address, slots) @@ -66,14 +64,11 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseProof<'a> input: TrieInput, targets: MultiProofTargets, ) -> Result { - let nodes_sorted = input.nodes.into_sorted(); - let state_sorted = input.state.into_sorted(); + let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::new(input.nodes.into_sorted())]); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(input.state.into_sorted())]); Proof::new( - InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), [&nodes_sorted]), - HashedPostStateCursorFactory::new( - self.hashed_cursor_factory().clone(), - [&state_sorted], - ), + InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), &nodes_overlay), + HashedPostStateCursorFactory::new(self.hashed_cursor_factory().clone(), &state_overlay), ) .with_prefix_sets_mut(input.prefix_sets) .multiproof(targets) @@ -129,12 +124,10 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseStorageProof<'a, TX> Default::default(), HashMap::from_iter([(hashed_address, storage.into_sorted())]), ); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(state_sorted)]); StorageProof::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(tx), - [&state_sorted], - ), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), address, ) .with_prefix_set_mut(prefix_set) @@ -154,12 +147,10 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseStorageProof<'a, TX> Default::default(), HashMap::from_iter([(hashed_address, storage.into_sorted())]), ); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(state_sorted)]); StorageProof::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(tx), - [&state_sorted], - ), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), address, ) .with_prefix_set_mut(prefix_set) diff --git a/crates/trie/db/src/state.rs b/crates/trie/db/src/state.rs index 0a618b30f4e..4763cc16302 100644 --- a/crates/trie/db/src/state.rs +++ b/crates/trie/db/src/state.rs @@ -10,13 +10,15 @@ use reth_storage_api::{ }; use reth_storage_errors::provider::ProviderError; use reth_trie::{ - hashed_cursor::HashedPostStateCursorFactory, trie_cursor::InMemoryTrieCursorFactory, - updates::TrieUpdates, HashedPostStateSorted, HashedStorageSorted, StateRoot, StateRootProgress, - TrieInputSorted, + hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, + trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, + updates::TrieUpdates, + HashedPostStateSorted, HashedStorageSorted, StateRoot, StateRootProgress, TrieInputSorted, }; use std::{ collections::HashSet, ops::{Bound, RangeBounds, RangeInclusive}, + sync::Arc, }; use tracing::{debug, instrument}; @@ -208,9 +210,10 @@ impl<'a, TX: DbTx, A: crate::TrieTableAdapter> DatabaseStateRoot<'a, TX> post_state: &HashedPostStateSorted, ) -> Result { let prefix_sets = post_state.construct_prefix_sets().freeze(); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state.clone())]); StateRoot::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), [post_state]), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), ) .with_prefix_sets(prefix_sets) .root() @@ -221,24 +224,24 @@ impl<'a, TX: DbTx, A: crate::TrieTableAdapter> DatabaseStateRoot<'a, TX> post_state: &HashedPostStateSorted, ) -> Result<(B256, TrieUpdates), StateRootError> { let prefix_sets = post_state.construct_prefix_sets().freeze(); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state.clone())]); StateRoot::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), [post_state]), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), ) .with_prefix_sets(prefix_sets) .root_with_updates() } fn overlay_root_from_nodes(tx: &'a TX, input: TrieInputSorted) -> Result { + let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::clone(&input.nodes)]); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::clone(&input.state)]); StateRoot::new( InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - [input.nodes.as_ref()], - ), - HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(tx), - [input.state.as_ref()], + &nodes_overlay, ), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), ) .with_prefix_sets(input.prefix_sets.freeze()) .root() @@ -248,15 +251,14 @@ impl<'a, TX: DbTx, A: crate::TrieTableAdapter> DatabaseStateRoot<'a, TX> tx: &'a TX, input: TrieInputSorted, ) -> Result<(B256, TrieUpdates), StateRootError> { + let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::clone(&input.nodes)]); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::clone(&input.state)]); StateRoot::new( InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - [input.nodes.as_ref()], - ), - HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(tx), - [input.state.as_ref()], + &nodes_overlay, ), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), ) .with_prefix_sets(input.prefix_sets.freeze()) .root_with_updates() diff --git a/crates/trie/db/src/storage.rs b/crates/trie/db/src/storage.rs index 6e3edd42eae..2247036f9e5 100644 --- a/crates/trie/db/src/storage.rs +++ b/crates/trie/db/src/storage.rs @@ -5,8 +5,10 @@ use reth_execution_errors::StorageRootError; use reth_storage_api::{BlockNumReader, StorageChangeSetReader}; use reth_storage_errors::provider::ProviderResult; use reth_trie::{ - hashed_cursor::HashedPostStateCursorFactory, HashedPostState, HashedStorage, StorageRoot, + hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, + HashedPostState, HashedStorage, StorageRoot, }; +use std::sync::Arc; #[cfg(feature = "metrics")] use reth_trie::metrics::TrieRootMetrics; @@ -90,12 +92,10 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseStorageRoot<'a, TX> let prefix_set = hashed_storage.construct_prefix_set().freeze(); let state_sorted = HashedPostState::from_hashed_storage(keccak256(address), hashed_storage).into_sorted(); + let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(state_sorted)]); StorageRoot::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(tx), - [&state_sorted], - ), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), address, prefix_set, #[cfg(feature = "metrics")] diff --git a/crates/trie/db/tests/fuzz_in_memory_nodes.rs b/crates/trie/db/tests/fuzz_in_memory_nodes.rs index dd58f5f3967..04099cb9b5f 100644 --- a/crates/trie/db/tests/fuzz_in_memory_nodes.rs +++ b/crates/trie/db/tests/fuzz_in_memory_nodes.rs @@ -12,14 +12,14 @@ use reth_provider::test_utils::create_test_provider_factory; use reth_storage_api::StorageSettingsCache; use reth_trie::{ test_utils::{state_root_prehashed, storage_root_prehashed}, - trie_cursor::InMemoryTrieCursorFactory, + trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, updates::TrieUpdates, HashedPostState, HashedStorage, StateRoot, StorageRoot, }; use reth_trie_db::{ DatabaseHashedCursorFactory, DatabaseStateRoot, DatabaseStorageRoot, DatabaseTrieCursorFactory, }; -use std::collections::BTreeMap; +use std::{collections::BTreeMap, sync::Arc}; type DbStateRoot<'a, TX, A> = StateRoot, DatabaseHashedCursorFactory<&'a TX>>; @@ -65,11 +65,13 @@ proptest! { } // Compute root with in-memory trie nodes overlay + let trie_overlay = + TrieUpdatesOverlay::new(vec![Arc::new(trie_nodes.clone().into_sorted())]); let (state_root, trie_updates) = DbStateRoot::<_, A>::from_tx(provider.tx_ref()) .with_prefix_sets(hashed_state.construct_prefix_sets().freeze()) .with_trie_cursor_factory(InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(provider.tx_ref()), - [&trie_nodes.clone().into_sorted()], + trie_overlay, )) .root_with_updates() .unwrap(); @@ -122,12 +124,13 @@ proptest! { // Compute root with in-memory trie nodes overlay let mut trie_nodes = TrieUpdates::default(); trie_nodes.insert_storage_updates(hashed_address, storage_trie_nodes.clone()); + let trie_overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_nodes.into_sorted())]); let (storage_root, _, trie_updates) = DbStorageRoot::<_, A>::from_tx_hashed(provider.tx_ref(), hashed_address) .with_prefix_set(hashed_storage.construct_prefix_set().freeze()) .with_trie_cursor_factory(InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(provider.tx_ref()), - [&trie_nodes.into_sorted()], + trie_overlay, )) .root_with_updates() .unwrap(); diff --git a/crates/trie/db/tests/post_state.rs b/crates/trie/db/tests/post_state.rs index a3ee272d05a..9187b0386c9 100644 --- a/crates/trie/db/tests/post_state.rs +++ b/crates/trie/db/tests/post_state.rs @@ -8,12 +8,17 @@ use reth_db_api::{database::Database, transaction::DbTxMut}; use reth_primitives_traits::{Account, StorageEntry}; use reth_trie::{ hashed_cursor::{ - HashedCursor, HashedCursorFactory, HashedPostStateCursorFactory, HashedStorageCursor, + HashedCursor, HashedCursorFactory, HashedPostStateCursorFactory, HashedPostStateOverlay, + HashedStorageCursor, }, - HashedPostState, HashedStorage, + HashedPostState, HashedPostStateSorted, HashedStorage, }; use reth_trie_db::DatabaseHashedCursorFactory; -use std::collections::BTreeMap; +use std::{collections::BTreeMap, sync::Arc}; + +fn post_state_overlay(sorted: &HashedPostStateSorted) -> HashedPostStateOverlay { + HashedPostStateOverlay::new(vec![Arc::new(sorted.clone())]) +} fn assert_account_cursor_order( factory: &impl HashedCursorFactory, @@ -66,8 +71,10 @@ fn post_state_only_accounts() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -88,7 +95,7 @@ fn db_only_accounts() { let tx = db.tx().unwrap(); let factory = HashedPostStateCursorFactory::new( DatabaseHashedCursorFactory::new(&tx), - [&sorted_post_state], + post_state_overlay(&sorted_post_state), ); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -114,8 +121,10 @@ fn account_cursor_correct_order() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -145,8 +154,10 @@ fn removed_accounts_are_discarded() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let expected = accounts.into_iter().filter(|x| !removed_keys.contains(&x.0)); assert_account_cursor_order(&factory, expected); } @@ -173,8 +184,10 @@ fn post_state_accounts_take_precedence() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -206,7 +219,7 @@ fn fuzz_hashed_account_cursor() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), post_state_overlay(&sorted)); assert_account_cursor_order(&factory, expected.into_iter()); } ); @@ -233,8 +246,10 @@ fn storage_is_empty() { { let sorted = HashedPostState::default().into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(cursor.is_storage_empty().unwrap()); } @@ -254,8 +269,10 @@ fn storage_is_empty() { { let sorted = HashedPostState::default().into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(!cursor.is_storage_empty().unwrap()); } @@ -271,8 +288,10 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(!cursor.is_storage_empty().unwrap()); } @@ -287,8 +306,10 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(cursor.is_storage_empty().unwrap()); } @@ -304,8 +325,10 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(cursor.is_storage_empty().unwrap()); } @@ -321,8 +344,10 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(!cursor.is_storage_empty().unwrap()); } @@ -358,8 +383,10 @@ fn storage_cursor_correct_order() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let expected = std::iter::once((address, db_storage.into_iter().chain(post_state_storage).collect())); assert_storage_cursor_order(&factory, expected); @@ -399,8 +426,10 @@ fn zero_value_storage_entries_are_discarded() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let expected = std::iter::once(( address, post_state_storage.into_iter().filter(|(_, value)| *value > U256::ZERO).collect(), @@ -437,8 +466,10 @@ fn wiped_storage_is_discarded() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let expected = std::iter::once((address, post_state_storage)); assert_storage_cursor_order(&factory, expected); } @@ -473,8 +504,10 @@ fn post_state_storages_take_precedence() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let expected = std::iter::once((address, storage)); assert_storage_cursor_order(&factory, expected); } @@ -521,7 +554,7 @@ fn fuzz_hashed_storage_cursor() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), post_state_overlay(&sorted)); assert_storage_cursor_order(&factory, expected.into_iter()); }); } @@ -568,8 +601,10 @@ fn all_storage_slots_deleted_not_wiped_exact_keys() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); + let factory = HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(&tx), + post_state_overlay(&sorted), + ); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); diff --git a/crates/trie/sparse/src/parallel.rs b/crates/trie/sparse/src/parallel.rs index 7539a490c7b..d4ccd56e2e1 100644 --- a/crates/trie/sparse/src/parallel.rs +++ b/crates/trie/sparse/src/parallel.rs @@ -3426,7 +3426,7 @@ mod tests { test_utils::create_test_provider_factory, StorageSettingsCache, TrieWriter, }; use reth_trie::{ - hashed_cursor::{noop::NoopHashedCursor, HashedPostStateCursor}, + hashed_cursor::{noop::NoopHashedCursor, HashedPostStateCursor, HashedPostStateOverlay}, node_iter::{TrieElement, TrieNodeIter}, trie_cursor::{noop::NoopAccountTrieCursor, TrieCursor, TrieCursorFactory}, walker::TrieWalker, @@ -3728,11 +3728,13 @@ mod tests { (nibbles.pack().into_inner().unwrap().into(), Some(account)) })) .into_sorted(); + let hashed_post_state = + HashedPostStateOverlay::new(vec![alloc::sync::Arc::new(hashed_post_state)]); let mut node_iter = TrieNodeIter::state_trie( walker, HashedPostStateCursor::new_account( NoopHashedCursor::::default(), - [&hashed_post_state], + &hashed_post_state, ), ); diff --git a/crates/trie/trie/src/forward_cursor.rs b/crates/trie/trie/src/forward_cursor.rs deleted file mode 100644 index eafdfbb8ed5..00000000000 --- a/crates/trie/trie/src/forward_cursor.rs +++ /dev/null @@ -1,187 +0,0 @@ -/// The implementation of forward-only in memory cursor over the entries. -/// -/// The cursor operates under the assumption that the supplied collection is pre-sorted. -#[derive(Debug)] -pub struct ForwardInMemoryCursor<'a, K, V> { - /// The reference to the pre-sorted collection of entries. - entries: &'a [(K, V)], - /// Current index in the collection. - idx: usize, -} - -impl<'a, K, V> ForwardInMemoryCursor<'a, K, V> { - /// Create new forward cursor positioned at the beginning of the collection. - /// - /// The cursor expects all of the entries to have been sorted in advance. - #[inline] - pub const fn new(entries: &'a [(K, V)]) -> Self { - Self { entries, idx: 0 } - } - - /// Returns `true` if the cursor is empty, regardless of its position. - #[inline] - pub const fn is_empty(&self) -> bool { - self.entries.is_empty() - } - - /// Returns `true` if any entry satisfies the predicate. - #[inline] - pub fn has_any(&self, predicate: F) -> bool - where - F: Fn(&(K, V)) -> bool, - { - self.entries.iter().any(predicate) - } - - /// Returns the current entry pointed to be the cursor, or `None` if no entries are left. - #[inline] - pub fn current(&self) -> Option<&(K, V)> { - self.entries.get(self.idx) - } - - /// Resets the cursor to the beginning of the collection. - #[inline] - pub const fn reset(&mut self) { - self.idx = 0; - } - - #[inline] - fn next(&mut self) -> Option<&(K, V)> { - let entry = self.entries.get(self.idx)?; - self.idx += 1; - Some(entry) - } -} - -/// Threshold for remaining entries above which binary search is used instead of linear scan. -/// For small slices, linear scan has better cache locality and lower overhead. -const BINARY_SEARCH_THRESHOLD: usize = 64; - -impl ForwardInMemoryCursor<'_, K, V> { - /// Returns the first entry from the current cursor position that's greater or equal to the - /// provided key. This method advances the cursor forward. - pub fn seek(&mut self, key: &K) -> Option<&(K, V)> { - self.advance_while(|k| k < key) - } - - /// Returns the first entry from the current cursor position that's greater than the provided - /// key. This method advances the cursor forward. - pub fn first_after(&mut self, key: &K) -> Option<&(K, V)> { - self.advance_while(|k| k <= key) - } - - /// Advances the cursor forward while `predicate` returns `true` or until the collection is - /// exhausted. - /// - /// Uses binary search for large remaining slices (>= 64 entries), linear scan for small ones. - /// - /// Returns the first entry for which `predicate` returns `false` or `None`. The cursor will - /// point to the returned entry. - fn advance_while(&mut self, predicate: impl Fn(&K) -> bool) -> Option<&(K, V)> { - let remaining = self.entries.len().saturating_sub(self.idx); - if remaining >= BINARY_SEARCH_THRESHOLD { - let slice = &self.entries[self.idx..]; - let pos = slice.partition_point(|(k, _)| predicate(k)); - self.idx += pos; - } else { - while self.current().is_some_and(|(k, _)| predicate(k)) { - self.next(); - } - } - self.current() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_cursor_small() { - let mut cursor = ForwardInMemoryCursor::new(&[(1, ()), (2, ()), (3, ()), (4, ()), (5, ())]); - assert_eq!(cursor.current(), Some(&(1, ()))); - - assert_eq!(cursor.seek(&0), Some(&(1, ()))); - assert_eq!(cursor.current(), Some(&(1, ()))); - - assert_eq!(cursor.seek(&3), Some(&(3, ()))); - assert_eq!(cursor.current(), Some(&(3, ()))); - - assert_eq!(cursor.seek(&3), Some(&(3, ()))); - assert_eq!(cursor.current(), Some(&(3, ()))); - - assert_eq!(cursor.seek(&4), Some(&(4, ()))); - assert_eq!(cursor.current(), Some(&(4, ()))); - - assert_eq!(cursor.seek(&6), None); - assert_eq!(cursor.current(), None); - } - - #[test] - fn test_cursor_large_binary_search() { - // Create a large enough collection to trigger binary search - let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 2, ())).collect(); - let mut cursor = ForwardInMemoryCursor::new(&entries); - - // Seek to beginning - assert_eq!(cursor.seek(&0), Some(&(0, ()))); - assert_eq!(cursor.idx, 0); - - // Seek to middle (should use binary search) - assert_eq!(cursor.seek(&100), Some(&(100, ()))); - assert_eq!(cursor.idx, 50); - - // Seek to non-existent key (should find next greater) - assert_eq!(cursor.seek(&101), Some(&(102, ()))); - assert_eq!(cursor.idx, 51); - - // Seek to end - assert_eq!(cursor.seek(&398), Some(&(398, ()))); - assert_eq!(cursor.idx, 199); - - // Seek past end - assert_eq!(cursor.seek(&1000), None); - } - - #[test] - fn test_first_after_large() { - let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 2, ())).collect(); - let mut cursor = ForwardInMemoryCursor::new(&entries); - - // first_after should find strictly greater - assert_eq!(cursor.first_after(&0), Some(&(2, ()))); - assert_eq!(cursor.idx, 1); - - // Reset and test from beginning - cursor.reset(); - assert_eq!(cursor.first_after(&99), Some(&(100, ()))); - - // first_after on exact match - cursor.reset(); - assert_eq!(cursor.first_after(&100), Some(&(102, ()))); - } - - #[test] - fn test_cursor_consistency() { - // Verify binary search and linear scan produce same results - let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 3, ())).collect(); - - for search_key in [0, 1, 3, 50, 150, 299, 300, 597, 598, 599, 1000] { - // Test with fresh cursor (binary search path) - let mut cursor1 = ForwardInMemoryCursor::new(&entries); - let result1 = cursor1.seek(&search_key); - - // Manually advance to trigger linear path by getting close first - let mut cursor2 = ForwardInMemoryCursor::new(&entries); - if search_key > 100 { - cursor2.seek(&(search_key - 50)); - } - let result2 = cursor2.seek(&search_key); - - assert_eq!( - result1, result2, - "Mismatch for key {search_key}: binary={result1:?}, linear={result2:?}" - ); - } - } -} diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 125544f3914..046160b8aba 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -1,16 +1,10 @@ use super::{HashedCursor, HashedCursorFactory, HashedStorageCursor}; -use crate::storage_overlay_index::{ - StorageOverlayIndex, StorageOverlayIndexEntry, StorageOverlayIndexMut, -}; -use alloy_primitives::{B256, U256}; +use crate::overlay_cursor::{DbCursorState, OverlayLayer, PositionedOverlayCursor}; +use alloy_primitives::{map::B256Map, B256, U256}; use reth_primitives_traits::Account; use reth_storage_errors::db::DatabaseError; use reth_trie_common::HashedPostStateSorted; -use std::{ - marker::PhantomData, - ops::{Deref, Index}, - sync::Arc, -}; +use std::{marker::PhantomData, sync::Arc}; /// The hashed cursor factory for the post state. #[derive(Clone, Debug)] @@ -30,20 +24,20 @@ impl<'overlay, CF, T> HashedPostStateCursorFactory<'overlay, CF, T> { impl<'overlay, CF, T> HashedCursorFactory for HashedPostStateCursorFactory<'overlay, CF, T> where CF: HashedCursorFactory + 'overlay, - T: AsRef<[&'overlay HashedPostStateSorted]>, + T: AsRef, { type AccountCursor<'cursor> - = HashedPostStateCursor<'overlay, CF::AccountCursor<'cursor>, Option> + = HashedPostStateCursor<'cursor, CF::AccountCursor<'cursor>, Option> where Self: 'cursor; type StorageCursor<'cursor> - = HashedPostStateCursor<'overlay, CF::StorageCursor<'cursor>, U256> + = HashedPostStateCursor<'cursor, CF::StorageCursor<'cursor>, U256> where Self: 'cursor; fn hashed_account_cursor(&self) -> Result, DatabaseError> { let cursor = self.cursor_factory.hashed_account_cursor()?; - Ok(HashedPostStateCursor::new_account(cursor, self.post_state.as_ref().iter().copied())) + Ok(HashedPostStateCursor::new_account(cursor, self.post_state.as_ref())) } fn hashed_storage_cursor( @@ -51,11 +45,7 @@ where hashed_address: B256, ) -> Result, DatabaseError> { let cursor = self.cursor_factory.hashed_storage_cursor(hashed_address)?; - Ok(HashedPostStateCursor::new_storage( - cursor, - self.post_state.as_ref().iter().copied(), - hashed_address, - )) + Ok(HashedPostStateCursor::new_storage(cursor, self.post_state.as_ref(), hashed_address)) } } @@ -104,11 +94,9 @@ where /// The underlying cursor. cursor: C, /// The current DB cursor state. - db_cursor_state: DbCursorState, + db_cursor_state: DbCursorState, /// In-memory cursors over post state overlays. post_state_cursor: PostStateOverlayCursor<'a, V>, - /// Lower-priority overlays that still need positioning after a lazy exact overlay hit. - deferred_overlay_seek_start: Option, /// The last hashed key that was returned by the cursor. /// De facto, this is a current cursor position. last_key: Option, @@ -116,31 +104,7 @@ where /// Tracks whether `seek` has been called. seeked: bool, /// Source of post-state overlays. - post_states: HashedPostStateSource<'a>, -} - -impl<'a, C> HashedPostStateCursor<'a, C, Option> -where - C: HashedCursor, -{ - /// Create new account cursor which combines a DB cursor and the post state. - pub fn new_account( - cursor: C, - post_states: impl IntoIterator, - ) -> Self { - let post_states = HashedPostStateSource::from_refs(post_states); - let post_state_cursor = post_states.account_overlay(); - Self { - cursor, - db_cursor_state: DbCursorState::new(false), - post_state_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - post_states, - } - } + post_states: &'a HashedPostStateOverlay, } impl<'a, C> HashedPostStateCursor<'a, C, Option> @@ -148,14 +112,12 @@ where C: HashedCursor, { /// Create new account cursor from an indexed hashed post-state overlay. - pub fn new_account_from_overlay(cursor: C, post_states: &'a HashedPostStateOverlay) -> Self { - let post_states = HashedPostStateSource::Indexed(post_states); + pub fn new_account(cursor: C, post_states: &'a HashedPostStateOverlay) -> Self { let post_state_cursor = post_states.account_overlay(); Self { cursor, db_cursor_state: DbCursorState::new(false), post_state_cursor, - deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -168,40 +130,17 @@ impl<'a, C> HashedPostStateCursor<'a, C, U256> where C: HashedStorageCursor, { - /// Create new storage cursor with full post state reference. - /// This allows the cursor to switch between storage tries when `set_hashed_address` is called. - pub fn new_storage( - cursor: C, - post_states: impl IntoIterator, - hashed_address: B256, - ) -> Self { - let post_states = HashedPostStateSource::from_refs(post_states); - let (post_state_cursor, cursor_wiped) = post_states.storage_overlay(hashed_address); - Self { - cursor, - db_cursor_state: DbCursorState::new(cursor_wiped), - post_state_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - post_states, - } - } - /// Create new storage cursor from an indexed hashed post-state overlay. - pub fn new_storage_from_overlay( + pub fn new_storage( cursor: C, post_states: &'a HashedPostStateOverlay, hashed_address: B256, ) -> Self { - let post_states = HashedPostStateSource::Indexed(post_states); let (post_state_cursor, cursor_wiped) = post_states.storage_overlay(hashed_address); Self { cursor, db_cursor_state: DbCursorState::new(cursor_wiped), post_state_cursor, - deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -226,6 +165,11 @@ where /// Positions the DB cursor state using the underlying cursor. fn cursor_seek(&mut self, key: B256) -> Result<(), DatabaseError> { + if self.db_cursor_state.is_positioned_at(&key) { + self.db_cursor_state.validate_position(); + return Ok(()) + } + let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); self.db_cursor_state.set_entry(entry); Ok(()) @@ -254,9 +198,13 @@ where } /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. - fn choose_next_entry(&mut self) -> Result, DatabaseError> { + fn choose_next_entry( + &mut self, + mut overlay_bound: B256, + mut overlay_bound_inclusive: bool, + ) -> Result, DatabaseError> { loop { - let mem_key = self.post_state_cursor.min_current_key(); + let mem_key = self.post_state_cursor.next_key(&overlay_bound, overlay_bound_inclusive); let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { return Ok(None); @@ -267,7 +215,8 @@ where return Ok(Some((next_key, value))) } - self.post_state_cursor.advance_key(&next_key); + overlay_bound = next_key; + overlay_bound_inclusive = false; if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { self.cursor_next()?; } @@ -302,23 +251,16 @@ where self.seeked = true; } - self.deferred_overlay_seek_start = None; - match self.post_state_cursor.seek_until_exact(&key) { - Some((idx, Some(value))) => { - let entry = Some((key, value)); - self.deferred_overlay_seek_start = Some(idx + 1); - self.set_last_key(&entry); - return Ok(entry) - } - Some((idx, None)) => { - self.post_state_cursor.seek_from(idx + 1, &key); - } - None => {} + if let Some(Some(value)) = self.post_state_cursor.seek_exact(&key) { + self.db_cursor_state.invalidate_position(); + let entry = Some((key, value)); + self.set_last_key(&entry); + return Ok(entry) } self.cursor_seek(key)?; - let entry = self.choose_next_entry()?; + let entry = self.choose_next_entry(key, true)?; self.set_last_key(&entry); Ok(entry) } @@ -340,28 +282,22 @@ where return Ok(None); }; - if let Some(start) = self.deferred_overlay_seek_start.take() { - self.post_state_cursor.seek_from(start, &last_key); + match self.db_cursor_state.entry().map(|(db_key, _)| *db_key) { + Some(db_key) if db_key == last_key => self.cursor_next()?, + Some(db_key) if db_key > last_key && self.db_cursor_state.position_valid() => {} + _ => self.cursor_first_after(last_key)?, } - self.post_state_cursor.first_after(&last_key); - if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &last_key) { - self.cursor_next()?; - } else { - self.cursor_first_after(last_key)?; - } - - let entry = self.choose_next_entry()?; + let entry = self.choose_next_entry(last_key, false)?; self.set_last_key(&entry); Ok(entry) } fn reset(&mut self) { self.cursor.reset(); - self.post_state_cursor.reset(); self.db_cursor_state.set_entry(None); - self.deferred_overlay_seek_start = None; + self.post_state_cursor.reset(); self.last_key = None; #[cfg(debug_assertions)] { @@ -394,8 +330,9 @@ where fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let (post_state_cursor, cursor_wiped) = self.post_states.storage_overlay(hashed_address); - self.post_state_cursor = post_state_cursor; + let (layers, cursor_wiped, has_visible_value) = + self.post_states.storage_overlay_layers(hashed_address); + self.post_state_cursor.retarget(layers, has_visible_value); self.db_cursor_state = DbCursorState::new(cursor_wiped); } } @@ -403,196 +340,120 @@ where /// Hashed post-state overlays ordered from highest to lowest precedence. #[derive(Clone, Debug, Default)] pub struct HashedPostStateOverlay { - states: Vec>, - storage_index: Arc, + account_overlay: Arc>>>, + storage_overlays: Arc>, } impl HashedPostStateOverlay { /// Create a new indexed hashed post-state overlay stack. pub fn new(states: Vec>) -> Self { - let storage_index = Arc::new(StorageOverlayIndexEntry::new(&states)); - Self { states, storage_index } + let account_overlay = Self::build_account_overlay(&states); + let storage_overlays = Self::build_storage_overlays(&states); + Self { account_overlay, storage_overlays } } - /// Returns `true` if there are no hashed post-state overlays. - pub const fn is_empty(&self) -> bool { - self.states.is_empty() + /// Returns `true` if the overlay does not contain any hashed post-state updates. + pub fn is_empty(&self) -> bool { + self.account_overlay.is_empty() && self.storage_overlays.is_empty() } - /// Returns the number of hashed post-state overlays. - pub const fn len(&self) -> usize { - self.states.len() + fn build_account_overlay( + states: &[Arc], + ) -> Arc>>> { + Arc::new( + states + .iter() + .filter(|state| !state.accounts.is_empty()) + .map(|state| { + PostStateOverlayLayer::new(Arc::clone(state), state.accounts.as_slice()) + }) + .collect(), + ) } - /// Returns an iterator over hashed post-state overlays. - pub fn iter(&self) -> impl Iterator> { - self.states.iter() - } + fn build_storage_overlays( + states: &[Arc], + ) -> Arc> { + let mut overlays: B256Map = B256Map::default(); - /// Push a hashed post-state overlay at the end of the precedence stack. - pub fn push(&mut self, state: Arc) { - Arc::make_mut(&mut self.storage_index).append(self.states.len(), state.as_ref()); - self.states.push(state); - } + for state in states { + for (hashed_address, storage) in &state.storages { + let overlay = overlays.entry(*hashed_address).or_default(); + if overlay.db_wiped { + continue; + } - /// Insert a hashed post-state overlay at `index`. - pub fn insert(&mut self, index: usize, state: Arc) { - if index == 0 { - Arc::make_mut(&mut self.storage_index).prepend(state.as_ref()); - self.states.insert(index, state); - } else { - self.states.insert(index, state); - self.storage_index = Arc::new(StorageOverlayIndexEntry::new(&self.states)); - } - } + if !storage.storage_slots_ref().is_empty() { + overlay.layers.push(PostStateOverlayLayer::new( + Arc::clone(state), + storage.storage_slots_ref(), + )); + } - fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'_, U256>, bool) { - let Some(index) = self.storage_index.get(&hashed_address) else { - return (PostStateOverlayCursor::default(), false); - }; + if storage.is_wiped() { + overlay.db_wiped = true; + } + } + } - ( - PostStateOverlayCursor { - cursors: index - .indices - .iter() - .filter_map(|idx| self.states[*idx].storages.get(&hashed_address)) - .map(|storage| SeekablePostStateCursor::new(storage.storage_slots_ref())) - .collect(), - }, - index.db_wiped, - ) - } -} + for overlay in overlays.values_mut() { + overlay.has_visible_value = has_visible_storage_value(&overlay.layers); + } -impl From>> for HashedPostStateOverlay { - fn from(states: Vec>) -> Self { - Self::new(states) + Arc::new(overlays) } -} -impl IntoIterator for HashedPostStateOverlay { - type IntoIter = std::vec::IntoIter; - type Item = Arc; - - fn into_iter(self) -> Self::IntoIter { - self.states.into_iter() + fn account_overlay(&self) -> PostStateOverlayCursor<'_, Option> { + PostStateOverlayCursor::new(self.account_overlay.as_slice(), false) } -} -impl Index for HashedPostStateOverlay { - type Output = Arc; - - fn index(&self, index: usize) -> &Self::Output { - &self.states[index] + fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'_, U256>, bool) { + let (layers, db_wiped, has_visible_value) = self.storage_overlay_layers(hashed_address); + (PostStateOverlayCursor::new(layers, has_visible_value), db_wiped) } -} -impl Deref for HashedPostStateOverlay { - type Target = [Arc]; + fn storage_overlay_layers( + &self, + hashed_address: B256, + ) -> (&[PostStateOverlayLayer], bool, bool) { + let Some(overlay) = self.storage_overlays.get(&hashed_address) else { + return (&[], false, false); + }; - fn deref(&self) -> &Self::Target { - &self.states + (overlay.layers.as_slice(), overlay.db_wiped, overlay.has_visible_value) } } -#[derive(Clone, Debug)] -enum HashedPostStateSource<'a> { - Refs(Vec<&'a HashedPostStateSorted>), - Indexed(&'a HashedPostStateOverlay), -} - -impl<'a> HashedPostStateSource<'a> { - fn from_refs(post_states: impl IntoIterator) -> Self { - Self::Refs(post_states.into_iter().collect()) - } - - fn account_overlay(&self) -> PostStateOverlayCursor<'a, Option> { - match self { - Self::Refs(post_states) => PostStateOverlayCursor::account(post_states), - Self::Indexed(post_states) => PostStateOverlayCursor { - cursors: post_states - .iter() - .map(|post_state| SeekablePostStateCursor::new(post_state.accounts.as_slice())) - .collect(), - }, - } - } - - fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'a, U256>, bool) { - match self { - Self::Refs(post_states) => PostStateOverlayCursor::storage(post_states, hashed_address), - Self::Indexed(post_states) => post_states.storage_overlay(hashed_address), - } +impl AsRef for HashedPostStateOverlay { + fn as_ref(&self) -> &Self { + self } } #[derive(Debug)] -enum DbCursorState { - Unpositioned, - Positioned((B256, V)), - Wiped, -} - -impl DbCursorState { - const fn new(cursor_wiped: bool) -> Self { - if cursor_wiped { - Self::Wiped - } else { - Self::Unpositioned - } - } - - const fn is_wiped(&self) -> bool { - matches!(self, Self::Wiped) - } - - const fn entry(&self) -> Option<&(B256, V)> { - match self { - Self::Positioned(entry) => Some(entry), - Self::Unpositioned | Self::Wiped => None, - } - } - - fn set_entry(&mut self, entry: Option<(B256, V)>) { - if !self.is_wiped() { - *self = entry.map(Self::Positioned).unwrap_or(Self::Unpositioned); - } - } -} - -#[derive(Clone, Debug, Default)] struct PostStateOverlayCursor<'a, V> { - cursors: Vec>, + cursor: PositionedOverlayCursor<'a, HashedPostStateSorted, B256, V>, + has_visible_value: bool, } -impl<'a> PostStateOverlayCursor<'a, Option> { - fn account(post_states: &[&'a HashedPostStateSorted]) -> Self { - Self { - cursors: post_states - .iter() - .map(|post_state| SeekablePostStateCursor::new(post_state.accounts.as_slice())) - .collect(), - } +impl Default for PostStateOverlayCursor<'_, V> { + fn default() -> Self { + Self::new(&[], false) } } -impl<'a> PostStateOverlayCursor<'a, U256> { - fn storage(post_states: &[&'a HashedPostStateSorted], hashed_address: B256) -> (Self, bool) { - let mut cursors = Vec::new(); - let mut db_wiped = false; +impl<'a, V> PostStateOverlayCursor<'a, V> { + fn new(layers: &'a [PostStateOverlayLayer], has_visible_value: bool) -> Self { + Self { cursor: PositionedOverlayCursor::new(layers), has_visible_value } + } - for post_state in post_states { - if let Some(storage) = post_state.storages.get(&hashed_address) { - cursors.push(SeekablePostStateCursor::new(storage.storage_slots_ref())); - if storage.is_wiped() { - db_wiped = true; - break; - } - } - } + fn reset(&mut self) { + self.cursor.reset(); + } - (Self { cursors }, db_wiped) + fn retarget(&mut self, layers: &'a [PostStateOverlayLayer], has_visible_value: bool) { + self.cursor.retarget(layers); + self.has_visible_value = has_visible_value; } } @@ -600,100 +461,48 @@ impl<'a, V> PostStateOverlayCursor<'a, V> where V: HashedPostStateCursorValue, { - fn seek_from(&mut self, start: usize, key: &B256) { - for cursor in self.cursors.iter_mut().skip(start) { - cursor.seek(key); - } - } - - fn seek_until_exact(&mut self, key: &B256) -> Option<(usize, Option)> { - for (idx, cursor) in self.cursors.iter_mut().enumerate() { - if let Some((cursor_key, value)) = cursor.seek(key) && - cursor_key == key - { - return Some((idx, value.into_option())) - } - } - None + fn seek_exact(&mut self, key: &B256) -> Option> { + self.cursor.seek_exact(key).map(|value| (*value).into_option()) } - fn first_after(&mut self, key: &B256) { - for cursor in &mut self.cursors { - cursor.first_after(key); - } - } - - fn reset(&mut self) { - for cursor in &mut self.cursors { - cursor.reset(); - } - } - - fn min_current_key(&self) -> Option { - self.cursors.iter().filter_map(|cursor| cursor.current().map(|(key, _)| *key)).min() + fn next_key(&mut self, key: &B256, inclusive: bool) -> Option { + self.cursor.next_key(key, inclusive) } fn highest_priority_value_at(&self, key: &B256) -> Option> { - self.cursors.iter().find_map(|cursor| { - let (cursor_key, value) = cursor.current()?; - (cursor_key == key).then(|| value.into_option()) - }) - } - - fn advance_key(&mut self, key: &B256) { - for cursor in &mut self.cursors { - if cursor.current().is_some_and(|(cursor_key, _)| cursor_key == key) { - cursor.first_after(key); - } - } + self.cursor.highest_priority_value_at(key).map(|value| (*value).into_option()) } - fn has_visible_value(&self) -> bool { - let mut cursor = self.clone(); - cursor.reset(); - while let Some(key) = cursor.min_current_key() { - if cursor.highest_priority_value_at(&key).flatten().is_some() { - return true - } - cursor.advance_key(&key); - } - false + const fn has_visible_value(&self) -> bool { + self.has_visible_value } } -#[derive(Clone, Debug)] -struct SeekablePostStateCursor<'a, V> { - entries: &'a [(B256, V)], - idx: usize, +#[derive(Clone, Debug, Default)] +struct HashedStorageOverlay { + layers: Vec>, + db_wiped: bool, + has_visible_value: bool, } -impl<'a, V> SeekablePostStateCursor<'a, V> { - const fn new(entries: &'a [(B256, V)]) -> Self { - Self { entries, idx: 0 } - } - - fn current(&self) -> Option<&'a (B256, V)> { - self.entries.get(self.idx) - } - - const fn reset(&mut self) { - self.idx = 0; - } - - fn seek(&mut self, key: &B256) -> Option<&'a (B256, V)> { - self.idx = self.entries.partition_point(|(entry_key, _)| entry_key < key); - self.current() - } - - fn first_after(&mut self, key: &B256) -> Option<&'a (B256, V)> { - if self.current().is_some_and(|(entry_key, _)| entry_key > key) { - return self.current() +type PostStateOverlayLayer = OverlayLayer; + +fn has_visible_storage_value(layers: &[PostStateOverlayLayer]) -> bool { + for (layer_idx, layer) in layers.iter().enumerate() { + for (key, value) in layer.entries() { + if !value.is_zero() && + !layers[..layer_idx].iter().any(|higher_layer| { + higher_layer + .entries() + .binary_search_by_key(key, |(entry_key, _)| *entry_key) + .is_ok() + }) + { + return true + } } - - let remaining = &self.entries[self.idx..]; - self.idx += remaining.partition_point(|(entry_key, _)| entry_key <= key); - self.current() } + false } #[cfg(test)] @@ -741,6 +550,14 @@ mod tests { HashedPostStateSorted::new(Vec::new(), storages) } + fn storage_cursor<'a>( + cursor: MockHashedCursor, + overlay: &'a HashedPostStateOverlay, + hashed_address: B256, + ) -> HashedPostStateCursor<'a, MockHashedCursor, U256> { + HashedPostStateCursor::new_storage(cursor, overlay, hashed_address) + } + #[test] fn test_seek_overlay_exact_hit_does_not_touch_db_until_next() { let db_nodes = vec![(key(0x02), U256::from(2)), (key(0x03), U256::from(3))]; @@ -752,7 +569,8 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); let post_state = storage_post_state(post_state_nodes); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); let result = cursor.seek(key(0x02)).unwrap(); assert_eq!(result, Some((key(0x02), U256::from(42)))); @@ -774,7 +592,8 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); let post_state = storage_post_state(post_state_nodes); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); let result = cursor.seek(key(0x01)).unwrap(); assert_eq!(result, Some((key(0x01), U256::from(1)))); @@ -788,6 +607,33 @@ mod tests { assert_eq!(result, Some((key(0x03), U256::from(3)))); } + #[test] + fn test_seek_overlay_exact_hit_repositions_stale_ahead_db_on_next() { + let db_nodes = vec![(key(0x03), U256::from(3)), (key(0x05), U256::from(5))]; + let post_state_nodes = vec![(key(0x02), U256::from(2))]; + + let db_nodes_map: BTreeMap = db_nodes.into_iter().collect(); + let db_nodes_arc = Arc::new(db_nodes_map); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); + + let post_state = storage_post_state(post_state_nodes); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); + + let result = cursor.seek(key(0x05)).unwrap(); + assert_eq!(result, Some((key(0x05), U256::from(5)))); + assert_eq!(visited_keys.lock().len(), 1); + + let result = cursor.seek(key(0x02)).unwrap(); + assert_eq!(result, Some((key(0x02), U256::from(2)))); + assert_eq!(visited_keys.lock().len(), 1, "exact overlay hit should not seek the DB"); + + let result = cursor.next().unwrap(); + assert_eq!(result, Some((key(0x03), U256::from(3)))); + assert_eq!(visited_keys.lock().len(), 2, "next should reposition the stale DB cursor"); + } + #[test] fn test_seek_overlay_exact_deletion_still_seeks_db() { let db_nodes = vec![(key(0x02), U256::from(2)), (key(0x03), U256::from(3))]; @@ -799,7 +645,8 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); let post_state = storage_post_state(post_state_nodes); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); let result = cursor.seek(key(0x02)).unwrap(); assert_eq!(result, Some((key(0x03), U256::from(3)))); @@ -818,20 +665,15 @@ mod tests { let exact_hit = storage_post_state(vec![(key(0x05), U256::from(5))]); let lower_priority = storage_post_state(vec![(key(0x01), U256::from(10)), (key(0x07), U256::from(7))]); - let mut cursor = HashedPostStateCursor::new_storage( - mock_cursor, - [&higher_priority, &exact_hit, &lower_priority], - B256::ZERO, - ); + let overlay = HashedPostStateOverlay::new(vec![ + Arc::new(higher_priority), + Arc::new(exact_hit), + Arc::new(lower_priority), + ]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); let result = cursor.seek(key(0x05)).unwrap(); assert_eq!(result, Some((key(0x05), U256::from(5)))); - assert_eq!(cursor.post_state_cursor.cursors[0].idx, 1); - assert_eq!(cursor.post_state_cursor.cursors[1].idx, 0); - assert_eq!( - cursor.post_state_cursor.cursors[2].idx, 0, - "lower-priority overlay should not be sought after an exact overlay hit" - ); assert!(visited_keys.lock().is_empty(), "exact overlay hit should not touch the DB cursor"); let result = cursor.next().unwrap(); @@ -847,13 +689,34 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); let post_state = storage_post_state(vec![(key(0x02), U256::from(2))]); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); assert_eq!(cursor.seek(key(0x03)).unwrap(), Some((key(0x03), U256::from(3)))); assert_eq!(cursor.seek(key(0x01)).unwrap(), Some((key(0x01), U256::from(1)))); assert_eq!(cursor.next().unwrap(), Some((key(0x02), U256::from(2)))); } + #[test] + fn test_seek_reuses_exact_db_position() { + let db_nodes = BTreeMap::from([(key(0x01), account(1)), (key(0x02), account(2))]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); + + let overlay = HashedPostStateOverlay::default(); + let mut cursor = HashedPostStateCursor::new_account(mock_cursor, &overlay); + + assert_eq!(cursor.seek(key(0x01)).unwrap(), Some((key(0x01), account(1)))); + assert_eq!(visited_keys.lock().len(), 1); + + assert_eq!(cursor.next().unwrap(), Some((key(0x02), account(2)))); + assert_eq!(visited_keys.lock().len(), 2); + + assert_eq!(cursor.seek(key(0x02)).unwrap(), Some((key(0x02), account(2)))); + assert_eq!(visited_keys.lock().len(), 2, "seek should reuse the exact DB position"); + } + #[test] fn test_multiple_overlays_resolve_by_precedence() { let db_nodes = BTreeMap::from([ @@ -871,8 +734,8 @@ mod tests { (key(0x02), U256::from(20)), (key(0x03), U256::from(3)), ]); - let mut cursor = - HashedPostStateCursor::new_storage(mock_cursor, [&newest, &oldest], B256::ZERO); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); let mut results = Vec::new(); if let Some(entry) = cursor.seek(B256::ZERO).unwrap() { @@ -907,9 +770,8 @@ mod tests { vec![(key(0x01), Some(account(10))), (key(0x03), Some(account(30)))], Default::default(), ); - let mut overlay = HashedPostStateOverlay::new(vec![Arc::new(oldest)]); - overlay.insert(0, Arc::new(newest)); - let mut cursor = HashedPostStateCursor::new_account_from_overlay(mock_cursor, &overlay); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); + let mut cursor = HashedPostStateCursor::new_account(mock_cursor, &overlay); let mut results = Vec::new(); if let Some(entry) = cursor.seek(B256::ZERO).unwrap() { @@ -932,11 +794,9 @@ mod tests { let newest = storage_post_state(vec![(key(0x02), U256::from(2))]); let wiping = storage_post_state_with_wipe(vec![(key(0x01), U256::from(1))], true); let hidden = storage_post_state(vec![(key(0x03), U256::from(3))]); - let mut cursor = HashedPostStateCursor::new_storage( - mock_cursor, - [&newest, &wiping, &hidden], - B256::ZERO, - ); + let overlay = + HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(wiping), Arc::new(hidden)]); + let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x01), U256::from(1)))); assert_eq!(cursor.next().unwrap(), Some((key(0x02), U256::from(2)))); @@ -953,10 +813,9 @@ mod tests { let newest = storage_post_state(vec![(key(0x02), U256::from(2))]); let wiping = storage_post_state_with_wipe(vec![(key(0x01), U256::from(1))], true); let hidden = storage_post_state(vec![(key(0x03), U256::from(3))]); - let mut overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(wiping)]); - overlay.push(Arc::new(hidden)); - let mut cursor = - HashedPostStateCursor::new_storage_from_overlay(mock_cursor, &overlay, B256::ZERO); + let overlay = + HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(wiping), Arc::new(hidden)]); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &overlay, B256::ZERO); assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x01), U256::from(1)))); assert_eq!(cursor.next().unwrap(), Some((key(0x02), U256::from(2)))); @@ -982,8 +841,7 @@ mod tests { storage_post_state_for_address(second_address, vec![(key(0x02), U256::from(2))]); let overlay = HashedPostStateOverlay::new(vec![Arc::new(first_overlay), Arc::new(second_overlay)]); - let mut cursor = - HashedPostStateCursor::new_storage_from_overlay(mock_cursor, &overlay, first_address); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &overlay, first_address); assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x01), U256::from(1)))); assert_eq!(cursor.next().unwrap(), Some((key(0x04), U256::from(4)))); @@ -995,6 +853,37 @@ mod tests { assert_eq!(cursor.next().unwrap(), None); } + #[test] + fn test_storage_empty_respects_layer_precedence() { + let mut db_storage = B256Map::default(); + db_storage.insert(B256::ZERO, BTreeMap::new()); + let visited_keys = + Arc::new(db_storage.keys().map(|key| (*key, Default::default())).collect()); + let mock_cursor = + MockHashedCursor::new_storage(Arc::new(db_storage), visited_keys, B256::ZERO).unwrap(); + + let newest = storage_post_state(vec![(key(0x01), U256::ZERO)]); + let hidden = storage_post_state(vec![(key(0x01), U256::from(1))]); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(hidden)]); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &overlay, B256::ZERO); + + assert!(cursor.is_storage_empty().unwrap()); + + let mut db_storage = B256Map::default(); + db_storage.insert(B256::ZERO, BTreeMap::new()); + let visited_keys = + Arc::new(db_storage.keys().map(|key| (*key, Default::default())).collect()); + let mock_cursor = + MockHashedCursor::new_storage(Arc::new(db_storage), visited_keys, B256::ZERO).unwrap(); + + let newest = storage_post_state(vec![(key(0x01), U256::ZERO)]); + let visible = storage_post_state(vec![(key(0x02), U256::from(2))]); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(visible)]); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &overlay, B256::ZERO); + + assert!(!cursor.is_storage_empty().unwrap()); + } + mod proptest_tests { use super::*; use proptest::prelude::*; @@ -1135,10 +1024,13 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); let hashed_address = B256::ZERO; - let post_states = - overlays.into_iter().map(storage_post_state).collect::>(); - let mut test_cursor = - HashedPostStateCursor::new_storage(mock_cursor, post_states.iter(), hashed_address); + let post_states = overlays + .into_iter() + .map(storage_post_state) + .map(Arc::new) + .collect::>(); + let overlay = HashedPostStateOverlay::new(post_states); + let mut test_cursor = storage_cursor(mock_cursor, &overlay, hashed_address); // Test: seek to the beginning first let control_first = diff --git a/crates/trie/trie/src/lib.rs b/crates/trie/trie/src/lib.rs index e506843a6f4..a91048036c5 100644 --- a/crates/trie/trie/src/lib.rs +++ b/crates/trie/trie/src/lib.rs @@ -14,10 +14,7 @@ )] #![cfg_attr(docsrs, feature(doc_cfg))] -/// The implementation of forward-only in-memory cursor. -pub mod forward_cursor; - -mod storage_overlay_index; +mod overlay_cursor; /// The cursor implementations for navigating account and storage tries. pub mod trie_cursor; diff --git a/crates/trie/trie/src/node_iter.rs b/crates/trie/trie/src/node_iter.rs index 45d26238984..facedbb4dce 100644 --- a/crates/trie/trie/src/node_iter.rs +++ b/crates/trie/trie/src/node_iter.rs @@ -310,7 +310,7 @@ mod tests { use crate::{ hashed_cursor::{ mock::MockHashedCursorFactory, noop::NoopHashedCursor, HashedCursorFactory, - HashedPostStateCursor, + HashedPostStateCursor, HashedPostStateOverlay, }, mock::{KeyVisit, KeyVisitType}, trie_cursor::{ @@ -331,7 +331,7 @@ mod tests { prefix_set::PrefixSetMut, updates::TrieUpdates, BranchNode, HashedPostState, LeafNode, RlpNode, }; - use std::collections::BTreeMap; + use std::{collections::BTreeMap, sync::Arc}; /// Calculate the branch node stored in the database by feeding the provided state to the hash /// builder and taking the trie updates. @@ -349,12 +349,13 @@ mod tests { (nibbles.pack().into_inner().unwrap().into(), Some(account)) })) .into_sorted(); + let hashed_post_state = HashedPostStateOverlay::new(vec![Arc::new(hashed_post_state)]); let mut node_iter = TrieNodeIter::state_trie( walker, HashedPostStateCursor::new_account( NoopHashedCursor::::default(), - [&hashed_post_state], + &hashed_post_state, ), ); diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs new file mode 100644 index 00000000000..7796e1abbbc --- /dev/null +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -0,0 +1,223 @@ +use std::{fmt, slice, sync::Arc}; + +const OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN: usize = 64; + +#[derive(Debug)] +pub(crate) enum DbCursorState { + Unpositioned, + Positioned { entry: (K, V), position_valid: bool }, + Wiped, +} + +impl DbCursorState { + pub(crate) const fn new(cursor_wiped: bool) -> Self { + if cursor_wiped { + Self::Wiped + } else { + Self::Unpositioned + } + } + + pub(crate) const fn is_wiped(&self) -> bool { + matches!(self, Self::Wiped) + } + + pub(crate) const fn entry(&self) -> Option<&(K, V)> { + match self { + Self::Positioned { entry, .. } => Some(entry), + Self::Unpositioned | Self::Wiped => None, + } + } + + pub(crate) const fn position_valid(&self) -> bool { + matches!(self, Self::Positioned { position_valid: true, .. }) + } + + pub(crate) fn set_entry(&mut self, entry: Option<(K, V)>) { + if !self.is_wiped() { + *self = entry + .map(|entry| Self::Positioned { entry, position_valid: true }) + .unwrap_or(Self::Unpositioned); + } + } + + pub(crate) const fn validate_position(&mut self) { + if let Self::Positioned { position_valid, .. } = self { + *position_valid = true; + } + } + + pub(crate) const fn invalidate_position(&mut self) { + if let Self::Positioned { position_valid, .. } = self { + *position_valid = false; + } + } +} + +impl DbCursorState { + pub(crate) fn is_positioned_at(&self, key: &K) -> bool { + matches!(self, Self::Positioned { entry: (db_key, _), .. } if db_key == key) + } +} + +#[derive(Debug)] +pub(crate) struct PositionedOverlayCursor<'a, O, K, V> { + layers: &'a [OverlayLayer], + positions: Vec, +} + +impl Default for PositionedOverlayCursor<'_, O, K, V> { + fn default() -> Self { + Self::new(&[]) + } +} + +impl<'a, O, K, V> PositionedOverlayCursor<'a, O, K, V> { + pub(crate) fn new(layers: &'a [OverlayLayer]) -> Self { + Self { layers, positions: vec![0; layers.len()] } + } + + pub(crate) fn reset(&mut self) { + self.positions.fill(0); + } + + pub(crate) fn retarget(&mut self, layers: &'a [OverlayLayer]) { + self.layers = layers; + self.positions.clear(); + self.positions.resize(layers.len(), 0); + } +} + +impl PositionedOverlayCursor<'_, O, K, V> +where + K: Ord, +{ + pub(crate) fn seek_exact(&mut self, key: &K) -> Option<&V> { + let Self { layers, positions } = self; + + layers.iter().enumerate().find_map(|(layer_idx, layer)| { + let entries = layer.entries(); + let idx = seek_overlay_entries( + entries, + positions.get_mut(layer_idx), + key, + OverlaySeekMode::Inclusive, + )?; + (&entries[idx].0 == key).then_some(&entries[idx].1) + }) + } + + pub(crate) fn highest_priority_value_at(&self, key: &K) -> Option<&V> { + self.layers.iter().enumerate().find_map(|(layer_idx, layer)| { + let entries = layer.entries(); + if let Some(position) = self.positions.get(layer_idx) { + entries + .get(*position) + .and_then(|(entry_key, value)| (entry_key == key).then_some(value)) + } else { + let idx = entries.binary_search_by(|(entry_key, _)| entry_key.cmp(key)).ok()?; + Some(&entries[idx].1) + } + }) + } +} + +impl PositionedOverlayCursor<'_, O, K, V> +where + K: Copy + Ord, +{ + pub(crate) fn next_key(&mut self, key: &K, inclusive: bool) -> Option { + let mode = if inclusive { OverlaySeekMode::Inclusive } else { OverlaySeekMode::Exclusive }; + let Self { layers, positions } = self; + + layers + .iter() + .enumerate() + .filter_map(|(layer_idx, layer)| { + let entries = layer.entries(); + let idx = seek_overlay_entries(entries, positions.get_mut(layer_idx), key, mode)?; + Some(entries[idx].0) + }) + .min() + } +} + +#[derive(Clone, Copy)] +enum OverlaySeekMode { + Inclusive, + Exclusive, +} + +impl OverlaySeekMode { + fn skips(self, entry_key: &K, bound: &K) -> bool { + match self { + Self::Inclusive => entry_key < bound, + Self::Exclusive => entry_key <= bound, + } + } +} + +fn seek_overlay_entries( + entries: &[(K, V)], + mut position: Option<&mut usize>, + key: &K, + mode: OverlaySeekMode, +) -> Option +where + K: Ord, +{ + let mut start = + position.as_ref().map(|position| **position).unwrap_or_default().min(entries.len()); + if start > 0 && !mode.skips(&entries[start - 1].0, key) { + start = 0; + } + + let remaining = &entries[start..]; + let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { + remaining.partition_point(|(entry_key, _)| mode.skips(entry_key, key)) + } else { + let mut advance = 0; + while advance < remaining.len() && mode.skips(&remaining[advance].0, key) { + advance += 1; + } + advance + }; + + let idx = start + advance; + if let Some(position) = position.as_mut() { + **position = idx; + } + (idx < entries.len()).then_some(idx) +} + +#[derive(Clone)] +pub(crate) struct OverlayLayer { + _owner: Arc, + entries_ptr: *const (K, V), + entries_len: usize, +} + +impl OverlayLayer { + pub(crate) const fn new(owner: Arc, entries: &[(K, V)]) -> Self { + Self { _owner: owner, entries_ptr: entries.as_ptr(), entries_len: entries.len() } + } + + pub(crate) const fn entries(&self) -> &[(K, V)] { + // SAFETY: `entries_ptr` and `entries_len` are captured from a slice inside `_owner`. + // The `Arc` keeps that allocation alive, and the overlay owners are never mutated through + // this layer. + unsafe { slice::from_raw_parts(self.entries_ptr, self.entries_len) } + } +} + +// SAFETY: the raw pointer only targets immutable data owned by `_owner`, and `_owner` is retained +// for at least as long as the pointer is used. +unsafe impl Send for OverlayLayer {} +// SAFETY: see the `Send` impl; shared access only exposes immutable slices. +unsafe impl Sync for OverlayLayer {} + +impl fmt::Debug for OverlayLayer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("OverlayLayer").field("entries_len", &self.entries_len).finish() + } +} diff --git a/crates/trie/trie/src/storage_overlay_index.rs b/crates/trie/trie/src/storage_overlay_index.rs deleted file mode 100644 index 88e6cca89cd..00000000000 --- a/crates/trie/trie/src/storage_overlay_index.rs +++ /dev/null @@ -1,95 +0,0 @@ -use alloy_primitives::{map::B256Map, B256}; -use reth_trie_common::{updates::TrieUpdatesSorted, HashedPostStateSorted}; -use std::sync::Arc; - -/// Source of per-account storage overlays for [`StorageOverlayIndex`]. -pub(crate) trait StorageOverlayIndexSource { - /// Returns every hashed address touched by this overlay and whether that storage overlay wipes - /// lower-priority database or overlay contents for the address. - fn storage_overlay_index_entries(&self) -> impl Iterator + '_; -} - -impl StorageOverlayIndexSource for TrieUpdatesSorted { - fn storage_overlay_index_entries(&self) -> impl Iterator + '_ { - self.storage_tries_ref() - .iter() - .map(|(hashed_address, storage)| (*hashed_address, storage.is_deleted())) - } -} - -impl StorageOverlayIndexSource for HashedPostStateSorted { - fn storage_overlay_index_entries(&self) -> impl Iterator + '_ { - self.storages.iter().map(|(hashed_address, storage)| (*hashed_address, storage.is_wiped())) - } -} - -/// Precomputed lookup from hashed address to the overlay layers that contain storage for it. -pub(crate) type StorageOverlayIndex = B256Map; - -/// Incremental updates for a [`StorageOverlayIndex`]. -pub(crate) trait StorageOverlayIndexMut { - /// Adds a lower-priority overlay to this storage overlay index. - fn append(&mut self, overlay_index: usize, overlay: &T); - - /// Adds a highest-priority overlay to this storage overlay index. - fn prepend(&mut self, overlay: &T); -} - -/// Index entry for one hashed address in a [`StorageOverlayIndex`]. -#[derive(Clone, Debug, Default)] -pub(crate) struct StorageOverlayIndexEntry { - /// Overlay indices that should be searched for a hashed address, ordered by precedence. - pub(crate) indices: Arc>, - /// Whether an overlay at one of [`Self::indices`] wipes lower-priority database contents. - pub(crate) db_wiped: bool, -} - -impl StorageOverlayIndexEntry { - /// Builds a storage overlay index for the full overlay stack. - pub(crate) fn new(overlays: &[Arc]) -> StorageOverlayIndex { - let mut index = StorageOverlayIndex::default(); - - for (idx, overlay) in overlays.iter().enumerate() { - index.append(idx, overlay.as_ref()); - } - - index - } -} - -impl StorageOverlayIndexMut for StorageOverlayIndex { - fn append(&mut self, overlay_index: usize, overlay: &T) { - for (hashed_address, wipes_db) in overlay.storage_overlay_index_entries() { - let entry = self.entry(hashed_address).or_default(); - if entry.db_wiped { - continue; - } - - Arc::make_mut(&mut entry.indices).push(overlay_index); - if wipes_db { - entry.db_wiped = true; - } - } - } - - fn prepend(&mut self, overlay: &T) { - for entry in self.values_mut() { - for idx in Arc::make_mut(&mut entry.indices) { - *idx += 1; - } - } - - for (hashed_address, wipes_db) in overlay.storage_overlay_index_entries() { - let entry = self.entry(hashed_address).or_default(); - let indices = Arc::make_mut(&mut entry.indices); - - if wipes_db { - indices.clear(); - indices.push(0); - entry.db_wiped = true; - } else { - indices.insert(0, 0); - } - } - } -} diff --git a/crates/trie/trie/src/test_utils.rs b/crates/trie/trie/src/test_utils.rs index 8d3f2f6659f..966d90ac3d2 100644 --- a/crates/trie/trie/src/test_utils.rs +++ b/crates/trie/trie/src/test_utils.rs @@ -55,6 +55,7 @@ pub fn storage_root_prehashed>(storage: I) use crate::{ hashed_cursor::{ mock::MockHashedCursorFactory, HashedCursorFactory, HashedPostStateCursorFactory, + HashedPostStateOverlay, }, proof_v2::StorageProofCalculator, trie_cursor::{mock::MockTrieCursorFactory, TrieCursorFactory}, @@ -65,7 +66,7 @@ use reth_trie_common::{ prefix_set::PrefixSetMut, updates::StorageTrieUpdates, BranchNodeCompact, HashedPostStateSorted, HashedStorage, Nibbles, ProofTrieNodeV2, ProofV2Target, }; -use std::{collections::BTreeMap, iter::once}; +use std::{collections::BTreeMap, iter::once, sync::Arc}; /// General-purpose test harness for storage trie tests. /// @@ -125,8 +126,9 @@ impl TrieTestHarness { Vec::new(), once((self.hashed_address(), hashed_storage.into_sorted())).collect(), ); + let overlay = HashedPostStateOverlay::new(vec![Arc::new(overlay)]); let overlay_cursor_factory = - HashedPostStateCursorFactory::new(self.hashed_cursor_factory.clone(), [&overlay]); + HashedPostStateCursorFactory::new(self.hashed_cursor_factory.clone(), &overlay); let (root, _, updates) = StorageRoot::new_hashed( self.trie_cursor_factory.clone(), diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index 6f39b47e8b4..40fd74b1a58 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -1,18 +1,12 @@ use super::{TrieCursor, TrieCursorFactory, TrieStorageCursor}; use crate::{ - storage_overlay_index::{ - StorageOverlayIndex, StorageOverlayIndexEntry, StorageOverlayIndexMut, - }, + overlay_cursor::{DbCursorState, OverlayLayer, PositionedOverlayCursor}, updates::TrieUpdatesSorted, }; -use alloy_primitives::B256; +use alloy_primitives::{map::B256Map, B256}; use reth_storage_errors::db::DatabaseError; use reth_trie_common::{BranchNodeCompact, Nibbles}; -use std::{ - marker::PhantomData, - ops::{Deref, Index}, - sync::Arc, -}; +use std::{marker::PhantomData, sync::Arc}; /// The trie cursor factory for the trie updates. #[derive(Debug, Clone)] @@ -34,21 +28,21 @@ impl<'overlay, CF, T> InMemoryTrieCursorFactory<'overlay, CF, T> { impl<'overlay, CF, T> TrieCursorFactory for InMemoryTrieCursorFactory<'overlay, CF, T> where CF: TrieCursorFactory + 'overlay, - T: AsRef<[&'overlay TrieUpdatesSorted]>, + T: AsRef, { type AccountTrieCursor<'cursor> - = InMemoryTrieCursor<'overlay, CF::AccountTrieCursor<'cursor>> + = InMemoryTrieCursor<'cursor, CF::AccountTrieCursor<'cursor>> where Self: 'cursor; type StorageTrieCursor<'cursor> - = InMemoryTrieCursor<'overlay, CF::StorageTrieCursor<'cursor>> + = InMemoryTrieCursor<'cursor, CF::StorageTrieCursor<'cursor>> where Self: 'cursor; fn account_trie_cursor(&self) -> Result, DatabaseError> { let cursor = self.cursor_factory.account_trie_cursor()?; - Ok(InMemoryTrieCursor::new_account(cursor, self.trie_updates.as_ref().iter().copied())) + Ok(InMemoryTrieCursor::new_account(cursor, self.trie_updates.as_ref())) } fn storage_trie_cursor( @@ -56,11 +50,7 @@ where hashed_address: B256, ) -> Result, DatabaseError> { let cursor = self.cursor_factory.storage_trie_cursor(hashed_address)?; - Ok(InMemoryTrieCursor::new_storage( - cursor, - self.trie_updates.as_ref().iter().copied(), - hashed_address, - )) + Ok(InMemoryTrieCursor::new_storage(cursor, self.trie_updates.as_ref(), hashed_address)) } } @@ -71,70 +61,26 @@ pub struct InMemoryTrieCursor<'a, C> { /// The underlying cursor. cursor: C, /// The current DB cursor state. - db_cursor_state: DbCursorState, + db_cursor_state: DbCursorState, /// In-memory cursors over trie update overlays. in_memory_cursor: OverlayCursor<'a>, - /// Lower-priority overlays that still need positioning after a lazy exact overlay hit. - deferred_overlay_seek_start: Option, /// The key most recently returned from the Cursor. last_key: Option, #[cfg(debug_assertions)] /// Whether an initial seek was called. seeked: bool, /// Source of trie update overlays. - trie_updates: TrieUpdatesSource<'a>, + trie_updates: &'a TrieUpdatesOverlay, } impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { - /// Create new account trie cursor which combines a DB cursor and the trie updates. - pub fn new_account( - cursor: C, - trie_updates: impl IntoIterator, - ) -> Self { - let trie_updates = TrieUpdatesSource::from_refs(trie_updates); - let in_memory_cursor = trie_updates.account_overlay(); - Self { - cursor, - db_cursor_state: DbCursorState::new(false), - in_memory_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - trie_updates, - } - } - /// Create new account trie cursor from an indexed trie updates overlay. - pub fn new_account_from_overlay(cursor: C, trie_updates: &'a TrieUpdatesOverlay) -> Self { - let trie_updates = TrieUpdatesSource::Indexed(trie_updates); + pub fn new_account(cursor: C, trie_updates: &'a TrieUpdatesOverlay) -> Self { let in_memory_cursor = trie_updates.account_overlay(); Self { cursor, db_cursor_state: DbCursorState::new(false), in_memory_cursor, - deferred_overlay_seek_start: None, - last_key: None, - #[cfg(debug_assertions)] - seeked: false, - trie_updates, - } - } - - /// Create new storage trie cursor with full trie updates reference. - /// This allows the cursor to switch between storage tries when `set_hashed_address` is called. - pub fn new_storage( - cursor: C, - trie_updates: impl IntoIterator, - hashed_address: B256, - ) -> Self { - let trie_updates = TrieUpdatesSource::from_refs(trie_updates); - let (in_memory_cursor, db_wiped) = trie_updates.storage_overlay(hashed_address); - Self { - cursor, - db_cursor_state: DbCursorState::new(db_wiped), - in_memory_cursor, - deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -143,18 +89,16 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { } /// Create new storage trie cursor from an indexed trie updates overlay. - pub fn new_storage_from_overlay( + pub fn new_storage( cursor: C, trie_updates: &'a TrieUpdatesOverlay, hashed_address: B256, ) -> Self { - let trie_updates = TrieUpdatesSource::Indexed(trie_updates); let (in_memory_cursor, db_wiped) = trie_updates.storage_overlay(hashed_address); Self { cursor, db_cursor_state: DbCursorState::new(db_wiped), in_memory_cursor, - deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -173,6 +117,11 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { /// Positions the DB cursor state using the underlying cursor. fn cursor_seek(&mut self, key: Nibbles) -> Result<(), DatabaseError> { + if self.db_cursor_state.is_positioned_at(&key) { + self.db_cursor_state.validate_position(); + return Ok(()) + } + let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); self.db_cursor_state.set_entry(entry); Ok(()) @@ -201,20 +150,27 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { } /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. - fn choose_next_entry(&mut self) -> Result, DatabaseError> { + fn choose_next_entry( + &mut self, + mut overlay_bound: Nibbles, + mut overlay_bound_inclusive: bool, + ) -> Result, DatabaseError> { loop { - let mem_key = self.in_memory_cursor.min_current_key(); + let mem_key = self.in_memory_cursor.next_key(&overlay_bound, overlay_bound_inclusive); let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { return Ok(None); }; - if let Some(mem_value) = self.in_memory_cursor.highest_priority_value_at(&next_key) { + if let Some(mem_value) = + self.in_memory_cursor.highest_priority_value_at(&next_key).cloned() + { if let Some(node) = mem_value { return Ok(Some((next_key, node))) } - self.in_memory_cursor.advance_key(&next_key); + overlay_bound = next_key; + overlay_bound_inclusive = false; if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { self.cursor_next()?; } @@ -238,11 +194,8 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.seeked = true; } - self.deferred_overlay_seek_start = None; - let entry = if let Some((idx, mem_value)) = self.in_memory_cursor.seek_until_exact(&key) { - if mem_value.is_some() { - self.deferred_overlay_seek_start = Some(idx + 1); - } + let entry = if let Some(mem_value) = self.in_memory_cursor.seek_exact(&key).cloned() { + self.db_cursor_state.invalidate_position(); mem_value.map(|node| (key, node)) } else { let db_entry = self.get_cursor_mut().map(|c| c.seek_exact(key)).transpose()?.flatten(); @@ -263,22 +216,15 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.seeked = true; } - self.deferred_overlay_seek_start = None; - match self.in_memory_cursor.seek_until_exact(&key) { - Some((idx, Some(node))) => { - let entry = Some((key, node)); - self.deferred_overlay_seek_start = Some(idx + 1); - self.set_last_key(&entry); - return Ok(entry); - } - Some((idx, None)) => { - self.in_memory_cursor.seek_from(idx + 1, &key); - } - None => {} + if let Some(Some(node)) = self.in_memory_cursor.seek_exact(&key).cloned() { + self.db_cursor_state.invalidate_position(); + let entry = Some((key, node)); + self.set_last_key(&entry); + return Ok(entry); } self.cursor_seek(key)?; - let entry = self.choose_next_entry()?; + let entry = self.choose_next_entry(key, true)?; self.set_last_key(&entry); Ok(entry) } @@ -294,17 +240,13 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { return Ok(None); }; - if let Some(start) = self.deferred_overlay_seek_start.take() { - self.in_memory_cursor.seek_from(start, &last_key); - } - self.in_memory_cursor.first_after(&last_key); - if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &last_key) { - self.cursor_next()?; - } else { - self.cursor_first_after(last_key)?; + match self.db_cursor_state.entry().map(|(db_key, _)| *db_key) { + Some(db_key) if db_key == last_key => self.cursor_next()?, + Some(db_key) if db_key > last_key && self.db_cursor_state.position_valid() => {} + _ => self.cursor_first_after(last_key)?, } - let entry = self.choose_next_entry()?; + let entry = self.choose_next_entry(last_key, false)?; self.set_last_key(&entry); Ok(entry) } @@ -318,10 +260,9 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { fn reset(&mut self) { self.cursor.reset(); - self.in_memory_cursor.reset(); self.db_cursor_state.set_entry(None); - self.deferred_overlay_seek_start = None; + self.in_memory_cursor.reset(); self.last_key = None; #[cfg(debug_assertions)] { @@ -334,8 +275,8 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let (in_memory_cursor, db_wiped) = self.trie_updates.storage_overlay(hashed_address); - self.in_memory_cursor = in_memory_cursor; + let (layers, db_wiped) = self.trie_updates.storage_overlay_layers(hashed_address); + self.in_memory_cursor.retarget(layers); self.db_cursor_state = DbCursorState::new(db_wiped); } } @@ -343,268 +284,94 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { /// Trie updates overlays ordered from highest to lowest precedence. #[derive(Clone, Debug, Default)] pub struct TrieUpdatesOverlay { - updates: Vec>, - storage_index: Arc, + account_overlay: Arc>, + storage_overlays: Arc>, } impl TrieUpdatesOverlay { /// Create a new indexed trie updates overlay stack. pub fn new(updates: Vec>) -> Self { - let storage_index = Arc::new(StorageOverlayIndexEntry::new(&updates)); - Self { updates, storage_index } - } - - /// Returns `true` if there are no trie update overlays. - pub const fn is_empty(&self) -> bool { - self.updates.is_empty() + let account_overlay = Self::build_account_overlay(&updates); + let storage_overlays = Self::build_storage_overlays(&updates); + Self { account_overlay, storage_overlays } } - /// Returns the number of trie update overlays. - pub const fn len(&self) -> usize { - self.updates.len() + /// Returns `true` if the overlay does not contain any trie updates. + pub fn is_empty(&self) -> bool { + self.account_overlay.is_empty() && self.storage_overlays.is_empty() } - /// Returns an iterator over trie update overlays. - pub fn iter(&self) -> impl Iterator> { - self.updates.iter() - } - - /// Push a trie update overlay at the end of the precedence stack. - pub fn push(&mut self, update: Arc) { - Arc::make_mut(&mut self.storage_index).append(self.updates.len(), update.as_ref()); - self.updates.push(update); - } - - fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'_>, bool) { - let Some(index) = self.storage_index.get(&hashed_address) else { - return (OverlayCursor::default(), false); - }; - - ( - OverlayCursor { - cursors: index - .indices - .iter() - .filter_map(|idx| self.updates[*idx].storage_tries_ref().get(&hashed_address)) - .map(|storage| SeekableInMemoryCursor::new(storage.storage_nodes_ref())) - .collect(), - }, - index.db_wiped, + fn build_account_overlay(updates: &[Arc]) -> Arc> { + Arc::new( + updates + .iter() + .filter(|update| !update.account_nodes_ref().is_empty()) + .map(|update| TrieOverlayLayer::new(Arc::clone(update), update.account_nodes_ref())) + .collect(), ) } -} - -impl From>> for TrieUpdatesOverlay { - fn from(updates: Vec>) -> Self { - Self::new(updates) - } -} - -impl IntoIterator for TrieUpdatesOverlay { - type IntoIter = std::vec::IntoIter; - type Item = Arc; - - fn into_iter(self) -> Self::IntoIter { - self.updates.into_iter() - } -} - -impl Index for TrieUpdatesOverlay { - type Output = Arc; - - fn index(&self, index: usize) -> &Self::Output { - &self.updates[index] - } -} - -impl Deref for TrieUpdatesOverlay { - type Target = [Arc]; - - fn deref(&self) -> &Self::Target { - &self.updates - } -} - -#[derive(Clone, Debug)] -enum TrieUpdatesSource<'a> { - Refs(Vec<&'a TrieUpdatesSorted>), - Indexed(&'a TrieUpdatesOverlay), -} -impl<'a> TrieUpdatesSource<'a> { - fn from_refs(trie_updates: impl IntoIterator) -> Self { - Self::Refs(trie_updates.into_iter().collect()) - } + fn build_storage_overlays( + updates: &[Arc], + ) -> Arc> { + let mut overlays: B256Map = B256Map::default(); - fn account_overlay(&self) -> OverlayCursor<'a> { - match self { - Self::Refs(trie_updates) => OverlayCursor::account(trie_updates), - Self::Indexed(trie_updates) => OverlayCursor { - cursors: trie_updates - .iter() - .map(|updates| SeekableInMemoryCursor::new(updates.account_nodes_ref())) - .collect(), - }, - } - } - - fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'a>, bool) { - match self { - Self::Refs(trie_updates) => OverlayCursor::storage(trie_updates, hashed_address), - Self::Indexed(trie_updates) => trie_updates.storage_overlay(hashed_address), - } - } -} - -#[derive(Debug)] -enum DbCursorState { - Unpositioned, - Positioned((Nibbles, BranchNodeCompact)), - Wiped, -} - -impl DbCursorState { - const fn new(cursor_wiped: bool) -> Self { - if cursor_wiped { - Self::Wiped - } else { - Self::Unpositioned - } - } - - const fn is_wiped(&self) -> bool { - matches!(self, Self::Wiped) - } - - const fn entry(&self) -> Option<&(Nibbles, BranchNodeCompact)> { - match self { - Self::Positioned(entry) => Some(entry), - Self::Unpositioned | Self::Wiped => None, - } - } - - fn set_entry(&mut self, entry: Option<(Nibbles, BranchNodeCompact)>) { - if !self.is_wiped() { - *self = entry.map(Self::Positioned).unwrap_or(Self::Unpositioned); - } - } -} - -#[derive(Debug, Default)] -struct OverlayCursor<'a> { - cursors: Vec>, -} - -impl<'a> OverlayCursor<'a> { - fn account(trie_updates: &[&'a TrieUpdatesSorted]) -> Self { - Self { - cursors: trie_updates - .iter() - .map(|updates| SeekableInMemoryCursor::new(updates.account_nodes_ref())) - .collect(), - } - } + for update in updates { + for (hashed_address, storage) in update.storage_tries_ref() { + let overlay = overlays.entry(*hashed_address).or_default(); + if overlay.db_wiped { + continue; + } - fn storage(trie_updates: &[&'a TrieUpdatesSorted], hashed_address: B256) -> (Self, bool) { - let mut cursors = Vec::new(); - let mut db_wiped = false; + if !storage.storage_nodes_ref().is_empty() { + overlay.layers.push(TrieOverlayLayer::new( + Arc::clone(update), + storage.storage_nodes_ref(), + )); + } - for updates in trie_updates { - if let Some(storage) = updates.storage_tries_ref().get(&hashed_address) { - cursors.push(SeekableInMemoryCursor::new(storage.storage_nodes_ref())); if storage.is_deleted() { - db_wiped = true; - break; + overlay.db_wiped = true; } } } - (Self { cursors }, db_wiped) + Arc::new(overlays) } - fn seek_from(&mut self, start: usize, key: &Nibbles) { - for cursor in self.cursors.iter_mut().skip(start) { - cursor.seek(key); - } - } - - fn seek_until_exact(&mut self, key: &Nibbles) -> Option<(usize, Option)> { - for (idx, cursor) in self.cursors.iter_mut().enumerate() { - if let Some((cursor_key, value)) = cursor.seek(key) && - cursor_key == key - { - return Some((idx, value.clone())) - } - } - None - } - - fn first_after(&mut self, key: &Nibbles) { - for cursor in &mut self.cursors { - cursor.first_after(key); - } + fn account_overlay(&self) -> OverlayCursor<'_> { + OverlayCursor::new(self.account_overlay.as_slice()) } - fn reset(&mut self) { - for cursor in &mut self.cursors { - cursor.reset(); - } + fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'_>, bool) { + let (layers, db_wiped) = self.storage_overlay_layers(hashed_address); + (OverlayCursor::new(layers), db_wiped) } - fn min_current_key(&self) -> Option { - self.cursors.iter().filter_map(|cursor| cursor.current().map(|(key, _)| *key)).min() - } + fn storage_overlay_layers(&self, hashed_address: B256) -> (&[TrieOverlayLayer], bool) { + let Some(overlay) = self.storage_overlays.get(&hashed_address) else { + return (&[], false); + }; - fn highest_priority_value_at(&self, key: &Nibbles) -> Option> { - self.cursors.iter().find_map(|cursor| { - let (cursor_key, value) = cursor.current()?; - (cursor_key == key).then(|| value.clone()) - }) + (overlay.layers.as_slice(), overlay.db_wiped) } +} - fn advance_key(&mut self, key: &Nibbles) { - for cursor in &mut self.cursors { - if cursor.current().is_some_and(|(cursor_key, _)| cursor_key == key) { - cursor.first_after(key); - } - } +impl AsRef for TrieUpdatesOverlay { + fn as_ref(&self) -> &Self { + self } } -#[derive(Debug)] -struct SeekableInMemoryCursor<'a> { - entries: &'a [(Nibbles, Option)], - idx: usize, +#[derive(Clone, Debug, Default)] +struct TrieStorageOverlay { + layers: Vec, + db_wiped: bool, } -impl<'a> SeekableInMemoryCursor<'a> { - const fn new(entries: &'a [(Nibbles, Option)]) -> Self { - Self { entries, idx: 0 } - } - - fn current(&self) -> Option<&'a (Nibbles, Option)> { - self.entries.get(self.idx) - } - - const fn reset(&mut self) { - self.idx = 0; - } - - fn seek(&mut self, key: &Nibbles) -> Option<&'a (Nibbles, Option)> { - self.idx = self.entries.partition_point(|(entry_key, _)| entry_key < key); - self.current() - } - - fn first_after(&mut self, key: &Nibbles) -> Option<&'a (Nibbles, Option)> { - if self.current().is_some_and(|(entry_key, _)| entry_key > key) { - return self.current() - } - - let remaining = &self.entries[self.idx..]; - self.idx += remaining.partition_point(|(entry_key, _)| entry_key <= key); - self.current() - } -} +type OverlayCursor<'a> = + PositionedOverlayCursor<'a, TrieUpdatesSorted, Nibbles, Option>; +type TrieOverlayLayer = OverlayLayer>; #[cfg(test)] mod tests { @@ -629,7 +396,8 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); let trie_updates = TrieUpdatesSorted::new(test_case.in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let mut results = Vec::new(); @@ -844,7 +612,8 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let result = cursor.seek_exact(Nibbles::from_nibbles([0x2])).unwrap(); assert_eq!( @@ -887,7 +656,8 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let result = cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(); assert_eq!( @@ -954,10 +724,12 @@ mod tests { ], Default::default(), ); - let mut cursor = InMemoryTrieCursor::new_account( - mock_cursor, - [&higher_priority, &exact_hit, &lower_priority], - ); + let overlay = TrieUpdatesOverlay::new(vec![ + Arc::new(higher_priority), + Arc::new(exact_hit), + Arc::new(lower_priority), + ]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let result = cursor.seek(Nibbles::from_nibbles([0x5])).unwrap(); assert_eq!( @@ -967,12 +739,6 @@ mod tests { BranchNodeCompact::new(0b0101, 0b0101, 0, vec![], None) )) ); - assert_eq!(cursor.in_memory_cursor.cursors[0].idx, 1); - assert_eq!(cursor.in_memory_cursor.cursors[1].idx, 0); - assert_eq!( - cursor.in_memory_cursor.cursors[2].idx, 0, - "lower-priority overlay should not be sought after an exact overlay hit" - ); assert!(visited_keys.lock().is_empty(), "exact overlay hit should not touch the DB cursor"); let result = cursor.next().unwrap(); @@ -1004,7 +770,8 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let result = cursor.seek(Nibbles::from_nibbles([0x1])).unwrap(); assert_eq!( @@ -1036,6 +803,58 @@ mod tests { ); } + #[test] + fn test_seek_overlay_exact_hit_repositions_stale_ahead_db_on_next() { + let db_nodes = vec![ + (Nibbles::from_nibbles([0x3]), BranchNodeCompact::new(0b0011, 0b0011, 0, vec![], None)), + (Nibbles::from_nibbles([0x5]), BranchNodeCompact::new(0b0101, 0b0101, 0, vec![], None)), + ]; + + let in_memory_nodes = vec![( + Nibbles::from_nibbles([0x2]), + Some(BranchNodeCompact::new(0b0010, 0b0010, 0, vec![], None)), + )]; + + let db_nodes_map: BTreeMap = db_nodes.into_iter().collect(); + let db_nodes_arc = Arc::new(db_nodes_map); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); + + let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + + let result = cursor.seek(Nibbles::from_nibbles([0x5])).unwrap(); + assert_eq!( + result, + Some(( + Nibbles::from_nibbles([0x5]), + BranchNodeCompact::new(0b0101, 0b0101, 0, vec![], None) + )) + ); + assert_eq!(visited_keys.lock().len(), 1); + + let result = cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(); + assert_eq!( + result, + Some(( + Nibbles::from_nibbles([0x2]), + BranchNodeCompact::new(0b0010, 0b0010, 0, vec![], None) + )) + ); + assert_eq!(visited_keys.lock().len(), 1, "exact overlay hit should not seek the DB"); + + let result = cursor.next().unwrap(); + assert_eq!( + result, + Some(( + Nibbles::from_nibbles([0x3]), + BranchNodeCompact::new(0b0011, 0b0011, 0, vec![], None) + )) + ); + assert_eq!(visited_keys.lock().len(), 2, "next should reposition the stale DB cursor"); + } + #[test] fn test_multiple_consecutive_deletes() { let db_nodes: Vec<(Nibbles, BranchNodeCompact)> = (1..=10) @@ -1112,7 +931,8 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); assert_eq!(cursor.current().unwrap(), None); @@ -1163,7 +983,8 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); // Seek to beginning should return None (all nodes are deleted) tracing::debug!("seeking to 0x"); @@ -1218,7 +1039,8 @@ mod tests { )], Default::default(), ); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); assert_eq!( cursor.seek(Nibbles::from_nibbles([0x3])).unwrap(), @@ -1234,6 +1056,35 @@ mod tests { ); } + #[test] + fn test_seek_reuses_exact_db_position() { + let db_nodes = BTreeMap::from([ + (Nibbles::from_nibbles([0x1]), branch_node(1)), + (Nibbles::from_nibbles([0x2]), branch_node(2)), + ]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); + + let overlay = TrieUpdatesOverlay::default(); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + + assert_eq!( + cursor.seek(Nibbles::from_nibbles([0x1])).unwrap(), + Some((Nibbles::from_nibbles([0x1]), branch_node(1))) + ); + assert_eq!(visited_keys.lock().len(), 1); + + assert_eq!(cursor.next().unwrap(), Some((Nibbles::from_nibbles([0x2]), branch_node(2)))); + assert_eq!(visited_keys.lock().len(), 2); + + assert_eq!( + cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(), + Some((Nibbles::from_nibbles([0x2]), branch_node(2))) + ); + assert_eq!(visited_keys.lock().len(), 2, "seek should reuse the exact DB position"); + } + #[test] fn test_multiple_overlays_resolve_by_precedence() { let db_nodes = BTreeMap::from([ @@ -1269,7 +1120,8 @@ mod tests { ], Default::default(), ); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&newest, &oldest]); + let overlay = TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let mut results = Vec::new(); if let Some(entry) = cursor.seek(Nibbles::default()).unwrap() { @@ -1316,7 +1168,7 @@ mod tests { Default::default(), ); let overlay = TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); - let mut cursor = InMemoryTrieCursor::new_account_from_overlay(mock_cursor, &overlay); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); let mut results = Vec::new(); if let Some(entry) = cursor.seek(Nibbles::default()).unwrap() { @@ -1397,11 +1249,9 @@ mod tests { ); let hidden = TrieUpdatesSorted::new(vec![], hidden_storage); - let mut cursor = InMemoryTrieCursor::new_storage( - mock_cursor, - [&newest, &deleting, &hidden], - hashed_address, - ); + let overlay = + TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(deleting), Arc::new(hidden)]); + let mut cursor = InMemoryTrieCursor::new_storage(mock_cursor, &overlay, hashed_address); assert_eq!( cursor.seek(Nibbles::default()).unwrap(), @@ -1439,10 +1289,9 @@ mod tests { false, vec![(Nibbles::from_nibbles([0x3]), Some(branch_node(3)))], ); - let mut overlay = TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(deleting)]); - overlay.push(Arc::new(hidden)); - let mut cursor = - InMemoryTrieCursor::new_storage_from_overlay(mock_cursor, &overlay, hashed_address); + let overlay = + TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(deleting), Arc::new(hidden)]); + let mut cursor = InMemoryTrieCursor::new_storage(mock_cursor, &overlay, hashed_address); assert_eq!( cursor.seek(Nibbles::default()).unwrap(), @@ -1479,8 +1328,7 @@ mod tests { ); let overlay = TrieUpdatesOverlay::new(vec![Arc::new(first_overlay), Arc::new(second_overlay)]); - let mut cursor = - InMemoryTrieCursor::new_storage_from_overlay(mock_cursor, &overlay, first_address); + let mut cursor = InMemoryTrieCursor::new_storage(mock_cursor, &overlay, first_address); assert_eq!( cursor.seek(Nibbles::default()).unwrap(), @@ -1676,8 +1524,11 @@ mod tests { let trie_updates = overlays .into_iter() .map(|in_memory_nodes| TrieUpdatesSorted::new(in_memory_nodes, Default::default())) + .map(Arc::new) .collect::>(); - let mut test_cursor = InMemoryTrieCursor::new_account(mock_cursor, trie_updates.iter()); + let overlay = TrieUpdatesOverlay::new(trie_updates); + let mut test_cursor = + InMemoryTrieCursor::new_account(mock_cursor, &overlay); // Test: seek to the beginning first let control_first = From 52105c239ae4284c087d2b04863a55ed04704c6a Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Wed, 27 May 2026 17:17:57 +0200 Subject: [PATCH 07/40] perf(trie): fast-path overlay cursor seeks --- crates/chain-state/src/state_trie_overlay.rs | 30 ++++++++++----- crates/trie/trie/src/overlay_cursor.rs | 39 +++++++++++++++++++- 2 files changed, 59 insertions(+), 10 deletions(-) diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs index a552e3068fb..d1a9b42a9cf 100644 --- a/crates/chain-state/src/state_trie_overlay.rs +++ b/crates/chain-state/src/state_trie_overlay.rs @@ -337,15 +337,27 @@ impl StateTrieOverlayManager { let span = tracing::Span::current(); worker_pool.spawn(move || { - let _span = tracing::debug_span!( - target: "chain_state::state_trie_overlay", - parent: span, - "compute_state_trie_overlay_cache_fill", - tip_hash = %key.tip_hash, - anchor_hash = %key.anchor_hash, - ) - .entered(); - manager.compute_and_cache_overlay(key, path); + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let _span = tracing::debug_span!( + target: "chain_state::state_trie_overlay", + parent: &span, + "compute_state_trie_overlay_cache_fill", + tip_hash = %key.tip_hash, + anchor_hash = %key.anchor_hash, + ) + .entered(); + manager.compute_and_cache_overlay(key, path); + })); + + if result.is_err() { + manager.remove_pending_overlay(key); + debug!( + target: "chain_state::state_trie_overlay", + tip_hash = %key.tip_hash, + anchor_hash = %key.anchor_hash, + "state trie overlay cache fill panicked" + ); + } }); } } diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 7796e1abbbc..ff1289c2fb3 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -1,6 +1,6 @@ use std::{fmt, slice, sync::Arc}; -const OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN: usize = 64; +const OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN: usize = 128; #[derive(Debug)] pub(crate) enum DbCursorState { @@ -168,6 +168,11 @@ where { let mut start = position.as_ref().map(|position| **position).unwrap_or_default().min(entries.len()); + if entries.get(start).is_some_and(|(entry_key, _)| !mode.skips(entry_key, key)) && + (start == 0 || mode.skips(&entries[start - 1].0, key)) + { + return Some(start) + } if start > 0 && !mode.skips(&entries[start - 1].0, key) { start = 0; } @@ -221,3 +226,35 @@ impl fmt::Debug for OverlayLayer { f.debug_struct("OverlayLayer").field("entries_len", &self.entries_len).finish() } } + +#[cfg(test)] +mod tests { + use super::*; + + fn layer(entries: Arc>) -> OverlayLayer, u8, u8> { + OverlayLayer::new(Arc::clone(&entries), entries.as_slice()) + } + + #[test] + fn seek_reuses_current_position_when_it_already_satisfies_bound() { + let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); + let overlay = [layer(entries)]; + let mut cursor = PositionedOverlayCursor::new(&overlay); + + assert_eq!(cursor.next_key(&100, true), Some(100)); + assert_eq!(cursor.next_key(&100, true), Some(100)); + assert_eq!(cursor.next_key(&99, false), Some(100)); + assert_eq!(cursor.next_key(&100, false), Some(101)); + } + + #[test] + fn seek_can_move_backwards_from_current_position() { + let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); + let overlay = [layer(entries)]; + let mut cursor = PositionedOverlayCursor::new(&overlay); + + assert_eq!(cursor.next_key(&150, true), Some(150)); + assert_eq!(cursor.next_key(&75, true), Some(75)); + assert_eq!(cursor.seek_exact(&25), Some(&25)); + } +} From 2e03b98d7bb24ea38da49da15e37535fc622a83e Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 28 May 2026 16:23:41 +0200 Subject: [PATCH 08/40] fix(chain-state): guard overlay cache fill panics --- crates/chain-state/src/state_trie_overlay.rs | 30 ++++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs index a552e3068fb..d1a9b42a9cf 100644 --- a/crates/chain-state/src/state_trie_overlay.rs +++ b/crates/chain-state/src/state_trie_overlay.rs @@ -337,15 +337,27 @@ impl StateTrieOverlayManager { let span = tracing::Span::current(); worker_pool.spawn(move || { - let _span = tracing::debug_span!( - target: "chain_state::state_trie_overlay", - parent: span, - "compute_state_trie_overlay_cache_fill", - tip_hash = %key.tip_hash, - anchor_hash = %key.anchor_hash, - ) - .entered(); - manager.compute_and_cache_overlay(key, path); + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let _span = tracing::debug_span!( + target: "chain_state::state_trie_overlay", + parent: &span, + "compute_state_trie_overlay_cache_fill", + tip_hash = %key.tip_hash, + anchor_hash = %key.anchor_hash, + ) + .entered(); + manager.compute_and_cache_overlay(key, path); + })); + + if result.is_err() { + manager.remove_pending_overlay(key); + debug!( + target: "chain_state::state_trie_overlay", + tip_hash = %key.tip_hash, + anchor_hash = %key.anchor_hash, + "state trie overlay cache fill panicked" + ); + } }); } } From a2207bd63fcac10548ce11913fc0fdb3d1d80561 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 28 May 2026 16:23:46 +0200 Subject: [PATCH 09/40] perf(trie): restore positioned overlay merge --- .../trie/trie/src/hashed_cursor/post_state.rs | 63 ++++++--- crates/trie/trie/src/overlay_cursor.rs | 132 ++++++++++++++---- crates/trie/trie/src/trie_cursor/in_memory.rs | 51 ++++--- 3 files changed, 181 insertions(+), 65 deletions(-) diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 046160b8aba..49f1a456f6a 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -97,6 +97,8 @@ where db_cursor_state: DbCursorState, /// In-memory cursors over post state overlays. post_state_cursor: PostStateOverlayCursor<'a, V>, + /// Lower-priority overlays that still need positioning after a lazy exact overlay hit. + deferred_overlay_seek_start: Option, /// The last hashed key that was returned by the cursor. /// De facto, this is a current cursor position. last_key: Option, @@ -118,6 +120,7 @@ where cursor, db_cursor_state: DbCursorState::new(false), post_state_cursor, + deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -141,6 +144,7 @@ where cursor, db_cursor_state: DbCursorState::new(cursor_wiped), post_state_cursor, + deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -198,13 +202,9 @@ where } /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. - fn choose_next_entry( - &mut self, - mut overlay_bound: B256, - mut overlay_bound_inclusive: bool, - ) -> Result, DatabaseError> { + fn choose_next_entry(&mut self) -> Result, DatabaseError> { loop { - let mem_key = self.post_state_cursor.next_key(&overlay_bound, overlay_bound_inclusive); + let mem_key = self.post_state_cursor.min_current_key(); let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { return Ok(None); @@ -215,8 +215,7 @@ where return Ok(Some((next_key, value))) } - overlay_bound = next_key; - overlay_bound_inclusive = false; + self.post_state_cursor.advance_key(&next_key); if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { self.cursor_next()?; } @@ -251,16 +250,24 @@ where self.seeked = true; } - if let Some(Some(value)) = self.post_state_cursor.seek_exact(&key) { - self.db_cursor_state.invalidate_position(); - let entry = Some((key, value)); - self.set_last_key(&entry); - return Ok(entry) + self.deferred_overlay_seek_start = None; + match self.post_state_cursor.seek_until_exact(&key) { + Some((idx, Some(value))) => { + self.db_cursor_state.invalidate_position(); + self.deferred_overlay_seek_start = Some(idx + 1); + let entry = Some((key, value)); + self.set_last_key(&entry); + return Ok(entry) + } + Some((idx, None)) => { + self.post_state_cursor.seek_from(idx + 1, &key); + } + None => {} } self.cursor_seek(key)?; - let entry = self.choose_next_entry(key, true)?; + let entry = self.choose_next_entry()?; self.set_last_key(&entry); Ok(entry) } @@ -282,13 +289,18 @@ where return Ok(None); }; + if let Some(start) = self.deferred_overlay_seek_start.take() { + self.post_state_cursor.seek_from(start, &last_key); + } + self.post_state_cursor.first_after(&last_key); + match self.db_cursor_state.entry().map(|(db_key, _)| *db_key) { Some(db_key) if db_key == last_key => self.cursor_next()?, Some(db_key) if db_key > last_key && self.db_cursor_state.position_valid() => {} _ => self.cursor_first_after(last_key)?, } - let entry = self.choose_next_entry(last_key, false)?; + let entry = self.choose_next_entry()?; self.set_last_key(&entry); Ok(entry) } @@ -298,6 +310,7 @@ where self.db_cursor_state.set_entry(None); self.post_state_cursor.reset(); + self.deferred_overlay_seek_start = None; self.last_key = None; #[cfg(debug_assertions)] { @@ -461,18 +474,30 @@ impl<'a, V> PostStateOverlayCursor<'a, V> where V: HashedPostStateCursorValue, { - fn seek_exact(&mut self, key: &B256) -> Option> { - self.cursor.seek_exact(key).map(|value| (*value).into_option()) + fn seek_from(&mut self, start: usize, key: &B256) { + self.cursor.seek_from(start, key); + } + + fn seek_until_exact(&mut self, key: &B256) -> Option<(usize, Option)> { + self.cursor.seek_until_exact(key).map(|(idx, value)| (idx, (*value).into_option())) + } + + fn first_after(&mut self, key: &B256) { + self.cursor.first_after(key); } - fn next_key(&mut self, key: &B256, inclusive: bool) -> Option { - self.cursor.next_key(key, inclusive) + fn min_current_key(&self) -> Option { + self.cursor.min_current_key() } fn highest_priority_value_at(&self, key: &B256) -> Option> { self.cursor.highest_priority_value_at(key).map(|value| (*value).into_option()) } + fn advance_key(&mut self, key: &B256) { + self.cursor.advance_key(key); + } + const fn has_visible_value(&self) -> bool { self.has_visible_value } diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 7796e1abbbc..984f308e05f 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -92,52 +92,83 @@ impl PositionedOverlayCursor<'_, O, K, V> where K: Ord, { - pub(crate) fn seek_exact(&mut self, key: &K) -> Option<&V> { - let Self { layers, positions } = self; + pub(crate) fn seek_from(&mut self, start: usize, key: &K) { + for layer_idx in start..self.layers.len() { + let entries = self.layers[layer_idx].entries(); + let _ = seek_overlay_entries( + entries, + self.positions.get_mut(layer_idx), + key, + OverlaySeekMode::Inclusive, + ); + } + } - layers.iter().enumerate().find_map(|(layer_idx, layer)| { - let entries = layer.entries(); - let idx = seek_overlay_entries( + pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { + for layer_idx in 0..self.layers.len() { + let entries = self.layers[layer_idx].entries(); + let Some(idx) = seek_overlay_entries( entries, - positions.get_mut(layer_idx), + self.positions.get_mut(layer_idx), key, OverlaySeekMode::Inclusive, - )?; - (&entries[idx].0 == key).then_some(&entries[idx].1) - }) + ) else { + continue; + }; + if &entries[idx].0 == key { + return Some((layer_idx, &entries[idx].1)) + } + } + + None + } + + pub(crate) fn first_after(&mut self, key: &K) { + for layer_idx in 0..self.layers.len() { + let entries = self.layers[layer_idx].entries(); + let _ = seek_overlay_entries( + entries, + self.positions.get_mut(layer_idx), + key, + OverlaySeekMode::Exclusive, + ); + } } pub(crate) fn highest_priority_value_at(&self, key: &K) -> Option<&V> { - self.layers.iter().enumerate().find_map(|(layer_idx, layer)| { + self.layers.iter().zip(&self.positions).find_map(|(layer, position)| { let entries = layer.entries(); - if let Some(position) = self.positions.get(layer_idx) { - entries - .get(*position) - .and_then(|(entry_key, value)| (entry_key == key).then_some(value)) - } else { - let idx = entries.binary_search_by(|(entry_key, _)| entry_key.cmp(key)).ok()?; - Some(&entries[idx].1) - } + entries + .get(*position) + .and_then(|(entry_key, value)| (entry_key == key).then_some(value)) }) } + + pub(crate) fn advance_key(&mut self, key: &K) { + for layer_idx in 0..self.layers.len() { + let entries = self.layers[layer_idx].entries(); + if entries.get(self.positions[layer_idx]).is_some_and(|(entry_key, _)| entry_key == key) + { + let _ = seek_overlay_entries( + entries, + self.positions.get_mut(layer_idx), + key, + OverlaySeekMode::Exclusive, + ); + } + } + } } impl PositionedOverlayCursor<'_, O, K, V> where K: Copy + Ord, { - pub(crate) fn next_key(&mut self, key: &K, inclusive: bool) -> Option { - let mode = if inclusive { OverlaySeekMode::Inclusive } else { OverlaySeekMode::Exclusive }; - let Self { layers, positions } = self; - - layers + pub(crate) fn min_current_key(&self) -> Option { + self.layers .iter() - .enumerate() - .filter_map(|(layer_idx, layer)| { - let entries = layer.entries(); - let idx = seek_overlay_entries(entries, positions.get_mut(layer_idx), key, mode)?; - Some(entries[idx].0) - }) + .zip(&self.positions) + .filter_map(|(layer, position)| layer.entries().get(*position).map(|(key, _)| *key)) .min() } } @@ -168,6 +199,11 @@ where { let mut start = position.as_ref().map(|position| **position).unwrap_or_default().min(entries.len()); + if entries.get(start).is_some_and(|(entry_key, _)| !mode.skips(entry_key, key)) && + (start == 0 || mode.skips(&entries[start - 1].0, key)) + { + return Some(start) + } if start > 0 && !mode.skips(&entries[start - 1].0, key) { start = 0; } @@ -221,3 +257,41 @@ impl fmt::Debug for OverlayLayer { f.debug_struct("OverlayLayer").field("entries_len", &self.entries_len).finish() } } + +#[cfg(test)] +mod tests { + use super::*; + + fn layer(entries: Arc>) -> OverlayLayer, u8, u8> { + OverlayLayer::new(Arc::clone(&entries), entries.as_slice()) + } + + #[test] + fn seek_reuses_current_position_when_it_already_satisfies_bound() { + let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); + let overlay = [layer(entries)]; + let mut cursor = PositionedOverlayCursor::new(&overlay); + + cursor.seek_from(0, &100); + assert_eq!(cursor.min_current_key(), Some(100)); + cursor.seek_from(0, &100); + assert_eq!(cursor.min_current_key(), Some(100)); + cursor.first_after(&99); + assert_eq!(cursor.min_current_key(), Some(100)); + cursor.first_after(&100); + assert_eq!(cursor.min_current_key(), Some(101)); + } + + #[test] + fn seek_can_move_backwards_from_current_position() { + let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); + let overlay = [layer(entries)]; + let mut cursor = PositionedOverlayCursor::new(&overlay); + + cursor.seek_from(0, &150); + assert_eq!(cursor.min_current_key(), Some(150)); + cursor.seek_from(0, &75); + assert_eq!(cursor.min_current_key(), Some(75)); + assert_eq!(cursor.seek_until_exact(&25), Some((0, &25))); + } +} diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index 40fd74b1a58..1378bdf2013 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -64,6 +64,8 @@ pub struct InMemoryTrieCursor<'a, C> { db_cursor_state: DbCursorState, /// In-memory cursors over trie update overlays. in_memory_cursor: OverlayCursor<'a>, + /// Lower-priority overlays that still need positioning after a lazy exact overlay hit. + deferred_overlay_seek_start: Option, /// The key most recently returned from the Cursor. last_key: Option, #[cfg(debug_assertions)] @@ -81,6 +83,7 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { cursor, db_cursor_state: DbCursorState::new(false), in_memory_cursor, + deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -99,6 +102,7 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { cursor, db_cursor_state: DbCursorState::new(db_wiped), in_memory_cursor, + deferred_overlay_seek_start: None, last_key: None, #[cfg(debug_assertions)] seeked: false, @@ -150,13 +154,9 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { } /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. - fn choose_next_entry( - &mut self, - mut overlay_bound: Nibbles, - mut overlay_bound_inclusive: bool, - ) -> Result, DatabaseError> { + fn choose_next_entry(&mut self) -> Result, DatabaseError> { loop { - let mem_key = self.in_memory_cursor.next_key(&overlay_bound, overlay_bound_inclusive); + let mem_key = self.in_memory_cursor.min_current_key(); let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { return Ok(None); @@ -169,8 +169,7 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { return Ok(Some((next_key, node))) } - overlay_bound = next_key; - overlay_bound_inclusive = false; + self.in_memory_cursor.advance_key(&next_key); if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &next_key) { self.cursor_next()?; } @@ -194,9 +193,13 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.seeked = true; } - let entry = if let Some(mem_value) = self.in_memory_cursor.seek_exact(&key).cloned() { + self.deferred_overlay_seek_start = None; + let entry = if let Some((idx, mem_value)) = self.in_memory_cursor.seek_until_exact(&key) { self.db_cursor_state.invalidate_position(); - mem_value.map(|node| (key, node)) + if mem_value.is_some() { + self.deferred_overlay_seek_start = Some(idx + 1); + } + mem_value.clone().map(|node| (key, node)) } else { let db_entry = self.get_cursor_mut().map(|c| c.seek_exact(key)).transpose()?.flatten(); self.db_cursor_state.set_entry(db_entry); @@ -216,15 +219,23 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.seeked = true; } - if let Some(Some(node)) = self.in_memory_cursor.seek_exact(&key).cloned() { - self.db_cursor_state.invalidate_position(); - let entry = Some((key, node)); - self.set_last_key(&entry); - return Ok(entry); + self.deferred_overlay_seek_start = None; + match self.in_memory_cursor.seek_until_exact(&key) { + Some((idx, Some(node))) => { + self.db_cursor_state.invalidate_position(); + self.deferred_overlay_seek_start = Some(idx + 1); + let entry = Some((key, node.clone())); + self.set_last_key(&entry); + return Ok(entry); + } + Some((idx, None)) => { + self.in_memory_cursor.seek_from(idx + 1, &key); + } + None => {} } self.cursor_seek(key)?; - let entry = self.choose_next_entry(key, true)?; + let entry = self.choose_next_entry()?; self.set_last_key(&entry); Ok(entry) } @@ -240,13 +251,18 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { return Ok(None); }; + if let Some(start) = self.deferred_overlay_seek_start.take() { + self.in_memory_cursor.seek_from(start, &last_key); + } + self.in_memory_cursor.first_after(&last_key); + match self.db_cursor_state.entry().map(|(db_key, _)| *db_key) { Some(db_key) if db_key == last_key => self.cursor_next()?, Some(db_key) if db_key > last_key && self.db_cursor_state.position_valid() => {} _ => self.cursor_first_after(last_key)?, } - let entry = self.choose_next_entry(last_key, false)?; + let entry = self.choose_next_entry()?; self.set_last_key(&entry); Ok(entry) } @@ -263,6 +279,7 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.db_cursor_state.set_entry(None); self.in_memory_cursor.reset(); + self.deferred_overlay_seek_start = None; self.last_key = None; #[cfg(debug_assertions)] { From d0ebdcb2ed8423fa773c2abcccae2e435dc54edb Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 29 May 2026 13:37:02 +0200 Subject: [PATCH 10/40] perf(trie): restore overlay cursor seek fast path --- crates/trie/trie/src/overlay_cursor.rs | 134 +++++++++++++++++-------- 1 file changed, 93 insertions(+), 41 deletions(-) diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 984f308e05f..12d593d960a 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -92,27 +92,21 @@ impl PositionedOverlayCursor<'_, O, K, V> where K: Ord, { + #[inline(always)] pub(crate) fn seek_from(&mut self, start: usize, key: &K) { for layer_idx in start..self.layers.len() { let entries = self.layers[layer_idx].entries(); - let _ = seek_overlay_entries( - entries, - self.positions.get_mut(layer_idx), - key, - OverlaySeekMode::Inclusive, - ); + let _ = seek_overlay_entries_inclusive(entries, self.positions.get_mut(layer_idx), key); } } + #[inline(always)] pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { for layer_idx in 0..self.layers.len() { let entries = self.layers[layer_idx].entries(); - let Some(idx) = seek_overlay_entries( - entries, - self.positions.get_mut(layer_idx), - key, - OverlaySeekMode::Inclusive, - ) else { + let Some(idx) = + seek_overlay_entries_inclusive(entries, self.positions.get_mut(layer_idx), key) + else { continue; }; if &entries[idx].0 == key { @@ -123,18 +117,15 @@ where None } + #[inline(always)] pub(crate) fn first_after(&mut self, key: &K) { for layer_idx in 0..self.layers.len() { let entries = self.layers[layer_idx].entries(); - let _ = seek_overlay_entries( - entries, - self.positions.get_mut(layer_idx), - key, - OverlaySeekMode::Exclusive, - ); + let _ = seek_overlay_entries_exclusive(entries, self.positions.get_mut(layer_idx), key); } } + #[inline(always)] pub(crate) fn highest_priority_value_at(&self, key: &K) -> Option<&V> { self.layers.iter().zip(&self.positions).find_map(|(layer, position)| { let entries = layer.entries(); @@ -144,17 +135,14 @@ where }) } + #[inline(always)] pub(crate) fn advance_key(&mut self, key: &K) { for layer_idx in 0..self.layers.len() { let entries = self.layers[layer_idx].entries(); if entries.get(self.positions[layer_idx]).is_some_and(|(entry_key, _)| entry_key == key) { - let _ = seek_overlay_entries( - entries, - self.positions.get_mut(layer_idx), - key, - OverlaySeekMode::Exclusive, - ); + let _ = + seek_overlay_entries_exclusive(entries, self.positions.get_mut(layer_idx), key); } } } @@ -164,6 +152,7 @@ impl PositionedOverlayCursor<'_, O, K, V> where K: Copy + Ord, { + #[inline(always)] pub(crate) fn min_current_key(&self) -> Option { self.layers .iter() @@ -173,47 +162,88 @@ where } } -#[derive(Clone, Copy)] -enum OverlaySeekMode { - Inclusive, - Exclusive, -} +#[inline(always)] +fn seek_overlay_entries_inclusive( + entries: &[(K, V)], + mut position: Option<&mut usize>, + key: &K, +) -> Option +where + K: Ord, +{ + let mut start = + position.as_ref().map(|position| **position).unwrap_or_default().min(entries.len()); -impl OverlaySeekMode { - fn skips(self, entry_key: &K, bound: &K) -> bool { - match self { - Self::Inclusive => entry_key < bound, - Self::Exclusive => entry_key <= bound, + if entries.get(start).is_some_and(|(entry_key, _)| entry_key >= key) && + (start == 0 || &entries[start - 1].0 < key) + { + return Some(start) + } + + if start > 0 && &entries[start - 1].0 >= key { + start = 0; + } + + if entries.last().is_none_or(|(entry_key, _)| entry_key < key) { + if let Some(position) = position.as_mut() { + **position = entries.len(); } + return None } + + let remaining = &entries[start..]; + let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { + remaining.partition_point(|(entry_key, _)| entry_key < key) + } else { + let mut advance = 0; + while advance < remaining.len() && &remaining[advance].0 < key { + advance += 1; + } + advance + }; + + let idx = start + advance; + if let Some(position) = position.as_mut() { + **position = idx; + } + (idx < entries.len()).then_some(idx) } -fn seek_overlay_entries( +#[inline(always)] +fn seek_overlay_entries_exclusive( entries: &[(K, V)], mut position: Option<&mut usize>, key: &K, - mode: OverlaySeekMode, ) -> Option where K: Ord, { let mut start = position.as_ref().map(|position| **position).unwrap_or_default().min(entries.len()); - if entries.get(start).is_some_and(|(entry_key, _)| !mode.skips(entry_key, key)) && - (start == 0 || mode.skips(&entries[start - 1].0, key)) + + if entries.get(start).is_some_and(|(entry_key, _)| entry_key > key) && + (start == 0 || &entries[start - 1].0 <= key) { return Some(start) } - if start > 0 && !mode.skips(&entries[start - 1].0, key) { + + if start > 0 && &entries[start - 1].0 > key { start = 0; } + if entries.last().is_none_or(|(entry_key, _)| entry_key <= key) { + if let Some(position) = position.as_mut() { + **position = entries.len(); + } + return None + } + let remaining = &entries[start..]; let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { - remaining.partition_point(|(entry_key, _)| mode.skips(entry_key, key)) + remaining.partition_point(|(entry_key, _)| entry_key <= key) } else { let mut advance = 0; - while advance < remaining.len() && mode.skips(&remaining[advance].0, key) { + while advance < remaining.len() && &remaining[advance].0 <= key { advance += 1; } advance @@ -294,4 +324,26 @@ mod tests { assert_eq!(cursor.min_current_key(), Some(75)); assert_eq!(cursor.seek_until_exact(&25), Some((0, &25))); } + + #[test] + fn seek_can_recover_after_past_end_fast_path() { + let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); + let overlay = [layer(entries)]; + let mut cursor = PositionedOverlayCursor::new(&overlay); + + cursor.seek_from(0, &250); + assert_eq!(cursor.min_current_key(), None); + assert_eq!(cursor.positions, vec![201]); + + assert_eq!(cursor.seek_until_exact(&25), Some((0, &25))); + assert_eq!(cursor.positions, vec![25]); + + cursor.first_after(&250); + assert_eq!(cursor.min_current_key(), None); + assert_eq!(cursor.positions, vec![201]); + + cursor.first_after(&25); + assert_eq!(cursor.min_current_key(), Some(26)); + assert_eq!(cursor.positions, vec![26]); + } } From e8ffce0d31634395f8408c29647f9715bb145bd3 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 29 May 2026 15:24:02 +0200 Subject: [PATCH 11/40] perf(trie): make overlay cursors forward-only --- .../trie/trie/src/hashed_cursor/post_state.rs | 57 ++------- crates/trie/trie/src/overlay_cursor.rs | 121 ++++-------------- crates/trie/trie/src/trie_cursor/in_memory.rs | 105 +++------------ 3 files changed, 54 insertions(+), 229 deletions(-) diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 49f1a456f6a..2e1c36b19aa 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -170,7 +170,6 @@ where /// Positions the DB cursor state using the underlying cursor. fn cursor_seek(&mut self, key: B256) -> Result<(), DatabaseError> { if self.db_cursor_state.is_positioned_at(&key) { - self.db_cursor_state.validate_position(); return Ok(()) } @@ -253,7 +252,6 @@ where self.deferred_overlay_seek_start = None; match self.post_state_cursor.seek_until_exact(&key) { Some((idx, Some(value))) => { - self.db_cursor_state.invalidate_position(); self.deferred_overlay_seek_start = Some(idx + 1); let entry = Some((key, value)); self.set_last_key(&entry); @@ -296,7 +294,7 @@ where match self.db_cursor_state.entry().map(|(db_key, _)| *db_key) { Some(db_key) if db_key == last_key => self.cursor_next()?, - Some(db_key) if db_key > last_key && self.db_cursor_state.position_valid() => {} + Some(db_key) if db_key > last_key => {} _ => self.cursor_first_after(last_key)?, } @@ -632,33 +630,6 @@ mod tests { assert_eq!(result, Some((key(0x03), U256::from(3)))); } - #[test] - fn test_seek_overlay_exact_hit_repositions_stale_ahead_db_on_next() { - let db_nodes = vec![(key(0x03), U256::from(3)), (key(0x05), U256::from(5))]; - let post_state_nodes = vec![(key(0x02), U256::from(2))]; - - let db_nodes_map: BTreeMap = db_nodes.into_iter().collect(); - let db_nodes_arc = Arc::new(db_nodes_map); - let visited_keys = Arc::new(Mutex::new(Vec::new())); - let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); - - let post_state = storage_post_state(post_state_nodes); - let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); - let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); - - let result = cursor.seek(key(0x05)).unwrap(); - assert_eq!(result, Some((key(0x05), U256::from(5)))); - assert_eq!(visited_keys.lock().len(), 1); - - let result = cursor.seek(key(0x02)).unwrap(); - assert_eq!(result, Some((key(0x02), U256::from(2)))); - assert_eq!(visited_keys.lock().len(), 1, "exact overlay hit should not seek the DB"); - - let result = cursor.next().unwrap(); - assert_eq!(result, Some((key(0x03), U256::from(3)))); - assert_eq!(visited_keys.lock().len(), 2, "next should reposition the stale DB cursor"); - } - #[test] fn test_seek_overlay_exact_deletion_still_seeks_db() { let db_nodes = vec![(key(0x02), U256::from(2)), (key(0x03), U256::from(3))]; @@ -706,22 +677,6 @@ mod tests { assert!(!visited_keys.lock().is_empty(), "next should lazily position the DB cursor"); } - #[test] - fn test_seek_can_move_backwards() { - let db_nodes = BTreeMap::from([(key(0x01), U256::from(1)), (key(0x03), U256::from(3))]); - let db_nodes_arc = Arc::new(db_nodes); - let visited_keys = Arc::new(Mutex::new(Vec::new())); - let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); - - let post_state = storage_post_state(vec![(key(0x02), U256::from(2))]); - let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); - let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); - - assert_eq!(cursor.seek(key(0x03)).unwrap(), Some((key(0x03), U256::from(3)))); - assert_eq!(cursor.seek(key(0x01)).unwrap(), Some((key(0x01), U256::from(1)))); - assert_eq!(cursor.next().unwrap(), Some((key(0x02), U256::from(2)))); - } - #[test] fn test_seek_reuses_exact_db_position() { let db_nodes = BTreeMap::from([(key(0x01), account(1)), (key(0x02), account(2))]); @@ -1067,9 +1022,14 @@ mod tests { "Initial seek returned", ); assert_eq!(control_first, test_first, "Initial seek mismatch"); + let mut seek_floor = control_first.as_ref().map(|(key, _)| *key); // Execute a sequence of random operations for op in ops { + if reference_position.is_none() { + break + } + match op { CursorOp::Next => { let control_result = @@ -1081,8 +1041,11 @@ mod tests { "Next returned", ); assert_eq!(control_result, test_result, "Next operation mismatch"); + let Some((key, _)) = control_result else { break }; + seek_floor = Some(key); } CursorOp::Seek(key) => { + let key = seek_floor.map_or(key, |floor| key.max(floor)); let control_result = reference_seek(&expected_combined, &mut reference_position, key); let test_result = test_cursor.seek(key).unwrap(); @@ -1093,6 +1056,8 @@ mod tests { "Seek returned", ); assert_eq!(control_result, test_result, "Seek operation mismatch for key {:?}", key); + let Some((key, _)) = control_result else { break }; + seek_floor = Some(key); } } } diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 12d593d960a..dd8759c979c 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -5,7 +5,7 @@ const OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN: usize = 64; #[derive(Debug)] pub(crate) enum DbCursorState { Unpositioned, - Positioned { entry: (K, V), position_valid: bool }, + Positioned((K, V)), Wiped, } @@ -24,39 +24,21 @@ impl DbCursorState { pub(crate) const fn entry(&self) -> Option<&(K, V)> { match self { - Self::Positioned { entry, .. } => Some(entry), + Self::Positioned(entry) => Some(entry), Self::Unpositioned | Self::Wiped => None, } } - pub(crate) const fn position_valid(&self) -> bool { - matches!(self, Self::Positioned { position_valid: true, .. }) - } - pub(crate) fn set_entry(&mut self, entry: Option<(K, V)>) { if !self.is_wiped() { - *self = entry - .map(|entry| Self::Positioned { entry, position_valid: true }) - .unwrap_or(Self::Unpositioned); - } - } - - pub(crate) const fn validate_position(&mut self) { - if let Self::Positioned { position_valid, .. } = self { - *position_valid = true; - } - } - - pub(crate) const fn invalidate_position(&mut self) { - if let Self::Positioned { position_valid, .. } = self { - *position_valid = false; + *self = entry.map(Self::Positioned).unwrap_or(Self::Unpositioned); } } } impl DbCursorState { pub(crate) fn is_positioned_at(&self, key: &K) -> bool { - matches!(self, Self::Positioned { entry: (db_key, _), .. } if db_key == key) + matches!(self, Self::Positioned((db_key, _)) if db_key == key) } } @@ -96,7 +78,7 @@ where pub(crate) fn seek_from(&mut self, start: usize, key: &K) { for layer_idx in start..self.layers.len() { let entries = self.layers[layer_idx].entries(); - let _ = seek_overlay_entries_inclusive(entries, self.positions.get_mut(layer_idx), key); + let _ = seek_overlay_entries(entries, &mut self.positions[layer_idx], key); } } @@ -104,8 +86,7 @@ where pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { for layer_idx in 0..self.layers.len() { let entries = self.layers[layer_idx].entries(); - let Some(idx) = - seek_overlay_entries_inclusive(entries, self.positions.get_mut(layer_idx), key) + let Some(idx) = seek_overlay_entries(entries, &mut self.positions[layer_idx], key) else { continue; }; @@ -121,7 +102,7 @@ where pub(crate) fn first_after(&mut self, key: &K) { for layer_idx in 0..self.layers.len() { let entries = self.layers[layer_idx].entries(); - let _ = seek_overlay_entries_exclusive(entries, self.positions.get_mut(layer_idx), key); + let _ = seek_overlay_entries_after(entries, &mut self.positions[layer_idx], key); } } @@ -141,8 +122,7 @@ where let entries = self.layers[layer_idx].entries(); if entries.get(self.positions[layer_idx]).is_some_and(|(entry_key, _)| entry_key == key) { - let _ = - seek_overlay_entries_exclusive(entries, self.positions.get_mut(layer_idx), key); + let _ = seek_overlay_entries_after(entries, &mut self.positions[layer_idx], key); } } } @@ -163,35 +143,11 @@ where } #[inline(always)] -fn seek_overlay_entries_inclusive( - entries: &[(K, V)], - mut position: Option<&mut usize>, - key: &K, -) -> Option +fn seek_overlay_entries(entries: &[(K, V)], position: &mut usize, key: &K) -> Option where K: Ord, { - let mut start = - position.as_ref().map(|position| **position).unwrap_or_default().min(entries.len()); - - if entries.get(start).is_some_and(|(entry_key, _)| entry_key >= key) && - (start == 0 || &entries[start - 1].0 < key) - { - return Some(start) - } - - if start > 0 && &entries[start - 1].0 >= key { - start = 0; - } - - if entries.last().is_none_or(|(entry_key, _)| entry_key < key) { - if let Some(position) = position.as_mut() { - **position = entries.len(); - } - return None - } - - let remaining = &entries[start..]; + let remaining = &entries[*position..]; let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { remaining.partition_point(|(entry_key, _)| entry_key < key) } else { @@ -202,43 +158,24 @@ where advance }; - let idx = start + advance; - if let Some(position) = position.as_mut() { - **position = idx; - } - (idx < entries.len()).then_some(idx) + *position += advance; + (*position < entries.len()).then_some(*position) } #[inline(always)] -fn seek_overlay_entries_exclusive( +fn seek_overlay_entries_after( entries: &[(K, V)], - mut position: Option<&mut usize>, + position: &mut usize, key: &K, ) -> Option where K: Ord, { - let mut start = - position.as_ref().map(|position| **position).unwrap_or_default().min(entries.len()); - - if entries.get(start).is_some_and(|(entry_key, _)| entry_key > key) && - (start == 0 || &entries[start - 1].0 <= key) - { - return Some(start) - } - - if start > 0 && &entries[start - 1].0 > key { - start = 0; - } - - if entries.last().is_none_or(|(entry_key, _)| entry_key <= key) { - if let Some(position) = position.as_mut() { - **position = entries.len(); - } - return None + if entries.get(*position).is_some_and(|(entry_key, _)| entry_key > key) { + return Some(*position) } - let remaining = &entries[start..]; + let remaining = &entries[*position..]; let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { remaining.partition_point(|(entry_key, _)| entry_key <= key) } else { @@ -249,11 +186,8 @@ where advance }; - let idx = start + advance; - if let Some(position) = position.as_mut() { - **position = idx; - } - (idx < entries.len()).then_some(idx) + *position += advance; + (*position < entries.len()).then_some(*position) } #[derive(Clone)] @@ -313,7 +247,7 @@ mod tests { } #[test] - fn seek_can_move_backwards_from_current_position() { + fn seek_does_not_move_backwards_from_current_position() { let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); let overlay = [layer(entries)]; let mut cursor = PositionedOverlayCursor::new(&overlay); @@ -321,12 +255,13 @@ mod tests { cursor.seek_from(0, &150); assert_eq!(cursor.min_current_key(), Some(150)); cursor.seek_from(0, &75); - assert_eq!(cursor.min_current_key(), Some(75)); - assert_eq!(cursor.seek_until_exact(&25), Some((0, &25))); + assert_eq!(cursor.min_current_key(), Some(150)); + assert_eq!(cursor.seek_until_exact(&25), None); + assert_eq!(cursor.min_current_key(), Some(150)); } #[test] - fn seek_can_recover_after_past_end_fast_path() { + fn seek_does_not_recover_after_past_end() { let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); let overlay = [layer(entries)]; let mut cursor = PositionedOverlayCursor::new(&overlay); @@ -335,15 +270,15 @@ mod tests { assert_eq!(cursor.min_current_key(), None); assert_eq!(cursor.positions, vec![201]); - assert_eq!(cursor.seek_until_exact(&25), Some((0, &25))); - assert_eq!(cursor.positions, vec![25]); + assert_eq!(cursor.seek_until_exact(&25), None); + assert_eq!(cursor.positions, vec![201]); cursor.first_after(&250); assert_eq!(cursor.min_current_key(), None); assert_eq!(cursor.positions, vec![201]); cursor.first_after(&25); - assert_eq!(cursor.min_current_key(), Some(26)); - assert_eq!(cursor.positions, vec![26]); + assert_eq!(cursor.min_current_key(), None); + assert_eq!(cursor.positions, vec![201]); } } diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index 1378bdf2013..d3866f328dc 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -122,7 +122,6 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { /// Positions the DB cursor state using the underlying cursor. fn cursor_seek(&mut self, key: Nibbles) -> Result<(), DatabaseError> { if self.db_cursor_state.is_positioned_at(&key) { - self.db_cursor_state.validate_position(); return Ok(()) } @@ -195,7 +194,6 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.deferred_overlay_seek_start = None; let entry = if let Some((idx, mem_value)) = self.in_memory_cursor.seek_until_exact(&key) { - self.db_cursor_state.invalidate_position(); if mem_value.is_some() { self.deferred_overlay_seek_start = Some(idx + 1); } @@ -222,7 +220,6 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.deferred_overlay_seek_start = None; match self.in_memory_cursor.seek_until_exact(&key) { Some((idx, Some(node))) => { - self.db_cursor_state.invalidate_position(); self.deferred_overlay_seek_start = Some(idx + 1); let entry = Some((key, node.clone())); self.set_last_key(&entry); @@ -258,7 +255,7 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { match self.db_cursor_state.entry().map(|(db_key, _)| *db_key) { Some(db_key) if db_key == last_key => self.cursor_next()?, - Some(db_key) if db_key > last_key && self.db_cursor_state.position_valid() => {} + Some(db_key) if db_key > last_key => {} _ => self.cursor_first_after(last_key)?, } @@ -820,58 +817,6 @@ mod tests { ); } - #[test] - fn test_seek_overlay_exact_hit_repositions_stale_ahead_db_on_next() { - let db_nodes = vec![ - (Nibbles::from_nibbles([0x3]), BranchNodeCompact::new(0b0011, 0b0011, 0, vec![], None)), - (Nibbles::from_nibbles([0x5]), BranchNodeCompact::new(0b0101, 0b0101, 0, vec![], None)), - ]; - - let in_memory_nodes = vec![( - Nibbles::from_nibbles([0x2]), - Some(BranchNodeCompact::new(0b0010, 0b0010, 0, vec![], None)), - )]; - - let db_nodes_map: BTreeMap = db_nodes.into_iter().collect(); - let db_nodes_arc = Arc::new(db_nodes_map); - let visited_keys = Arc::new(Mutex::new(Vec::new())); - let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); - - let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); - - let result = cursor.seek(Nibbles::from_nibbles([0x5])).unwrap(); - assert_eq!( - result, - Some(( - Nibbles::from_nibbles([0x5]), - BranchNodeCompact::new(0b0101, 0b0101, 0, vec![], None) - )) - ); - assert_eq!(visited_keys.lock().len(), 1); - - let result = cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(); - assert_eq!( - result, - Some(( - Nibbles::from_nibbles([0x2]), - BranchNodeCompact::new(0b0010, 0b0010, 0, vec![], None) - )) - ); - assert_eq!(visited_keys.lock().len(), 1, "exact overlay hit should not seek the DB"); - - let result = cursor.next().unwrap(); - assert_eq!( - result, - Some(( - Nibbles::from_nibbles([0x3]), - BranchNodeCompact::new(0b0011, 0b0011, 0, vec![], None) - )) - ); - assert_eq!(visited_keys.lock().len(), 2, "next should reposition the stale DB cursor"); - } - #[test] fn test_multiple_consecutive_deletes() { let db_nodes: Vec<(Nibbles, BranchNodeCompact)> = (1..=10) @@ -1025,6 +970,7 @@ mod tests { ]; for seek_key in seek_keys { + cursor.reset(); tracing::debug!("seeking to {seek_key:?}"); let result = cursor.seek(seek_key).unwrap(); assert_eq!( @@ -1039,40 +985,6 @@ mod tests { assert_eq!(result, None, "Expected None from next() but got {:?}", result); } - #[test] - fn test_seek_can_move_backwards() { - let db_nodes = BTreeMap::from([ - (Nibbles::from_nibbles([0x1]), BranchNodeCompact::new(1, 1, 0, vec![], None)), - (Nibbles::from_nibbles([0x3]), BranchNodeCompact::new(3, 3, 0, vec![], None)), - ]); - let db_nodes_arc = Arc::new(db_nodes); - let visited_keys = Arc::new(Mutex::new(Vec::new())); - let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); - - let trie_updates = TrieUpdatesSorted::new( - vec![( - Nibbles::from_nibbles([0x2]), - Some(BranchNodeCompact::new(2, 2, 0, vec![], None)), - )], - Default::default(), - ); - let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); - - assert_eq!( - cursor.seek(Nibbles::from_nibbles([0x3])).unwrap(), - Some((Nibbles::from_nibbles([0x3]), BranchNodeCompact::new(3, 3, 0, vec![], None))) - ); - assert_eq!( - cursor.seek(Nibbles::from_nibbles([0x1])).unwrap(), - Some((Nibbles::from_nibbles([0x1]), BranchNodeCompact::new(1, 1, 0, vec![], None))) - ); - assert_eq!( - cursor.next().unwrap(), - Some((Nibbles::from_nibbles([0x2]), BranchNodeCompact::new(2, 2, 0, vec![], None))) - ); - } - #[test] fn test_seek_reuses_exact_db_position() { let db_nodes = BTreeMap::from([ @@ -1557,9 +1469,14 @@ mod tests { "Initial seek returned", ); assert_eq!(control_first, test_first, "Initial seek mismatch"); + let mut seek_floor = control_first.as_ref().map(|(key, _)| *key); // Execute a sequence of random operations for op in ops { + if reference_position.is_none() { + break + } + match op { CursorOp::Next => { let control_result = @@ -1571,8 +1488,11 @@ mod tests { "Next returned", ); assert_eq!(control_result, test_result, "Next operation mismatch"); + let Some((key, _)) = control_result else { break }; + seek_floor = Some(key); } CursorOp::Seek(key) => { + let key = seek_floor.map_or(key, |floor| key.max(floor)); let control_result = reference_seek(&expected_combined, &mut reference_position, key); let test_result = test_cursor.seek(key).unwrap(); @@ -1583,8 +1503,11 @@ mod tests { "Seek returned", ); assert_eq!(control_result, test_result, "Seek operation mismatch for key {:?}", key); + let Some((key, _)) = control_result else { break }; + seek_floor = Some(key); } CursorOp::SeekExact(key) => { + let key = seek_floor.map_or(key, |floor| key.max(floor)); let control_result = reference_seek_exact(&expected_combined, &mut reference_position, key); let test_result = test_cursor.seek_exact(key).unwrap(); @@ -1595,6 +1518,8 @@ mod tests { "SeekExact returned", ); assert_eq!(control_result, test_result, "SeekExact operation mismatch for key {:?}", key); + let Some((key, _)) = control_result else { break }; + seek_floor = Some(key); } } } From b760c95637afad331be33e041492ffa5e2db4c23 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 29 May 2026 18:14:10 +0200 Subject: [PATCH 12/40] perf: extend cached state trie overlays incrementally --- crates/chain-state/src/state_trie_overlay.rs | 78 ++++++++++++++++---- 1 file changed, 64 insertions(+), 14 deletions(-) diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs index d1a9b42a9cf..e25e4a2f403 100644 --- a/crates/chain-state/src/state_trie_overlay.rs +++ b/crates/chain-state/src/state_trie_overlay.rs @@ -645,25 +645,58 @@ fn flatten_overlay( blocks: Vec>, parent_overlay: StateTrieOverlay, ) -> StateTrieOverlay { - let trie_data = blocks.iter().map(ExecutedBlock::trie_data).collect::>(); let (parent_trie_updates, parent_hashed_post_state) = parent_overlay.into_layers(); + if !parent_trie_updates.is_empty() || !parent_hashed_post_state.is_empty() { + let mut trie_updates = flatten_trie_update_layers(parent_trie_updates); + let mut hashed_post_state = flatten_hashed_post_state_layers(parent_hashed_post_state); + + for block in blocks.iter().rev() { + let trie_data = block.trie_data(); + + #[cfg(feature = "rayon")] + { + rayon::join( + || { + if !trie_data.trie_updates.is_empty() { + Arc::make_mut(&mut trie_updates) + .extend_ref_and_sort(&trie_data.trie_updates); + } + }, + || { + if !trie_data.hashed_state.is_empty() { + Arc::make_mut(&mut hashed_post_state) + .extend_ref_and_sort(&trie_data.hashed_state); + } + }, + ); + } + + #[cfg(not(feature = "rayon"))] + { + if !trie_data.trie_updates.is_empty() { + Arc::make_mut(&mut trie_updates).extend_ref_and_sort(&trie_data.trie_updates); + } + if !trie_data.hashed_state.is_empty() { + Arc::make_mut(&mut hashed_post_state) + .extend_ref_and_sort(&trie_data.hashed_state); + } + } + } + + return StateTrieOverlay::new(vec![trie_updates], vec![hashed_post_state]) + } + #[cfg(feature = "rayon")] let (trie_updates, hashed_post_state) = rayon::join( || { TrieUpdatesSorted::merge_batch( - trie_data - .iter() - .map(|data| Arc::clone(&data.trie_updates)) - .chain(parent_trie_updates), + blocks.iter().map(|block| Arc::clone(&block.trie_data().trie_updates)), ) }, || { HashedPostStateSorted::merge_batch( - trie_data - .iter() - .map(|data| Arc::clone(&data.hashed_state)) - .chain(parent_hashed_post_state), + blocks.iter().map(|block| Arc::clone(&block.trie_data().hashed_state)), ) }, ); @@ -671,19 +704,36 @@ fn flatten_overlay( #[cfg(not(feature = "rayon"))] let (trie_updates, hashed_post_state) = ( TrieUpdatesSorted::merge_batch( - trie_data.iter().map(|data| Arc::clone(&data.trie_updates)).chain(parent_trie_updates), + blocks.iter().map(|block| Arc::clone(&block.trie_data().trie_updates)), ), HashedPostStateSorted::merge_batch( - trie_data - .iter() - .map(|data| Arc::clone(&data.hashed_state)) - .chain(parent_hashed_post_state), + blocks.iter().map(|block| Arc::clone(&block.trie_data().hashed_state)), ), ); StateTrieOverlay::new(vec![trie_updates], vec![hashed_post_state]) } +#[cfg(any(test, feature = "rayon"))] +fn flatten_trie_update_layers(layers: Vec>) -> Arc { + match layers.len() { + 0 => Arc::new(TrieUpdatesSorted::default()), + 1 => layers.into_iter().next().expect("len checked"), + _ => TrieUpdatesSorted::merge_batch(layers), + } +} + +#[cfg(any(test, feature = "rayon"))] +fn flatten_hashed_post_state_layers( + layers: Vec>, +) -> Arc { + match layers.len() { + 0 => Arc::new(HashedPostStateSorted::default()), + 1 => layers.into_iter().next().expect("len checked"), + _ => HashedPostStateSorted::merge_batch(layers), + } +} + #[cfg(test)] mod tests { use super::*; From a895635a71e99bcdc913fc88ae3945b46c45e0d8 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 29 May 2026 19:33:12 +0200 Subject: [PATCH 13/40] perf: skip redundant overlay seek searches --- crates/trie/trie/src/overlay_cursor.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index dd8759c979c..31e7366905f 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -147,6 +147,10 @@ fn seek_overlay_entries(entries: &[(K, V)], position: &mut usize, key: &K) where K: Ord, { + if entries.get(*position).is_some_and(|(entry_key, _)| entry_key >= key) { + return Some(*position) + } + let remaining = &entries[*position..]; let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { remaining.partition_point(|(entry_key, _)| entry_key < key) From 64a0d0276a6e5ac295e9b2a5ae699c1002c2644c Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 29 May 2026 20:46:52 +0200 Subject: [PATCH 14/40] perf: avoid duplicate overlay seek comparisons --- crates/trie/trie/src/overlay_cursor.rs | 64 ++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 31e7366905f..7ca7f469b43 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -86,13 +86,12 @@ where pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { for layer_idx in 0..self.layers.len() { let entries = self.layers[layer_idx].entries(); - let Some(idx) = seek_overlay_entries(entries, &mut self.positions[layer_idx], key) + let Some(idx) = + seek_overlay_entries_exact(entries, &mut self.positions[layer_idx], key) else { continue; }; - if &entries[idx].0 == key { - return Some((layer_idx, &entries[idx].1)) - } + return Some((layer_idx, &entries[idx].1)) } None @@ -147,8 +146,11 @@ fn seek_overlay_entries(entries: &[(K, V)], position: &mut usize, key: &K) where K: Ord, { - if entries.get(*position).is_some_and(|(entry_key, _)| entry_key >= key) { - return Some(*position) + if let Some((entry_key, _)) = entries.get(*position) { + match entry_key.cmp(key) { + std::cmp::Ordering::Less => *position += 1, + std::cmp::Ordering::Equal | std::cmp::Ordering::Greater => return Some(*position), + } } let remaining = &entries[*position..]; @@ -166,6 +168,49 @@ where (*position < entries.len()).then_some(*position) } +#[inline(always)] +fn seek_overlay_entries_exact( + entries: &[(K, V)], + position: &mut usize, + key: &K, +) -> Option +where + K: Ord, +{ + if let Some((entry_key, _)) = entries.get(*position) { + match entry_key.cmp(key) { + std::cmp::Ordering::Less => *position += 1, + std::cmp::Ordering::Equal => return Some(*position), + std::cmp::Ordering::Greater => return None, + } + } + + let remaining = &entries[*position..]; + if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { + *position += remaining.partition_point(|(entry_key, _)| entry_key < key); + return entries + .get(*position) + .and_then(|(entry_key, _)| (entry_key == key).then_some(*position)) + } + + for (advance, (entry_key, _)) in remaining.iter().enumerate() { + match entry_key.cmp(key) { + std::cmp::Ordering::Less => {} + std::cmp::Ordering::Equal => { + *position += advance; + return Some(*position) + } + std::cmp::Ordering::Greater => { + *position += advance; + return None + } + } + } + + *position = entries.len(); + None +} + #[inline(always)] fn seek_overlay_entries_after( entries: &[(K, V)], @@ -175,8 +220,11 @@ fn seek_overlay_entries_after( where K: Ord, { - if entries.get(*position).is_some_and(|(entry_key, _)| entry_key > key) { - return Some(*position) + if let Some((entry_key, _)) = entries.get(*position) { + match entry_key.cmp(key) { + std::cmp::Ordering::Greater => return Some(*position), + std::cmp::Ordering::Less | std::cmp::Ordering::Equal => *position += 1, + } } let remaining = &entries[*position..]; From a52ffc342c92616a60fa5bbc962a1ba89279bdce Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 29 May 2026 21:50:39 +0200 Subject: [PATCH 15/40] perf: keep overlay cursor positions with layers --- .../trie/trie/src/hashed_cursor/post_state.rs | 2 +- crates/trie/trie/src/overlay_cursor.rs | 123 +++++++++++------- crates/trie/trie/src/trie_cursor/in_memory.rs | 3 +- 3 files changed, 79 insertions(+), 49 deletions(-) diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 2e1c36b19aa..75c862eeade 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -443,7 +443,7 @@ impl AsRef for HashedPostStateOverlay { #[derive(Debug)] struct PostStateOverlayCursor<'a, V> { - cursor: PositionedOverlayCursor<'a, HashedPostStateSorted, B256, V>, + cursor: PositionedOverlayCursor<'a, B256, V>, has_visible_value: bool, } diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 7ca7f469b43..3c7bb63c160 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -43,55 +43,53 @@ impl DbCursorState { } #[derive(Debug)] -pub(crate) struct PositionedOverlayCursor<'a, O, K, V> { - layers: &'a [OverlayLayer], - positions: Vec, +pub(crate) struct PositionedOverlayCursor<'a, K, V> { + layers: Vec>, } -impl Default for PositionedOverlayCursor<'_, O, K, V> { +impl Default for PositionedOverlayCursor<'_, K, V> { fn default() -> Self { - Self::new(&[]) + Self { layers: Vec::new() } } } -impl<'a, O, K, V> PositionedOverlayCursor<'a, O, K, V> { - pub(crate) fn new(layers: &'a [OverlayLayer]) -> Self { - Self { layers, positions: vec![0; layers.len()] } +impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { + pub(crate) fn new(layers: &'a [OverlayLayer]) -> Self { + let mut cursor = Self { layers: Vec::with_capacity(layers.len()) }; + cursor.retarget(layers); + cursor } pub(crate) fn reset(&mut self) { - self.positions.fill(0); + for layer in &mut self.layers { + layer.reset(); + } } - pub(crate) fn retarget(&mut self, layers: &'a [OverlayLayer]) { - self.layers = layers; - self.positions.clear(); - self.positions.resize(layers.len(), 0); + pub(crate) fn retarget(&mut self, layers: &'a [OverlayLayer]) { + self.layers.clear(); + self.layers.extend(layers.iter().map(|layer| PositionedOverlayLayer::new(layer.entries()))); } } -impl PositionedOverlayCursor<'_, O, K, V> +impl PositionedOverlayCursor<'_, K, V> where K: Ord, { #[inline(always)] pub(crate) fn seek_from(&mut self, start: usize, key: &K) { - for layer_idx in start..self.layers.len() { - let entries = self.layers[layer_idx].entries(); - let _ = seek_overlay_entries(entries, &mut self.positions[layer_idx], key); + for layer in &mut self.layers[start..] { + let _ = layer.seek(key); } } #[inline(always)] pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { - for layer_idx in 0..self.layers.len() { - let entries = self.layers[layer_idx].entries(); - let Some(idx) = - seek_overlay_entries_exact(entries, &mut self.positions[layer_idx], key) - else { + for (layer_idx, layer) in self.layers.iter_mut().enumerate() { + let Some(entry) = layer.seek_exact(key) else { continue; }; - return Some((layer_idx, &entries[idx].1)) + return Some((layer_idx, &entry.1)) } None @@ -99,45 +97,78 @@ where #[inline(always)] pub(crate) fn first_after(&mut self, key: &K) { - for layer_idx in 0..self.layers.len() { - let entries = self.layers[layer_idx].entries(); - let _ = seek_overlay_entries_after(entries, &mut self.positions[layer_idx], key); + for layer in &mut self.layers { + let _ = layer.first_after(key); } } #[inline(always)] pub(crate) fn highest_priority_value_at(&self, key: &K) -> Option<&V> { - self.layers.iter().zip(&self.positions).find_map(|(layer, position)| { - let entries = layer.entries(); - entries - .get(*position) - .and_then(|(entry_key, value)| (entry_key == key).then_some(value)) + self.layers.iter().find_map(|layer| { + layer.current().and_then(|(entry_key, value)| (entry_key == key).then_some(value)) }) } #[inline(always)] pub(crate) fn advance_key(&mut self, key: &K) { - for layer_idx in 0..self.layers.len() { - let entries = self.layers[layer_idx].entries(); - if entries.get(self.positions[layer_idx]).is_some_and(|(entry_key, _)| entry_key == key) - { - let _ = seek_overlay_entries_after(entries, &mut self.positions[layer_idx], key); + for layer in &mut self.layers { + if layer.current().is_some_and(|(entry_key, _)| entry_key == key) { + let _ = layer.first_after(key); } } } } -impl PositionedOverlayCursor<'_, O, K, V> +impl PositionedOverlayCursor<'_, K, V> where K: Copy + Ord, { #[inline(always)] pub(crate) fn min_current_key(&self) -> Option { - self.layers - .iter() - .zip(&self.positions) - .filter_map(|(layer, position)| layer.entries().get(*position).map(|(key, _)| *key)) - .min() + self.layers.iter().filter_map(|layer| layer.current().map(|(key, _)| *key)).min() + } +} + +#[derive(Debug)] +struct PositionedOverlayLayer<'a, K, V> { + entries: &'a [(K, V)], + position: usize, +} + +impl<'a, K, V> PositionedOverlayLayer<'a, K, V> { + const fn new(entries: &'a [(K, V)]) -> Self { + Self { entries, position: 0 } + } + + fn current(&self) -> Option<&'a (K, V)> { + self.entries.get(self.position) + } + + const fn reset(&mut self) { + self.position = 0; + } +} + +impl<'a, K, V> PositionedOverlayLayer<'a, K, V> +where + K: Ord, +{ + #[inline(always)] + fn seek(&mut self, key: &K) -> Option<&'a (K, V)> { + let _ = seek_overlay_entries(self.entries, &mut self.position, key); + self.current() + } + + #[inline(always)] + fn seek_exact(&mut self, key: &K) -> Option<&'a (K, V)> { + let _ = seek_overlay_entries_exact(self.entries, &mut self.position, key)?; + self.current() + } + + #[inline(always)] + fn first_after(&mut self, key: &K) -> Option<&'a (K, V)> { + let _ = seek_overlay_entries_after(self.entries, &mut self.position, key); + self.current() } } @@ -320,17 +351,17 @@ mod tests { cursor.seek_from(0, &250); assert_eq!(cursor.min_current_key(), None); - assert_eq!(cursor.positions, vec![201]); + assert_eq!(cursor.layers[0].position, 201); assert_eq!(cursor.seek_until_exact(&25), None); - assert_eq!(cursor.positions, vec![201]); + assert_eq!(cursor.layers[0].position, 201); cursor.first_after(&250); assert_eq!(cursor.min_current_key(), None); - assert_eq!(cursor.positions, vec![201]); + assert_eq!(cursor.layers[0].position, 201); cursor.first_after(&25); assert_eq!(cursor.min_current_key(), None); - assert_eq!(cursor.positions, vec![201]); + assert_eq!(cursor.layers[0].position, 201); } } diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index d3866f328dc..9cd49ad0000 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -383,8 +383,7 @@ struct TrieStorageOverlay { db_wiped: bool, } -type OverlayCursor<'a> = - PositionedOverlayCursor<'a, TrieUpdatesSorted, Nibbles, Option>; +type OverlayCursor<'a> = PositionedOverlayCursor<'a, Nibbles, Option>; type TrieOverlayLayer = OverlayLayer>; #[cfg(test)] From 5704819f43b3316872fc8a146351296ce2973d76 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 29 May 2026 22:46:53 +0200 Subject: [PATCH 16/40] perf: index overlay exact key lookups --- .../trie/trie/src/hashed_cursor/post_state.rs | 60 ++++++++++---- crates/trie/trie/src/overlay_cursor.rs | 78 +++++++++++++++++-- crates/trie/trie/src/trie_cursor/in_memory.rs | 38 ++++++--- 3 files changed, 139 insertions(+), 37 deletions(-) diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 75c862eeade..23c1799c9ad 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -1,5 +1,8 @@ use super::{HashedCursor, HashedCursorFactory, HashedStorageCursor}; -use crate::overlay_cursor::{DbCursorState, OverlayLayer, PositionedOverlayCursor}; +use crate::overlay_cursor::{ + build_overlay_exact_index, DbCursorState, OverlayExactIndexEntry, OverlayLayer, + PositionedOverlayCursor, +}; use alloy_primitives::{map::B256Map, B256, U256}; use reth_primitives_traits::Account; use reth_storage_errors::db::DatabaseError; @@ -257,8 +260,8 @@ where self.set_last_key(&entry); return Ok(entry) } - Some((idx, None)) => { - self.post_state_cursor.seek_from(idx + 1, &key); + Some((_, None)) => { + self.post_state_cursor.seek_from(0, &key); } None => {} } @@ -341,9 +344,9 @@ where fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let (layers, cursor_wiped, has_visible_value) = + let (layers, exact_index, cursor_wiped, has_visible_value) = self.post_states.storage_overlay_layers(hashed_address); - self.post_state_cursor.retarget(layers, has_visible_value); + self.post_state_cursor.retarget(layers, exact_index, has_visible_value); self.db_cursor_state = DbCursorState::new(cursor_wiped); } } @@ -352,6 +355,7 @@ where #[derive(Clone, Debug, Default)] pub struct HashedPostStateOverlay { account_overlay: Arc>>>, + account_exact_index: Arc>>, storage_overlays: Arc>, } @@ -359,8 +363,9 @@ impl HashedPostStateOverlay { /// Create a new indexed hashed post-state overlay stack. pub fn new(states: Vec>) -> Self { let account_overlay = Self::build_account_overlay(&states); + let account_exact_index = Arc::new(build_overlay_exact_index(account_overlay.as_slice())); let storage_overlays = Self::build_storage_overlays(&states); - Self { account_overlay, storage_overlays } + Self { account_overlay, account_exact_index, storage_overlays } } /// Returns `true` if the overlay does not contain any hashed post-state updates. @@ -408,6 +413,7 @@ impl HashedPostStateOverlay { } for overlay in overlays.values_mut() { + overlay.exact_index = build_overlay_exact_index(&overlay.layers); overlay.has_visible_value = has_visible_storage_value(&overlay.layers); } @@ -415,23 +421,33 @@ impl HashedPostStateOverlay { } fn account_overlay(&self) -> PostStateOverlayCursor<'_, Option> { - PostStateOverlayCursor::new(self.account_overlay.as_slice(), false) + PostStateOverlayCursor::new( + self.account_overlay.as_slice(), + self.account_exact_index.as_slice(), + false, + ) } fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'_, U256>, bool) { - let (layers, db_wiped, has_visible_value) = self.storage_overlay_layers(hashed_address); - (PostStateOverlayCursor::new(layers, has_visible_value), db_wiped) + let (layers, exact_index, db_wiped, has_visible_value) = + self.storage_overlay_layers(hashed_address); + (PostStateOverlayCursor::new(layers, exact_index, has_visible_value), db_wiped) } fn storage_overlay_layers( &self, hashed_address: B256, - ) -> (&[PostStateOverlayLayer], bool, bool) { + ) -> (&[PostStateOverlayLayer], &[OverlayExactIndexEntry], bool, bool) { let Some(overlay) = self.storage_overlays.get(&hashed_address) else { - return (&[], false, false); + return (&[], &[], false, false); }; - (overlay.layers.as_slice(), overlay.db_wiped, overlay.has_visible_value) + ( + overlay.layers.as_slice(), + overlay.exact_index.as_slice(), + overlay.db_wiped, + overlay.has_visible_value, + ) } } @@ -449,21 +465,30 @@ struct PostStateOverlayCursor<'a, V> { impl Default for PostStateOverlayCursor<'_, V> { fn default() -> Self { - Self::new(&[], false) + Self::new(&[], &[], false) } } impl<'a, V> PostStateOverlayCursor<'a, V> { - fn new(layers: &'a [PostStateOverlayLayer], has_visible_value: bool) -> Self { - Self { cursor: PositionedOverlayCursor::new(layers), has_visible_value } + fn new( + layers: &'a [PostStateOverlayLayer], + exact_index: &'a [OverlayExactIndexEntry], + has_visible_value: bool, + ) -> Self { + Self { cursor: PositionedOverlayCursor::new(layers, exact_index), has_visible_value } } fn reset(&mut self) { self.cursor.reset(); } - fn retarget(&mut self, layers: &'a [PostStateOverlayLayer], has_visible_value: bool) { - self.cursor.retarget(layers); + fn retarget( + &mut self, + layers: &'a [PostStateOverlayLayer], + exact_index: &'a [OverlayExactIndexEntry], + has_visible_value: bool, + ) { + self.cursor.retarget(layers, exact_index); self.has_visible_value = has_visible_value; } } @@ -504,6 +529,7 @@ where #[derive(Clone, Debug, Default)] struct HashedStorageOverlay { layers: Vec>, + exact_index: Vec>, db_wiped: bool, has_visible_value: bool, } diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 3c7bb63c160..a75fc768e80 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -45,18 +45,22 @@ impl DbCursorState { #[derive(Debug)] pub(crate) struct PositionedOverlayCursor<'a, K, V> { layers: Vec>, + exact_index: &'a [OverlayExactIndexEntry], } impl Default for PositionedOverlayCursor<'_, K, V> { fn default() -> Self { - Self { layers: Vec::new() } + Self { layers: Vec::new(), exact_index: &[] } } } impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { - pub(crate) fn new(layers: &'a [OverlayLayer]) -> Self { - let mut cursor = Self { layers: Vec::with_capacity(layers.len()) }; - cursor.retarget(layers); + pub(crate) fn new( + layers: &'a [OverlayLayer], + exact_index: &'a [OverlayExactIndexEntry], + ) -> Self { + let mut cursor = Self { layers: Vec::with_capacity(layers.len()), exact_index: &[] }; + cursor.retarget(layers, exact_index); cursor } @@ -66,9 +70,14 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { } } - pub(crate) fn retarget(&mut self, layers: &'a [OverlayLayer]) { + pub(crate) fn retarget( + &mut self, + layers: &'a [OverlayLayer], + exact_index: &'a [OverlayExactIndexEntry], + ) { self.layers.clear(); self.layers.extend(layers.iter().map(|layer| PositionedOverlayLayer::new(layer.entries()))); + self.exact_index = exact_index; } } @@ -85,6 +94,21 @@ where #[inline(always)] pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { + if !self.exact_index.is_empty() { + let Ok(index_idx) = self.exact_index.binary_search_by(|entry| entry.key.cmp(key)) + else { + self.seek_from(0, key); + return None; + }; + let entry = &self.exact_index[index_idx]; + let layer = &mut self.layers[entry.layer_idx]; + if layer.position > entry.entry_idx { + return None + } + layer.position = entry.entry_idx; + return Some((entry.layer_idx, &layer.entries[entry.entry_idx].1)) + } + for (layer_idx, layer) in self.layers.iter_mut().enumerate() { let Some(entry) = layer.seek_exact(key) else { continue; @@ -273,6 +297,41 @@ where (*position < entries.len()).then_some(*position) } +#[derive(Clone, Copy, Debug)] +pub(crate) struct OverlayExactIndexEntry { + key: K, + layer_idx: usize, + entry_idx: usize, +} + +pub(crate) fn build_overlay_exact_index( + layers: &[OverlayLayer], +) -> Vec> +where + K: Copy + Ord, +{ + let entry_count = layers.iter().map(|layer| layer.entries_len).sum(); + let mut index = Vec::with_capacity(entry_count); + + for (layer_idx, layer) in layers.iter().enumerate() { + index.extend(layer.entries().iter().enumerate().map(|(entry_idx, (key, _))| { + OverlayExactIndexEntry { key: *key, layer_idx, entry_idx } + })); + } + + index.sort_unstable_by(|a, b| a.key.cmp(&b.key).then_with(|| a.layer_idx.cmp(&b.layer_idx))); + + let mut write_idx = 0; + for read_idx in 0..index.len() { + if write_idx == 0 || index[write_idx - 1].key != index[read_idx].key { + index[write_idx] = index[read_idx]; + write_idx += 1; + } + } + index.truncate(write_idx); + index +} + #[derive(Clone)] pub(crate) struct OverlayLayer { _owner: Arc, @@ -317,7 +376,8 @@ mod tests { fn seek_reuses_current_position_when_it_already_satisfies_bound() { let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); let overlay = [layer(entries)]; - let mut cursor = PositionedOverlayCursor::new(&overlay); + let exact_index = build_overlay_exact_index(&overlay); + let mut cursor = PositionedOverlayCursor::new(&overlay, &exact_index); cursor.seek_from(0, &100); assert_eq!(cursor.min_current_key(), Some(100)); @@ -333,7 +393,8 @@ mod tests { fn seek_does_not_move_backwards_from_current_position() { let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); let overlay = [layer(entries)]; - let mut cursor = PositionedOverlayCursor::new(&overlay); + let exact_index = build_overlay_exact_index(&overlay); + let mut cursor = PositionedOverlayCursor::new(&overlay, &exact_index); cursor.seek_from(0, &150); assert_eq!(cursor.min_current_key(), Some(150)); @@ -347,7 +408,8 @@ mod tests { fn seek_does_not_recover_after_past_end() { let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); let overlay = [layer(entries)]; - let mut cursor = PositionedOverlayCursor::new(&overlay); + let exact_index = build_overlay_exact_index(&overlay); + let mut cursor = PositionedOverlayCursor::new(&overlay, &exact_index); cursor.seek_from(0, &250); assert_eq!(cursor.min_current_key(), None); diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index 9cd49ad0000..d246e27be4c 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -1,6 +1,9 @@ use super::{TrieCursor, TrieCursorFactory, TrieStorageCursor}; use crate::{ - overlay_cursor::{DbCursorState, OverlayLayer, PositionedOverlayCursor}, + overlay_cursor::{ + build_overlay_exact_index, DbCursorState, OverlayExactIndexEntry, OverlayLayer, + PositionedOverlayCursor, + }, updates::TrieUpdatesSorted, }; use alloy_primitives::{map::B256Map, B256}; @@ -225,8 +228,8 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.set_last_key(&entry); return Ok(entry); } - Some((idx, None)) => { - self.in_memory_cursor.seek_from(idx + 1, &key); + Some((_, None)) => { + self.in_memory_cursor.seek_from(0, &key); } None => {} } @@ -289,8 +292,9 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let (layers, db_wiped) = self.trie_updates.storage_overlay_layers(hashed_address); - self.in_memory_cursor.retarget(layers); + let (layers, exact_index, db_wiped) = + self.trie_updates.storage_overlay_layers(hashed_address); + self.in_memory_cursor.retarget(layers, exact_index); self.db_cursor_state = DbCursorState::new(db_wiped); } } @@ -299,6 +303,7 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { #[derive(Clone, Debug, Default)] pub struct TrieUpdatesOverlay { account_overlay: Arc>, + account_exact_index: Arc>>, storage_overlays: Arc>, } @@ -306,8 +311,9 @@ impl TrieUpdatesOverlay { /// Create a new indexed trie updates overlay stack. pub fn new(updates: Vec>) -> Self { let account_overlay = Self::build_account_overlay(&updates); + let account_exact_index = Arc::new(build_overlay_exact_index(account_overlay.as_slice())); let storage_overlays = Self::build_storage_overlays(&updates); - Self { account_overlay, storage_overlays } + Self { account_overlay, account_exact_index, storage_overlays } } /// Returns `true` if the overlay does not contain any trie updates. @@ -350,24 +356,31 @@ impl TrieUpdatesOverlay { } } + for overlay in overlays.values_mut() { + overlay.exact_index = build_overlay_exact_index(&overlay.layers); + } + Arc::new(overlays) } fn account_overlay(&self) -> OverlayCursor<'_> { - OverlayCursor::new(self.account_overlay.as_slice()) + OverlayCursor::new(self.account_overlay.as_slice(), self.account_exact_index.as_slice()) } fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'_>, bool) { - let (layers, db_wiped) = self.storage_overlay_layers(hashed_address); - (OverlayCursor::new(layers), db_wiped) + let (layers, exact_index, db_wiped) = self.storage_overlay_layers(hashed_address); + (OverlayCursor::new(layers, exact_index), db_wiped) } - fn storage_overlay_layers(&self, hashed_address: B256) -> (&[TrieOverlayLayer], bool) { + fn storage_overlay_layers( + &self, + hashed_address: B256, + ) -> (&[TrieOverlayLayer], &[OverlayExactIndexEntry], bool) { let Some(overlay) = self.storage_overlays.get(&hashed_address) else { - return (&[], false); + return (&[], &[], false); }; - (overlay.layers.as_slice(), overlay.db_wiped) + (overlay.layers.as_slice(), overlay.exact_index.as_slice(), overlay.db_wiped) } } @@ -380,6 +393,7 @@ impl AsRef for TrieUpdatesOverlay { #[derive(Clone, Debug, Default)] struct TrieStorageOverlay { layers: Vec, + exact_index: Vec>, db_wiped: bool, } From 1a1257c60d8eab83a281173040751682092d105a Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 00:14:14 +0200 Subject: [PATCH 17/40] perf: advance overlay exact index monotonically --- crates/trie/trie/src/overlay_cursor.rs | 50 +++++++++++++++++++++----- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index a75fc768e80..2a7a584ce43 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -46,11 +46,12 @@ impl DbCursorState { pub(crate) struct PositionedOverlayCursor<'a, K, V> { layers: Vec>, exact_index: &'a [OverlayExactIndexEntry], + exact_index_position: usize, } impl Default for PositionedOverlayCursor<'_, K, V> { fn default() -> Self { - Self { layers: Vec::new(), exact_index: &[] } + Self { layers: Vec::new(), exact_index: &[], exact_index_position: 0 } } } @@ -59,7 +60,11 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { layers: &'a [OverlayLayer], exact_index: &'a [OverlayExactIndexEntry], ) -> Self { - let mut cursor = Self { layers: Vec::with_capacity(layers.len()), exact_index: &[] }; + let mut cursor = Self { + layers: Vec::with_capacity(layers.len()), + exact_index: &[], + exact_index_position: 0, + }; cursor.retarget(layers, exact_index); cursor } @@ -68,6 +73,7 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { for layer in &mut self.layers { layer.reset(); } + self.exact_index_position = 0; } pub(crate) fn retarget( @@ -78,6 +84,7 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { self.layers.clear(); self.layers.extend(layers.iter().map(|layer| PositionedOverlayLayer::new(layer.entries()))); self.exact_index = exact_index; + self.exact_index_position = 0; } } @@ -95,18 +102,16 @@ where #[inline(always)] pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { if !self.exact_index.is_empty() { - let Ok(index_idx) = self.exact_index.binary_search_by(|entry| entry.key.cmp(key)) - else { + let Some((layer_idx, entry_idx)) = self.seek_exact_index(key) else { self.seek_from(0, key); return None; }; - let entry = &self.exact_index[index_idx]; - let layer = &mut self.layers[entry.layer_idx]; - if layer.position > entry.entry_idx { + let layer = &mut self.layers[layer_idx]; + if layer.position > entry_idx { return None } - layer.position = entry.entry_idx; - return Some((entry.layer_idx, &layer.entries[entry.entry_idx].1)) + layer.position = entry_idx; + return Some((layer_idx, &layer.entries[entry_idx].1)) } for (layer_idx, layer) in self.layers.iter_mut().enumerate() { @@ -119,6 +124,33 @@ where None } + #[inline(always)] + fn seek_exact_index(&mut self, key: &K) -> Option<(usize, usize)> { + if let Some(entry) = self.exact_index.get(self.exact_index_position) { + match entry.key.cmp(key) { + std::cmp::Ordering::Less => self.exact_index_position += 1, + std::cmp::Ordering::Equal => return Some((entry.layer_idx, entry.entry_idx)), + std::cmp::Ordering::Greater => return None, + } + } + + let remaining = &self.exact_index[self.exact_index_position..]; + let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { + remaining.partition_point(|entry| &entry.key < key) + } else { + let mut advance = 0; + while advance < remaining.len() && &remaining[advance].key < key { + advance += 1; + } + advance + }; + + self.exact_index_position += advance; + self.exact_index + .get(self.exact_index_position) + .and_then(|entry| (&entry.key == key).then_some((entry.layer_idx, entry.entry_idx))) + } + #[inline(always)] pub(crate) fn first_after(&mut self, key: &K) { for layer in &mut self.layers { From ee53b32f8b3e3f7c8b5d5635c09442097e8e2892 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 01:00:13 +0200 Subject: [PATCH 18/40] Revert "perf: advance overlay exact index monotonically" This reverts commit 1a1257c60d8eab83a281173040751682092d105a. --- crates/trie/trie/src/overlay_cursor.rs | 50 +++++--------------------- 1 file changed, 9 insertions(+), 41 deletions(-) diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 2a7a584ce43..a75fc768e80 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -46,12 +46,11 @@ impl DbCursorState { pub(crate) struct PositionedOverlayCursor<'a, K, V> { layers: Vec>, exact_index: &'a [OverlayExactIndexEntry], - exact_index_position: usize, } impl Default for PositionedOverlayCursor<'_, K, V> { fn default() -> Self { - Self { layers: Vec::new(), exact_index: &[], exact_index_position: 0 } + Self { layers: Vec::new(), exact_index: &[] } } } @@ -60,11 +59,7 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { layers: &'a [OverlayLayer], exact_index: &'a [OverlayExactIndexEntry], ) -> Self { - let mut cursor = Self { - layers: Vec::with_capacity(layers.len()), - exact_index: &[], - exact_index_position: 0, - }; + let mut cursor = Self { layers: Vec::with_capacity(layers.len()), exact_index: &[] }; cursor.retarget(layers, exact_index); cursor } @@ -73,7 +68,6 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { for layer in &mut self.layers { layer.reset(); } - self.exact_index_position = 0; } pub(crate) fn retarget( @@ -84,7 +78,6 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { self.layers.clear(); self.layers.extend(layers.iter().map(|layer| PositionedOverlayLayer::new(layer.entries()))); self.exact_index = exact_index; - self.exact_index_position = 0; } } @@ -102,16 +95,18 @@ where #[inline(always)] pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { if !self.exact_index.is_empty() { - let Some((layer_idx, entry_idx)) = self.seek_exact_index(key) else { + let Ok(index_idx) = self.exact_index.binary_search_by(|entry| entry.key.cmp(key)) + else { self.seek_from(0, key); return None; }; - let layer = &mut self.layers[layer_idx]; - if layer.position > entry_idx { + let entry = &self.exact_index[index_idx]; + let layer = &mut self.layers[entry.layer_idx]; + if layer.position > entry.entry_idx { return None } - layer.position = entry_idx; - return Some((layer_idx, &layer.entries[entry_idx].1)) + layer.position = entry.entry_idx; + return Some((entry.layer_idx, &layer.entries[entry.entry_idx].1)) } for (layer_idx, layer) in self.layers.iter_mut().enumerate() { @@ -124,33 +119,6 @@ where None } - #[inline(always)] - fn seek_exact_index(&mut self, key: &K) -> Option<(usize, usize)> { - if let Some(entry) = self.exact_index.get(self.exact_index_position) { - match entry.key.cmp(key) { - std::cmp::Ordering::Less => self.exact_index_position += 1, - std::cmp::Ordering::Equal => return Some((entry.layer_idx, entry.entry_idx)), - std::cmp::Ordering::Greater => return None, - } - } - - let remaining = &self.exact_index[self.exact_index_position..]; - let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { - remaining.partition_point(|entry| &entry.key < key) - } else { - let mut advance = 0; - while advance < remaining.len() && &remaining[advance].key < key { - advance += 1; - } - advance - }; - - self.exact_index_position += advance; - self.exact_index - .get(self.exact_index_position) - .and_then(|entry| (&entry.key == key).then_some((entry.layer_idx, entry.entry_idx))) - } - #[inline(always)] pub(crate) fn first_after(&mut self, key: &K) { for layer in &mut self.layers { From 9a7999f7eaa7b5d09f3f1e897eeea8a61b124429 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 01:01:59 +0200 Subject: [PATCH 19/40] perf: gate overlay exact index construction --- crates/trie/trie/src/overlay_cursor.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index a75fc768e80..2183d2f4f26 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -311,6 +311,10 @@ where K: Copy + Ord, { let entry_count = layers.iter().map(|layer| layer.entries_len).sum(); + if layers.len() < 2 || entry_count < OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { + return Vec::new() + } + let mut index = Vec::with_capacity(entry_count); for (layer_idx, layer) in layers.iter().enumerate() { From 534c845438062a2544f850c6757f4d7eeaff9cab Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 01:41:47 +0200 Subject: [PATCH 20/40] perf: use hybrid overlay exact index seeks --- crates/trie/trie/src/overlay_cursor.rs | 53 +++++++++++++++++++++----- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 2183d2f4f26..1cd112bb9af 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -46,11 +46,12 @@ impl DbCursorState { pub(crate) struct PositionedOverlayCursor<'a, K, V> { layers: Vec>, exact_index: &'a [OverlayExactIndexEntry], + exact_index_position: usize, } impl Default for PositionedOverlayCursor<'_, K, V> { fn default() -> Self { - Self { layers: Vec::new(), exact_index: &[] } + Self { layers: Vec::new(), exact_index: &[], exact_index_position: 0 } } } @@ -59,7 +60,11 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { layers: &'a [OverlayLayer], exact_index: &'a [OverlayExactIndexEntry], ) -> Self { - let mut cursor = Self { layers: Vec::with_capacity(layers.len()), exact_index: &[] }; + let mut cursor = Self { + layers: Vec::with_capacity(layers.len()), + exact_index: &[], + exact_index_position: 0, + }; cursor.retarget(layers, exact_index); cursor } @@ -68,6 +73,7 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { for layer in &mut self.layers { layer.reset(); } + self.exact_index_position = 0; } pub(crate) fn retarget( @@ -78,6 +84,7 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { self.layers.clear(); self.layers.extend(layers.iter().map(|layer| PositionedOverlayLayer::new(layer.entries()))); self.exact_index = exact_index; + self.exact_index_position = 0; } } @@ -95,18 +102,16 @@ where #[inline(always)] pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { if !self.exact_index.is_empty() { - let Ok(index_idx) = self.exact_index.binary_search_by(|entry| entry.key.cmp(key)) - else { + let Some((layer_idx, entry_idx)) = self.seek_exact_index(key) else { self.seek_from(0, key); return None; }; - let entry = &self.exact_index[index_idx]; - let layer = &mut self.layers[entry.layer_idx]; - if layer.position > entry.entry_idx { + let layer = &mut self.layers[layer_idx]; + if layer.position > entry_idx { return None } - layer.position = entry.entry_idx; - return Some((entry.layer_idx, &layer.entries[entry.entry_idx].1)) + layer.position = entry_idx; + return Some((layer_idx, &layer.entries[entry_idx].1)) } for (layer_idx, layer) in self.layers.iter_mut().enumerate() { @@ -119,6 +124,34 @@ where None } + #[inline(always)] + fn seek_exact_index(&mut self, key: &K) -> Option<(usize, usize)> { + let start = self.exact_index_position; + if self.exact_index.get(start).is_some_and(|entry| &entry.key <= key) { + let remaining = &self.exact_index[start..]; + let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { + remaining.partition_point(|entry| &entry.key < key) + } else { + let mut advance = 0; + while advance < remaining.len() && &remaining[advance].key < key { + advance += 1; + } + advance + }; + + self.exact_index_position += advance; + return self + .exact_index + .get(self.exact_index_position) + .and_then(|entry| (&entry.key == key).then_some((entry.layer_idx, entry.entry_idx))) + } + + let index_idx = self.exact_index.binary_search_by(|entry| entry.key.cmp(key)).ok()?; + self.exact_index_position = index_idx; + let entry = &self.exact_index[index_idx]; + Some((entry.layer_idx, entry.entry_idx)) + } + #[inline(always)] pub(crate) fn first_after(&mut self, key: &K) { for layer in &mut self.layers { @@ -311,7 +344,7 @@ where K: Copy + Ord, { let entry_count = layers.iter().map(|layer| layer.entries_len).sum(); - if layers.len() < 2 || entry_count < OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { + if entry_count < OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { return Vec::new() } From 8a65f01e79e7088ce8372b0439f6e7d1c036ce1e Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 02:19:12 +0200 Subject: [PATCH 21/40] Revert "perf: use hybrid overlay exact index seeks" This reverts commit 534c845438062a2544f850c6757f4d7eeaff9cab. --- crates/trie/trie/src/overlay_cursor.rs | 53 +++++--------------------- 1 file changed, 10 insertions(+), 43 deletions(-) diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 1cd112bb9af..2183d2f4f26 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -46,12 +46,11 @@ impl DbCursorState { pub(crate) struct PositionedOverlayCursor<'a, K, V> { layers: Vec>, exact_index: &'a [OverlayExactIndexEntry], - exact_index_position: usize, } impl Default for PositionedOverlayCursor<'_, K, V> { fn default() -> Self { - Self { layers: Vec::new(), exact_index: &[], exact_index_position: 0 } + Self { layers: Vec::new(), exact_index: &[] } } } @@ -60,11 +59,7 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { layers: &'a [OverlayLayer], exact_index: &'a [OverlayExactIndexEntry], ) -> Self { - let mut cursor = Self { - layers: Vec::with_capacity(layers.len()), - exact_index: &[], - exact_index_position: 0, - }; + let mut cursor = Self { layers: Vec::with_capacity(layers.len()), exact_index: &[] }; cursor.retarget(layers, exact_index); cursor } @@ -73,7 +68,6 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { for layer in &mut self.layers { layer.reset(); } - self.exact_index_position = 0; } pub(crate) fn retarget( @@ -84,7 +78,6 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { self.layers.clear(); self.layers.extend(layers.iter().map(|layer| PositionedOverlayLayer::new(layer.entries()))); self.exact_index = exact_index; - self.exact_index_position = 0; } } @@ -102,16 +95,18 @@ where #[inline(always)] pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { if !self.exact_index.is_empty() { - let Some((layer_idx, entry_idx)) = self.seek_exact_index(key) else { + let Ok(index_idx) = self.exact_index.binary_search_by(|entry| entry.key.cmp(key)) + else { self.seek_from(0, key); return None; }; - let layer = &mut self.layers[layer_idx]; - if layer.position > entry_idx { + let entry = &self.exact_index[index_idx]; + let layer = &mut self.layers[entry.layer_idx]; + if layer.position > entry.entry_idx { return None } - layer.position = entry_idx; - return Some((layer_idx, &layer.entries[entry_idx].1)) + layer.position = entry.entry_idx; + return Some((entry.layer_idx, &layer.entries[entry.entry_idx].1)) } for (layer_idx, layer) in self.layers.iter_mut().enumerate() { @@ -124,34 +119,6 @@ where None } - #[inline(always)] - fn seek_exact_index(&mut self, key: &K) -> Option<(usize, usize)> { - let start = self.exact_index_position; - if self.exact_index.get(start).is_some_and(|entry| &entry.key <= key) { - let remaining = &self.exact_index[start..]; - let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { - remaining.partition_point(|entry| &entry.key < key) - } else { - let mut advance = 0; - while advance < remaining.len() && &remaining[advance].key < key { - advance += 1; - } - advance - }; - - self.exact_index_position += advance; - return self - .exact_index - .get(self.exact_index_position) - .and_then(|entry| (&entry.key == key).then_some((entry.layer_idx, entry.entry_idx))) - } - - let index_idx = self.exact_index.binary_search_by(|entry| entry.key.cmp(key)).ok()?; - self.exact_index_position = index_idx; - let entry = &self.exact_index[index_idx]; - Some((entry.layer_idx, entry.entry_idx)) - } - #[inline(always)] pub(crate) fn first_after(&mut self, key: &K) { for layer in &mut self.layers { @@ -344,7 +311,7 @@ where K: Copy + Ord, { let entry_count = layers.iter().map(|layer| layer.entries_len).sum(); - if entry_count < OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { + if layers.len() < 2 || entry_count < OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { return Vec::new() } From 3f79bd85499a41d1f7121d5c5eba86fd959cf39a Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 02:21:54 +0200 Subject: [PATCH 22/40] perf: avoid redundant overlay deletion seeks --- .../trie/trie/src/hashed_cursor/post_state.rs | 30 +++++++++++-------- crates/trie/trie/src/overlay_cursor.rs | 17 +++++++++-- crates/trie/trie/src/trie_cursor/in_memory.rs | 19 ++++++------ 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 23c1799c9ad..13d00fc08b0 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -1,7 +1,7 @@ use super::{HashedCursor, HashedCursorFactory, HashedStorageCursor}; use crate::overlay_cursor::{ - build_overlay_exact_index, DbCursorState, OverlayExactIndexEntry, OverlayLayer, - PositionedOverlayCursor, + build_overlay_exact_index, DbCursorState, OverlayExactHit, OverlayExactIndexEntry, + OverlayLayer, PositionedOverlayCursor, }; use alloy_primitives::{map::B256Map, B256, U256}; use reth_primitives_traits::Account; @@ -254,14 +254,16 @@ where self.deferred_overlay_seek_start = None; match self.post_state_cursor.seek_until_exact(&key) { - Some((idx, Some(value))) => { - self.deferred_overlay_seek_start = Some(idx + 1); - let entry = Some((key, value)); - self.set_last_key(&entry); - return Ok(entry) - } - Some((_, None)) => { - self.post_state_cursor.seek_from(0, &key); + Some(hit) => { + if let Some(value) = hit.value { + self.deferred_overlay_seek_start = Some(hit.layer_idx + 1); + let entry = Some((key, value)); + self.set_last_key(&entry); + return Ok(entry) + } + + let start = if hit.prefix_positioned { hit.layer_idx + 1 } else { 0 }; + self.post_state_cursor.seek_from(start, &key); } None => {} } @@ -501,8 +503,12 @@ where self.cursor.seek_from(start, key); } - fn seek_until_exact(&mut self, key: &B256) -> Option<(usize, Option)> { - self.cursor.seek_until_exact(key).map(|(idx, value)| (idx, (*value).into_option())) + fn seek_until_exact(&mut self, key: &B256) -> Option>> { + self.cursor.seek_until_exact(key).map(|hit| OverlayExactHit { + layer_idx: hit.layer_idx, + value: (*hit.value).into_option(), + prefix_positioned: hit.prefix_positioned, + }) } fn first_after(&mut self, key: &B256) { diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 2183d2f4f26..9efc8b7feb3 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -93,7 +93,7 @@ where } #[inline(always)] - pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { + pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option> { if !self.exact_index.is_empty() { let Ok(index_idx) = self.exact_index.binary_search_by(|entry| entry.key.cmp(key)) else { @@ -106,14 +106,18 @@ where return None } layer.position = entry.entry_idx; - return Some((entry.layer_idx, &layer.entries[entry.entry_idx].1)) + return Some(OverlayExactHit { + layer_idx: entry.layer_idx, + value: &layer.entries[entry.entry_idx].1, + prefix_positioned: false, + }) } for (layer_idx, layer) in self.layers.iter_mut().enumerate() { let Some(entry) = layer.seek_exact(key) else { continue; }; - return Some((layer_idx, &entry.1)) + return Some(OverlayExactHit { layer_idx, value: &entry.1, prefix_positioned: true }) } None @@ -304,6 +308,13 @@ pub(crate) struct OverlayExactIndexEntry { entry_idx: usize, } +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) struct OverlayExactHit { + pub(crate) layer_idx: usize, + pub(crate) value: V, + pub(crate) prefix_positioned: bool, +} + pub(crate) fn build_overlay_exact_index( layers: &[OverlayLayer], ) -> Vec> diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index d246e27be4c..76ccd8e33ea 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -196,11 +196,11 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { } self.deferred_overlay_seek_start = None; - let entry = if let Some((idx, mem_value)) = self.in_memory_cursor.seek_until_exact(&key) { - if mem_value.is_some() { - self.deferred_overlay_seek_start = Some(idx + 1); + let entry = if let Some(hit) = self.in_memory_cursor.seek_until_exact(&key) { + if hit.value.is_some() { + self.deferred_overlay_seek_start = Some(hit.layer_idx + 1); } - mem_value.clone().map(|node| (key, node)) + hit.value.clone().map(|node| (key, node)) } else { let db_entry = self.get_cursor_mut().map(|c| c.seek_exact(key)).transpose()?.flatten(); self.db_cursor_state.set_entry(db_entry); @@ -222,14 +222,15 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.deferred_overlay_seek_start = None; match self.in_memory_cursor.seek_until_exact(&key) { - Some((idx, Some(node))) => { - self.deferred_overlay_seek_start = Some(idx + 1); - let entry = Some((key, node.clone())); + Some(hit) if hit.value.is_some() => { + self.deferred_overlay_seek_start = Some(hit.layer_idx + 1); + let entry = hit.value.clone().map(|node| (key, node)); self.set_last_key(&entry); return Ok(entry); } - Some((_, None)) => { - self.in_memory_cursor.seek_from(0, &key); + Some(hit) => { + let start = if hit.prefix_positioned { hit.layer_idx + 1 } else { 0 }; + self.in_memory_cursor.seek_from(start, &key); } None => {} } From 80db761322f55ee26213ba0549d3f634baf1426b Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 02:50:40 +0200 Subject: [PATCH 23/40] perf: restore positioned overlay cursor layout --- .../trie/trie/src/hashed_cursor/post_state.rs | 84 +++---- crates/trie/trie/src/overlay_cursor.rs | 206 +++++------------- crates/trie/trie/src/trie_cursor/in_memory.rs | 56 ++--- 3 files changed, 96 insertions(+), 250 deletions(-) diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 13d00fc08b0..2e1c36b19aa 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -1,8 +1,5 @@ use super::{HashedCursor, HashedCursorFactory, HashedStorageCursor}; -use crate::overlay_cursor::{ - build_overlay_exact_index, DbCursorState, OverlayExactHit, OverlayExactIndexEntry, - OverlayLayer, PositionedOverlayCursor, -}; +use crate::overlay_cursor::{DbCursorState, OverlayLayer, PositionedOverlayCursor}; use alloy_primitives::{map::B256Map, B256, U256}; use reth_primitives_traits::Account; use reth_storage_errors::db::DatabaseError; @@ -254,16 +251,14 @@ where self.deferred_overlay_seek_start = None; match self.post_state_cursor.seek_until_exact(&key) { - Some(hit) => { - if let Some(value) = hit.value { - self.deferred_overlay_seek_start = Some(hit.layer_idx + 1); - let entry = Some((key, value)); - self.set_last_key(&entry); - return Ok(entry) - } - - let start = if hit.prefix_positioned { hit.layer_idx + 1 } else { 0 }; - self.post_state_cursor.seek_from(start, &key); + Some((idx, Some(value))) => { + self.deferred_overlay_seek_start = Some(idx + 1); + let entry = Some((key, value)); + self.set_last_key(&entry); + return Ok(entry) + } + Some((idx, None)) => { + self.post_state_cursor.seek_from(idx + 1, &key); } None => {} } @@ -346,9 +341,9 @@ where fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let (layers, exact_index, cursor_wiped, has_visible_value) = + let (layers, cursor_wiped, has_visible_value) = self.post_states.storage_overlay_layers(hashed_address); - self.post_state_cursor.retarget(layers, exact_index, has_visible_value); + self.post_state_cursor.retarget(layers, has_visible_value); self.db_cursor_state = DbCursorState::new(cursor_wiped); } } @@ -357,7 +352,6 @@ where #[derive(Clone, Debug, Default)] pub struct HashedPostStateOverlay { account_overlay: Arc>>>, - account_exact_index: Arc>>, storage_overlays: Arc>, } @@ -365,9 +359,8 @@ impl HashedPostStateOverlay { /// Create a new indexed hashed post-state overlay stack. pub fn new(states: Vec>) -> Self { let account_overlay = Self::build_account_overlay(&states); - let account_exact_index = Arc::new(build_overlay_exact_index(account_overlay.as_slice())); let storage_overlays = Self::build_storage_overlays(&states); - Self { account_overlay, account_exact_index, storage_overlays } + Self { account_overlay, storage_overlays } } /// Returns `true` if the overlay does not contain any hashed post-state updates. @@ -415,7 +408,6 @@ impl HashedPostStateOverlay { } for overlay in overlays.values_mut() { - overlay.exact_index = build_overlay_exact_index(&overlay.layers); overlay.has_visible_value = has_visible_storage_value(&overlay.layers); } @@ -423,33 +415,23 @@ impl HashedPostStateOverlay { } fn account_overlay(&self) -> PostStateOverlayCursor<'_, Option> { - PostStateOverlayCursor::new( - self.account_overlay.as_slice(), - self.account_exact_index.as_slice(), - false, - ) + PostStateOverlayCursor::new(self.account_overlay.as_slice(), false) } fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'_, U256>, bool) { - let (layers, exact_index, db_wiped, has_visible_value) = - self.storage_overlay_layers(hashed_address); - (PostStateOverlayCursor::new(layers, exact_index, has_visible_value), db_wiped) + let (layers, db_wiped, has_visible_value) = self.storage_overlay_layers(hashed_address); + (PostStateOverlayCursor::new(layers, has_visible_value), db_wiped) } fn storage_overlay_layers( &self, hashed_address: B256, - ) -> (&[PostStateOverlayLayer], &[OverlayExactIndexEntry], bool, bool) { + ) -> (&[PostStateOverlayLayer], bool, bool) { let Some(overlay) = self.storage_overlays.get(&hashed_address) else { - return (&[], &[], false, false); + return (&[], false, false); }; - ( - overlay.layers.as_slice(), - overlay.exact_index.as_slice(), - overlay.db_wiped, - overlay.has_visible_value, - ) + (overlay.layers.as_slice(), overlay.db_wiped, overlay.has_visible_value) } } @@ -461,36 +443,27 @@ impl AsRef for HashedPostStateOverlay { #[derive(Debug)] struct PostStateOverlayCursor<'a, V> { - cursor: PositionedOverlayCursor<'a, B256, V>, + cursor: PositionedOverlayCursor<'a, HashedPostStateSorted, B256, V>, has_visible_value: bool, } impl Default for PostStateOverlayCursor<'_, V> { fn default() -> Self { - Self::new(&[], &[], false) + Self::new(&[], false) } } impl<'a, V> PostStateOverlayCursor<'a, V> { - fn new( - layers: &'a [PostStateOverlayLayer], - exact_index: &'a [OverlayExactIndexEntry], - has_visible_value: bool, - ) -> Self { - Self { cursor: PositionedOverlayCursor::new(layers, exact_index), has_visible_value } + fn new(layers: &'a [PostStateOverlayLayer], has_visible_value: bool) -> Self { + Self { cursor: PositionedOverlayCursor::new(layers), has_visible_value } } fn reset(&mut self) { self.cursor.reset(); } - fn retarget( - &mut self, - layers: &'a [PostStateOverlayLayer], - exact_index: &'a [OverlayExactIndexEntry], - has_visible_value: bool, - ) { - self.cursor.retarget(layers, exact_index); + fn retarget(&mut self, layers: &'a [PostStateOverlayLayer], has_visible_value: bool) { + self.cursor.retarget(layers); self.has_visible_value = has_visible_value; } } @@ -503,12 +476,8 @@ where self.cursor.seek_from(start, key); } - fn seek_until_exact(&mut self, key: &B256) -> Option>> { - self.cursor.seek_until_exact(key).map(|hit| OverlayExactHit { - layer_idx: hit.layer_idx, - value: (*hit.value).into_option(), - prefix_positioned: hit.prefix_positioned, - }) + fn seek_until_exact(&mut self, key: &B256) -> Option<(usize, Option)> { + self.cursor.seek_until_exact(key).map(|(idx, value)| (idx, (*value).into_option())) } fn first_after(&mut self, key: &B256) { @@ -535,7 +504,6 @@ where #[derive(Clone, Debug, Default)] struct HashedStorageOverlay { layers: Vec>, - exact_index: Vec>, db_wiped: bool, has_visible_value: bool, } diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 9efc8b7feb3..7ca7f469b43 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -43,81 +43,55 @@ impl DbCursorState { } #[derive(Debug)] -pub(crate) struct PositionedOverlayCursor<'a, K, V> { - layers: Vec>, - exact_index: &'a [OverlayExactIndexEntry], +pub(crate) struct PositionedOverlayCursor<'a, O, K, V> { + layers: &'a [OverlayLayer], + positions: Vec, } -impl Default for PositionedOverlayCursor<'_, K, V> { +impl Default for PositionedOverlayCursor<'_, O, K, V> { fn default() -> Self { - Self { layers: Vec::new(), exact_index: &[] } + Self::new(&[]) } } -impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { - pub(crate) fn new( - layers: &'a [OverlayLayer], - exact_index: &'a [OverlayExactIndexEntry], - ) -> Self { - let mut cursor = Self { layers: Vec::with_capacity(layers.len()), exact_index: &[] }; - cursor.retarget(layers, exact_index); - cursor +impl<'a, O, K, V> PositionedOverlayCursor<'a, O, K, V> { + pub(crate) fn new(layers: &'a [OverlayLayer]) -> Self { + Self { layers, positions: vec![0; layers.len()] } } pub(crate) fn reset(&mut self) { - for layer in &mut self.layers { - layer.reset(); - } + self.positions.fill(0); } - pub(crate) fn retarget( - &mut self, - layers: &'a [OverlayLayer], - exact_index: &'a [OverlayExactIndexEntry], - ) { - self.layers.clear(); - self.layers.extend(layers.iter().map(|layer| PositionedOverlayLayer::new(layer.entries()))); - self.exact_index = exact_index; + pub(crate) fn retarget(&mut self, layers: &'a [OverlayLayer]) { + self.layers = layers; + self.positions.clear(); + self.positions.resize(layers.len(), 0); } } -impl PositionedOverlayCursor<'_, K, V> +impl PositionedOverlayCursor<'_, O, K, V> where K: Ord, { #[inline(always)] pub(crate) fn seek_from(&mut self, start: usize, key: &K) { - for layer in &mut self.layers[start..] { - let _ = layer.seek(key); + for layer_idx in start..self.layers.len() { + let entries = self.layers[layer_idx].entries(); + let _ = seek_overlay_entries(entries, &mut self.positions[layer_idx], key); } } #[inline(always)] - pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option> { - if !self.exact_index.is_empty() { - let Ok(index_idx) = self.exact_index.binary_search_by(|entry| entry.key.cmp(key)) + pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { + for layer_idx in 0..self.layers.len() { + let entries = self.layers[layer_idx].entries(); + let Some(idx) = + seek_overlay_entries_exact(entries, &mut self.positions[layer_idx], key) else { - self.seek_from(0, key); - return None; - }; - let entry = &self.exact_index[index_idx]; - let layer = &mut self.layers[entry.layer_idx]; - if layer.position > entry.entry_idx { - return None - } - layer.position = entry.entry_idx; - return Some(OverlayExactHit { - layer_idx: entry.layer_idx, - value: &layer.entries[entry.entry_idx].1, - prefix_positioned: false, - }) - } - - for (layer_idx, layer) in self.layers.iter_mut().enumerate() { - let Some(entry) = layer.seek_exact(key) else { continue; }; - return Some(OverlayExactHit { layer_idx, value: &entry.1, prefix_positioned: true }) + return Some((layer_idx, &entries[idx].1)) } None @@ -125,78 +99,45 @@ where #[inline(always)] pub(crate) fn first_after(&mut self, key: &K) { - for layer in &mut self.layers { - let _ = layer.first_after(key); + for layer_idx in 0..self.layers.len() { + let entries = self.layers[layer_idx].entries(); + let _ = seek_overlay_entries_after(entries, &mut self.positions[layer_idx], key); } } #[inline(always)] pub(crate) fn highest_priority_value_at(&self, key: &K) -> Option<&V> { - self.layers.iter().find_map(|layer| { - layer.current().and_then(|(entry_key, value)| (entry_key == key).then_some(value)) + self.layers.iter().zip(&self.positions).find_map(|(layer, position)| { + let entries = layer.entries(); + entries + .get(*position) + .and_then(|(entry_key, value)| (entry_key == key).then_some(value)) }) } #[inline(always)] pub(crate) fn advance_key(&mut self, key: &K) { - for layer in &mut self.layers { - if layer.current().is_some_and(|(entry_key, _)| entry_key == key) { - let _ = layer.first_after(key); + for layer_idx in 0..self.layers.len() { + let entries = self.layers[layer_idx].entries(); + if entries.get(self.positions[layer_idx]).is_some_and(|(entry_key, _)| entry_key == key) + { + let _ = seek_overlay_entries_after(entries, &mut self.positions[layer_idx], key); } } } } -impl PositionedOverlayCursor<'_, K, V> +impl PositionedOverlayCursor<'_, O, K, V> where K: Copy + Ord, { #[inline(always)] pub(crate) fn min_current_key(&self) -> Option { - self.layers.iter().filter_map(|layer| layer.current().map(|(key, _)| *key)).min() - } -} - -#[derive(Debug)] -struct PositionedOverlayLayer<'a, K, V> { - entries: &'a [(K, V)], - position: usize, -} - -impl<'a, K, V> PositionedOverlayLayer<'a, K, V> { - const fn new(entries: &'a [(K, V)]) -> Self { - Self { entries, position: 0 } - } - - fn current(&self) -> Option<&'a (K, V)> { - self.entries.get(self.position) - } - - const fn reset(&mut self) { - self.position = 0; - } -} - -impl<'a, K, V> PositionedOverlayLayer<'a, K, V> -where - K: Ord, -{ - #[inline(always)] - fn seek(&mut self, key: &K) -> Option<&'a (K, V)> { - let _ = seek_overlay_entries(self.entries, &mut self.position, key); - self.current() - } - - #[inline(always)] - fn seek_exact(&mut self, key: &K) -> Option<&'a (K, V)> { - let _ = seek_overlay_entries_exact(self.entries, &mut self.position, key)?; - self.current() - } - - #[inline(always)] - fn first_after(&mut self, key: &K) -> Option<&'a (K, V)> { - let _ = seek_overlay_entries_after(self.entries, &mut self.position, key); - self.current() + self.layers + .iter() + .zip(&self.positions) + .filter_map(|(layer, position)| layer.entries().get(*position).map(|(key, _)| *key)) + .min() } } @@ -301,52 +242,6 @@ where (*position < entries.len()).then_some(*position) } -#[derive(Clone, Copy, Debug)] -pub(crate) struct OverlayExactIndexEntry { - key: K, - layer_idx: usize, - entry_idx: usize, -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub(crate) struct OverlayExactHit { - pub(crate) layer_idx: usize, - pub(crate) value: V, - pub(crate) prefix_positioned: bool, -} - -pub(crate) fn build_overlay_exact_index( - layers: &[OverlayLayer], -) -> Vec> -where - K: Copy + Ord, -{ - let entry_count = layers.iter().map(|layer| layer.entries_len).sum(); - if layers.len() < 2 || entry_count < OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { - return Vec::new() - } - - let mut index = Vec::with_capacity(entry_count); - - for (layer_idx, layer) in layers.iter().enumerate() { - index.extend(layer.entries().iter().enumerate().map(|(entry_idx, (key, _))| { - OverlayExactIndexEntry { key: *key, layer_idx, entry_idx } - })); - } - - index.sort_unstable_by(|a, b| a.key.cmp(&b.key).then_with(|| a.layer_idx.cmp(&b.layer_idx))); - - let mut write_idx = 0; - for read_idx in 0..index.len() { - if write_idx == 0 || index[write_idx - 1].key != index[read_idx].key { - index[write_idx] = index[read_idx]; - write_idx += 1; - } - } - index.truncate(write_idx); - index -} - #[derive(Clone)] pub(crate) struct OverlayLayer { _owner: Arc, @@ -391,8 +286,7 @@ mod tests { fn seek_reuses_current_position_when_it_already_satisfies_bound() { let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); let overlay = [layer(entries)]; - let exact_index = build_overlay_exact_index(&overlay); - let mut cursor = PositionedOverlayCursor::new(&overlay, &exact_index); + let mut cursor = PositionedOverlayCursor::new(&overlay); cursor.seek_from(0, &100); assert_eq!(cursor.min_current_key(), Some(100)); @@ -408,8 +302,7 @@ mod tests { fn seek_does_not_move_backwards_from_current_position() { let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); let overlay = [layer(entries)]; - let exact_index = build_overlay_exact_index(&overlay); - let mut cursor = PositionedOverlayCursor::new(&overlay, &exact_index); + let mut cursor = PositionedOverlayCursor::new(&overlay); cursor.seek_from(0, &150); assert_eq!(cursor.min_current_key(), Some(150)); @@ -423,22 +316,21 @@ mod tests { fn seek_does_not_recover_after_past_end() { let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); let overlay = [layer(entries)]; - let exact_index = build_overlay_exact_index(&overlay); - let mut cursor = PositionedOverlayCursor::new(&overlay, &exact_index); + let mut cursor = PositionedOverlayCursor::new(&overlay); cursor.seek_from(0, &250); assert_eq!(cursor.min_current_key(), None); - assert_eq!(cursor.layers[0].position, 201); + assert_eq!(cursor.positions, vec![201]); assert_eq!(cursor.seek_until_exact(&25), None); - assert_eq!(cursor.layers[0].position, 201); + assert_eq!(cursor.positions, vec![201]); cursor.first_after(&250); assert_eq!(cursor.min_current_key(), None); - assert_eq!(cursor.layers[0].position, 201); + assert_eq!(cursor.positions, vec![201]); cursor.first_after(&25); assert_eq!(cursor.min_current_key(), None); - assert_eq!(cursor.layers[0].position, 201); + assert_eq!(cursor.positions, vec![201]); } } diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index 76ccd8e33ea..d3866f328dc 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -1,9 +1,6 @@ use super::{TrieCursor, TrieCursorFactory, TrieStorageCursor}; use crate::{ - overlay_cursor::{ - build_overlay_exact_index, DbCursorState, OverlayExactIndexEntry, OverlayLayer, - PositionedOverlayCursor, - }, + overlay_cursor::{DbCursorState, OverlayLayer, PositionedOverlayCursor}, updates::TrieUpdatesSorted, }; use alloy_primitives::{map::B256Map, B256}; @@ -196,11 +193,11 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { } self.deferred_overlay_seek_start = None; - let entry = if let Some(hit) = self.in_memory_cursor.seek_until_exact(&key) { - if hit.value.is_some() { - self.deferred_overlay_seek_start = Some(hit.layer_idx + 1); + let entry = if let Some((idx, mem_value)) = self.in_memory_cursor.seek_until_exact(&key) { + if mem_value.is_some() { + self.deferred_overlay_seek_start = Some(idx + 1); } - hit.value.clone().map(|node| (key, node)) + mem_value.clone().map(|node| (key, node)) } else { let db_entry = self.get_cursor_mut().map(|c| c.seek_exact(key)).transpose()?.flatten(); self.db_cursor_state.set_entry(db_entry); @@ -222,15 +219,14 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.deferred_overlay_seek_start = None; match self.in_memory_cursor.seek_until_exact(&key) { - Some(hit) if hit.value.is_some() => { - self.deferred_overlay_seek_start = Some(hit.layer_idx + 1); - let entry = hit.value.clone().map(|node| (key, node)); + Some((idx, Some(node))) => { + self.deferred_overlay_seek_start = Some(idx + 1); + let entry = Some((key, node.clone())); self.set_last_key(&entry); return Ok(entry); } - Some(hit) => { - let start = if hit.prefix_positioned { hit.layer_idx + 1 } else { 0 }; - self.in_memory_cursor.seek_from(start, &key); + Some((idx, None)) => { + self.in_memory_cursor.seek_from(idx + 1, &key); } None => {} } @@ -293,9 +289,8 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let (layers, exact_index, db_wiped) = - self.trie_updates.storage_overlay_layers(hashed_address); - self.in_memory_cursor.retarget(layers, exact_index); + let (layers, db_wiped) = self.trie_updates.storage_overlay_layers(hashed_address); + self.in_memory_cursor.retarget(layers); self.db_cursor_state = DbCursorState::new(db_wiped); } } @@ -304,7 +299,6 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { #[derive(Clone, Debug, Default)] pub struct TrieUpdatesOverlay { account_overlay: Arc>, - account_exact_index: Arc>>, storage_overlays: Arc>, } @@ -312,9 +306,8 @@ impl TrieUpdatesOverlay { /// Create a new indexed trie updates overlay stack. pub fn new(updates: Vec>) -> Self { let account_overlay = Self::build_account_overlay(&updates); - let account_exact_index = Arc::new(build_overlay_exact_index(account_overlay.as_slice())); let storage_overlays = Self::build_storage_overlays(&updates); - Self { account_overlay, account_exact_index, storage_overlays } + Self { account_overlay, storage_overlays } } /// Returns `true` if the overlay does not contain any trie updates. @@ -357,31 +350,24 @@ impl TrieUpdatesOverlay { } } - for overlay in overlays.values_mut() { - overlay.exact_index = build_overlay_exact_index(&overlay.layers); - } - Arc::new(overlays) } fn account_overlay(&self) -> OverlayCursor<'_> { - OverlayCursor::new(self.account_overlay.as_slice(), self.account_exact_index.as_slice()) + OverlayCursor::new(self.account_overlay.as_slice()) } fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'_>, bool) { - let (layers, exact_index, db_wiped) = self.storage_overlay_layers(hashed_address); - (OverlayCursor::new(layers, exact_index), db_wiped) + let (layers, db_wiped) = self.storage_overlay_layers(hashed_address); + (OverlayCursor::new(layers), db_wiped) } - fn storage_overlay_layers( - &self, - hashed_address: B256, - ) -> (&[TrieOverlayLayer], &[OverlayExactIndexEntry], bool) { + fn storage_overlay_layers(&self, hashed_address: B256) -> (&[TrieOverlayLayer], bool) { let Some(overlay) = self.storage_overlays.get(&hashed_address) else { - return (&[], &[], false); + return (&[], false); }; - (overlay.layers.as_slice(), overlay.exact_index.as_slice(), overlay.db_wiped) + (overlay.layers.as_slice(), overlay.db_wiped) } } @@ -394,11 +380,11 @@ impl AsRef for TrieUpdatesOverlay { #[derive(Clone, Debug, Default)] struct TrieStorageOverlay { layers: Vec, - exact_index: Vec>, db_wiped: bool, } -type OverlayCursor<'a> = PositionedOverlayCursor<'a, Nibbles, Option>; +type OverlayCursor<'a> = + PositionedOverlayCursor<'a, TrieUpdatesSorted, Nibbles, Option>; type TrieOverlayLayer = OverlayLayer>; #[cfg(test)] From 22fb563bec5ab1b2538ae771cdec5afcf1d3ab7a Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 03:32:12 +0200 Subject: [PATCH 24/40] perf: reuse fresh cached overlay prefixes --- crates/chain-state/src/state_trie_overlay.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs index e25e4a2f403..4b2f8dd0098 100644 --- a/crates/chain-state/src/state_trie_overlay.rs +++ b/crates/chain-state/src/state_trie_overlay.rs @@ -396,7 +396,9 @@ impl StateTrieOverlayManager { ResolvedOverlayPath { blocks, cached_prefix } } }; - let (blocks, parent_overlay) = match path.cached_prefix { + let cached_prefix = + self.largest_cached_prefix(key.anchor_hash, &path.blocks).or(path.cached_prefix); + let (blocks, parent_overlay) = match cached_prefix { Some((idx, parent_overlay)) => { let mut blocks = path.blocks; blocks.truncate(idx); From aa7e668889f7b96d3b7ee79ab2cc720e1ea558d5 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 04:28:46 +0200 Subject: [PATCH 25/40] perf: stop exact overlay seeks on hits --- crates/trie/trie/src/overlay_cursor.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 7ca7f469b43..fac2de78427 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -187,10 +187,16 @@ where let remaining = &entries[*position..]; if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { - *position += remaining.partition_point(|(entry_key, _)| entry_key < key); - return entries - .get(*position) - .and_then(|(entry_key, _)| (entry_key == key).then_some(*position)) + return match remaining.binary_search_by(|(entry_key, _)| entry_key.cmp(key)) { + Ok(idx) => { + *position += idx; + Some(*position) + } + Err(idx) => { + *position += idx; + None + } + } } for (advance, (entry_key, _)) in remaining.iter().enumerate() { From 933a12f092452f54d9eaf84988fa5b04671fc6d9 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 05:15:56 +0200 Subject: [PATCH 26/40] perf: align overlay cursor seek threshold --- crates/trie/trie/src/overlay_cursor.rs | 42 ++------------------------ 1 file changed, 3 insertions(+), 39 deletions(-) diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index fac2de78427..5a9641439e9 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -1,6 +1,6 @@ use std::{fmt, slice, sync::Arc}; -const OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN: usize = 64; +const OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN: usize = 128; #[derive(Debug)] pub(crate) enum DbCursorState { @@ -177,44 +177,8 @@ fn seek_overlay_entries_exact( where K: Ord, { - if let Some((entry_key, _)) = entries.get(*position) { - match entry_key.cmp(key) { - std::cmp::Ordering::Less => *position += 1, - std::cmp::Ordering::Equal => return Some(*position), - std::cmp::Ordering::Greater => return None, - } - } - - let remaining = &entries[*position..]; - if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { - return match remaining.binary_search_by(|(entry_key, _)| entry_key.cmp(key)) { - Ok(idx) => { - *position += idx; - Some(*position) - } - Err(idx) => { - *position += idx; - None - } - } - } - - for (advance, (entry_key, _)) in remaining.iter().enumerate() { - match entry_key.cmp(key) { - std::cmp::Ordering::Less => {} - std::cmp::Ordering::Equal => { - *position += advance; - return Some(*position) - } - std::cmp::Ordering::Greater => { - *position += advance; - return None - } - } - } - - *position = entries.len(); - None + let idx = seek_overlay_entries(entries, position, key)?; + (&entries[idx].0 == key).then_some(idx) } #[inline(always)] From 0d5fa15e04d81d3cc9647db1a6d263712223f2d4 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 12:52:47 +0200 Subject: [PATCH 27/40] perf: reuse positioned overlay cursors --- .../trie/trie/src/hashed_cursor/post_state.rs | 23 +- crates/trie/trie/src/overlay_cursor.rs | 236 ++++++++++-------- crates/trie/trie/src/trie_cursor/in_memory.rs | 11 +- 3 files changed, 154 insertions(+), 116 deletions(-) diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 2e1c36b19aa..09a28001b71 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -353,14 +353,16 @@ where pub struct HashedPostStateOverlay { account_overlay: Arc>>>, storage_overlays: Arc>, + layer_capacity: usize, } impl HashedPostStateOverlay { /// Create a new indexed hashed post-state overlay stack. pub fn new(states: Vec>) -> Self { + let layer_capacity = states.len(); let account_overlay = Self::build_account_overlay(&states); let storage_overlays = Self::build_storage_overlays(&states); - Self { account_overlay, storage_overlays } + Self { account_overlay, storage_overlays, layer_capacity } } /// Returns `true` if the overlay does not contain any hashed post-state updates. @@ -415,12 +417,12 @@ impl HashedPostStateOverlay { } fn account_overlay(&self) -> PostStateOverlayCursor<'_, Option> { - PostStateOverlayCursor::new(self.account_overlay.as_slice(), false) + PostStateOverlayCursor::new(self.account_overlay.as_slice(), false, self.layer_capacity) } fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'_, U256>, bool) { let (layers, db_wiped, has_visible_value) = self.storage_overlay_layers(hashed_address); - (PostStateOverlayCursor::new(layers, has_visible_value), db_wiped) + (PostStateOverlayCursor::new(layers, has_visible_value, self.layer_capacity), db_wiped) } fn storage_overlay_layers( @@ -443,19 +445,26 @@ impl AsRef for HashedPostStateOverlay { #[derive(Debug)] struct PostStateOverlayCursor<'a, V> { - cursor: PositionedOverlayCursor<'a, HashedPostStateSorted, B256, V>, + cursor: PositionedOverlayCursor<'a, B256, V>, has_visible_value: bool, } impl Default for PostStateOverlayCursor<'_, V> { fn default() -> Self { - Self::new(&[], false) + Self::new(&[], false, 0) } } impl<'a, V> PostStateOverlayCursor<'a, V> { - fn new(layers: &'a [PostStateOverlayLayer], has_visible_value: bool) -> Self { - Self { cursor: PositionedOverlayCursor::new(layers), has_visible_value } + fn new( + layers: &'a [PostStateOverlayLayer], + has_visible_value: bool, + layer_capacity: usize, + ) -> Self { + Self { + cursor: PositionedOverlayCursor::with_capacity(layers, layer_capacity), + has_visible_value, + } } fn reset(&mut self) { diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 5a9641439e9..a7fb53ef1d3 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -43,55 +43,59 @@ impl DbCursorState { } #[derive(Debug)] -pub(crate) struct PositionedOverlayCursor<'a, O, K, V> { - layers: &'a [OverlayLayer], - positions: Vec, +pub(crate) struct PositionedOverlayCursor<'a, K, V> { + cursors: Vec>, } -impl Default for PositionedOverlayCursor<'_, O, K, V> { +impl Default for PositionedOverlayCursor<'_, K, V> { fn default() -> Self { - Self::new(&[]) + Self { cursors: Vec::new() } } } -impl<'a, O, K, V> PositionedOverlayCursor<'a, O, K, V> { - pub(crate) fn new(layers: &'a [OverlayLayer]) -> Self { - Self { layers, positions: vec![0; layers.len()] } +impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { + pub(crate) fn new(layers: &'a [OverlayLayer]) -> Self { + Self::with_capacity(layers, layers.len()) + } + + pub(crate) fn with_capacity(layers: &'a [OverlayLayer], capacity: usize) -> Self { + let mut this = Self { cursors: Vec::with_capacity(capacity.max(layers.len())) }; + this.retarget(layers); + this } pub(crate) fn reset(&mut self) { - self.positions.fill(0); + for cursor in &mut self.cursors { + cursor.reset(); + } } - pub(crate) fn retarget(&mut self, layers: &'a [OverlayLayer]) { - self.layers = layers; - self.positions.clear(); - self.positions.resize(layers.len(), 0); + pub(crate) fn retarget(&mut self, layers: &'a [OverlayLayer]) { + debug_assert!(self.cursors.capacity() >= layers.len()); + self.cursors.clear(); + self.cursors.extend(layers.iter().map(PositionedOverlayLayerCursor::new)); } } -impl PositionedOverlayCursor<'_, O, K, V> +impl PositionedOverlayCursor<'_, K, V> where K: Ord, { #[inline(always)] pub(crate) fn seek_from(&mut self, start: usize, key: &K) { - for layer_idx in start..self.layers.len() { - let entries = self.layers[layer_idx].entries(); - let _ = seek_overlay_entries(entries, &mut self.positions[layer_idx], key); + for cursor in self.cursors.iter_mut().skip(start) { + let _ = cursor.seek(key); } } #[inline(always)] pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { - for layer_idx in 0..self.layers.len() { - let entries = self.layers[layer_idx].entries(); - let Some(idx) = - seek_overlay_entries_exact(entries, &mut self.positions[layer_idx], key) - else { - continue; - }; - return Some((layer_idx, &entries[idx].1)) + for (layer_idx, cursor) in self.cursors.iter_mut().enumerate() { + if let Some((entry_key, value)) = cursor.seek(key) && + entry_key == key + { + return Some((layer_idx, value)) + } } None @@ -99,117 +103,110 @@ where #[inline(always)] pub(crate) fn first_after(&mut self, key: &K) { - for layer_idx in 0..self.layers.len() { - let entries = self.layers[layer_idx].entries(); - let _ = seek_overlay_entries_after(entries, &mut self.positions[layer_idx], key); + for cursor in &mut self.cursors { + let _ = cursor.first_after(key); } } #[inline(always)] pub(crate) fn highest_priority_value_at(&self, key: &K) -> Option<&V> { - self.layers.iter().zip(&self.positions).find_map(|(layer, position)| { - let entries = layer.entries(); - entries - .get(*position) - .and_then(|(entry_key, value)| (entry_key == key).then_some(value)) + self.cursors.iter().find_map(|cursor| { + cursor.current().and_then(|(entry_key, value)| (entry_key == key).then_some(value)) }) } #[inline(always)] pub(crate) fn advance_key(&mut self, key: &K) { - for layer_idx in 0..self.layers.len() { - let entries = self.layers[layer_idx].entries(); - if entries.get(self.positions[layer_idx]).is_some_and(|(entry_key, _)| entry_key == key) - { - let _ = seek_overlay_entries_after(entries, &mut self.positions[layer_idx], key); + for cursor in &mut self.cursors { + if cursor.current().is_some_and(|(entry_key, _)| entry_key == key) { + let _ = cursor.first_after(key); } } } } -impl PositionedOverlayCursor<'_, O, K, V> +impl PositionedOverlayCursor<'_, K, V> where K: Copy + Ord, { #[inline(always)] pub(crate) fn min_current_key(&self) -> Option { - self.layers - .iter() - .zip(&self.positions) - .filter_map(|(layer, position)| layer.entries().get(*position).map(|(key, _)| *key)) - .min() + self.cursors.iter().filter_map(|cursor| cursor.current().map(|(key, _)| *key)).min() } } -#[inline(always)] -fn seek_overlay_entries(entries: &[(K, V)], position: &mut usize, key: &K) -> Option -where - K: Ord, -{ - if let Some((entry_key, _)) = entries.get(*position) { - match entry_key.cmp(key) { - std::cmp::Ordering::Less => *position += 1, - std::cmp::Ordering::Equal | std::cmp::Ordering::Greater => return Some(*position), - } - } +#[derive(Debug)] +struct PositionedOverlayLayerCursor<'a, K, V> { + entries: &'a [(K, V)], + position: usize, +} - let remaining = &entries[*position..]; - let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { - remaining.partition_point(|(entry_key, _)| entry_key < key) - } else { - let mut advance = 0; - while advance < remaining.len() && &remaining[advance].0 < key { - advance += 1; - } - advance - }; +impl<'a, K, V> PositionedOverlayLayerCursor<'a, K, V> { + fn new(layer: &'a OverlayLayer) -> Self { + Self { entries: layer.entries(), position: 0 } + } - *position += advance; - (*position < entries.len()).then_some(*position) -} + #[inline(always)] + fn current(&self) -> Option<&'a (K, V)> { + self.entries.get(self.position) + } -#[inline(always)] -fn seek_overlay_entries_exact( - entries: &[(K, V)], - position: &mut usize, - key: &K, -) -> Option -where - K: Ord, -{ - let idx = seek_overlay_entries(entries, position, key)?; - (&entries[idx].0 == key).then_some(idx) + const fn reset(&mut self) { + self.position = 0; + } } -#[inline(always)] -fn seek_overlay_entries_after( - entries: &[(K, V)], - position: &mut usize, - key: &K, -) -> Option +impl<'a, K, V> PositionedOverlayLayerCursor<'a, K, V> where K: Ord, { - if let Some((entry_key, _)) = entries.get(*position) { - match entry_key.cmp(key) { - std::cmp::Ordering::Greater => return Some(*position), - std::cmp::Ordering::Less | std::cmp::Ordering::Equal => *position += 1, + #[inline(always)] + fn seek(&mut self, key: &K) -> Option<&'a (K, V)> { + if let Some((entry_key, _)) = self.current() { + match entry_key.cmp(key) { + std::cmp::Ordering::Less => self.position += 1, + std::cmp::Ordering::Equal | std::cmp::Ordering::Greater => return self.current(), + } } + + let remaining = &self.entries[self.position..]; + let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { + remaining.partition_point(|(entry_key, _)| entry_key < key) + } else { + let mut advance = 0; + while advance < remaining.len() && &remaining[advance].0 < key { + advance += 1; + } + advance + }; + + self.position += advance; + self.current() } - let remaining = &entries[*position..]; - let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { - remaining.partition_point(|(entry_key, _)| entry_key <= key) - } else { - let mut advance = 0; - while advance < remaining.len() && &remaining[advance].0 <= key { - advance += 1; + #[inline(always)] + fn first_after(&mut self, key: &K) -> Option<&'a (K, V)> { + if let Some((entry_key, _)) = self.current() { + match entry_key.cmp(key) { + std::cmp::Ordering::Greater => return self.current(), + std::cmp::Ordering::Less | std::cmp::Ordering::Equal => self.position += 1, + } } - advance - }; - *position += advance; - (*position < entries.len()).then_some(*position) + let remaining = &self.entries[self.position..]; + let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { + remaining.partition_point(|(entry_key, _)| entry_key <= key) + } else { + let mut advance = 0; + while advance < remaining.len() && &remaining[advance].0 <= key { + advance += 1; + } + advance + }; + + self.position += advance; + self.current() + } } #[derive(Clone)] @@ -290,17 +287,48 @@ mod tests { cursor.seek_from(0, &250); assert_eq!(cursor.min_current_key(), None); - assert_eq!(cursor.positions, vec![201]); + assert_eq!( + cursor.cursors.iter().map(|cursor| cursor.position).collect::>(), + vec![201] + ); assert_eq!(cursor.seek_until_exact(&25), None); - assert_eq!(cursor.positions, vec![201]); + assert_eq!( + cursor.cursors.iter().map(|cursor| cursor.position).collect::>(), + vec![201] + ); cursor.first_after(&250); assert_eq!(cursor.min_current_key(), None); - assert_eq!(cursor.positions, vec![201]); + assert_eq!( + cursor.cursors.iter().map(|cursor| cursor.position).collect::>(), + vec![201] + ); cursor.first_after(&25); assert_eq!(cursor.min_current_key(), None); - assert_eq!(cursor.positions, vec![201]); + assert_eq!( + cursor.cursors.iter().map(|cursor| cursor.position).collect::>(), + vec![201] + ); + } + + #[test] + fn retarget_reuses_cursor_allocation() { + let first_entries = Arc::new(vec![(1, 1)]); + let second_entries = Arc::new(vec![(2, 2)]); + let first_overlay = [layer(Arc::clone(&first_entries))]; + let second_overlay = [layer(first_entries), layer(second_entries)]; + let mut cursor = PositionedOverlayCursor::with_capacity(&first_overlay, 2); + let capacity = cursor.cursors.capacity(); + let ptr = cursor.cursors.as_ptr(); + + cursor.retarget(&second_overlay); + assert_eq!(cursor.cursors.capacity(), capacity); + assert_eq!(cursor.cursors.as_ptr(), ptr); + + cursor.reset(); + assert_eq!(cursor.cursors.capacity(), capacity); + assert_eq!(cursor.cursors.as_ptr(), ptr); } } diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index d3866f328dc..96d15d498f5 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -300,14 +300,16 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { pub struct TrieUpdatesOverlay { account_overlay: Arc>, storage_overlays: Arc>, + layer_capacity: usize, } impl TrieUpdatesOverlay { /// Create a new indexed trie updates overlay stack. pub fn new(updates: Vec>) -> Self { + let layer_capacity = updates.len(); let account_overlay = Self::build_account_overlay(&updates); let storage_overlays = Self::build_storage_overlays(&updates); - Self { account_overlay, storage_overlays } + Self { account_overlay, storage_overlays, layer_capacity } } /// Returns `true` if the overlay does not contain any trie updates. @@ -354,12 +356,12 @@ impl TrieUpdatesOverlay { } fn account_overlay(&self) -> OverlayCursor<'_> { - OverlayCursor::new(self.account_overlay.as_slice()) + OverlayCursor::with_capacity(self.account_overlay.as_slice(), self.layer_capacity) } fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'_>, bool) { let (layers, db_wiped) = self.storage_overlay_layers(hashed_address); - (OverlayCursor::new(layers), db_wiped) + (OverlayCursor::with_capacity(layers, self.layer_capacity), db_wiped) } fn storage_overlay_layers(&self, hashed_address: B256) -> (&[TrieOverlayLayer], bool) { @@ -383,8 +385,7 @@ struct TrieStorageOverlay { db_wiped: bool, } -type OverlayCursor<'a> = - PositionedOverlayCursor<'a, TrieUpdatesSorted, Nibbles, Option>; +type OverlayCursor<'a> = PositionedOverlayCursor<'a, Nibbles, Option>; type TrieOverlayLayer = OverlayLayer>; #[cfg(test)] From 9096525f48428396e0e9b958e3eb1aafd5640ed0 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 13:56:07 +0200 Subject: [PATCH 28/40] perf: restore overlay cursor search threshold --- crates/trie/trie/src/overlay_cursor.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index a7fb53ef1d3..cf1d5b48b0b 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -1,6 +1,6 @@ use std::{fmt, slice, sync::Arc}; -const OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN: usize = 128; +const OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN: usize = 64; #[derive(Debug)] pub(crate) enum DbCursorState { From 707a353e93d37521fa1eaa8c222af37913281255 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 14:43:51 +0200 Subject: [PATCH 29/40] perf: fold sparse trie final state accumulation --- .../src/tree/payload_processor/sparse_trie.rs | 40 ++++++++++++++----- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/crates/engine/tree/src/tree/payload_processor/sparse_trie.rs b/crates/engine/tree/src/tree/payload_processor/sparse_trie.rs index 73e3fcd81f3..e3909c2628d 100644 --- a/crates/engine/tree/src/tree/payload_processor/sparse_trie.rs +++ b/crates/engine/tree/src/tree/payload_processor/sparse_trie.rs @@ -472,14 +472,34 @@ where skip_all )] fn on_hashed_state_update(&mut self, hashed_state_update: HashedPostState) { + let Self { + trie, + new_storage_updates, + storage_updates, + new_account_updates, + pending_account_updates, + final_hashed_state, + .. + } = self; + + final_hashed_state.accounts.reserve(hashed_state_update.accounts.len()); + final_hashed_state.storages.reserve(hashed_state_update.storages.len()); + for (&address, storage) in &hashed_state_update.storages { + let final_storage = final_hashed_state.storages.entry(address).or_default(); + if storage.wiped { + final_storage.wiped = true; + final_storage.storage.clear(); + } + final_storage.storage.reserve(storage.storage.len()); + if !storage.storage.is_empty() { // Look up outer maps once per address instead of once per slot. - let new_updates = self.new_storage_updates.entry(address).or_default(); - let mut existing_updates = self.storage_updates.get_mut(&address); + let new_updates = new_storage_updates.entry(address).or_default(); + let mut existing_updates = storage_updates.get_mut(&address); for (&slot, &value) in &storage.storage { - self.trie.record_slot_touch(address, slot); + trie.record_slot_touch(address, slot); let encoded = if value.is_zero() { Vec::new() @@ -487,6 +507,7 @@ where alloy_rlp::encode_fixed_size(&value).to_vec() }; new_updates.insert(slot, LeafUpdate::Changed(encoded)); + final_storage.storage.insert(slot, value); // Remove an existing storage update if it exists. if let Some(ref mut existing) = existing_updates { @@ -497,28 +518,27 @@ where // Make sure account is tracked in `account_updates` so that it is revealed in accounts // trie for storage root update. - self.new_account_updates.entry(address).or_insert(LeafUpdate::Touched); + new_account_updates.entry(address).or_insert(LeafUpdate::Touched); // Make sure account is tracked in `pending_account_updates` so that once storage root // is computed, it will be updated in the accounts trie. - self.pending_account_updates.entry(address).or_insert(None); + pending_account_updates.entry(address).or_insert(None); } for (&address, &account) in &hashed_state_update.accounts { - self.trie.record_account_touch(address); + trie.record_account_touch(address); + final_hashed_state.accounts.insert(address, account); // Track account as touched. // // This might overwrite an existing update, which is fine, because storage root from it // is already tracked in the trie and can be easily fetched again. - self.new_account_updates.insert(address, LeafUpdate::Touched); + new_account_updates.insert(address, LeafUpdate::Touched); // Track account in `pending_account_updates` so that once storage root is computed, // it will be updated in the accounts trie. - self.pending_account_updates.insert(address, Some(account)); + pending_account_updates.insert(address, Some(account)); } - - self.final_hashed_state.extend(hashed_state_update); } fn on_proof_result( From e55cc61d3b8cfb6d6edaa17ecdc3bc19482a85ac Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 15:10:18 +0200 Subject: [PATCH 30/40] perf: update overlay indexes incrementally --- crates/chain-state/src/state_trie_overlay.rs | 12 +- .../trie/trie/src/hashed_cursor/post_state.rs | 133 +++++++++++++++--- crates/trie/trie/src/trie_cursor/in_memory.rs | 80 ++++++++--- 3 files changed, 186 insertions(+), 39 deletions(-) diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs index 4b2f8dd0098..b011b046770 100644 --- a/crates/chain-state/src/state_trie_overlay.rs +++ b/crates/chain-state/src/state_trie_overlay.rs @@ -513,20 +513,20 @@ impl StateTrieOverlay { /// Add a trie updates layer at the end of the precedence stack. pub fn push_trie_updates(&mut self, trie_updates: Arc) { - self.trie_update_layers.push(trie_updates); - self.trie_updates = TrieUpdatesOverlay::new(self.trie_update_layers.clone()); + self.trie_update_layers.push(Arc::clone(&trie_updates)); + self.trie_updates.push_layer(trie_updates); } /// Add a hashed post-state layer at the end of the precedence stack. pub fn push_hashed_post_state(&mut self, hashed_post_state: Arc) { - self.hashed_post_state_layers.push(hashed_post_state); - self.hashed_post_state = HashedPostStateOverlay::new(self.hashed_post_state_layers.clone()); + self.hashed_post_state_layers.push(Arc::clone(&hashed_post_state)); + self.hashed_post_state.push_layer(hashed_post_state); } /// Add a hashed post-state layer at the beginning of the precedence stack. pub fn prepend_hashed_post_state(&mut self, hashed_post_state: Arc) { - self.hashed_post_state_layers.insert(0, hashed_post_state); - self.hashed_post_state = HashedPostStateOverlay::new(self.hashed_post_state_layers.clone()); + self.hashed_post_state_layers.insert(0, Arc::clone(&hashed_post_state)); + self.hashed_post_state.prepend_layer(hashed_post_state); } /// Total number of trie update entries across all layers. diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 09a28001b71..c15b86b871c 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -390,30 +390,75 @@ impl HashedPostStateOverlay { let mut overlays: B256Map = B256Map::default(); for state in states { - for (hashed_address, storage) in &state.storages { - let overlay = overlays.entry(*hashed_address).or_default(); - if overlay.db_wiped { - continue; - } + Self::push_storage_layer(&mut overlays, state); + } - if !storage.storage_slots_ref().is_empty() { - overlay.layers.push(PostStateOverlayLayer::new( - Arc::clone(state), - storage.storage_slots_ref(), - )); - } + Arc::new(overlays) + } - if storage.is_wiped() { - overlay.db_wiped = true; - } - } + /// Add a hashed post-state layer at the end of the precedence stack. + pub fn push_layer(&mut self, state: Arc) { + self.layer_capacity += 1; + if !state.accounts.is_empty() { + Arc::make_mut(&mut self.account_overlay) + .push(PostStateOverlayLayer::new(Arc::clone(&state), state.accounts.as_slice())); } + Self::push_storage_layer(Arc::make_mut(&mut self.storage_overlays), &state); + } + + /// Add a hashed post-state layer at the beginning of the precedence stack. + pub fn prepend_layer(&mut self, state: Arc) { + self.layer_capacity += 1; + if !state.accounts.is_empty() { + Arc::make_mut(&mut self.account_overlay).insert( + 0, + PostStateOverlayLayer::new(Arc::clone(&state), state.accounts.as_slice()), + ); + } + + for (hashed_address, storage) in &state.storages { + let overlay = + Arc::make_mut(&mut self.storage_overlays).entry(*hashed_address).or_default(); + + if storage.is_wiped() { + overlay.layers.clear(); + overlay.db_wiped = true; + } + + if !storage.storage_slots_ref().is_empty() { + overlay.layers.insert( + 0, + PostStateOverlayLayer::new(Arc::clone(&state), storage.storage_slots_ref()), + ); + } - for overlay in overlays.values_mut() { overlay.has_visible_value = has_visible_storage_value(&overlay.layers); } + } - Arc::new(overlays) + fn push_storage_layer( + overlays: &mut B256Map, + state: &Arc, + ) { + for (hashed_address, storage) in &state.storages { + let overlay = overlays.entry(*hashed_address).or_default(); + if overlay.db_wiped { + continue; + } + + if !storage.storage_slots_ref().is_empty() { + overlay.layers.push(PostStateOverlayLayer::new( + Arc::clone(state), + storage.storage_slots_ref(), + )); + } + + if storage.is_wiped() { + overlay.db_wiped = true; + } + + overlay.has_visible_value = has_visible_storage_value(&overlay.layers); + } } fn account_overlay(&self) -> PostStateOverlayCursor<'_, Option> { @@ -590,6 +635,60 @@ mod tests { HashedPostStateCursor::new_storage(cursor, overlay, hashed_address) } + fn storage_overlay_snapshot( + overlay: &HashedPostStateOverlay, + hashed_address: B256, + ) -> (Vec>, bool, bool) { + let (layers, db_wiped, has_visible_value) = overlay.storage_overlay_layers(hashed_address); + (layers.iter().map(|layer| layer.entries().to_vec()).collect(), db_wiped, has_visible_value) + } + + #[test] + fn test_incremental_storage_push_matches_rebuilt_overlay() { + let hashed_address = key(0x01); + let top = Arc::new(storage_post_state_for_address( + hashed_address, + vec![(key(0x10), U256::from(1))], + )); + let lower = Arc::new(storage_post_state_with_wipe_for_address( + hashed_address, + vec![(key(0x20), U256::from(2))], + true, + )); + + let mut incremental = HashedPostStateOverlay::new(vec![Arc::clone(&top)]); + incremental.push_layer(Arc::clone(&lower)); + let rebuilt = HashedPostStateOverlay::new(vec![top, lower]); + + assert_eq!( + storage_overlay_snapshot(&incremental, hashed_address), + storage_overlay_snapshot(&rebuilt, hashed_address) + ); + } + + #[test] + fn test_incremental_storage_prepend_matches_rebuilt_overlay() { + let hashed_address = key(0x01); + let lower = Arc::new(storage_post_state_with_wipe_for_address( + hashed_address, + vec![(key(0x10), U256::from(1))], + true, + )); + let top = Arc::new(storage_post_state_for_address( + hashed_address, + vec![(key(0x10), U256::ZERO), (key(0x20), U256::from(2))], + )); + + let mut incremental = HashedPostStateOverlay::new(vec![Arc::clone(&lower)]); + incremental.prepend_layer(Arc::clone(&top)); + let rebuilt = HashedPostStateOverlay::new(vec![top, lower]); + + assert_eq!( + storage_overlay_snapshot(&incremental, hashed_address), + storage_overlay_snapshot(&rebuilt, hashed_address) + ); + } + #[test] fn test_seek_overlay_exact_hit_does_not_touch_db_until_next() { let db_nodes = vec![(key(0x02), U256::from(2)), (key(0x03), U256::from(3))]; diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index 96d15d498f5..031d9810b41 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -333,26 +333,42 @@ impl TrieUpdatesOverlay { let mut overlays: B256Map = B256Map::default(); for update in updates { - for (hashed_address, storage) in update.storage_tries_ref() { - let overlay = overlays.entry(*hashed_address).or_default(); - if overlay.db_wiped { - continue; - } + Self::push_storage_layer(&mut overlays, update); + } - if !storage.storage_nodes_ref().is_empty() { - overlay.layers.push(TrieOverlayLayer::new( - Arc::clone(update), - storage.storage_nodes_ref(), - )); - } + Arc::new(overlays) + } - if storage.is_deleted() { - overlay.db_wiped = true; - } - } + /// Add a trie updates layer at the end of the precedence stack. + pub fn push_layer(&mut self, update: Arc) { + self.layer_capacity += 1; + if !update.account_nodes_ref().is_empty() { + Arc::make_mut(&mut self.account_overlay) + .push(TrieOverlayLayer::new(Arc::clone(&update), update.account_nodes_ref())); } + Self::push_storage_layer(Arc::make_mut(&mut self.storage_overlays), &update); + } - Arc::new(overlays) + fn push_storage_layer( + overlays: &mut B256Map, + update: &Arc, + ) { + for (hashed_address, storage) in update.storage_tries_ref() { + let overlay = overlays.entry(*hashed_address).or_default(); + if overlay.db_wiped { + continue; + } + + if !storage.storage_nodes_ref().is_empty() { + overlay + .layers + .push(TrieOverlayLayer::new(Arc::clone(update), storage.storage_nodes_ref())); + } + + if storage.is_deleted() { + overlay.db_wiped = true; + } + } } fn account_overlay(&self) -> OverlayCursor<'_> { @@ -466,6 +482,38 @@ mod tests { .unwrap() } + fn storage_overlay_snapshot( + overlay: &TrieUpdatesOverlay, + hashed_address: B256, + ) -> (Vec)>>, bool) { + let (layers, db_wiped) = overlay.storage_overlay_layers(hashed_address); + (layers.iter().map(|layer| layer.entries().to_vec()).collect(), db_wiped) + } + + #[test] + fn test_incremental_storage_push_matches_rebuilt_overlay() { + let hashed_address = B256::with_last_byte(1); + let top = Arc::new(storage_trie_updates( + hashed_address, + false, + vec![(Nibbles::from_nibbles([0x1]), Some(branch_node(1)))], + )); + let lower = Arc::new(storage_trie_updates( + hashed_address, + true, + vec![(Nibbles::from_nibbles([0x2]), Some(branch_node(2)))], + )); + + let mut incremental = TrieUpdatesOverlay::new(vec![Arc::clone(&top)]); + incremental.push_layer(Arc::clone(&lower)); + let rebuilt = TrieUpdatesOverlay::new(vec![top, lower]); + + assert_eq!( + storage_overlay_snapshot(&incremental, hashed_address), + storage_overlay_snapshot(&rebuilt, hashed_address) + ); + } + #[test] fn test_empty_db_and_memory() { let test_case = InMemoryTrieCursorTestCase { From fdc8bcfe4f0da05134d3432c731fa3221bf1860e Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 15:10:25 +0200 Subject: [PATCH 31/40] Revert "perf: fold sparse trie final state accumulation" This reverts commit 707a353e93d37521fa1eaa8c222af37913281255. --- .../src/tree/payload_processor/sparse_trie.rs | 40 +++++-------------- 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/crates/engine/tree/src/tree/payload_processor/sparse_trie.rs b/crates/engine/tree/src/tree/payload_processor/sparse_trie.rs index e3909c2628d..73e3fcd81f3 100644 --- a/crates/engine/tree/src/tree/payload_processor/sparse_trie.rs +++ b/crates/engine/tree/src/tree/payload_processor/sparse_trie.rs @@ -472,34 +472,14 @@ where skip_all )] fn on_hashed_state_update(&mut self, hashed_state_update: HashedPostState) { - let Self { - trie, - new_storage_updates, - storage_updates, - new_account_updates, - pending_account_updates, - final_hashed_state, - .. - } = self; - - final_hashed_state.accounts.reserve(hashed_state_update.accounts.len()); - final_hashed_state.storages.reserve(hashed_state_update.storages.len()); - for (&address, storage) in &hashed_state_update.storages { - let final_storage = final_hashed_state.storages.entry(address).or_default(); - if storage.wiped { - final_storage.wiped = true; - final_storage.storage.clear(); - } - final_storage.storage.reserve(storage.storage.len()); - if !storage.storage.is_empty() { // Look up outer maps once per address instead of once per slot. - let new_updates = new_storage_updates.entry(address).or_default(); - let mut existing_updates = storage_updates.get_mut(&address); + let new_updates = self.new_storage_updates.entry(address).or_default(); + let mut existing_updates = self.storage_updates.get_mut(&address); for (&slot, &value) in &storage.storage { - trie.record_slot_touch(address, slot); + self.trie.record_slot_touch(address, slot); let encoded = if value.is_zero() { Vec::new() @@ -507,7 +487,6 @@ where alloy_rlp::encode_fixed_size(&value).to_vec() }; new_updates.insert(slot, LeafUpdate::Changed(encoded)); - final_storage.storage.insert(slot, value); // Remove an existing storage update if it exists. if let Some(ref mut existing) = existing_updates { @@ -518,27 +497,28 @@ where // Make sure account is tracked in `account_updates` so that it is revealed in accounts // trie for storage root update. - new_account_updates.entry(address).or_insert(LeafUpdate::Touched); + self.new_account_updates.entry(address).or_insert(LeafUpdate::Touched); // Make sure account is tracked in `pending_account_updates` so that once storage root // is computed, it will be updated in the accounts trie. - pending_account_updates.entry(address).or_insert(None); + self.pending_account_updates.entry(address).or_insert(None); } for (&address, &account) in &hashed_state_update.accounts { - trie.record_account_touch(address); - final_hashed_state.accounts.insert(address, account); + self.trie.record_account_touch(address); // Track account as touched. // // This might overwrite an existing update, which is fine, because storage root from it // is already tracked in the trie and can be easily fetched again. - new_account_updates.insert(address, LeafUpdate::Touched); + self.new_account_updates.insert(address, LeafUpdate::Touched); // Track account in `pending_account_updates` so that once storage root is computed, // it will be updated in the accounts trie. - pending_account_updates.insert(address, Some(account)); + self.pending_account_updates.insert(address, Some(account)); } + + self.final_hashed_state.extend(hashed_state_update); } fn on_proof_result( From d929303a6906d97622f7911f4bcc8065dcbb186e Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 16:34:53 +0200 Subject: [PATCH 32/40] perf: avoid duplicate exact overlay comparisons --- crates/trie/trie/src/overlay_cursor.rs | 39 ++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index cf1d5b48b0b..7ab969feea6 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -91,9 +91,7 @@ where #[inline(always)] pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { for (layer_idx, cursor) in self.cursors.iter_mut().enumerate() { - if let Some((entry_key, value)) = cursor.seek(key) && - entry_key == key - { + if let Some((_, value)) = cursor.seek_exact(key) { return Some((layer_idx, value)) } } @@ -184,6 +182,41 @@ where self.current() } + #[inline(always)] + fn seek_exact(&mut self, key: &K) -> Option<&'a (K, V)> { + if let Some(current @ (entry_key, _)) = self.current() { + match entry_key.cmp(key) { + std::cmp::Ordering::Less => self.position += 1, + std::cmp::Ordering::Equal => return Some(current), + std::cmp::Ordering::Greater => return None, + } + } + + let remaining = &self.entries[self.position..]; + if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { + self.position += remaining.partition_point(|(entry_key, _)| entry_key < key); + let current = self.current()?; + return (¤t.0 == key).then_some(current) + } + + for (advance, (entry_key, _)) in remaining.iter().enumerate() { + match entry_key.cmp(key) { + std::cmp::Ordering::Less => {} + std::cmp::Ordering::Equal => { + self.position += advance; + return self.current() + } + std::cmp::Ordering::Greater => { + self.position += advance; + return None + } + } + } + + self.position = self.entries.len(); + None + } + #[inline(always)] fn first_after(&mut self, key: &K) -> Option<&'a (K, V)> { if let Some((entry_key, _)) = self.current() { From 8bed90b24a843b8707fbb61e1f14a7350ad8965c Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 16:49:58 +0200 Subject: [PATCH 33/40] perf: share cached state trie overlays --- .../provider/src/providers/state/overlay.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/crates/storage/provider/src/providers/state/overlay.rs b/crates/storage/provider/src/providers/state/overlay.rs index 95422f361ee..6119550cf0f 100644 --- a/crates/storage/provider/src/providers/state/overlay.rs +++ b/crates/storage/provider/src/providers/state/overlay.rs @@ -424,7 +424,7 @@ pub struct OverlayStateProviderFactory { /// A cache which maps `db_tip -> StateTrieOverlay`. If the db tip changes during usage of the /// factory then a new entry will get added to this, but in most cases only one entry is /// present. - overlay_cache: Arc>, + overlay_cache: Arc>>, } impl OverlayStateProviderFactory { @@ -453,7 +453,7 @@ impl OverlayStateProviderFactory { /// Fetches a [`StateTrieOverlay`] from the cache based on the current db tip block. If there is /// no cached value then this calculates the [`StateTrieOverlay`] and populates the cache. #[instrument(level = "debug", target = "providers::state::overlay", skip_all)] - fn get_overlay(&self, provider: &Provider) -> ProviderResult + fn get_overlay(&self, provider: &Provider) -> ProviderResult> where Provider: StageCheckpointReader + PruneCheckpointReader @@ -466,11 +466,11 @@ impl OverlayStateProviderFactory { let db_tip_block = self.overlay_builder.get_db_tip_block(provider)?; let overlay = match self.overlay_cache.entry(db_tip_block.hash) { - dashmap::Entry::Occupied(entry) => entry.get().clone(), + dashmap::Entry::Occupied(entry) => Arc::clone(entry.get()), dashmap::Entry::Vacant(entry) => { self.overlay_builder.metrics.overlay_cache_misses.increment(1); - let overlay = self.overlay_builder.build_overlay(provider)?; - entry.insert(overlay.clone()); + let overlay = Arc::new(self.overlay_builder.build_overlay(provider)?); + entry.insert(Arc::clone(&overlay)); overlay } }; @@ -522,7 +522,7 @@ where #[derive(Debug)] pub struct OverlayStateProvider { provider: Provider, - overlay: StateTrieOverlay, + overlay: Arc, is_v2: bool, } @@ -532,7 +532,7 @@ where { /// Create new overlay state provider. The `Provider` must be cloneable, which generally means /// it should be wrapped in an `Arc`. - pub const fn new(provider: Provider, overlay: StateTrieOverlay, is_v2: bool) -> Self { + pub const fn new(provider: Provider, overlay: Arc, is_v2: bool) -> Self { Self { provider, overlay, is_v2 } } } From e330aab061682aa23828ab9b19ceae0fa6814792 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 18:10:02 +0200 Subject: [PATCH 34/40] perf: restore overlay cursor forward fast paths --- .../trie/trie/src/hashed_cursor/post_state.rs | 133 ++++++++++++++---- crates/trie/trie/src/overlay_cursor.rs | 37 ++++- crates/trie/trie/src/trie_cursor/in_memory.rs | 48 ++++++- 3 files changed, 184 insertions(+), 34 deletions(-) diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index c15b86b871c..9accfaa5813 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -169,7 +169,7 @@ where /// Positions the DB cursor state using the underlying cursor. fn cursor_seek(&mut self, key: B256) -> Result<(), DatabaseError> { - if self.db_cursor_state.is_positioned_at(&key) { + if !self.db_cursor_state.should_seek(&key) { return Ok(()) } @@ -306,7 +306,7 @@ where fn reset(&mut self) { self.cursor.reset(); - self.db_cursor_state.set_entry(None); + self.db_cursor_state.reset_position(); self.post_state_cursor.reset(); self.deferred_overlay_seek_start = None; self.last_key = None; @@ -419,20 +419,25 @@ impl HashedPostStateOverlay { for (hashed_address, storage) in &state.storages { let overlay = Arc::make_mut(&mut self.storage_overlays).entry(*hashed_address).or_default(); + let storage_slots = storage.storage_slots_ref(); if storage.is_wiped() { overlay.layers.clear(); overlay.db_wiped = true; + overlay.has_visible_value = false; } - if !storage.storage_slots_ref().is_empty() { - overlay.layers.insert( - 0, - PostStateOverlayLayer::new(Arc::clone(&state), storage.storage_slots_ref()), - ); - } + if !storage_slots.is_empty() { + let has_nonzero_slot = storage_slots.iter().any(|(_, value)| !value.is_zero()); + let recompute_after_insert = !has_nonzero_slot && overlay.has_visible_value; - overlay.has_visible_value = has_visible_storage_value(&overlay.layers); + overlay + .layers + .insert(0, PostStateOverlayLayer::new(Arc::clone(&state), storage_slots)); + + overlay.has_visible_value = has_nonzero_slot || + (recompute_after_insert && has_visible_storage_value(&overlay.layers)); + } } } @@ -446,18 +451,19 @@ impl HashedPostStateOverlay { continue; } - if !storage.storage_slots_ref().is_empty() { - overlay.layers.push(PostStateOverlayLayer::new( - Arc::clone(state), - storage.storage_slots_ref(), - )); + let storage_slots = storage.storage_slots_ref(); + if !storage_slots.is_empty() { + if !overlay.has_visible_value && + layer_has_visible_storage_value(storage_slots, &overlay.layers) + { + overlay.has_visible_value = true; + } + overlay.layers.push(PostStateOverlayLayer::new(Arc::clone(state), storage_slots)); } if storage.is_wiped() { overlay.db_wiped = true; } - - overlay.has_visible_value = has_visible_storage_value(&overlay.layers); } } @@ -564,19 +570,25 @@ struct HashedStorageOverlay { type PostStateOverlayLayer = OverlayLayer; +fn layer_has_visible_storage_value( + entries: &[(B256, U256)], + higher_priority_layers: &[PostStateOverlayLayer], +) -> bool { + entries.iter().any(|(key, value)| { + !value.is_zero() && + !higher_priority_layers.iter().any(|higher_layer| { + higher_layer + .entries() + .binary_search_by_key(key, |(entry_key, _)| *entry_key) + .is_ok() + }) + }) +} + fn has_visible_storage_value(layers: &[PostStateOverlayLayer]) -> bool { for (layer_idx, layer) in layers.iter().enumerate() { - for (key, value) in layer.entries() { - if !value.is_zero() && - !layers[..layer_idx].iter().any(|higher_layer| { - higher_layer - .entries() - .binary_search_by_key(key, |(entry_key, _)| *entry_key) - .is_ok() - }) - { - return true - } + if layer_has_visible_storage_value(layer.entries(), &layers[..layer_idx]) { + return true } } false @@ -689,6 +701,39 @@ mod tests { ); } + #[test] + fn test_storage_visible_value_tracks_shadowed_lower_layers() { + let hashed_address = key(0x01); + let top_delete = + Arc::new(storage_post_state_for_address(hashed_address, vec![(key(0x10), U256::ZERO)])); + let lower_visible = Arc::new(storage_post_state_for_address( + hashed_address, + vec![(key(0x10), U256::from(1))], + )); + + let overlay = HashedPostStateOverlay::new(vec![top_delete, lower_visible]); + + let (_, _, has_visible_value) = overlay.storage_overlay_layers(hashed_address); + assert!(!has_visible_value); + } + + #[test] + fn test_storage_visible_value_tracks_shadowing_prepend() { + let hashed_address = key(0x01); + let lower_visible = Arc::new(storage_post_state_for_address( + hashed_address, + vec![(key(0x10), U256::from(1))], + )); + let top_delete = + Arc::new(storage_post_state_for_address(hashed_address, vec![(key(0x10), U256::ZERO)])); + + let mut overlay = HashedPostStateOverlay::new(vec![lower_visible]); + overlay.prepend_layer(top_delete); + + let (_, _, has_visible_value) = overlay.storage_overlay_layers(hashed_address); + assert!(!has_visible_value); + } + #[test] fn test_seek_overlay_exact_hit_does_not_touch_db_until_next() { let db_nodes = vec![(key(0x02), U256::from(2)), (key(0x03), U256::from(3))]; @@ -805,6 +850,40 @@ mod tests { assert_eq!(visited_keys.lock().len(), 2, "seek should reuse the exact DB position"); } + #[test] + fn test_seek_reuses_ahead_db_position() { + let db_nodes = BTreeMap::from([(key(0x03), account(3))]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); + + let overlay = HashedPostStateOverlay::default(); + let mut cursor = HashedPostStateCursor::new_account(mock_cursor, &overlay); + + assert_eq!(cursor.seek(key(0x02)).unwrap(), Some((key(0x03), account(3)))); + assert_eq!(visited_keys.lock().len(), 1); + + assert_eq!(cursor.seek(key(0x02)).unwrap(), Some((key(0x03), account(3)))); + assert_eq!(visited_keys.lock().len(), 1, "seek should reuse an ahead DB position"); + } + + #[test] + fn test_seek_does_not_reseek_exhausted_db() { + let db_nodes = BTreeMap::from([(key(0x01), account(1))]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); + + let overlay = HashedPostStateOverlay::default(); + let mut cursor = HashedPostStateCursor::new_account(mock_cursor, &overlay); + + assert_eq!(cursor.seek(key(0x02)).unwrap(), None); + assert_eq!(visited_keys.lock().len(), 1); + + assert_eq!(cursor.seek(key(0x03)).unwrap(), None); + assert_eq!(visited_keys.lock().len(), 1, "exhausted DB cursor should stay exhausted"); + } + #[test] fn test_multiple_overlays_resolve_by_precedence() { let db_nodes = BTreeMap::from([ diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 7ab969feea6..ce8e238dccf 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -6,6 +6,7 @@ const OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN: usize = 64; pub(crate) enum DbCursorState { Unpositioned, Positioned((K, V)), + Exhausted, Wiped, } @@ -25,20 +26,45 @@ impl DbCursorState { pub(crate) const fn entry(&self) -> Option<&(K, V)> { match self { Self::Positioned(entry) => Some(entry), - Self::Unpositioned | Self::Wiped => None, + Self::Unpositioned | Self::Exhausted | Self::Wiped => None, } } pub(crate) fn set_entry(&mut self, entry: Option<(K, V)>) { if !self.is_wiped() { - *self = entry.map(Self::Positioned).unwrap_or(Self::Unpositioned); + *self = entry.map(Self::Positioned).unwrap_or(Self::Exhausted); + } + } + + pub(crate) fn reset_position(&mut self) { + if !self.is_wiped() { + *self = Self::Unpositioned; } } } -impl DbCursorState { - pub(crate) fn is_positioned_at(&self, key: &K) -> bool { - matches!(self, Self::Positioned((db_key, _)) if db_key == key) +impl DbCursorState { + pub(crate) fn should_seek(&self, key: &K) -> bool { + match self { + Self::Unpositioned => true, + Self::Positioned((db_key, _)) => db_key < key, + Self::Exhausted | Self::Wiped => false, + } + } + + pub(crate) fn exact_entry(&self, key: &K) -> Option<&(K, V)> { + match self { + Self::Positioned((db_key, _)) if db_key == key => self.entry(), + Self::Unpositioned | Self::Positioned(_) | Self::Exhausted | Self::Wiped => None, + } + } + + pub(crate) fn may_contain_exact(&self, key: &K) -> bool { + match self { + Self::Unpositioned => true, + Self::Positioned((db_key, _)) => db_key <= key, + Self::Exhausted | Self::Wiped => false, + } } } @@ -54,6 +80,7 @@ impl Default for PositionedOverlayCursor<'_, K, V> { } impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { + #[cfg(test)] pub(crate) fn new(layers: &'a [OverlayLayer]) -> Self { Self::with_capacity(layers, layers.len()) } diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index 031d9810b41..c7c6ff5305c 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -121,7 +121,7 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { /// Positions the DB cursor state using the underlying cursor. fn cursor_seek(&mut self, key: Nibbles) -> Result<(), DatabaseError> { - if self.db_cursor_state.is_positioned_at(&key) { + if !self.db_cursor_state.should_seek(&key) { return Ok(()) } @@ -198,6 +198,10 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.deferred_overlay_seek_start = Some(idx + 1); } mem_value.clone().map(|node| (key, node)) + } else if let Some(db_entry) = self.db_cursor_state.exact_entry(&key) { + Some(db_entry.clone()) + } else if !self.db_cursor_state.may_contain_exact(&key) { + None } else { let db_entry = self.get_cursor_mut().map(|c| c.seek_exact(key)).transpose()?.flatten(); self.db_cursor_state.set_entry(db_entry); @@ -274,7 +278,7 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { fn reset(&mut self) { self.cursor.reset(); - self.db_cursor_state.set_entry(None); + self.db_cursor_state.reset_position(); self.in_memory_cursor.reset(); self.deferred_overlay_seek_start = None; self.last_key = None; @@ -1063,6 +1067,46 @@ mod tests { assert_eq!(visited_keys.lock().len(), 2, "seek should reuse the exact DB position"); } + #[test] + fn test_seek_reuses_ahead_db_position() { + let db_nodes = BTreeMap::from([(Nibbles::from_nibbles([0x3]), branch_node(3))]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); + + let overlay = TrieUpdatesOverlay::default(); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + + assert_eq!( + cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(), + Some((Nibbles::from_nibbles([0x3]), branch_node(3))) + ); + assert_eq!(visited_keys.lock().len(), 1); + + assert_eq!( + cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(), + Some((Nibbles::from_nibbles([0x3]), branch_node(3))) + ); + assert_eq!(visited_keys.lock().len(), 1, "seek should reuse an ahead DB position"); + } + + #[test] + fn test_seek_does_not_reseek_exhausted_db() { + let db_nodes = BTreeMap::from([(Nibbles::from_nibbles([0x1]), branch_node(1))]); + let db_nodes_arc = Arc::new(db_nodes); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); + + let overlay = TrieUpdatesOverlay::default(); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + + assert_eq!(cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(), None); + assert_eq!(visited_keys.lock().len(), 1); + + assert_eq!(cursor.seek(Nibbles::from_nibbles([0x3])).unwrap(), None); + assert_eq!(visited_keys.lock().len(), 1, "exhausted DB cursor should stay exhausted"); + } + #[test] fn test_multiple_overlays_resolve_by_precedence() { let db_nodes = BTreeMap::from([ From 7a0c5d21452c5ea23d62b8123c21c58e914043d4 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 18:15:40 +0200 Subject: [PATCH 35/40] perf: pre-size storage overlay indexes --- crates/trie/trie/src/hashed_cursor/post_state.rs | 4 +++- crates/trie/trie/src/trie_cursor/in_memory.rs | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 9accfaa5813..e74676c168d 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -387,7 +387,9 @@ impl HashedPostStateOverlay { fn build_storage_overlays( states: &[Arc], ) -> Arc> { - let mut overlays: B256Map = B256Map::default(); + let storage_overlay_capacity = states.iter().map(|state| state.storages.len()).sum(); + let mut overlays: B256Map = + B256Map::with_capacity_and_hasher(storage_overlay_capacity, Default::default()); for state in states { Self::push_storage_layer(&mut overlays, state); diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index c7c6ff5305c..2522aab319c 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -334,7 +334,10 @@ impl TrieUpdatesOverlay { fn build_storage_overlays( updates: &[Arc], ) -> Arc> { - let mut overlays: B256Map = B256Map::default(); + let storage_overlay_capacity = + updates.iter().map(|update| update.storage_tries_ref().len()).sum(); + let mut overlays: B256Map = + B256Map::with_capacity_and_hasher(storage_overlay_capacity, Default::default()); for update in updates { Self::push_storage_layer(&mut overlays, update); From fa12aed83f2652a2e016b1795673a4123f69abf1 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 19:11:10 +0200 Subject: [PATCH 36/40] perf: avoid cloning cached overlay indexes --- crates/chain-state/src/state_trie_overlay.rs | 27 +++++++++++++++++++ .../provider/src/providers/state/overlay.rs | 16 +++++------ 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs index b011b046770..61780a8842e 100644 --- a/crates/chain-state/src/state_trie_overlay.rs +++ b/crates/chain-state/src/state_trie_overlay.rs @@ -517,18 +517,45 @@ impl StateTrieOverlay { self.trie_updates.push_layer(trie_updates); } + /// Return this overlay with a trie updates layer appended to the precedence stack. + pub fn with_pushed_trie_updates(mut self, trie_updates: Arc) -> Self { + self.trie_update_layers.push(trie_updates); + self.trie_updates = TrieUpdatesOverlay::new(self.trie_update_layers.clone()); + self + } + /// Add a hashed post-state layer at the end of the precedence stack. pub fn push_hashed_post_state(&mut self, hashed_post_state: Arc) { self.hashed_post_state_layers.push(Arc::clone(&hashed_post_state)); self.hashed_post_state.push_layer(hashed_post_state); } + /// Return this overlay with a hashed post-state layer appended to the precedence stack. + pub fn with_pushed_hashed_post_state( + mut self, + hashed_post_state: Arc, + ) -> Self { + self.hashed_post_state_layers.push(hashed_post_state); + self.hashed_post_state = HashedPostStateOverlay::new(self.hashed_post_state_layers.clone()); + self + } + /// Add a hashed post-state layer at the beginning of the precedence stack. pub fn prepend_hashed_post_state(&mut self, hashed_post_state: Arc) { self.hashed_post_state_layers.insert(0, Arc::clone(&hashed_post_state)); self.hashed_post_state.prepend_layer(hashed_post_state); } + /// Return this overlay with a hashed post-state layer prepended to the precedence stack. + pub fn with_prepended_hashed_post_state( + mut self, + hashed_post_state: Arc, + ) -> Self { + self.hashed_post_state_layers.insert(0, hashed_post_state); + self.hashed_post_state = HashedPostStateOverlay::new(self.hashed_post_state_layers.clone()); + self + } + /// Total number of trie update entries across all layers. pub fn trie_updates_total_len(&self) -> usize { self.trie_update_layers.iter().map(|updates| updates.total_len()).sum() diff --git a/crates/storage/provider/src/providers/state/overlay.rs b/crates/storage/provider/src/providers/state/overlay.rs index 6119550cf0f..c488165890e 100644 --- a/crates/storage/provider/src/providers/state/overlay.rs +++ b/crates/storage/provider/src/providers/state/overlay.rs @@ -168,7 +168,7 @@ impl OverlayBuilder { fn resolve_overlays(&self, anchor_hash: BlockHash) -> ProviderResult { match &self.overlay_source { Some(OverlaySource::Managed { manager, state }) => { - let mut overlay = if anchor_hash == self.parent_hash { + let overlay = if anchor_hash == self.parent_hash { StateTrieOverlay::default() } else { manager @@ -176,11 +176,11 @@ impl OverlayBuilder { .map_err(ProviderError::other)? }; - if !state.is_empty() { - overlay.prepend_hashed_post_state(Arc::clone(state)); - } - - Ok(overlay) + Ok(if state.is_empty() { + overlay + } else { + overlay.with_prepended_hashed_post_state(Arc::clone(state)) + }) } Some(OverlaySource::Immediate { trie, state }) => { if anchor_hash != self.parent_hash { @@ -348,11 +348,11 @@ impl OverlayBuilder { let mut overlay = self.resolve_overlays(anchor_hash)?; if !trie_reverts.is_empty() { - overlay.push_trie_updates(Arc::new(trie_reverts)); + overlay = overlay.with_pushed_trie_updates(Arc::new(trie_reverts)); } if !hashed_state_reverts.is_empty() { - overlay.push_hashed_post_state(Arc::new(hashed_state_reverts)); + overlay = overlay.with_pushed_hashed_post_state(Arc::new(hashed_state_reverts)); } trie_updates_total_len = overlay.trie_updates_total_len(); From a71c0f66afcbe239d402ad7caa97590fcf17140c Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 20:10:17 +0200 Subject: [PATCH 37/40] perf: avoid duplicate overlay cursor scans --- .../trie/trie/src/hashed_cursor/post_state.rs | 23 ++++++++++--------- crates/trie/trie/src/overlay_cursor.rs | 23 ++++++++++++------- crates/trie/trie/src/trie_cursor/in_memory.rs | 16 ++++++++----- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index e74676c168d..385e7d2b72e 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -203,13 +203,18 @@ where /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. fn choose_next_entry(&mut self) -> Result, DatabaseError> { loop { - let mem_key = self.post_state_cursor.min_current_key(); - let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); - let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { - return Ok(None); + let mem_entry = self.post_state_cursor.min_current_entry(); + let db_entry = self.db_cursor_state.entry(); + let next_key = match (mem_entry, db_entry) { + (Some((mem_key, _)), Some((db_key, _))) => mem_key.min(*db_key), + (Some((mem_key, _)), None) => mem_key, + (None, Some((db_key, _))) => *db_key, + (None, None) => return Ok(None), }; - if let Some(mem_value) = self.post_state_cursor.highest_priority_value_at(&next_key) { + if let Some((mem_key, mem_value)) = mem_entry && + mem_key == next_key + { if let Some(value) = mem_value { return Ok(Some((next_key, value))) } @@ -546,12 +551,8 @@ where self.cursor.first_after(key); } - fn min_current_key(&self) -> Option { - self.cursor.min_current_key() - } - - fn highest_priority_value_at(&self, key: &B256) -> Option> { - self.cursor.highest_priority_value_at(key).map(|value| (*value).into_option()) + fn min_current_entry(&self) -> Option<(B256, Option)> { + self.cursor.min_current_entry().map(|(key, value)| (key, (*value).into_option())) } fn advance_key(&mut self, key: &B256) { diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index ce8e238dccf..21c244026f8 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -133,13 +133,6 @@ where } } - #[inline(always)] - pub(crate) fn highest_priority_value_at(&self, key: &K) -> Option<&V> { - self.cursors.iter().find_map(|cursor| { - cursor.current().and_then(|(entry_key, value)| (entry_key == key).then_some(value)) - }) - } - #[inline(always)] pub(crate) fn advance_key(&mut self, key: &K) { for cursor in &mut self.cursors { @@ -154,10 +147,24 @@ impl PositionedOverlayCursor<'_, K, V> where K: Copy + Ord, { - #[inline(always)] + #[cfg(test)] pub(crate) fn min_current_key(&self) -> Option { self.cursors.iter().filter_map(|cursor| cursor.current().map(|(key, _)| *key)).min() } + + #[inline(always)] + pub(crate) fn min_current_entry(&self) -> Option<(K, &V)> { + let mut min_entry = None; + for cursor in &self.cursors { + if let Some((key, value)) = cursor.current() { + match min_entry { + Some((min_key, _)) if key >= &min_key => {} + _ => min_entry = Some((*key, value)), + } + } + } + min_entry + } } #[derive(Debug)] diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index 2522aab319c..7f754e9a719 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -155,15 +155,19 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. fn choose_next_entry(&mut self) -> Result, DatabaseError> { loop { - let mem_key = self.in_memory_cursor.min_current_key(); - let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); - let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { - return Ok(None); + let mem_entry = self.in_memory_cursor.min_current_entry(); + let db_entry = self.db_cursor_state.entry(); + let next_key = match (mem_entry, db_entry) { + (Some((mem_key, _)), Some((db_key, _))) => mem_key.min(*db_key), + (Some((mem_key, _)), None) => mem_key, + (None, Some((db_key, _))) => *db_key, + (None, None) => return Ok(None), }; - if let Some(mem_value) = - self.in_memory_cursor.highest_priority_value_at(&next_key).cloned() + if let Some((mem_key, mem_value)) = mem_entry && + mem_key == next_key { + let mem_value = mem_value.clone(); if let Some(node) = mem_value { return Ok(Some((next_key, node))) } From eb1b6d9da3156c406dc21cc5a974237b98aa6dc1 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 21:01:17 +0200 Subject: [PATCH 38/40] perf: skip single-layer storage overlay indexes --- .../trie/trie/src/hashed_cursor/post_state.rs | 241 ++++++++++++++---- crates/trie/trie/src/overlay_cursor.rs | 22 +- crates/trie/trie/src/trie_cursor/in_memory.rs | 130 ++++++++-- 3 files changed, 328 insertions(+), 65 deletions(-) diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 385e7d2b72e..b8e38bf5be8 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -346,9 +346,8 @@ where fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let (layers, cursor_wiped, has_visible_value) = - self.post_states.storage_overlay_layers(hashed_address); - self.post_state_cursor.retarget(layers, has_visible_value); + let cursor_wiped = + self.post_states.retarget_storage_overlay(&mut self.post_state_cursor, hashed_address); self.db_cursor_state = DbCursorState::new(cursor_wiped); } } @@ -357,7 +356,7 @@ where #[derive(Clone, Debug, Default)] pub struct HashedPostStateOverlay { account_overlay: Arc>>>, - storage_overlays: Arc>, + storage_overlays: HashedStorageOverlays, layer_capacity: usize, } @@ -389,9 +388,11 @@ impl HashedPostStateOverlay { ) } - fn build_storage_overlays( - states: &[Arc], - ) -> Arc> { + fn build_storage_overlays(states: &[Arc]) -> HashedStorageOverlays { + if let [state] = states { + return HashedStorageOverlays::Single(Arc::clone(state)) + } + let storage_overlay_capacity = states.iter().map(|state| state.storages.len()).sum(); let mut overlays: B256Map = B256Map::with_capacity_and_hasher(storage_overlay_capacity, Default::default()); @@ -400,7 +401,7 @@ impl HashedPostStateOverlay { Self::push_storage_layer(&mut overlays, state); } - Arc::new(overlays) + HashedStorageOverlays::Indexed(Arc::new(overlays)) } /// Add a hashed post-state layer at the end of the precedence stack. @@ -410,7 +411,7 @@ impl HashedPostStateOverlay { Arc::make_mut(&mut self.account_overlay) .push(PostStateOverlayLayer::new(Arc::clone(&state), state.accounts.as_slice())); } - Self::push_storage_layer(Arc::make_mut(&mut self.storage_overlays), &state); + self.storage_overlays.push_layer(state); } /// Add a hashed post-state layer at the beginning of the precedence stack. @@ -423,29 +424,7 @@ impl HashedPostStateOverlay { ); } - for (hashed_address, storage) in &state.storages { - let overlay = - Arc::make_mut(&mut self.storage_overlays).entry(*hashed_address).or_default(); - let storage_slots = storage.storage_slots_ref(); - - if storage.is_wiped() { - overlay.layers.clear(); - overlay.db_wiped = true; - overlay.has_visible_value = false; - } - - if !storage_slots.is_empty() { - let has_nonzero_slot = storage_slots.iter().any(|(_, value)| !value.is_zero()); - let recompute_after_insert = !has_nonzero_slot && overlay.has_visible_value; - - overlay - .layers - .insert(0, PostStateOverlayLayer::new(Arc::clone(&state), storage_slots)); - - overlay.has_visible_value = has_nonzero_slot || - (recompute_after_insert && has_visible_storage_value(&overlay.layers)); - } - } + self.storage_overlays.prepend_layer(state); } fn push_storage_layer( @@ -474,24 +453,97 @@ impl HashedPostStateOverlay { } } + fn prepend_storage_layer( + overlays: &mut B256Map, + state: &Arc, + ) { + for (hashed_address, storage) in &state.storages { + let overlay = overlays.entry(*hashed_address).or_default(); + let storage_slots = storage.storage_slots_ref(); + + if storage.is_wiped() { + overlay.layers.clear(); + overlay.db_wiped = true; + overlay.has_visible_value = false; + } + + if !storage_slots.is_empty() { + let has_nonzero_slot = storage_slots.iter().any(|(_, value)| !value.is_zero()); + let recompute_after_insert = !has_nonzero_slot && overlay.has_visible_value; + + overlay + .layers + .insert(0, PostStateOverlayLayer::new(Arc::clone(state), storage_slots)); + + overlay.has_visible_value = has_nonzero_slot || + (recompute_after_insert && has_visible_storage_value(&overlay.layers)); + } + } + } + fn account_overlay(&self) -> PostStateOverlayCursor<'_, Option> { PostStateOverlayCursor::new(self.account_overlay.as_slice(), false, self.layer_capacity) } fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'_, U256>, bool) { - let (layers, db_wiped, has_visible_value) = self.storage_overlay_layers(hashed_address); - (PostStateOverlayCursor::new(layers, has_visible_value, self.layer_capacity), db_wiped) + match &self.storage_overlays { + HashedStorageOverlays::Single(state) => { + let Some(storage) = state.storages.get(&hashed_address) else { + return (PostStateOverlayCursor::with_capacity(self.layer_capacity), false) + }; + let storage_slots = storage.storage_slots_ref(); + let has_visible_value = storage_slots.iter().any(|(_, value)| !value.is_zero()); + ( + PostStateOverlayCursor::from_entries( + storage_slots, + has_visible_value, + self.layer_capacity, + ), + storage.is_wiped(), + ) + } + HashedStorageOverlays::Indexed(overlays) => { + let Some(overlay) = overlays.get(&hashed_address) else { + return (PostStateOverlayCursor::with_capacity(self.layer_capacity), false) + }; + + ( + PostStateOverlayCursor::new( + overlay.layers.as_slice(), + overlay.has_visible_value, + self.layer_capacity, + ), + overlay.db_wiped, + ) + } + } } - fn storage_overlay_layers( - &self, + fn retarget_storage_overlay<'a>( + &'a self, + cursor: &mut PostStateOverlayCursor<'a, U256>, hashed_address: B256, - ) -> (&[PostStateOverlayLayer], bool, bool) { - let Some(overlay) = self.storage_overlays.get(&hashed_address) else { - return (&[], false, false); - }; - - (overlay.layers.as_slice(), overlay.db_wiped, overlay.has_visible_value) + ) -> bool { + match &self.storage_overlays { + HashedStorageOverlays::Single(state) => { + let Some(storage) = state.storages.get(&hashed_address) else { + cursor.retarget_entries(&[], false); + return false + }; + let storage_slots = storage.storage_slots_ref(); + let has_visible_value = storage_slots.iter().any(|(_, value)| !value.is_zero()); + cursor.retarget_entries(storage_slots, has_visible_value); + storage.is_wiped() + } + HashedStorageOverlays::Indexed(overlays) => { + let Some(overlay) = overlays.get(&hashed_address) else { + cursor.retarget(&[], false); + return false + }; + cursor.retarget(overlay.layers.as_slice(), overlay.has_visible_value); + overlay.db_wiped + } + } } } @@ -529,10 +581,33 @@ impl<'a, V> PostStateOverlayCursor<'a, V> { self.cursor.reset(); } + fn with_capacity(layer_capacity: usize) -> Self { + Self { + cursor: PositionedOverlayCursor::with_entries(&[], layer_capacity), + has_visible_value: false, + } + } + + fn from_entries( + entries: &'a [(B256, V)], + has_visible_value: bool, + layer_capacity: usize, + ) -> Self { + Self { + cursor: PositionedOverlayCursor::with_entries(entries, layer_capacity), + has_visible_value, + } + } + fn retarget(&mut self, layers: &'a [PostStateOverlayLayer], has_visible_value: bool) { self.cursor.retarget(layers); self.has_visible_value = has_visible_value; } + + fn retarget_entries(&mut self, entries: &'a [(B256, V)], has_visible_value: bool) { + self.cursor.retarget_entries(entries); + self.has_visible_value = has_visible_value; + } } impl<'a, V> PostStateOverlayCursor<'a, V> @@ -571,6 +646,59 @@ struct HashedStorageOverlay { has_visible_value: bool, } +#[derive(Clone, Debug)] +enum HashedStorageOverlays { + Single(Arc), + Indexed(Arc>), +} + +impl Default for HashedStorageOverlays { + fn default() -> Self { + Self::Indexed(Default::default()) + } +} + +impl HashedStorageOverlays { + fn is_empty(&self) -> bool { + match self { + Self::Single(state) => state.storages.is_empty(), + Self::Indexed(overlays) => overlays.is_empty(), + } + } + + fn push_layer(&mut self, state: Arc) { + match self { + Self::Single(existing) => { + let storage_overlay_capacity = existing.storages.len() + state.storages.len(); + let mut overlays: B256Map = + B256Map::with_capacity_and_hasher(storage_overlay_capacity, Default::default()); + HashedPostStateOverlay::push_storage_layer(&mut overlays, existing); + HashedPostStateOverlay::push_storage_layer(&mut overlays, &state); + *self = Self::Indexed(Arc::new(overlays)); + } + Self::Indexed(overlays) => { + HashedPostStateOverlay::push_storage_layer(Arc::make_mut(overlays), &state); + } + } + } + + fn prepend_layer(&mut self, state: Arc) { + match self { + Self::Single(existing) => { + let storage_overlay_capacity = existing.storages.len() + state.storages.len(); + let mut overlays: B256Map = + B256Map::with_capacity_and_hasher(storage_overlay_capacity, Default::default()); + HashedPostStateOverlay::push_storage_layer(&mut overlays, &state); + HashedPostStateOverlay::push_storage_layer(&mut overlays, existing); + *self = Self::Indexed(Arc::new(overlays)); + } + Self::Indexed(overlays) => { + HashedPostStateOverlay::prepend_storage_layer(Arc::make_mut(overlays), &state); + } + } + } +} + type PostStateOverlayLayer = OverlayLayer; fn layer_has_visible_storage_value( @@ -654,8 +782,29 @@ mod tests { overlay: &HashedPostStateOverlay, hashed_address: B256, ) -> (Vec>, bool, bool) { - let (layers, db_wiped, has_visible_value) = overlay.storage_overlay_layers(hashed_address); - (layers.iter().map(|layer| layer.entries().to_vec()).collect(), db_wiped, has_visible_value) + match &overlay.storage_overlays { + HashedStorageOverlays::Single(state) => { + let Some(storage) = state.storages.get(&hashed_address) else { + return (Vec::new(), false, false) + }; + let storage_slots = storage.storage_slots_ref(); + let layers = (!storage_slots.is_empty()) + .then(|| vec![storage_slots.to_vec()]) + .unwrap_or_default(); + let has_visible_value = storage_slots.iter().any(|(_, value)| !value.is_zero()); + (layers, storage.is_wiped(), has_visible_value) + } + HashedStorageOverlays::Indexed(overlays) => { + let Some(overlay) = overlays.get(&hashed_address) else { + return (Vec::new(), false, false) + }; + ( + overlay.layers.iter().map(|layer| layer.entries().to_vec()).collect(), + overlay.db_wiped, + overlay.has_visible_value, + ) + } + } } #[test] @@ -716,7 +865,7 @@ mod tests { let overlay = HashedPostStateOverlay::new(vec![top_delete, lower_visible]); - let (_, _, has_visible_value) = overlay.storage_overlay_layers(hashed_address); + let (_, _, has_visible_value) = storage_overlay_snapshot(&overlay, hashed_address); assert!(!has_visible_value); } @@ -733,7 +882,7 @@ mod tests { let mut overlay = HashedPostStateOverlay::new(vec![lower_visible]); overlay.prepend_layer(top_delete); - let (_, _, has_visible_value) = overlay.storage_overlay_layers(hashed_address); + let (_, _, has_visible_value) = storage_overlay_snapshot(&overlay, hashed_address); assert!(!has_visible_value); } diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs index 21c244026f8..643276aaaa3 100644 --- a/crates/trie/trie/src/overlay_cursor.rs +++ b/crates/trie/trie/src/overlay_cursor.rs @@ -91,6 +91,14 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { this } + pub(crate) fn with_entries(entries: &'a [(K, V)], capacity: usize) -> Self { + let mut cursors = Vec::with_capacity(capacity.max(usize::from(!entries.is_empty()))); + if !entries.is_empty() { + cursors.push(PositionedOverlayLayerCursor::from_entries(entries)); + } + Self { cursors } + } + pub(crate) fn reset(&mut self) { for cursor in &mut self.cursors { cursor.reset(); @@ -102,6 +110,14 @@ impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { self.cursors.clear(); self.cursors.extend(layers.iter().map(PositionedOverlayLayerCursor::new)); } + + pub(crate) fn retarget_entries(&mut self, entries: &'a [(K, V)]) { + debug_assert!(self.cursors.capacity() >= usize::from(!entries.is_empty())); + self.cursors.clear(); + if !entries.is_empty() { + self.cursors.push(PositionedOverlayLayerCursor::from_entries(entries)); + } + } } impl PositionedOverlayCursor<'_, K, V> @@ -175,7 +191,11 @@ struct PositionedOverlayLayerCursor<'a, K, V> { impl<'a, K, V> PositionedOverlayLayerCursor<'a, K, V> { fn new(layer: &'a OverlayLayer) -> Self { - Self { entries: layer.entries(), position: 0 } + Self::from_entries(layer.entries()) + } + + fn from_entries(entries: &'a [(K, V)]) -> Self { + Self { entries, position: 0 } } #[inline(always)] diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index 7f754e9a719..cd18965ac25 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -297,8 +297,8 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let (layers, db_wiped) = self.trie_updates.storage_overlay_layers(hashed_address); - self.in_memory_cursor.retarget(layers); + let db_wiped = + self.trie_updates.retarget_storage_overlay(&mut self.in_memory_cursor, hashed_address); self.db_cursor_state = DbCursorState::new(db_wiped); } } @@ -307,7 +307,7 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { #[derive(Clone, Debug, Default)] pub struct TrieUpdatesOverlay { account_overlay: Arc>, - storage_overlays: Arc>, + storage_overlays: TrieStorageOverlays, layer_capacity: usize, } @@ -335,9 +335,11 @@ impl TrieUpdatesOverlay { ) } - fn build_storage_overlays( - updates: &[Arc], - ) -> Arc> { + fn build_storage_overlays(updates: &[Arc]) -> TrieStorageOverlays { + if let [update] = updates { + return TrieStorageOverlays::Single(Arc::clone(update)) + } + let storage_overlay_capacity = updates.iter().map(|update| update.storage_tries_ref().len()).sum(); let mut overlays: B256Map = @@ -347,7 +349,7 @@ impl TrieUpdatesOverlay { Self::push_storage_layer(&mut overlays, update); } - Arc::new(overlays) + TrieStorageOverlays::Indexed(Arc::new(overlays)) } /// Add a trie updates layer at the end of the precedence stack. @@ -357,7 +359,7 @@ impl TrieUpdatesOverlay { Arc::make_mut(&mut self.account_overlay) .push(TrieOverlayLayer::new(Arc::clone(&update), update.account_nodes_ref())); } - Self::push_storage_layer(Arc::make_mut(&mut self.storage_overlays), &update); + self.storage_overlays.push_layer(update); } fn push_storage_layer( @@ -387,16 +389,52 @@ impl TrieUpdatesOverlay { } fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'_>, bool) { - let (layers, db_wiped) = self.storage_overlay_layers(hashed_address); - (OverlayCursor::with_capacity(layers, self.layer_capacity), db_wiped) - } + match &self.storage_overlays { + TrieStorageOverlays::Single(update) => { + let Some(storage) = update.storage_tries_ref().get(&hashed_address) else { + return (OverlayCursor::with_entries(&[], self.layer_capacity), false) + }; + ( + OverlayCursor::with_entries(storage.storage_nodes_ref(), self.layer_capacity), + storage.is_deleted(), + ) + } + TrieStorageOverlays::Indexed(overlays) => { + let Some(overlay) = overlays.get(&hashed_address) else { + return (OverlayCursor::with_entries(&[], self.layer_capacity), false) + }; - fn storage_overlay_layers(&self, hashed_address: B256) -> (&[TrieOverlayLayer], bool) { - let Some(overlay) = self.storage_overlays.get(&hashed_address) else { - return (&[], false); - }; + ( + OverlayCursor::with_capacity(overlay.layers.as_slice(), self.layer_capacity), + overlay.db_wiped, + ) + } + } + } - (overlay.layers.as_slice(), overlay.db_wiped) + fn retarget_storage_overlay<'a>( + &'a self, + cursor: &mut OverlayCursor<'a>, + hashed_address: B256, + ) -> bool { + match &self.storage_overlays { + TrieStorageOverlays::Single(update) => { + let Some(storage) = update.storage_tries_ref().get(&hashed_address) else { + cursor.retarget_entries(&[]); + return false + }; + cursor.retarget_entries(storage.storage_nodes_ref()); + storage.is_deleted() + } + TrieStorageOverlays::Indexed(overlays) => { + let Some(overlay) = overlays.get(&hashed_address) else { + cursor.retarget_entries(&[]); + return false + }; + cursor.retarget(overlay.layers.as_slice()); + overlay.db_wiped + } + } } } @@ -412,6 +450,44 @@ struct TrieStorageOverlay { db_wiped: bool, } +#[derive(Clone, Debug)] +enum TrieStorageOverlays { + Single(Arc), + Indexed(Arc>), +} + +impl Default for TrieStorageOverlays { + fn default() -> Self { + Self::Indexed(Default::default()) + } +} + +impl TrieStorageOverlays { + fn is_empty(&self) -> bool { + match self { + Self::Single(update) => update.storage_tries_ref().is_empty(), + Self::Indexed(overlays) => overlays.is_empty(), + } + } + + fn push_layer(&mut self, update: Arc) { + match self { + Self::Single(existing) => { + let storage_overlay_capacity = + existing.storage_tries_ref().len() + update.storage_tries_ref().len(); + let mut overlays: B256Map = + B256Map::with_capacity_and_hasher(storage_overlay_capacity, Default::default()); + TrieUpdatesOverlay::push_storage_layer(&mut overlays, existing); + TrieUpdatesOverlay::push_storage_layer(&mut overlays, &update); + *self = Self::Indexed(Arc::new(overlays)); + } + Self::Indexed(overlays) => { + TrieUpdatesOverlay::push_storage_layer(Arc::make_mut(overlays), &update); + } + } + } +} + type OverlayCursor<'a> = PositionedOverlayCursor<'a, Nibbles, Option>; type TrieOverlayLayer = OverlayLayer>; @@ -497,8 +573,26 @@ mod tests { overlay: &TrieUpdatesOverlay, hashed_address: B256, ) -> (Vec)>>, bool) { - let (layers, db_wiped) = overlay.storage_overlay_layers(hashed_address); - (layers.iter().map(|layer| layer.entries().to_vec()).collect(), db_wiped) + match &overlay.storage_overlays { + TrieStorageOverlays::Single(update) => { + let Some(storage) = update.storage_tries_ref().get(&hashed_address) else { + return (Vec::new(), false) + }; + let layers = (!storage.storage_nodes_ref().is_empty()) + .then(|| vec![storage.storage_nodes_ref().to_vec()]) + .unwrap_or_default(); + (layers, storage.is_deleted()) + } + TrieStorageOverlays::Indexed(overlays) => { + let Some(overlay) = overlays.get(&hashed_address) else { + return (Vec::new(), false) + }; + ( + overlay.layers.iter().map(|layer| layer.entries().to_vec()).collect(), + overlay.db_wiped, + ) + } + } } #[test] From de17b428b15e172b7552b2cc276e53d8dd1c1a5f Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 30 May 2026 22:09:10 +0200 Subject: [PATCH 39/40] perf: reuse state trie overlay indexes --- crates/chain-state/src/state_trie_overlay.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs index 61780a8842e..5eb8c34bf88 100644 --- a/crates/chain-state/src/state_trie_overlay.rs +++ b/crates/chain-state/src/state_trie_overlay.rs @@ -519,8 +519,7 @@ impl StateTrieOverlay { /// Return this overlay with a trie updates layer appended to the precedence stack. pub fn with_pushed_trie_updates(mut self, trie_updates: Arc) -> Self { - self.trie_update_layers.push(trie_updates); - self.trie_updates = TrieUpdatesOverlay::new(self.trie_update_layers.clone()); + self.push_trie_updates(trie_updates); self } @@ -535,8 +534,7 @@ impl StateTrieOverlay { mut self, hashed_post_state: Arc, ) -> Self { - self.hashed_post_state_layers.push(hashed_post_state); - self.hashed_post_state = HashedPostStateOverlay::new(self.hashed_post_state_layers.clone()); + self.push_hashed_post_state(hashed_post_state); self } @@ -551,8 +549,7 @@ impl StateTrieOverlay { mut self, hashed_post_state: Arc, ) -> Self { - self.hashed_post_state_layers.insert(0, hashed_post_state); - self.hashed_post_state = HashedPostStateOverlay::new(self.hashed_post_state_layers.clone()); + self.prepend_hashed_post_state(hashed_post_state); self } From a76ce35c8be44dc20f57bcb7f621f0dd5ec39fc8 Mon Sep 17 00:00:00 2001 From: Brian Date: Sun, 31 May 2026 11:26:40 +0200 Subject: [PATCH 40/40] perf: revert overlay cursor experiments --- crates/chain-state/src/state_trie_overlay.rs | 592 ++++------ .../src/providers/state/historical.rs | 16 +- .../provider/src/providers/state/latest.rs | 14 +- .../provider/src/providers/state/overlay.rs | 176 +-- crates/trie/db/src/changesets.rs | 9 +- crates/trie/db/src/proof.rs | 39 +- crates/trie/db/src/state.rs | 32 +- crates/trie/db/src/storage.rs | 10 +- crates/trie/db/tests/fuzz_in_memory_nodes.rs | 11 +- crates/trie/db/tests/post_state.rs | 107 +- crates/trie/sparse/src/parallel.rs | 6 +- crates/trie/trie/src/forward_cursor.rs | 187 +++ .../trie/trie/src/hashed_cursor/post_state.rs | 1006 +++++------------ crates/trie/trie/src/lib.rs | 3 +- crates/trie/trie/src/node_iter.rs | 7 +- crates/trie/trie/src/overlay_cursor.rs | 421 ------- crates/trie/trie/src/test_utils.rs | 6 +- crates/trie/trie/src/trie_cursor/in_memory.rs | 788 ++++--------- 18 files changed, 1107 insertions(+), 2323 deletions(-) create mode 100644 crates/trie/trie/src/forward_cursor.rs delete mode 100644 crates/trie/trie/src/overlay_cursor.rs diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs index 5eb8c34bf88..3027f0997ac 100644 --- a/crates/chain-state/src/state_trie_overlay.rs +++ b/crates/chain-state/src/state_trie_overlay.rs @@ -1,8 +1,8 @@ -//! State trie overlay stacks for in-memory blocks. +//! Flattened state trie overlays for in-memory blocks. //! //! Payload validation needs a view of the state trie as of an in-memory parent block even when that //! parent has not been persisted yet. [`StateTrieOverlayManager`] tracks those in-memory blocks and -//! builds reusable state trie overlays on demand. +//! builds reusable flattened state trie overlays on demand. use crate::{EthPrimitives, ExecutedBlock}; use alloy_primitives::B256; @@ -16,23 +16,22 @@ use reth_primitives_traits::{ }; #[cfg(feature = "rayon")] use reth_tasks::WorkerPool; -use reth_trie::{ - hashed_cursor::HashedPostStateOverlay, trie_cursor::TrieUpdatesOverlay, - updates::TrieUpdatesSorted, HashedPostStateSorted, -}; -#[cfg(any(test, feature = "rayon"))] -use std::time::Instant; -use std::{fmt, sync::Arc}; -use tracing::debug; +use reth_trie::{updates::TrieUpdatesSorted, HashedPostStateSorted, TrieInputSorted}; +use std::{fmt, sync::Arc, time::Instant}; +use tracing::{debug, trace, warn}; + +/// State trie overlays ordered from highest to lowest precedence. +pub type StateTrieOverlay = (Vec>, Vec>); -/// Manages state trie overlays for in-memory blocks. +/// Manages flattened state trie overlays for in-memory blocks. /// -/// The manager owns the in-memory block graph and a cache of state trie overlays keyed by -/// `(anchor_hash, tip_hash)`. Cache entries can also mark in-flight background computations. +/// The manager owns the in-memory block graph and a cache of flattened state trie overlays keyed by +/// `(anchor_hash, tip_hash)`. #[derive(Clone)] pub struct StateTrieOverlayManager { blocks: Arc>>, - overlays: Arc>, + overlays: Arc>>, + pending_overlays: Arc>, #[cfg(feature = "rayon")] worker_pool: Option>, metrics: StateTrieOverlayMetrics, @@ -55,6 +54,7 @@ impl Default for StateTrieOverlayManager { Self { blocks: Default::default(), overlays: Default::default(), + pending_overlays: Default::default(), #[cfg(feature = "rayon")] worker_pool: None, metrics: Default::default(), @@ -67,6 +67,7 @@ impl std::fmt::Debug for StateTrieOverlayManager { f.debug_struct("StateTrieOverlayManager") .field("blocks", &self.blocks.len()) .field("overlays", &self.overlays.len()) + .field("pending_overlays", &self.pending_overlays.len()) .finish() } } @@ -78,6 +79,7 @@ impl StateTrieOverlayManager { Self { blocks: Default::default(), overlays: Default::default(), + pending_overlays: Default::default(), worker_pool: Some(worker_pool), metrics: Default::default(), } @@ -123,7 +125,7 @@ impl StateTrieOverlayManager { .iter() .filter_map(|entry| { let key = *entry.key(); - (key.tip_hash == parent_hash && entry.value().is_ready()).then_some(key.anchor_hash) + (key.tip_hash == parent_hash).then_some(key.anchor_hash) }) .collect::>(); @@ -139,7 +141,7 @@ impl StateTrieOverlayManager { let _guard = span.enter(); for anchor_hash in cached_parent_overlays { - self.spawn_overlay_cache_fill(OverlayCacheKey { anchor_hash, tip_hash: hash }, None); + self.spawn_overlay_cache_fill(OverlayCacheKey { anchor_hash, tip_hash: hash }); } } @@ -171,13 +173,20 @@ impl StateTrieOverlayManager { if removed_blocks > 0 { let overlays_before = self.overlays.len(); + let pending_overlays_before = self.pending_overlays.len(); let blocks = Arc::clone(&self.blocks); self.overlays.retain(|key, _| { key.tip_hash != key.anchor_hash && Self::anchor_for_parent_in(blocks.as_ref(), key.tip_hash, key.anchor_hash) == Some(key.anchor_hash) }); + self.pending_overlays.retain(|key, _| { + key.tip_hash != key.anchor_hash && + Self::anchor_for_parent_in(blocks.as_ref(), key.tip_hash, key.anchor_hash) == + Some(key.anchor_hash) + }); pruned_overlays = overlays_before.saturating_sub(self.overlays.len()); + pruned_overlays += pending_overlays_before.saturating_sub(self.pending_overlays.len()); span.record("pruned_overlays", pruned_overlays); } debug!( @@ -207,8 +216,8 @@ impl StateTrieOverlayManager { %anchor_hash, "loading state trie overlay for parent" ); - let overlay = self.get_overlay(parent_hash, anchor_hash)?; - Ok(overlay) + let input = self.get_overlay(parent_hash, anchor_hash)?; + Ok(input) } #[tracing::instrument( @@ -231,26 +240,25 @@ impl StateTrieOverlayManager { let key = OverlayCacheKey { anchor_hash, tip_hash }; let span = tracing::Span::current(); - if let Some(overlay) = self.ready_overlay(key) { + if let Some(input) = self.overlays.get(&key).map(|entry| Arc::clone(entry.value())) { self.metrics.overlay_cache_reuses.increment(1); span.record("cache_reused", true); - return Ok(overlay) + return Ok((vec![Arc::clone(&input.nodes)], vec![Arc::clone(&input.state)])) } span.record("cache_reused", false); let blocks = self.resolve_block_path(tip_hash, anchor_hash)?; span.record("block_count", blocks.len()); if blocks.is_empty() { - return Ok(StateTrieOverlay::default()) + return Ok((Vec::new(), Vec::new())) } let cached_prefix = self.largest_cached_prefix(anchor_hash, &blocks); span.record("parent_overlay_reused", cached_prefix.is_some()); - let overlay = Self::overlay_stack_from_path(&blocks, cached_prefix.as_ref()); - self.spawn_overlay_cache_fill(key, Some(ResolvedOverlayPath { blocks, cached_prefix })); + self.spawn_overlay_cache_fill(key); - Ok(overlay) + Ok(Self::overlay_stack_from_path(&blocks, cached_prefix)) } fn resolve_block_path( @@ -282,20 +290,21 @@ impl StateTrieOverlayManager { &self, anchor_hash: B256, blocks_newest_to_oldest: &[ExecutedBlock], - ) -> Option<(usize, StateTrieOverlay)> { + ) -> Option<(usize, Arc)> { blocks_newest_to_oldest.iter().enumerate().find_map(|(idx, block)| { let tip_hash = block.recovered_block().hash(); - self.ready_overlay(OverlayCacheKey { anchor_hash, tip_hash }) - .map(|overlay| (idx, overlay)) + self.overlays + .get(&OverlayCacheKey { anchor_hash, tip_hash }) + .map(|entry| (idx, Arc::clone(entry.value()))) }) } fn overlay_stack_from_path( blocks_newest_to_oldest: &[ExecutedBlock], - cached_prefix: Option<&(usize, StateTrieOverlay)>, + cached_prefix: Option<(usize, Arc)>, ) -> StateTrieOverlay { let individual_block_count = - cached_prefix.map_or(blocks_newest_to_oldest.len(), |(idx, _)| *idx); + cached_prefix.as_ref().map_or(blocks_newest_to_oldest.len(), |(idx, _)| *idx); let mut trie_updates = Vec::with_capacity(individual_block_count + cached_prefix.is_some() as usize); let mut hashed_post_state = @@ -307,71 +316,73 @@ impl StateTrieOverlayManager { hashed_post_state.push(trie_data.hashed_state); } - if let Some((_, cached_overlay)) = cached_prefix { - trie_updates.extend(cached_overlay.trie_update_layers.iter().cloned()); - hashed_post_state.extend(cached_overlay.hashed_post_state_layers.iter().cloned()); + if let Some((_, input)) = cached_prefix { + trie_updates.push(Arc::clone(&input.nodes)); + hashed_post_state.push(Arc::clone(&input.state)); } - StateTrieOverlay::new(trie_updates, hashed_post_state) + (trie_updates, hashed_post_state) } - fn spawn_overlay_cache_fill(&self, key: OverlayCacheKey, path: Option>) { - #[cfg(not(feature = "rayon"))] - { - let _ = key; - let _ = path; + fn spawn_overlay_cache_fill(&self, key: OverlayCacheKey) { + if self.overlays.contains_key(&key) { + return } - - #[cfg(feature = "rayon")] - { - let Some(worker_pool) = self.worker_pool.clone() else { return }; - - match self.overlays.entry(key) { - Entry::Occupied(_) => return, - Entry::Vacant(entry) => { - entry.insert(OverlayCacheEntry::Pending); - } + match self.pending_overlays.entry(key) { + Entry::Occupied(_) => return, + Entry::Vacant(entry) => { + entry.insert(()); } + } - let manager = ::clone(self); - let span = tracing::Span::current(); + let manager = ::clone(self); + let span = tracing::Span::current(); + #[cfg(feature = "rayon")] + if let Some(worker_pool) = self.worker_pool.clone() { worker_pool.spawn(move || { - let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { - let _span = tracing::debug_span!( - target: "chain_state::state_trie_overlay", - parent: &span, - "compute_state_trie_overlay_cache_fill", - tip_hash = %key.tip_hash, - anchor_hash = %key.anchor_hash, - ) - .entered(); - manager.compute_and_cache_overlay(key, path); - })); - - if result.is_err() { - manager.remove_pending_overlay(key); - debug!( - target: "chain_state::state_trie_overlay", - tip_hash = %key.tip_hash, - anchor_hash = %key.anchor_hash, - "state trie overlay cache fill panicked" - ); - } + let _span = tracing::trace_span!( + target: "chain_state::state_trie_overlay", + parent: span, + "compute_state_trie_overlay_cache_fill", + tip_hash = %key.tip_hash, + anchor_hash = %key.anchor_hash, + ) + .entered(); + manager.compute_and_cache_overlay(key); }); + return + } + + if let Err(error) = + std::thread::Builder::new().name("state-ovly-cache-fill".to_string()).spawn(move || { + let _span = tracing::trace_span!( + target: "chain_state::state_trie_overlay", + parent: span, + "compute_state_trie_overlay_cache_fill", + tip_hash = %key.tip_hash, + anchor_hash = %key.anchor_hash, + ) + .entered(); + manager.compute_and_cache_overlay(key); + }) + { + self.pending_overlays.remove(&key); + warn!( + target: "chain_state::state_trie_overlay", + ?error, + tip_hash = %key.tip_hash, + anchor_hash = %key.anchor_hash, + "failed to spawn state trie overlay cache fill" + ); } } - #[cfg(any(test, feature = "rayon"))] - fn compute_and_cache_overlay( - &self, - key: OverlayCacheKey, - path: Option>, - ) { - let result = self.compute_overlay_for_key(key, path); + fn compute_and_cache_overlay(&self, key: OverlayCacheKey) { + let result = self.compute_overlay_for_key(key); + self.pending_overlays.remove(&key); if let Err(error) = result { - self.remove_pending_overlay(key); debug!( target: "chain_state::state_trie_overlay", ?error, @@ -382,31 +393,24 @@ impl StateTrieOverlayManager { } } - #[cfg(any(test, feature = "rayon"))] fn compute_overlay_for_key( &self, key: OverlayCacheKey, - path: Option>, - ) -> Result { - let path = match path { - Some(path) => path, - None => { - let blocks = self.resolve_block_path(key.tip_hash, key.anchor_hash)?; - let cached_prefix = self.largest_cached_prefix(key.anchor_hash, &blocks); - ResolvedOverlayPath { blocks, cached_prefix } - } - }; - let cached_prefix = - self.largest_cached_prefix(key.anchor_hash, &path.blocks).or(path.cached_prefix); - let (blocks, parent_overlay) = match cached_prefix { - Some((idx, parent_overlay)) => { - let mut blocks = path.blocks; - blocks.truncate(idx); - (blocks, parent_overlay) + ) -> Result, StateTrieOverlayError> { + if let Some(input) = self.overlays.get(&key).map(|entry| Arc::clone(entry.value())) { + self.metrics.overlay_cache_reuses.increment(1); + return Ok(input) + } + + let blocks = self.resolve_block_path(key.tip_hash, key.anchor_hash)?; + let cached_prefix = self.largest_cached_prefix(key.anchor_hash, &blocks); + let compute_input = match cached_prefix { + Some((idx, parent_input)) => { + ComputeOverlayInput::ExtendCached { blocks: blocks[..idx].to_vec(), parent_input } } - None => (path.blocks, StateTrieOverlay::default()), + None => ComputeOverlayInput::MergeBlocks(blocks), }; - let overlay = compute_overlay(blocks, parent_overlay, key.anchor_hash, &self.metrics); + let input = Arc::new(compute_overlay(compute_input, key.anchor_hash, &self.metrics)); if key.tip_hash != key.anchor_hash && Self::anchor_for_parent_in(self.blocks.as_ref(), key.tip_hash, key.anchor_hash) != @@ -418,35 +422,19 @@ impl StateTrieOverlayManager { }); } - let overlay = match self.overlays.entry(key) { - Entry::Occupied(mut entry) => match entry.get() { - OverlayCacheEntry::Ready(overlay) => { - self.metrics.overlay_cache_reuses.increment(1); - overlay.clone() - } - OverlayCacheEntry::Pending => { - self.metrics.overlay_cache_fills.increment(1); - entry.insert(OverlayCacheEntry::Ready(overlay.clone())); - overlay - } - }, + let input = match self.overlays.entry(key) { + Entry::Occupied(entry) => { + self.metrics.overlay_cache_reuses.increment(1); + Arc::clone(entry.get()) + } Entry::Vacant(entry) => { self.metrics.overlay_cache_fills.increment(1); - entry.insert(OverlayCacheEntry::Ready(overlay.clone())); - overlay + entry.insert(Arc::clone(&input)); + input } }; - Ok(overlay) - } - - fn ready_overlay(&self, key: OverlayCacheKey) -> Option { - self.overlays.get(&key).and_then(|entry| entry.value().ready()) - } - - #[cfg(any(test, feature = "rayon"))] - fn remove_pending_overlay(&self, key: OverlayCacheKey) { - self.overlays.remove_if(&key, |_, entry| matches!(entry, OverlayCacheEntry::Pending)); + Ok(input) } /// Returns `preferred_anchor` if it is on the parent chain, otherwise the first missing parent. @@ -481,94 +469,6 @@ impl StateTrieOverlayManager { } } -/// State trie overlays ordered from highest to lowest precedence. -#[derive(Clone, Debug, Default)] -pub struct StateTrieOverlay { - /// Trie updates overlays. - pub trie_updates: TrieUpdatesOverlay, - /// Hashed post state overlays. - pub hashed_post_state: HashedPostStateOverlay, - trie_update_layers: Vec>, - hashed_post_state_layers: Vec>, -} - -impl StateTrieOverlay { - /// Create a new state trie overlay. - pub fn new( - trie_updates: Vec>, - hashed_post_state: Vec>, - ) -> Self { - Self { - trie_updates: TrieUpdatesOverlay::new(trie_updates.clone()), - hashed_post_state: HashedPostStateOverlay::new(hashed_post_state.clone()), - trie_update_layers: trie_updates, - hashed_post_state_layers: hashed_post_state, - } - } - - /// Returns `true` if this overlay has no layers. - pub const fn is_empty(&self) -> bool { - self.trie_update_layers.is_empty() && self.hashed_post_state_layers.is_empty() - } - - /// Add a trie updates layer at the end of the precedence stack. - pub fn push_trie_updates(&mut self, trie_updates: Arc) { - self.trie_update_layers.push(Arc::clone(&trie_updates)); - self.trie_updates.push_layer(trie_updates); - } - - /// Return this overlay with a trie updates layer appended to the precedence stack. - pub fn with_pushed_trie_updates(mut self, trie_updates: Arc) -> Self { - self.push_trie_updates(trie_updates); - self - } - - /// Add a hashed post-state layer at the end of the precedence stack. - pub fn push_hashed_post_state(&mut self, hashed_post_state: Arc) { - self.hashed_post_state_layers.push(Arc::clone(&hashed_post_state)); - self.hashed_post_state.push_layer(hashed_post_state); - } - - /// Return this overlay with a hashed post-state layer appended to the precedence stack. - pub fn with_pushed_hashed_post_state( - mut self, - hashed_post_state: Arc, - ) -> Self { - self.push_hashed_post_state(hashed_post_state); - self - } - - /// Add a hashed post-state layer at the beginning of the precedence stack. - pub fn prepend_hashed_post_state(&mut self, hashed_post_state: Arc) { - self.hashed_post_state_layers.insert(0, Arc::clone(&hashed_post_state)); - self.hashed_post_state.prepend_layer(hashed_post_state); - } - - /// Return this overlay with a hashed post-state layer prepended to the precedence stack. - pub fn with_prepended_hashed_post_state( - mut self, - hashed_post_state: Arc, - ) -> Self { - self.prepend_hashed_post_state(hashed_post_state); - self - } - - /// Total number of trie update entries across all layers. - pub fn trie_updates_total_len(&self) -> usize { - self.trie_update_layers.iter().map(|updates| updates.total_len()).sum() - } - - /// Total number of hashed post-state entries across all layers. - pub fn hashed_post_state_total_len(&self) -> usize { - self.hashed_post_state_layers.iter().map(|state| state.total_len()).sum() - } - - /// Consume the overlay into its original layer stacks. - pub fn into_layers(self) -> (Vec>, Vec>) { - (self.trie_update_layers, self.hashed_post_state_layers) - } -} - /// Error returned when a state trie overlay cannot be built from the manager's current block set. #[derive(Debug)] pub struct StateTrieOverlayError { @@ -596,33 +496,9 @@ struct OverlayCacheKey { tip_hash: B256, } -#[cfg_attr(not(any(test, feature = "rayon")), allow(dead_code))] -struct ResolvedOverlayPath { - blocks: Vec>, - cached_prefix: Option<(usize, StateTrieOverlay)>, -} - -#[cfg_attr(not(any(test, feature = "rayon")), allow(dead_code))] -enum OverlayCacheEntry { - /// An in-flight background cache fill. - /// - /// Read paths treat this as a cache miss so they can still return a lazy overlay stack. - Pending, - /// A flattened overlay ready for reuse. - Ready(StateTrieOverlay), -} - -impl OverlayCacheEntry { - const fn is_ready(&self) -> bool { - matches!(self, Self::Ready(_)) - } - - fn ready(&self) -> Option { - match self { - Self::Pending => None, - Self::Ready(overlay) => Some(overlay.clone()), - } - } +enum ComputeOverlayInput { + ExtendCached { blocks: Vec>, parent_input: Arc }, + MergeBlocks(Vec>), } #[tracing::instrument( @@ -636,20 +512,38 @@ impl OverlayCacheEntry { elapsed_us = tracing::field::Empty, ) )] -#[cfg(any(test, feature = "rayon"))] fn compute_overlay( - blocks: Vec>, - parent_overlay: StateTrieOverlay, + input: ComputeOverlayInput, anchor_hash: B256, metrics: &StateTrieOverlayMetrics, -) -> StateTrieOverlay { +) -> TrieInputSorted { let started_at = Instant::now(); - let block_count = blocks.len(); - let parent_overlay_reused = !parent_overlay.is_empty(); + let block_count = match &input { + ComputeOverlayInput::ExtendCached { blocks, .. } => blocks.len(), + ComputeOverlayInput::MergeBlocks(blocks) => blocks.len(), + }; + let parent_overlay = matches!(&input, ComputeOverlayInput::ExtendCached { .. }); tracing::Span::current().record("block_count", block_count); - tracing::Span::current().record("parent_overlay", parent_overlay_reused); + tracing::Span::current().record("parent_overlay", parent_overlay); - let overlay = flatten_overlay(blocks, parent_overlay); + let overlay = match input { + ComputeOverlayInput::ExtendCached { blocks, parent_input } => { + trace!( + target: "chain_state::state_trie_overlay", + %anchor_hash, + block_count = blocks.len(), + "extending cached parent state trie overlay" + ); + + let mut overlay = parent_input.as_ref().clone(); + for block in blocks.iter().rev() { + let trie_data = block.trie_data(); + extend_overlay(&mut overlay, &trie_data.hashed_state, &trie_data.trie_updates); + } + overlay + } + ComputeOverlayInput::MergeBlocks(blocks) => merge_blocks(blocks), + }; let elapsed = started_at.elapsed(); metrics.overlay_computation_duration_seconds.record(elapsed.as_secs_f64()); @@ -658,7 +552,7 @@ fn compute_overlay( target: "chain_state::state_trie_overlay", %anchor_hash, block_count, - parent_overlay = parent_overlay_reused, + parent_overlay, ?elapsed, "computed state trie overlay" ); @@ -666,97 +560,63 @@ fn compute_overlay( overlay } -#[cfg(any(test, feature = "rayon"))] -fn flatten_overlay( - blocks: Vec>, - parent_overlay: StateTrieOverlay, -) -> StateTrieOverlay { - let (parent_trie_updates, parent_hashed_post_state) = parent_overlay.into_layers(); - - if !parent_trie_updates.is_empty() || !parent_hashed_post_state.is_empty() { - let mut trie_updates = flatten_trie_update_layers(parent_trie_updates); - let mut hashed_post_state = flatten_hashed_post_state_layers(parent_hashed_post_state); - - for block in blocks.iter().rev() { - let trie_data = block.trie_data(); - - #[cfg(feature = "rayon")] - { - rayon::join( - || { - if !trie_data.trie_updates.is_empty() { - Arc::make_mut(&mut trie_updates) - .extend_ref_and_sort(&trie_data.trie_updates); - } - }, - || { - if !trie_data.hashed_state.is_empty() { - Arc::make_mut(&mut hashed_post_state) - .extend_ref_and_sort(&trie_data.hashed_state); - } - }, - ); - } - - #[cfg(not(feature = "rayon"))] - { - if !trie_data.trie_updates.is_empty() { - Arc::make_mut(&mut trie_updates).extend_ref_and_sort(&trie_data.trie_updates); - } - if !trie_data.hashed_state.is_empty() { - Arc::make_mut(&mut hashed_post_state) - .extend_ref_and_sort(&trie_data.hashed_state); - } - } - } - - return StateTrieOverlay::new(vec![trie_updates], vec![hashed_post_state]) - } +fn merge_blocks(blocks: Vec>) -> TrieInputSorted { + let trie_data = blocks.iter().map(ExecutedBlock::trie_data).collect::>(); #[cfg(feature = "rayon")] - let (trie_updates, hashed_post_state) = rayon::join( + let (nodes, state) = rayon::join( || { TrieUpdatesSorted::merge_batch( - blocks.iter().map(|block| Arc::clone(&block.trie_data().trie_updates)), + trie_data.iter().map(|data| Arc::clone(&data.trie_updates)), ) }, || { HashedPostStateSorted::merge_batch( - blocks.iter().map(|block| Arc::clone(&block.trie_data().hashed_state)), + trie_data.iter().map(|data| Arc::clone(&data.hashed_state)), ) }, ); #[cfg(not(feature = "rayon"))] - let (trie_updates, hashed_post_state) = ( - TrieUpdatesSorted::merge_batch( - blocks.iter().map(|block| Arc::clone(&block.trie_data().trie_updates)), - ), + let (nodes, state) = ( + TrieUpdatesSorted::merge_batch(trie_data.iter().map(|data| Arc::clone(&data.trie_updates))), HashedPostStateSorted::merge_batch( - blocks.iter().map(|block| Arc::clone(&block.trie_data().hashed_state)), + trie_data.iter().map(|data| Arc::clone(&data.hashed_state)), ), ); - StateTrieOverlay::new(vec![trie_updates], vec![hashed_post_state]) + TrieInputSorted::new(nodes, state, Default::default()) } -#[cfg(any(test, feature = "rayon"))] -fn flatten_trie_update_layers(layers: Vec>) -> Arc { - match layers.len() { - 0 => Arc::new(TrieUpdatesSorted::default()), - 1 => layers.into_iter().next().expect("len checked"), - _ => TrieUpdatesSorted::merge_batch(layers), +fn extend_overlay( + overlay: &mut TrieInputSorted, + hashed_state: &HashedPostStateSorted, + trie_updates: &TrieUpdatesSorted, +) { + #[cfg(feature = "rayon")] + { + rayon::join( + || { + if !hashed_state.is_empty() { + Arc::make_mut(&mut overlay.state).extend_ref_and_sort(hashed_state); + } + }, + || { + if !trie_updates.is_empty() { + Arc::make_mut(&mut overlay.nodes).extend_ref_and_sort(trie_updates); + } + }, + ); } -} -#[cfg(any(test, feature = "rayon"))] -fn flatten_hashed_post_state_layers( - layers: Vec>, -) -> Arc { - match layers.len() { - 0 => Arc::new(HashedPostStateSorted::default()), - 1 => layers.into_iter().next().expect("len checked"), - _ => HashedPostStateSorted::merge_batch(layers), + #[cfg(not(feature = "rayon"))] + { + if !hashed_state.is_empty() { + Arc::make_mut(&mut overlay.state).extend_ref_and_sort(hashed_state); + } + if !trie_updates.is_empty() { + Arc::make_mut(&mut overlay.nodes).extend_ref_and_sort(trie_updates); + } } } @@ -829,28 +689,22 @@ mod tests { let anchor_hash = blocks[0].recovered_block().parent_hash(); - let overlay = + let (_, state) = manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); - let state = &overlay.hashed_post_state_layers; assert_eq!(state.len(), 3); - assert_eq!(state_account_count(state), 3); + assert_eq!(state_account_count(&state), 3); let short_anchor = blocks[1].recovered_block().hash(); - let short_overlay = + let (_, short) = manager.overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor).unwrap(); - let short = &short_overlay.hashed_post_state_layers; assert_eq!(short.len(), 1); - assert_eq!(state_account_count(short), 1); - manager.compute_and_cache_overlay( - OverlayCacheKey { - anchor_hash: short_anchor, - tip_hash: blocks[2].recovered_block().hash(), - }, - None, - ); - let cached_short_overlay = + assert_eq!(state_account_count(&short), 1); + manager.compute_and_cache_overlay(OverlayCacheKey { + anchor_hash: short_anchor, + tip_hash: blocks[2].recovered_block().hash(), + }); + let (_, cached_short) = manager.overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor).unwrap(); - let cached_short = &cached_short_overlay.hashed_post_state_layers; assert_eq!(cached_short.len(), 1); assert_eq!(cached_short[0].accounts.len(), 1); } @@ -865,40 +719,14 @@ mod tests { let anchor_hash = blocks[0].recovered_block().parent_hash(); let prefix_tip = blocks[1].recovered_block().hash(); - manager - .compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: prefix_tip }, None); + manager.compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: prefix_tip }); - let overlay = + let (_, state) = manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); - let state = &overlay.hashed_post_state_layers; assert_eq!(state.len(), 2); assert_eq!(state[0].accounts.len(), 1); assert_eq!(state[1].accounts.len(), 2); - assert_eq!(state_account_count(state), 3); - } - - #[test] - fn pending_overlay_entries_are_ignored_by_read_path() { - let manager = StateTrieOverlayManager::default(); - let blocks = test_blocks(); - for block in &blocks { - manager.insert_block(block.clone()); - } - - let anchor_hash = blocks[0].recovered_block().parent_hash(); - let prefix_tip = blocks[1].recovered_block().hash(); - let prefix_key = OverlayCacheKey { anchor_hash, tip_hash: prefix_tip }; - manager.overlays.insert(prefix_key, OverlayCacheEntry::Pending); - - let overlay = - manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); - let state = &overlay.hashed_post_state_layers; - assert_eq!(state.len(), 3); - assert_eq!(state_account_count(state), 3); - assert!(matches!( - manager.overlays.get(&prefix_key).as_deref(), - Some(OverlayCacheEntry::Pending) - )); + assert_eq!(state_account_count(&state), 3); } #[test] @@ -949,17 +777,14 @@ mod tests { let anchor_hash = blocks[0].recovered_block().parent_hash(); let parent_hash = blocks[0].recovered_block().hash(); - manager.compute_and_cache_overlay( - OverlayCacheKey { anchor_hash, tip_hash: parent_hash }, - None, - ); + manager.compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: parent_hash }); let child_hash = blocks[1].recovered_block().hash(); manager.insert_block(blocks[1].clone()); let child_key = OverlayCacheKey { anchor_hash, tip_hash: child_hash }; let deadline = Instant::now() + Duration::from_secs(5); - while manager.ready_overlay(child_key).is_none() { + while !manager.overlays.contains_key(&child_key) { assert!( Instant::now() < deadline, "timed out waiting for optimistically prepared child overlay" @@ -967,8 +792,7 @@ mod tests { thread::sleep(Duration::from_millis(10)); } - let overlay = manager.overlay_for_parent(child_hash, anchor_hash).unwrap(); - let state = &overlay.hashed_post_state_layers; + let (_, state) = manager.overlay_for_parent(child_hash, anchor_hash).unwrap(); assert_eq!(state.len(), 1); assert_eq!(state[0].accounts.len(), 2); } @@ -983,24 +807,18 @@ mod tests { let anchor_hash = blocks[0].recovered_block().parent_hash(); let parent_hash = blocks[0].recovered_block().hash(); - manager.compute_and_cache_overlay( - OverlayCacheKey { anchor_hash, tip_hash: parent_hash }, - None, - ); + manager.compute_and_cache_overlay(OverlayCacheKey { anchor_hash, tip_hash: parent_hash }); let child_hash = blocks[1].recovered_block().hash(); let child_key = OverlayCacheKey { anchor_hash, tip_hash: child_hash }; - manager.overlays.insert(child_key, OverlayCacheEntry::Pending); + manager.pending_overlays.insert(child_key, ()); manager.insert_block(blocks[1].clone()); thread::sleep(Duration::from_millis(100)); - assert!(manager.ready_overlay(child_key).is_none()); - assert!(matches!( - manager.overlays.get(&child_key).as_deref(), - Some(OverlayCacheEntry::Pending) - )); - manager.overlays.remove(&child_key); + assert!(!manager.overlays.contains_key(&child_key)); + assert!(manager.pending_overlays.contains_key(&child_key)); + manager.pending_overlays.remove(&child_key); } #[test] @@ -1012,13 +830,10 @@ mod tests { } let original_anchor = blocks[0].recovered_block().parent_hash(); - manager.compute_and_cache_overlay( - OverlayCacheKey { - anchor_hash: original_anchor, - tip_hash: blocks[2].recovered_block().hash(), - }, - None, - ); + manager.compute_and_cache_overlay(OverlayCacheKey { + anchor_hash: original_anchor, + tip_hash: blocks[2].recovered_block().hash(), + }); manager.remove_blocks([ blocks[0].recovered_block().hash(), @@ -1030,9 +845,8 @@ mod tests { .overlay_for_parent(blocks[2].recovered_block().hash(), original_anchor) .is_err()); - let overlay = + let (_, state) = manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); - let state = &overlay.hashed_post_state_layers; - assert_eq!(state_account_count(state), 1); + assert_eq!(state_account_count(&state), 1); } } diff --git a/crates/storage/provider/src/providers/state/historical.rs b/crates/storage/provider/src/providers/state/historical.rs index fd2f4de85e5..3707e7ac2fe 100644 --- a/crates/storage/provider/src/providers/state/historical.rs +++ b/crates/storage/provider/src/providers/state/historical.rs @@ -1,4 +1,4 @@ -use super::overlay::{OverlayBuilder, OverlaySource}; +use super::overlay::{Overlay, OverlayBuilder, OverlaySource}; use crate::{ AccountReader, BlockHashReader, ChangeSetReader, EitherReader, HashedPostStateProvider, ProviderError, RocksDBProviderFactory, StateProvider, StateRootProvider, @@ -20,9 +20,9 @@ use reth_storage_api::{ }; use reth_storage_errors::provider::ProviderResult; use reth_trie::{ - hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, + hashed_cursor::HashedPostStateCursorFactory, proof::{Proof, StorageProof}, - trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, + trie_cursor::InMemoryTrieCursorFactory, updates::{TrieUpdates, TrieUpdatesSorted}, witness::TrieWitness, AccountProof, ExecutionWitnessMode, HashedPostState, HashedPostStateSorted, HashedStorage, @@ -311,8 +311,8 @@ where let TrieInputSorted { nodes, state, prefix_sets } = input; let overlay_builder = OverlayBuilder::::new(anchor_hash, self.changeset_cache.clone()) .with_overlay_source(Some(OverlaySource::Immediate { trie: nodes, state })); - let overlay = overlay_builder.build_overlay(self.provider)?; - let (trie_updates, hashed_post_state) = overlay.into_layers(); + let Overlay { trie_updates, hashed_post_state } = + overlay_builder.build_overlay(self.provider)?; Ok(TrieInputSorted::new( TrieUpdatesSorted::merge_batch(trie_updates), @@ -617,16 +617,14 @@ where reth_trie_db::with_adapter!(self.provider, |A| { let TrieInputSorted { nodes, state, prefix_sets } = self.build_overlay(TrieInputSorted::from_unsorted(input))?; - let nodes_overlay = TrieUpdatesOverlay::new(vec![nodes]); - let state_overlay = HashedPostStateOverlay::new(vec![state]); let witness = TrieWitness::new( InMemoryTrieCursorFactory::new( reth_trie_db::DatabaseTrieCursorFactory::<_, A>::new(self.tx()), - &nodes_overlay, + [nodes.as_ref()], ), HashedPostStateCursorFactory::new( reth_trie_db::DatabaseHashedCursorFactory::new(self.tx()), - &state_overlay, + [state.as_ref()], ), ) .with_prefix_sets_mut(prefix_sets) diff --git a/crates/storage/provider/src/providers/state/latest.rs b/crates/storage/provider/src/providers/state/latest.rs index 9ae013e83ea..3908a41c454 100644 --- a/crates/storage/provider/src/providers/state/latest.rs +++ b/crates/storage/provider/src/providers/state/latest.rs @@ -9,9 +9,9 @@ use reth_storage_api::{ }; use reth_storage_errors::provider::{ProviderError, ProviderResult}; use reth_trie::{ - hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, + hashed_cursor::HashedPostStateCursorFactory, proof::{Proof, StorageProof}, - trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, + trie_cursor::InMemoryTrieCursorFactory, updates::TrieUpdates, witness::TrieWitness, AccountProof, ExecutionWitnessMode, HashedPostState, HashedStorage, KeccakKeyHasher, @@ -19,7 +19,6 @@ use reth_trie::{ TrieInputSorted, }; use reth_trie_db::{DatabaseProof, DatabaseStateRoot, DatabaseStorageProof, DatabaseStorageRoot}; -use std::sync::Arc; type DbStateRoot<'a, TX, A> = StateRoot< reth_trie_db::DatabaseTrieCursorFactory<&'a TX, A>, @@ -227,17 +226,16 @@ impl StateProofProvider mode: ExecutionWitnessMode, ) -> ProviderResult> { reth_trie_db::with_adapter!(self.0, |A| { - let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::new(input.nodes.into_sorted())]); - let state_overlay = - HashedPostStateOverlay::new(vec![Arc::new(input.state.into_sorted())]); + let nodes_sorted = input.nodes.into_sorted(); + let state_sorted = input.state.into_sorted(); let witness = TrieWitness::new( InMemoryTrieCursorFactory::new( reth_trie_db::DatabaseTrieCursorFactory::<_, A>::new(self.tx()), - &nodes_overlay, + [&nodes_sorted], ), HashedPostStateCursorFactory::new( reth_trie_db::DatabaseHashedCursorFactory::new(self.tx()), - &state_overlay, + [&state_sorted], ), ) .with_prefix_sets_mut(input.prefix_sets) diff --git a/crates/storage/provider/src/providers/state/overlay.rs b/crates/storage/provider/src/providers/state/overlay.rs index c488165890e..c08b887759a 100644 --- a/crates/storage/provider/src/providers/state/overlay.rs +++ b/crates/storage/provider/src/providers/state/overlay.rs @@ -1,13 +1,13 @@ use alloy_eips::BlockNumHash; -use alloy_primitives::{BlockHash, BlockNumber, B256, U256}; +use alloy_primitives::{BlockHash, BlockNumber, B256}; use metrics::{Counter, Histogram}; -use reth_chain_state::{EthPrimitives, StateTrieOverlay, StateTrieOverlayManager}; +use reth_chain_state::{EthPrimitives, StateTrieOverlayManager}; use reth_db_api::{tables, transaction::DbTx, DatabaseError}; use reth_errors::{ProviderError, ProviderResult}; use reth_metrics::Metrics; use reth_primitives_traits::{ dashmap::{self, DashMap}, - Account, NodePrimitives, + NodePrimitives, }; use reth_prune_types::PruneSegment; use reth_stages_types::StageId; @@ -17,7 +17,7 @@ use reth_storage_api::{ StorageChangeSetReader, StorageSettingsCache, }; use reth_trie::{ - hashed_cursor::{HashedCursorFactory, HashedPostStateCursor}, + hashed_cursor::{HashedCursorFactory, HashedPostStateCursorFactory}, trie_cursor::{InMemoryTrieCursor, TrieCursor, TrieCursorFactory, TrieStorageCursor}, updates::TrieUpdatesSorted, HashedPostStateSorted, @@ -50,10 +50,17 @@ pub(crate) struct OverlayStateProviderMetrics { hashed_state_size: Histogram, /// Overall duration of the [`OverlayStateProviderFactory::database_provider_ro`] call database_provider_ro_duration: Histogram, - /// Number of cache misses when fetching [`StateTrieOverlay`]s from the overlay cache. + /// Number of cache misses when fetching [`Overlay`]s from the overlay cache. overlay_cache_misses: Counter, } +/// Contains all fields required to initialize an [`OverlayStateProvider`]. +#[derive(Debug, Clone)] +pub(super) struct Overlay { + pub(super) trie_updates: Vec>, + pub(super) hashed_post_state: Vec>, +} + /// Source of overlay data for [`OverlayStateProviderFactory`]. #[derive(Debug, Clone)] pub(super) enum OverlaySource { @@ -165,22 +172,25 @@ impl OverlayBuilder { } /// Resolves the effective overlay (trie updates, hashed state). - fn resolve_overlays(&self, anchor_hash: BlockHash) -> ProviderResult { + fn resolve_overlays( + &self, + anchor_hash: BlockHash, + ) -> ProviderResult<(Vec>, Vec>)> { match &self.overlay_source { Some(OverlaySource::Managed { manager, state }) => { - let overlay = if anchor_hash == self.parent_hash { - StateTrieOverlay::default() + let (trie, mut overlay_state) = if anchor_hash == self.parent_hash { + (Vec::new(), Vec::new()) } else { manager .overlay_for_parent(self.parent_hash, anchor_hash) .map_err(ProviderError::other)? }; - Ok(if state.is_empty() { - overlay - } else { - overlay.with_prepended_hashed_post_state(Arc::clone(state)) - }) + if !state.is_empty() { + overlay_state.insert(0, Arc::clone(state)); + } + + Ok((trie, overlay_state)) } Some(OverlaySource::Immediate { trie, state }) => { if anchor_hash != self.parent_hash { @@ -191,9 +201,9 @@ impl OverlayBuilder { } let trie = (!trie.is_empty()).then(|| Arc::clone(trie)).into_iter().collect(); let state = (!state.is_empty()).then(|| Arc::clone(state)).into_iter().collect(); - Ok(StateTrieOverlay::new(trie, state)) + Ok((trie, state)) } - None => Ok(StateTrieOverlay::default()), + None => Ok((Vec::new(), Vec::new())), } } @@ -262,7 +272,7 @@ impl OverlayBuilder { Ok(Some(anchor_number + 1..=db_tip_block.number)) } - /// Calculates a new [`StateTrieOverlay`] given a transaction and the current db tip. + /// Calculates a new [`Overlay`] given a transaction and the current db tip. #[instrument( level = "debug", target = "providers::state::overlay", @@ -273,7 +283,7 @@ impl OverlayBuilder { &self, provider: &Provider, db_tip_block: BlockNumHash, - ) -> ProviderResult + ) -> ProviderResult where Provider: ChangeSetReader + StorageChangeSetReader @@ -307,7 +317,7 @@ impl OverlayBuilder { }; // Collect any reverts which are required to bring the DB view back to the anchor hash. - let overlay = if let Some(revert_blocks) = + let (trie_updates, hashed_post_state) = if let Some(revert_blocks) = self.reverts_required(provider, db_tip_block, anchor_hash)? { debug!( @@ -345,18 +355,28 @@ impl OverlayBuilder { // Resolve overlays and extend reverts with them. // If reverts are empty, use overlays directly to avoid cloning. - let mut overlay = self.resolve_overlays(anchor_hash)?; - - if !trie_reverts.is_empty() { - overlay = overlay.with_pushed_trie_updates(Arc::new(trie_reverts)); - } + let (overlay_trie, overlay_state) = self.resolve_overlays(anchor_hash)?; + + let trie_updates = if trie_reverts.is_empty() { + overlay_trie + } else { + let mut trie_updates = overlay_trie; + trie_updates.push(Arc::new(trie_reverts)); + trie_updates + }; - if !hashed_state_reverts.is_empty() { - overlay = overlay.with_pushed_hashed_post_state(Arc::new(hashed_state_reverts)); - } + let hashed_state_updates = if hashed_state_reverts.is_empty() { + overlay_state + } else { + let mut hashed_state_updates = overlay_state; + hashed_state_updates.push(Arc::new(hashed_state_reverts)); + hashed_state_updates + }; - trie_updates_total_len = overlay.trie_updates_total_len(); - hashed_state_updates_total_len = overlay.hashed_post_state_total_len(); + trie_updates_total_len = + trie_updates.iter().map(|updates| updates.total_len()).sum::(); + hashed_state_updates_total_len = + hashed_state_updates.iter().map(|state| state.total_len()).sum::(); debug!( target: "providers::state::overlay", @@ -365,17 +385,19 @@ impl OverlayBuilder { "Reverted to anchor block", ); - overlay + (trie_updates, hashed_state_updates) } else { // If no reverts are needed then the db tip is the anchor hash. Use overlays directly. - let overlay = self.resolve_overlays(db_tip_block.hash)?; + let (trie_updates, hashed_state) = self.resolve_overlays(db_tip_block.hash)?; retrieve_trie_reverts_duration = Duration::ZERO; retrieve_hashed_state_reverts_duration = Duration::ZERO; - trie_updates_total_len = overlay.trie_updates_total_len(); - hashed_state_updates_total_len = overlay.hashed_post_state_total_len(); + trie_updates_total_len = + trie_updates.iter().map(|updates| updates.total_len()).sum::(); + hashed_state_updates_total_len = + hashed_state.iter().map(|state| state.total_len()).sum::(); - overlay + (trie_updates, hashed_state) }; // Record metrics @@ -388,15 +410,12 @@ impl OverlayBuilder { self.metrics.trie_updates_size.record(trie_updates_total_len as f64); self.metrics.hashed_state_size.record(hashed_state_updates_total_len as f64); - Ok(overlay) + Ok(Overlay { trie_updates, hashed_post_state }) } /// Builds the effective overlay for the given provider. #[instrument(level = "debug", target = "providers::state::overlay", skip_all)] - pub(super) fn build_overlay( - &self, - provider: &Provider, - ) -> ProviderResult + pub(super) fn build_overlay(&self, provider: &Provider) -> ProviderResult where Provider: StageCheckpointReader + PruneCheckpointReader @@ -421,10 +440,9 @@ pub struct OverlayStateProviderFactory { factory: F, /// Overlay builder containing the configuration and overlay calculation logic. overlay_builder: OverlayBuilder, - /// A cache which maps `db_tip -> StateTrieOverlay`. If the db tip changes during usage of the - /// factory then a new entry will get added to this, but in most cases only one entry is - /// present. - overlay_cache: Arc>>, + /// A cache which maps `db_tip -> Overlay`. If the db tip changes during usage of the factory + /// then a new entry will get added to this, but in most cases only one entry is present. + overlay_cache: Arc>, } impl OverlayStateProviderFactory { @@ -450,10 +468,10 @@ impl OverlayStateProviderFactory { self } - /// Fetches a [`StateTrieOverlay`] from the cache based on the current db tip block. If there is - /// no cached value then this calculates the [`StateTrieOverlay`] and populates the cache. + /// Fetches an [`Overlay`] from the cache based on the current db tip block. If there is no + /// cached value then this calculates the [`Overlay`] and populates the cache. #[instrument(level = "debug", target = "providers::state::overlay", skip_all)] - fn get_overlay(&self, provider: &Provider) -> ProviderResult> + fn get_overlay(&self, provider: &Provider) -> ProviderResult where Provider: StageCheckpointReader + PruneCheckpointReader @@ -466,11 +484,11 @@ impl OverlayStateProviderFactory { let db_tip_block = self.overlay_builder.get_db_tip_block(provider)?; let overlay = match self.overlay_cache.entry(db_tip_block.hash) { - dashmap::Entry::Occupied(entry) => Arc::clone(entry.get()), + dashmap::Entry::Occupied(entry) => entry.get().clone(), dashmap::Entry::Vacant(entry) => { self.overlay_builder.metrics.overlay_cache_misses.increment(1); - let overlay = Arc::new(self.overlay_builder.build_overlay(provider)?); - entry.insert(Arc::clone(&overlay)); + let overlay = self.overlay_builder.build_overlay(provider)?; + entry.insert(overlay.clone()); overlay } }; @@ -506,11 +524,11 @@ where res }; - let overlay = self.get_overlay(&provider)?; + let Overlay { trie_updates, hashed_post_state } = self.get_overlay(&provider)?; let is_v2 = provider.cached_storage_settings().is_v2(); self.overlay_builder.metrics.database_provider_ro_duration.record(overall_start.elapsed()); - Ok(OverlayStateProvider::new(provider, overlay, is_v2)) + Ok(OverlayStateProvider::new(provider, trie_updates, hashed_post_state, is_v2)) } } @@ -522,7 +540,8 @@ where #[derive(Debug)] pub struct OverlayStateProvider { provider: Provider, - overlay: Arc, + trie_updates: Vec>, + hashed_post_state: Vec>, is_v2: bool, } @@ -532,8 +551,13 @@ where { /// Create new overlay state provider. The `Provider` must be cloneable, which generally means /// it should be wrapped in an `Arc`. - pub const fn new(provider: Provider, overlay: Arc, is_v2: bool) -> Self { - Self { provider, overlay, is_v2 } + pub fn new( + provider: Provider, + trie_updates: Vec>, + hashed_post_state: Vec>, + is_v2: bool, + ) -> Self { + Self { provider, trie_updates, hashed_post_state, is_v2 } } } @@ -563,7 +587,7 @@ where tx.cursor_read::()?, )) }; - Ok(InMemoryTrieCursor::new_account(cursor, &self.overlay.trie_updates)) + Ok(InMemoryTrieCursor::new_account(cursor, self.trie_updates.iter().map(Arc::as_ref))) } fn storage_trie_cursor( @@ -582,7 +606,11 @@ where hashed_address, )) }; - Ok(InMemoryTrieCursor::new_storage(cursor, &self.overlay.trie_updates, hashed_address)) + Ok(InMemoryTrieCursor::new_storage( + cursor, + self.trie_updates.iter().map(Arc::as_ref), + hashed_address, + )) } } @@ -591,27 +619,29 @@ where Provider: DBProvider, { type AccountCursor<'a> - = HashedPostStateCursor< + = as HashedCursorFactory>::AccountCursor<'a>, - Option, - > + DatabaseHashedCursorFactory<&'a Provider::Tx>, + Vec<&'a HashedPostStateSorted>, + > as HashedCursorFactory>::AccountCursor<'a> where Self: 'a; type StorageCursor<'a> - = HashedPostStateCursor< + = as HashedCursorFactory>::StorageCursor<'a>, - U256, - > + DatabaseHashedCursorFactory<&'a Provider::Tx>, + Vec<&'a HashedPostStateSorted>, + > as HashedCursorFactory>::StorageCursor<'a> where Self: 'a; fn hashed_account_cursor(&self) -> Result, DatabaseError> { let db_hashed_cursor_factory = DatabaseHashedCursorFactory::new(self.provider.tx_ref()); - let cursor = db_hashed_cursor_factory.hashed_account_cursor()?; - Ok(HashedPostStateCursor::new_account(cursor, &self.overlay.hashed_post_state)) + let hashed_post_state = self.hashed_post_state.iter().map(Arc::as_ref).collect::>(); + let hashed_cursor_factory = + HashedPostStateCursorFactory::new(db_hashed_cursor_factory, hashed_post_state); + hashed_cursor_factory.hashed_account_cursor() } fn hashed_storage_cursor( @@ -619,12 +649,10 @@ where hashed_address: B256, ) -> Result, DatabaseError> { let db_hashed_cursor_factory = DatabaseHashedCursorFactory::new(self.provider.tx_ref()); - let cursor = db_hashed_cursor_factory.hashed_storage_cursor(hashed_address)?; - Ok(HashedPostStateCursor::new_storage( - cursor, - &self.overlay.hashed_post_state, - hashed_address, - )) + let hashed_post_state = self.hashed_post_state.iter().map(Arc::as_ref).collect::>(); + let hashed_cursor_factory = + HashedPostStateCursorFactory::new(db_hashed_cursor_factory, hashed_post_state); + hashed_cursor_factory.hashed_storage_cursor(hashed_address) } } @@ -640,9 +668,9 @@ mod tests { let builder = OverlayBuilder::::new(parent_hash, ChangesetCache::default()) .with_state_trie_overlay_manager(StateTrieOverlayManager::default()); - let overlay = builder.resolve_overlays(parent_hash).unwrap(); - assert!(overlay.trie_updates.is_empty()); - assert!(overlay.hashed_post_state.is_empty()); + let (trie, state) = builder.resolve_overlays(parent_hash).unwrap(); + assert!(trie.is_empty()); + assert!(state.is_empty()); } #[test] diff --git a/crates/trie/db/src/changesets.rs b/crates/trie/db/src/changesets.rs index 75cd2304212..e8ab5eb31b7 100644 --- a/crates/trie/db/src/changesets.rs +++ b/crates/trie/db/src/changesets.rs @@ -20,7 +20,7 @@ use reth_storage_api::{ use reth_storage_errors::provider::{ProviderError, ProviderResult}; use reth_trie::{ changesets::compute_trie_changesets, - trie_cursor::{InMemoryTrieCursorFactory, TrieCursor, TrieCursorFactory, TrieUpdatesOverlay}, + trie_cursor::{InMemoryTrieCursorFactory, TrieCursor, TrieCursorFactory}, TrieInputSorted, }; use reth_trie_common::updates::{StorageTrieUpdatesSorted, TrieUpdatesSorted}; @@ -155,8 +155,8 @@ where // Step 5: Compute changesets using cumulative trie updates for block-1 as overlay // Create an overlay cursor factory that has the trie state from after block-1 let db_cursor_factory = DatabaseTrieCursorFactory::<_, A>::new(provider.tx_ref()); - let trie_overlay = TrieUpdatesOverlay::new(vec![Arc::new(cumulative_trie_updates_prev)]); - let overlay_factory = InMemoryTrieCursorFactory::new(db_cursor_factory, &trie_overlay); + let overlay_factory = + InMemoryTrieCursorFactory::new(db_cursor_factory, [&cumulative_trie_updates_prev]); let changesets = compute_trie_changesets(&overlay_factory, &trie_updates).map_err(ProviderError::other)?; @@ -262,8 +262,7 @@ where // Step 4: Create an InMemoryTrieCursorFactory with the reverts // This gives us the trie state as it was after the target block was processed let db_cursor_factory = DatabaseTrieCursorFactory::<_, A>::new(tx); - let trie_overlay = TrieUpdatesOverlay::new(vec![Arc::new(reverts)]); - let cursor_factory = InMemoryTrieCursorFactory::new(db_cursor_factory, &trie_overlay); + let cursor_factory = InMemoryTrieCursorFactory::new(db_cursor_factory, [&reverts]); // Step 5: Collect all account trie nodes that changed in the target block let account_nodes_ref = changesets.account_nodes_ref(); diff --git a/crates/trie/db/src/proof.rs b/crates/trie/db/src/proof.rs index 5e07a57aebc..f44dc19cc03 100644 --- a/crates/trie/db/src/proof.rs +++ b/crates/trie/db/src/proof.rs @@ -3,13 +3,12 @@ use alloy_primitives::{keccak256, map::HashMap, Address, B256}; use reth_db_api::transaction::DbTx; use reth_execution_errors::StateProofError; use reth_trie::{ - hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, + hashed_cursor::HashedPostStateCursorFactory, proof::{Proof, StorageProof}, - trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, + trie_cursor::InMemoryTrieCursorFactory, AccountProof, HashedPostStateSorted, HashedStorage, MultiProof, MultiProofTargets, StorageMultiProof, TrieInput, }; -use std::sync::Arc; /// Extends [`Proof`] with operations specific for working with a database transaction. pub trait DatabaseProof<'a> { @@ -49,11 +48,14 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseProof<'a> address: Address, slots: &[B256], ) -> Result { - let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::new(input.nodes.into_sorted())]); - let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(input.state.into_sorted())]); + let nodes_sorted = input.nodes.into_sorted(); + let state_sorted = input.state.into_sorted(); Proof::new( - InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), &nodes_overlay), - HashedPostStateCursorFactory::new(self.hashed_cursor_factory().clone(), &state_overlay), + InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), [&nodes_sorted]), + HashedPostStateCursorFactory::new( + self.hashed_cursor_factory().clone(), + [&state_sorted], + ), ) .with_prefix_sets_mut(input.prefix_sets) .account_proof(address, slots) @@ -64,11 +66,14 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseProof<'a> input: TrieInput, targets: MultiProofTargets, ) -> Result { - let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::new(input.nodes.into_sorted())]); - let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(input.state.into_sorted())]); + let nodes_sorted = input.nodes.into_sorted(); + let state_sorted = input.state.into_sorted(); Proof::new( - InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), &nodes_overlay), - HashedPostStateCursorFactory::new(self.hashed_cursor_factory().clone(), &state_overlay), + InMemoryTrieCursorFactory::new(self.trie_cursor_factory().clone(), [&nodes_sorted]), + HashedPostStateCursorFactory::new( + self.hashed_cursor_factory().clone(), + [&state_sorted], + ), ) .with_prefix_sets_mut(input.prefix_sets) .multiproof(targets) @@ -124,10 +129,12 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseStorageProof<'a, TX> Default::default(), HashMap::from_iter([(hashed_address, storage.into_sorted())]), ); - let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(state_sorted)]); StorageProof::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), + HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(tx), + [&state_sorted], + ), address, ) .with_prefix_set_mut(prefix_set) @@ -147,10 +154,12 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseStorageProof<'a, TX> Default::default(), HashMap::from_iter([(hashed_address, storage.into_sorted())]), ); - let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(state_sorted)]); StorageProof::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), + HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(tx), + [&state_sorted], + ), address, ) .with_prefix_set_mut(prefix_set) diff --git a/crates/trie/db/src/state.rs b/crates/trie/db/src/state.rs index 4763cc16302..0a618b30f4e 100644 --- a/crates/trie/db/src/state.rs +++ b/crates/trie/db/src/state.rs @@ -10,15 +10,13 @@ use reth_storage_api::{ }; use reth_storage_errors::provider::ProviderError; use reth_trie::{ - hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, - trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, - updates::TrieUpdates, - HashedPostStateSorted, HashedStorageSorted, StateRoot, StateRootProgress, TrieInputSorted, + hashed_cursor::HashedPostStateCursorFactory, trie_cursor::InMemoryTrieCursorFactory, + updates::TrieUpdates, HashedPostStateSorted, HashedStorageSorted, StateRoot, StateRootProgress, + TrieInputSorted, }; use std::{ collections::HashSet, ops::{Bound, RangeBounds, RangeInclusive}, - sync::Arc, }; use tracing::{debug, instrument}; @@ -210,10 +208,9 @@ impl<'a, TX: DbTx, A: crate::TrieTableAdapter> DatabaseStateRoot<'a, TX> post_state: &HashedPostStateSorted, ) -> Result { let prefix_sets = post_state.construct_prefix_sets().freeze(); - let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state.clone())]); StateRoot::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), [post_state]), ) .with_prefix_sets(prefix_sets) .root() @@ -224,24 +221,24 @@ impl<'a, TX: DbTx, A: crate::TrieTableAdapter> DatabaseStateRoot<'a, TX> post_state: &HashedPostStateSorted, ) -> Result<(B256, TrieUpdates), StateRootError> { let prefix_sets = post_state.construct_prefix_sets().freeze(); - let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state.clone())]); StateRoot::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), [post_state]), ) .with_prefix_sets(prefix_sets) .root_with_updates() } fn overlay_root_from_nodes(tx: &'a TX, input: TrieInputSorted) -> Result { - let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::clone(&input.nodes)]); - let state_overlay = HashedPostStateOverlay::new(vec![Arc::clone(&input.state)]); StateRoot::new( InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - &nodes_overlay, + [input.nodes.as_ref()], + ), + HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(tx), + [input.state.as_ref()], ), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), ) .with_prefix_sets(input.prefix_sets.freeze()) .root() @@ -251,14 +248,15 @@ impl<'a, TX: DbTx, A: crate::TrieTableAdapter> DatabaseStateRoot<'a, TX> tx: &'a TX, input: TrieInputSorted, ) -> Result<(B256, TrieUpdates), StateRootError> { - let nodes_overlay = TrieUpdatesOverlay::new(vec![Arc::clone(&input.nodes)]); - let state_overlay = HashedPostStateOverlay::new(vec![Arc::clone(&input.state)]); StateRoot::new( InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - &nodes_overlay, + [input.nodes.as_ref()], + ), + HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(tx), + [input.state.as_ref()], ), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), ) .with_prefix_sets(input.prefix_sets.freeze()) .root_with_updates() diff --git a/crates/trie/db/src/storage.rs b/crates/trie/db/src/storage.rs index 2247036f9e5..6e3edd42eae 100644 --- a/crates/trie/db/src/storage.rs +++ b/crates/trie/db/src/storage.rs @@ -5,10 +5,8 @@ use reth_execution_errors::StorageRootError; use reth_storage_api::{BlockNumReader, StorageChangeSetReader}; use reth_storage_errors::provider::ProviderResult; use reth_trie::{ - hashed_cursor::{HashedPostStateCursorFactory, HashedPostStateOverlay}, - HashedPostState, HashedStorage, StorageRoot, + hashed_cursor::HashedPostStateCursorFactory, HashedPostState, HashedStorage, StorageRoot, }; -use std::sync::Arc; #[cfg(feature = "metrics")] use reth_trie::metrics::TrieRootMetrics; @@ -92,10 +90,12 @@ impl<'a, TX: DbTx, A: TrieTableAdapter> DatabaseStorageRoot<'a, TX> let prefix_set = hashed_storage.construct_prefix_set().freeze(); let state_sorted = HashedPostState::from_hashed_storage(keccak256(address), hashed_storage).into_sorted(); - let state_overlay = HashedPostStateOverlay::new(vec![Arc::new(state_sorted)]); StorageRoot::new( DatabaseTrieCursorFactory::<_, A>::new(tx), - HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_overlay), + HashedPostStateCursorFactory::new( + DatabaseHashedCursorFactory::new(tx), + [&state_sorted], + ), address, prefix_set, #[cfg(feature = "metrics")] diff --git a/crates/trie/db/tests/fuzz_in_memory_nodes.rs b/crates/trie/db/tests/fuzz_in_memory_nodes.rs index 04099cb9b5f..dd58f5f3967 100644 --- a/crates/trie/db/tests/fuzz_in_memory_nodes.rs +++ b/crates/trie/db/tests/fuzz_in_memory_nodes.rs @@ -12,14 +12,14 @@ use reth_provider::test_utils::create_test_provider_factory; use reth_storage_api::StorageSettingsCache; use reth_trie::{ test_utils::{state_root_prehashed, storage_root_prehashed}, - trie_cursor::{InMemoryTrieCursorFactory, TrieUpdatesOverlay}, + trie_cursor::InMemoryTrieCursorFactory, updates::TrieUpdates, HashedPostState, HashedStorage, StateRoot, StorageRoot, }; use reth_trie_db::{ DatabaseHashedCursorFactory, DatabaseStateRoot, DatabaseStorageRoot, DatabaseTrieCursorFactory, }; -use std::{collections::BTreeMap, sync::Arc}; +use std::collections::BTreeMap; type DbStateRoot<'a, TX, A> = StateRoot, DatabaseHashedCursorFactory<&'a TX>>; @@ -65,13 +65,11 @@ proptest! { } // Compute root with in-memory trie nodes overlay - let trie_overlay = - TrieUpdatesOverlay::new(vec![Arc::new(trie_nodes.clone().into_sorted())]); let (state_root, trie_updates) = DbStateRoot::<_, A>::from_tx(provider.tx_ref()) .with_prefix_sets(hashed_state.construct_prefix_sets().freeze()) .with_trie_cursor_factory(InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(provider.tx_ref()), - trie_overlay, + [&trie_nodes.clone().into_sorted()], )) .root_with_updates() .unwrap(); @@ -124,13 +122,12 @@ proptest! { // Compute root with in-memory trie nodes overlay let mut trie_nodes = TrieUpdates::default(); trie_nodes.insert_storage_updates(hashed_address, storage_trie_nodes.clone()); - let trie_overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_nodes.into_sorted())]); let (storage_root, _, trie_updates) = DbStorageRoot::<_, A>::from_tx_hashed(provider.tx_ref(), hashed_address) .with_prefix_set(hashed_storage.construct_prefix_set().freeze()) .with_trie_cursor_factory(InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::<_, A>::new(provider.tx_ref()), - trie_overlay, + [&trie_nodes.into_sorted()], )) .root_with_updates() .unwrap(); diff --git a/crates/trie/db/tests/post_state.rs b/crates/trie/db/tests/post_state.rs index 9187b0386c9..a3ee272d05a 100644 --- a/crates/trie/db/tests/post_state.rs +++ b/crates/trie/db/tests/post_state.rs @@ -8,17 +8,12 @@ use reth_db_api::{database::Database, transaction::DbTxMut}; use reth_primitives_traits::{Account, StorageEntry}; use reth_trie::{ hashed_cursor::{ - HashedCursor, HashedCursorFactory, HashedPostStateCursorFactory, HashedPostStateOverlay, - HashedStorageCursor, + HashedCursor, HashedCursorFactory, HashedPostStateCursorFactory, HashedStorageCursor, }, - HashedPostState, HashedPostStateSorted, HashedStorage, + HashedPostState, HashedStorage, }; use reth_trie_db::DatabaseHashedCursorFactory; -use std::{collections::BTreeMap, sync::Arc}; - -fn post_state_overlay(sorted: &HashedPostStateSorted) -> HashedPostStateOverlay { - HashedPostStateOverlay::new(vec![Arc::new(sorted.clone())]) -} +use std::collections::BTreeMap; fn assert_account_cursor_order( factory: &impl HashedCursorFactory, @@ -71,10 +66,8 @@ fn post_state_only_accounts() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -95,7 +88,7 @@ fn db_only_accounts() { let tx = db.tx().unwrap(); let factory = HashedPostStateCursorFactory::new( DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted_post_state), + [&sorted_post_state], ); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -121,10 +114,8 @@ fn account_cursor_correct_order() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -154,10 +145,8 @@ fn removed_accounts_are_discarded() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let expected = accounts.into_iter().filter(|x| !removed_keys.contains(&x.0)); assert_account_cursor_order(&factory, expected); } @@ -184,10 +173,8 @@ fn post_state_accounts_take_precedence() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -219,7 +206,7 @@ fn fuzz_hashed_account_cursor() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), post_state_overlay(&sorted)); + let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); assert_account_cursor_order(&factory, expected.into_iter()); } ); @@ -246,10 +233,8 @@ fn storage_is_empty() { { let sorted = HashedPostState::default().into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(cursor.is_storage_empty().unwrap()); } @@ -269,10 +254,8 @@ fn storage_is_empty() { { let sorted = HashedPostState::default().into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(!cursor.is_storage_empty().unwrap()); } @@ -288,10 +271,8 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(!cursor.is_storage_empty().unwrap()); } @@ -306,10 +287,8 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(cursor.is_storage_empty().unwrap()); } @@ -325,10 +304,8 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(cursor.is_storage_empty().unwrap()); } @@ -344,10 +321,8 @@ fn storage_is_empty() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); assert!(!cursor.is_storage_empty().unwrap()); } @@ -383,10 +358,8 @@ fn storage_cursor_correct_order() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let expected = std::iter::once((address, db_storage.into_iter().chain(post_state_storage).collect())); assert_storage_cursor_order(&factory, expected); @@ -426,10 +399,8 @@ fn zero_value_storage_entries_are_discarded() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let expected = std::iter::once(( address, post_state_storage.into_iter().filter(|(_, value)| *value > U256::ZERO).collect(), @@ -466,10 +437,8 @@ fn wiped_storage_is_discarded() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let expected = std::iter::once((address, post_state_storage)); assert_storage_cursor_order(&factory, expected); } @@ -504,10 +473,8 @@ fn post_state_storages_take_precedence() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let expected = std::iter::once((address, storage)); assert_storage_cursor_order(&factory, expected); } @@ -554,7 +521,7 @@ fn fuzz_hashed_storage_cursor() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), post_state_overlay(&sorted)); + let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); assert_storage_cursor_order(&factory, expected.into_iter()); }); } @@ -601,10 +568,8 @@ fn all_storage_slots_deleted_not_wiped_exact_keys() { let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new( - DatabaseHashedCursorFactory::new(&tx), - post_state_overlay(&sorted), - ); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), [&sorted]); let mut cursor = factory.hashed_storage_cursor(address).unwrap(); diff --git a/crates/trie/sparse/src/parallel.rs b/crates/trie/sparse/src/parallel.rs index d4ccd56e2e1..7539a490c7b 100644 --- a/crates/trie/sparse/src/parallel.rs +++ b/crates/trie/sparse/src/parallel.rs @@ -3426,7 +3426,7 @@ mod tests { test_utils::create_test_provider_factory, StorageSettingsCache, TrieWriter, }; use reth_trie::{ - hashed_cursor::{noop::NoopHashedCursor, HashedPostStateCursor, HashedPostStateOverlay}, + hashed_cursor::{noop::NoopHashedCursor, HashedPostStateCursor}, node_iter::{TrieElement, TrieNodeIter}, trie_cursor::{noop::NoopAccountTrieCursor, TrieCursor, TrieCursorFactory}, walker::TrieWalker, @@ -3728,13 +3728,11 @@ mod tests { (nibbles.pack().into_inner().unwrap().into(), Some(account)) })) .into_sorted(); - let hashed_post_state = - HashedPostStateOverlay::new(vec![alloc::sync::Arc::new(hashed_post_state)]); let mut node_iter = TrieNodeIter::state_trie( walker, HashedPostStateCursor::new_account( NoopHashedCursor::::default(), - &hashed_post_state, + [&hashed_post_state], ), ); diff --git a/crates/trie/trie/src/forward_cursor.rs b/crates/trie/trie/src/forward_cursor.rs new file mode 100644 index 00000000000..eafdfbb8ed5 --- /dev/null +++ b/crates/trie/trie/src/forward_cursor.rs @@ -0,0 +1,187 @@ +/// The implementation of forward-only in memory cursor over the entries. +/// +/// The cursor operates under the assumption that the supplied collection is pre-sorted. +#[derive(Debug)] +pub struct ForwardInMemoryCursor<'a, K, V> { + /// The reference to the pre-sorted collection of entries. + entries: &'a [(K, V)], + /// Current index in the collection. + idx: usize, +} + +impl<'a, K, V> ForwardInMemoryCursor<'a, K, V> { + /// Create new forward cursor positioned at the beginning of the collection. + /// + /// The cursor expects all of the entries to have been sorted in advance. + #[inline] + pub const fn new(entries: &'a [(K, V)]) -> Self { + Self { entries, idx: 0 } + } + + /// Returns `true` if the cursor is empty, regardless of its position. + #[inline] + pub const fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Returns `true` if any entry satisfies the predicate. + #[inline] + pub fn has_any(&self, predicate: F) -> bool + where + F: Fn(&(K, V)) -> bool, + { + self.entries.iter().any(predicate) + } + + /// Returns the current entry pointed to be the cursor, or `None` if no entries are left. + #[inline] + pub fn current(&self) -> Option<&(K, V)> { + self.entries.get(self.idx) + } + + /// Resets the cursor to the beginning of the collection. + #[inline] + pub const fn reset(&mut self) { + self.idx = 0; + } + + #[inline] + fn next(&mut self) -> Option<&(K, V)> { + let entry = self.entries.get(self.idx)?; + self.idx += 1; + Some(entry) + } +} + +/// Threshold for remaining entries above which binary search is used instead of linear scan. +/// For small slices, linear scan has better cache locality and lower overhead. +const BINARY_SEARCH_THRESHOLD: usize = 64; + +impl ForwardInMemoryCursor<'_, K, V> { + /// Returns the first entry from the current cursor position that's greater or equal to the + /// provided key. This method advances the cursor forward. + pub fn seek(&mut self, key: &K) -> Option<&(K, V)> { + self.advance_while(|k| k < key) + } + + /// Returns the first entry from the current cursor position that's greater than the provided + /// key. This method advances the cursor forward. + pub fn first_after(&mut self, key: &K) -> Option<&(K, V)> { + self.advance_while(|k| k <= key) + } + + /// Advances the cursor forward while `predicate` returns `true` or until the collection is + /// exhausted. + /// + /// Uses binary search for large remaining slices (>= 64 entries), linear scan for small ones. + /// + /// Returns the first entry for which `predicate` returns `false` or `None`. The cursor will + /// point to the returned entry. + fn advance_while(&mut self, predicate: impl Fn(&K) -> bool) -> Option<&(K, V)> { + let remaining = self.entries.len().saturating_sub(self.idx); + if remaining >= BINARY_SEARCH_THRESHOLD { + let slice = &self.entries[self.idx..]; + let pos = slice.partition_point(|(k, _)| predicate(k)); + self.idx += pos; + } else { + while self.current().is_some_and(|(k, _)| predicate(k)) { + self.next(); + } + } + self.current() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cursor_small() { + let mut cursor = ForwardInMemoryCursor::new(&[(1, ()), (2, ()), (3, ()), (4, ()), (5, ())]); + assert_eq!(cursor.current(), Some(&(1, ()))); + + assert_eq!(cursor.seek(&0), Some(&(1, ()))); + assert_eq!(cursor.current(), Some(&(1, ()))); + + assert_eq!(cursor.seek(&3), Some(&(3, ()))); + assert_eq!(cursor.current(), Some(&(3, ()))); + + assert_eq!(cursor.seek(&3), Some(&(3, ()))); + assert_eq!(cursor.current(), Some(&(3, ()))); + + assert_eq!(cursor.seek(&4), Some(&(4, ()))); + assert_eq!(cursor.current(), Some(&(4, ()))); + + assert_eq!(cursor.seek(&6), None); + assert_eq!(cursor.current(), None); + } + + #[test] + fn test_cursor_large_binary_search() { + // Create a large enough collection to trigger binary search + let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 2, ())).collect(); + let mut cursor = ForwardInMemoryCursor::new(&entries); + + // Seek to beginning + assert_eq!(cursor.seek(&0), Some(&(0, ()))); + assert_eq!(cursor.idx, 0); + + // Seek to middle (should use binary search) + assert_eq!(cursor.seek(&100), Some(&(100, ()))); + assert_eq!(cursor.idx, 50); + + // Seek to non-existent key (should find next greater) + assert_eq!(cursor.seek(&101), Some(&(102, ()))); + assert_eq!(cursor.idx, 51); + + // Seek to end + assert_eq!(cursor.seek(&398), Some(&(398, ()))); + assert_eq!(cursor.idx, 199); + + // Seek past end + assert_eq!(cursor.seek(&1000), None); + } + + #[test] + fn test_first_after_large() { + let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 2, ())).collect(); + let mut cursor = ForwardInMemoryCursor::new(&entries); + + // first_after should find strictly greater + assert_eq!(cursor.first_after(&0), Some(&(2, ()))); + assert_eq!(cursor.idx, 1); + + // Reset and test from beginning + cursor.reset(); + assert_eq!(cursor.first_after(&99), Some(&(100, ()))); + + // first_after on exact match + cursor.reset(); + assert_eq!(cursor.first_after(&100), Some(&(102, ()))); + } + + #[test] + fn test_cursor_consistency() { + // Verify binary search and linear scan produce same results + let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 3, ())).collect(); + + for search_key in [0, 1, 3, 50, 150, 299, 300, 597, 598, 599, 1000] { + // Test with fresh cursor (binary search path) + let mut cursor1 = ForwardInMemoryCursor::new(&entries); + let result1 = cursor1.seek(&search_key); + + // Manually advance to trigger linear path by getting close first + let mut cursor2 = ForwardInMemoryCursor::new(&entries); + if search_key > 100 { + cursor2.seek(&(search_key - 50)); + } + let result2 = cursor2.seek(&search_key); + + assert_eq!( + result1, result2, + "Mismatch for key {search_key}: binary={result1:?}, linear={result2:?}" + ); + } + } +} diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index b8e38bf5be8..b235d1f5a7d 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -1,10 +1,9 @@ use super::{HashedCursor, HashedCursorFactory, HashedStorageCursor}; -use crate::overlay_cursor::{DbCursorState, OverlayLayer, PositionedOverlayCursor}; -use alloy_primitives::{map::B256Map, B256, U256}; +use alloy_primitives::{B256, U256}; use reth_primitives_traits::Account; use reth_storage_errors::db::DatabaseError; use reth_trie_common::HashedPostStateSorted; -use std::{marker::PhantomData, sync::Arc}; +use std::marker::PhantomData; /// The hashed cursor factory for the post state. #[derive(Clone, Debug)] @@ -24,20 +23,20 @@ impl<'overlay, CF, T> HashedPostStateCursorFactory<'overlay, CF, T> { impl<'overlay, CF, T> HashedCursorFactory for HashedPostStateCursorFactory<'overlay, CF, T> where CF: HashedCursorFactory + 'overlay, - T: AsRef, + T: AsRef<[&'overlay HashedPostStateSorted]>, { type AccountCursor<'cursor> - = HashedPostStateCursor<'cursor, CF::AccountCursor<'cursor>, Option> + = HashedPostStateCursor<'overlay, CF::AccountCursor<'cursor>, Option> where Self: 'cursor; type StorageCursor<'cursor> - = HashedPostStateCursor<'cursor, CF::StorageCursor<'cursor>, U256> + = HashedPostStateCursor<'overlay, CF::StorageCursor<'cursor>, U256> where Self: 'cursor; fn hashed_account_cursor(&self) -> Result, DatabaseError> { let cursor = self.cursor_factory.hashed_account_cursor()?; - Ok(HashedPostStateCursor::new_account(cursor, self.post_state.as_ref())) + Ok(HashedPostStateCursor::new_account(cursor, self.post_state.as_ref().iter().copied())) } fn hashed_storage_cursor( @@ -45,7 +44,11 @@ where hashed_address: B256, ) -> Result, DatabaseError> { let cursor = self.cursor_factory.hashed_storage_cursor(hashed_address)?; - Ok(HashedPostStateCursor::new_storage(cursor, self.post_state.as_ref(), hashed_address)) + Ok(HashedPostStateCursor::new_storage( + cursor, + self.post_state.as_ref().iter().copied(), + hashed_address, + )) } } @@ -94,7 +97,7 @@ where /// The underlying cursor. cursor: C, /// The current DB cursor state. - db_cursor_state: DbCursorState, + db_cursor_state: DbCursorState, /// In-memory cursors over post state overlays. post_state_cursor: PostStateOverlayCursor<'a, V>, /// Lower-priority overlays that still need positioning after a lazy exact overlay hit. @@ -105,17 +108,189 @@ where #[cfg(debug_assertions)] /// Tracks whether `seek` has been called. seeked: bool, - /// Source of post-state overlays. - post_states: &'a HashedPostStateOverlay, + /// Reference to the full post state. + post_states: Vec<&'a HashedPostStateSorted>, +} + +#[derive(Debug)] +enum DbCursorState { + Active(Option<(B256, V)>), + Wiped, +} + +impl DbCursorState { + const fn new(cursor_wiped: bool) -> Self { + if cursor_wiped { + Self::Wiped + } else { + Self::Active(None) + } + } + + const fn is_wiped(&self) -> bool { + matches!(self, Self::Wiped) + } + + const fn entry(&self) -> Option<&(B256, V)> { + match self { + Self::Active(entry) => entry.as_ref(), + Self::Wiped => None, + } + } + + fn set_entry(&mut self, entry: Option<(B256, V)>) { + if let Self::Active(current) = self { + *current = entry; + } + } +} + +#[derive(Clone, Debug)] +struct PostStateOverlayCursor<'a, V> { + cursors: Vec>, +} + +impl<'a> PostStateOverlayCursor<'a, Option> { + fn account(post_states: &[&'a HashedPostStateSorted]) -> Self { + Self { + cursors: post_states + .iter() + .map(|post_state| SeekablePostStateCursor::new(post_state.accounts.as_slice())) + .collect(), + } + } +} + +impl<'a> PostStateOverlayCursor<'a, U256> { + fn storage(post_states: &[&'a HashedPostStateSorted], hashed_address: B256) -> (Self, bool) { + let mut cursors = Vec::new(); + let mut db_wiped = false; + + for post_state in post_states { + if let Some(storage) = post_state.storages.get(&hashed_address) { + cursors.push(SeekablePostStateCursor::new(storage.storage_slots_ref())); + if storage.is_wiped() { + db_wiped = true; + break; + } + } + } + + (Self { cursors }, db_wiped) + } +} + +impl<'a, V> PostStateOverlayCursor<'a, V> +where + V: HashedPostStateCursorValue, +{ + fn seek_from(&mut self, start: usize, key: &B256) { + for cursor in self.cursors.iter_mut().skip(start) { + cursor.seek(key); + } + } + + fn seek_until_exact(&mut self, key: &B256) -> Option<(usize, Option)> { + for (idx, cursor) in self.cursors.iter_mut().enumerate() { + if let Some((cursor_key, value)) = cursor.seek(key) && + cursor_key == key + { + return Some((idx, value.into_option())) + } + } + None + } + + fn first_after(&mut self, key: &B256) { + for cursor in &mut self.cursors { + cursor.first_after(key); + } + } + + fn reset(&mut self) { + for cursor in &mut self.cursors { + cursor.reset(); + } + } + + fn min_current_key(&self) -> Option { + self.cursors.iter().filter_map(|cursor| cursor.current().map(|(key, _)| *key)).min() + } + + fn highest_priority_value_at(&self, key: &B256) -> Option> { + self.cursors.iter().find_map(|cursor| { + let (cursor_key, value) = cursor.current()?; + (cursor_key == key).then(|| value.into_option()) + }) + } + + fn advance_key(&mut self, key: &B256) { + for cursor in &mut self.cursors { + if cursor.current().is_some_and(|(cursor_key, _)| cursor_key == key) { + cursor.first_after(key); + } + } + } + + fn has_visible_value(&self) -> bool { + let mut cursor = self.clone(); + cursor.reset(); + while let Some(key) = cursor.min_current_key() { + if cursor.highest_priority_value_at(&key).flatten().is_some() { + return true + } + cursor.advance_key(&key); + } + false + } +} + +#[derive(Clone, Debug)] +struct SeekablePostStateCursor<'a, V> { + entries: &'a [(B256, V)], + idx: usize, +} + +impl<'a, V> SeekablePostStateCursor<'a, V> { + const fn new(entries: &'a [(B256, V)]) -> Self { + Self { entries, idx: 0 } + } + + fn current(&self) -> Option<&'a (B256, V)> { + self.entries.get(self.idx) + } + + const fn reset(&mut self) { + self.idx = 0; + } + + fn seek(&mut self, key: &B256) -> Option<&'a (B256, V)> { + self.idx = self.entries.partition_point(|(entry_key, _)| entry_key < key); + self.current() + } + + fn first_after(&mut self, key: &B256) -> Option<&'a (B256, V)> { + if self.current().is_some_and(|(entry_key, _)| entry_key > key) { + return self.current() + } + + let remaining = &self.entries[self.idx..]; + self.idx += remaining.partition_point(|(entry_key, _)| entry_key <= key); + self.current() + } } impl<'a, C> HashedPostStateCursor<'a, C, Option> where C: HashedCursor, { - /// Create new account cursor from an indexed hashed post-state overlay. - pub fn new_account(cursor: C, post_states: &'a HashedPostStateOverlay) -> Self { - let post_state_cursor = post_states.account_overlay(); + /// Create new account cursor which combines a DB cursor and the post state. + pub fn new_account( + cursor: C, + post_states: impl IntoIterator, + ) -> Self { + let post_states = post_states.into_iter().collect::>(); + let post_state_cursor = PostStateOverlayCursor::account(&post_states); Self { cursor, db_cursor_state: DbCursorState::new(false), @@ -133,13 +308,16 @@ impl<'a, C> HashedPostStateCursor<'a, C, U256> where C: HashedStorageCursor, { - /// Create new storage cursor from an indexed hashed post-state overlay. + /// Create new storage cursor with full post state reference. + /// This allows the cursor to switch between storage tries when `set_hashed_address` is called. pub fn new_storage( cursor: C, - post_states: &'a HashedPostStateOverlay, + post_states: impl IntoIterator, hashed_address: B256, ) -> Self { - let (post_state_cursor, cursor_wiped) = post_states.storage_overlay(hashed_address); + let post_states = post_states.into_iter().collect::>(); + let (post_state_cursor, cursor_wiped) = + Self::get_storage_overlay(&post_states, hashed_address); Self { cursor, db_cursor_state: DbCursorState::new(cursor_wiped), @@ -151,6 +329,14 @@ where post_states, } } + + /// Returns the storage overlay for `hashed_address` and whether it was wiped. + fn get_storage_overlay( + post_states: &[&'a HashedPostStateSorted], + hashed_address: B256, + ) -> (PostStateOverlayCursor<'a, U256>, bool) { + PostStateOverlayCursor::storage(post_states, hashed_address) + } } impl<'a, C, V> HashedPostStateCursor<'a, C, V> @@ -169,10 +355,6 @@ where /// Positions the DB cursor state using the underlying cursor. fn cursor_seek(&mut self, key: B256) -> Result<(), DatabaseError> { - if !self.db_cursor_state.should_seek(&key) { - return Ok(()) - } - let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); self.db_cursor_state.set_entry(entry); Ok(()) @@ -203,18 +385,13 @@ where /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. fn choose_next_entry(&mut self) -> Result, DatabaseError> { loop { - let mem_entry = self.post_state_cursor.min_current_entry(); - let db_entry = self.db_cursor_state.entry(); - let next_key = match (mem_entry, db_entry) { - (Some((mem_key, _)), Some((db_key, _))) => mem_key.min(*db_key), - (Some((mem_key, _)), None) => mem_key, - (None, Some((db_key, _))) => *db_key, - (None, None) => return Ok(None), + let mem_key = self.post_state_cursor.min_current_key(); + let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); + let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { + return Ok(None); }; - if let Some((mem_key, mem_value)) = mem_entry && - mem_key == next_key - { + if let Some(mem_value) = self.post_state_cursor.highest_priority_value_at(&next_key) { if let Some(value) = mem_value { return Ok(Some((next_key, value))) } @@ -257,8 +434,8 @@ where self.deferred_overlay_seek_start = None; match self.post_state_cursor.seek_until_exact(&key) { Some((idx, Some(value))) => { - self.deferred_overlay_seek_start = Some(idx + 1); let entry = Some((key, value)); + self.deferred_overlay_seek_start = Some(idx + 1); self.set_last_key(&entry); return Ok(entry) } @@ -297,10 +474,10 @@ where } self.post_state_cursor.first_after(&last_key); - match self.db_cursor_state.entry().map(|(db_key, _)| *db_key) { - Some(db_key) if db_key == last_key => self.cursor_next()?, - Some(db_key) if db_key > last_key => {} - _ => self.cursor_first_after(last_key)?, + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &last_key) { + self.cursor_next()?; + } else { + self.cursor_first_after(last_key)?; } let entry = self.choose_next_entry()?; @@ -310,9 +487,9 @@ where fn reset(&mut self) { self.cursor.reset(); - - self.db_cursor_state.reset_position(); self.post_state_cursor.reset(); + + self.db_cursor_state.set_entry(None); self.deferred_overlay_seek_start = None; self.last_key = None; #[cfg(debug_assertions)] @@ -346,390 +523,20 @@ where fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let cursor_wiped = - self.post_states.retarget_storage_overlay(&mut self.post_state_cursor, hashed_address); - self.db_cursor_state = DbCursorState::new(cursor_wiped); - } -} - -/// Hashed post-state overlays ordered from highest to lowest precedence. -#[derive(Clone, Debug, Default)] -pub struct HashedPostStateOverlay { - account_overlay: Arc>>>, - storage_overlays: HashedStorageOverlays, - layer_capacity: usize, -} - -impl HashedPostStateOverlay { - /// Create a new indexed hashed post-state overlay stack. - pub fn new(states: Vec>) -> Self { - let layer_capacity = states.len(); - let account_overlay = Self::build_account_overlay(&states); - let storage_overlays = Self::build_storage_overlays(&states); - Self { account_overlay, storage_overlays, layer_capacity } - } - - /// Returns `true` if the overlay does not contain any hashed post-state updates. - pub fn is_empty(&self) -> bool { - self.account_overlay.is_empty() && self.storage_overlays.is_empty() - } - - fn build_account_overlay( - states: &[Arc], - ) -> Arc>>> { - Arc::new( - states - .iter() - .filter(|state| !state.accounts.is_empty()) - .map(|state| { - PostStateOverlayLayer::new(Arc::clone(state), state.accounts.as_slice()) - }) - .collect(), - ) - } - - fn build_storage_overlays(states: &[Arc]) -> HashedStorageOverlays { - if let [state] = states { - return HashedStorageOverlays::Single(Arc::clone(state)) - } - - let storage_overlay_capacity = states.iter().map(|state| state.storages.len()).sum(); - let mut overlays: B256Map = - B256Map::with_capacity_and_hasher(storage_overlay_capacity, Default::default()); - - for state in states { - Self::push_storage_layer(&mut overlays, state); - } - - HashedStorageOverlays::Indexed(Arc::new(overlays)) - } - - /// Add a hashed post-state layer at the end of the precedence stack. - pub fn push_layer(&mut self, state: Arc) { - self.layer_capacity += 1; - if !state.accounts.is_empty() { - Arc::make_mut(&mut self.account_overlay) - .push(PostStateOverlayLayer::new(Arc::clone(&state), state.accounts.as_slice())); - } - self.storage_overlays.push_layer(state); - } - - /// Add a hashed post-state layer at the beginning of the precedence stack. - pub fn prepend_layer(&mut self, state: Arc) { - self.layer_capacity += 1; - if !state.accounts.is_empty() { - Arc::make_mut(&mut self.account_overlay).insert( - 0, - PostStateOverlayLayer::new(Arc::clone(&state), state.accounts.as_slice()), + let (post_state_cursor, cursor_wiped) = + HashedPostStateCursor::::get_storage_overlay( + &self.post_states, + hashed_address, ); - } - - self.storage_overlays.prepend_layer(state); - } - - fn push_storage_layer( - overlays: &mut B256Map, - state: &Arc, - ) { - for (hashed_address, storage) in &state.storages { - let overlay = overlays.entry(*hashed_address).or_default(); - if overlay.db_wiped { - continue; - } - - let storage_slots = storage.storage_slots_ref(); - if !storage_slots.is_empty() { - if !overlay.has_visible_value && - layer_has_visible_storage_value(storage_slots, &overlay.layers) - { - overlay.has_visible_value = true; - } - overlay.layers.push(PostStateOverlayLayer::new(Arc::clone(state), storage_slots)); - } - - if storage.is_wiped() { - overlay.db_wiped = true; - } - } - } - - fn prepend_storage_layer( - overlays: &mut B256Map, - state: &Arc, - ) { - for (hashed_address, storage) in &state.storages { - let overlay = overlays.entry(*hashed_address).or_default(); - let storage_slots = storage.storage_slots_ref(); - - if storage.is_wiped() { - overlay.layers.clear(); - overlay.db_wiped = true; - overlay.has_visible_value = false; - } - - if !storage_slots.is_empty() { - let has_nonzero_slot = storage_slots.iter().any(|(_, value)| !value.is_zero()); - let recompute_after_insert = !has_nonzero_slot && overlay.has_visible_value; - - overlay - .layers - .insert(0, PostStateOverlayLayer::new(Arc::clone(state), storage_slots)); - - overlay.has_visible_value = has_nonzero_slot || - (recompute_after_insert && has_visible_storage_value(&overlay.layers)); - } - } - } - - fn account_overlay(&self) -> PostStateOverlayCursor<'_, Option> { - PostStateOverlayCursor::new(self.account_overlay.as_slice(), false, self.layer_capacity) - } - - fn storage_overlay(&self, hashed_address: B256) -> (PostStateOverlayCursor<'_, U256>, bool) { - match &self.storage_overlays { - HashedStorageOverlays::Single(state) => { - let Some(storage) = state.storages.get(&hashed_address) else { - return (PostStateOverlayCursor::with_capacity(self.layer_capacity), false) - }; - let storage_slots = storage.storage_slots_ref(); - let has_visible_value = storage_slots.iter().any(|(_, value)| !value.is_zero()); - ( - PostStateOverlayCursor::from_entries( - storage_slots, - has_visible_value, - self.layer_capacity, - ), - storage.is_wiped(), - ) - } - HashedStorageOverlays::Indexed(overlays) => { - let Some(overlay) = overlays.get(&hashed_address) else { - return (PostStateOverlayCursor::with_capacity(self.layer_capacity), false) - }; - - ( - PostStateOverlayCursor::new( - overlay.layers.as_slice(), - overlay.has_visible_value, - self.layer_capacity, - ), - overlay.db_wiped, - ) - } - } - } - - fn retarget_storage_overlay<'a>( - &'a self, - cursor: &mut PostStateOverlayCursor<'a, U256>, - hashed_address: B256, - ) -> bool { - match &self.storage_overlays { - HashedStorageOverlays::Single(state) => { - let Some(storage) = state.storages.get(&hashed_address) else { - cursor.retarget_entries(&[], false); - return false - }; - let storage_slots = storage.storage_slots_ref(); - let has_visible_value = storage_slots.iter().any(|(_, value)| !value.is_zero()); - cursor.retarget_entries(storage_slots, has_visible_value); - storage.is_wiped() - } - HashedStorageOverlays::Indexed(overlays) => { - let Some(overlay) = overlays.get(&hashed_address) else { - cursor.retarget(&[], false); - return false - }; - cursor.retarget(overlay.layers.as_slice(), overlay.has_visible_value); - overlay.db_wiped - } - } - } -} - -impl AsRef for HashedPostStateOverlay { - fn as_ref(&self) -> &Self { - self - } -} - -#[derive(Debug)] -struct PostStateOverlayCursor<'a, V> { - cursor: PositionedOverlayCursor<'a, B256, V>, - has_visible_value: bool, -} - -impl Default for PostStateOverlayCursor<'_, V> { - fn default() -> Self { - Self::new(&[], false, 0) - } -} - -impl<'a, V> PostStateOverlayCursor<'a, V> { - fn new( - layers: &'a [PostStateOverlayLayer], - has_visible_value: bool, - layer_capacity: usize, - ) -> Self { - Self { - cursor: PositionedOverlayCursor::with_capacity(layers, layer_capacity), - has_visible_value, - } - } - - fn reset(&mut self) { - self.cursor.reset(); - } - - fn with_capacity(layer_capacity: usize) -> Self { - Self { - cursor: PositionedOverlayCursor::with_entries(&[], layer_capacity), - has_visible_value: false, - } - } - - fn from_entries( - entries: &'a [(B256, V)], - has_visible_value: bool, - layer_capacity: usize, - ) -> Self { - Self { - cursor: PositionedOverlayCursor::with_entries(entries, layer_capacity), - has_visible_value, - } - } - - fn retarget(&mut self, layers: &'a [PostStateOverlayLayer], has_visible_value: bool) { - self.cursor.retarget(layers); - self.has_visible_value = has_visible_value; - } - - fn retarget_entries(&mut self, entries: &'a [(B256, V)], has_visible_value: bool) { - self.cursor.retarget_entries(entries); - self.has_visible_value = has_visible_value; - } -} - -impl<'a, V> PostStateOverlayCursor<'a, V> -where - V: HashedPostStateCursorValue, -{ - fn seek_from(&mut self, start: usize, key: &B256) { - self.cursor.seek_from(start, key); - } - - fn seek_until_exact(&mut self, key: &B256) -> Option<(usize, Option)> { - self.cursor.seek_until_exact(key).map(|(idx, value)| (idx, (*value).into_option())) - } - - fn first_after(&mut self, key: &B256) { - self.cursor.first_after(key); - } - - fn min_current_entry(&self) -> Option<(B256, Option)> { - self.cursor.min_current_entry().map(|(key, value)| (key, (*value).into_option())) - } - - fn advance_key(&mut self, key: &B256) { - self.cursor.advance_key(key); - } - - const fn has_visible_value(&self) -> bool { - self.has_visible_value - } -} - -#[derive(Clone, Debug, Default)] -struct HashedStorageOverlay { - layers: Vec>, - db_wiped: bool, - has_visible_value: bool, -} - -#[derive(Clone, Debug)] -enum HashedStorageOverlays { - Single(Arc), - Indexed(Arc>), -} - -impl Default for HashedStorageOverlays { - fn default() -> Self { - Self::Indexed(Default::default()) - } -} - -impl HashedStorageOverlays { - fn is_empty(&self) -> bool { - match self { - Self::Single(state) => state.storages.is_empty(), - Self::Indexed(overlays) => overlays.is_empty(), - } - } - - fn push_layer(&mut self, state: Arc) { - match self { - Self::Single(existing) => { - let storage_overlay_capacity = existing.storages.len() + state.storages.len(); - let mut overlays: B256Map = - B256Map::with_capacity_and_hasher(storage_overlay_capacity, Default::default()); - HashedPostStateOverlay::push_storage_layer(&mut overlays, existing); - HashedPostStateOverlay::push_storage_layer(&mut overlays, &state); - *self = Self::Indexed(Arc::new(overlays)); - } - Self::Indexed(overlays) => { - HashedPostStateOverlay::push_storage_layer(Arc::make_mut(overlays), &state); - } - } - } - - fn prepend_layer(&mut self, state: Arc) { - match self { - Self::Single(existing) => { - let storage_overlay_capacity = existing.storages.len() + state.storages.len(); - let mut overlays: B256Map = - B256Map::with_capacity_and_hasher(storage_overlay_capacity, Default::default()); - HashedPostStateOverlay::push_storage_layer(&mut overlays, &state); - HashedPostStateOverlay::push_storage_layer(&mut overlays, existing); - *self = Self::Indexed(Arc::new(overlays)); - } - Self::Indexed(overlays) => { - HashedPostStateOverlay::prepend_storage_layer(Arc::make_mut(overlays), &state); - } - } - } -} - -type PostStateOverlayLayer = OverlayLayer; - -fn layer_has_visible_storage_value( - entries: &[(B256, U256)], - higher_priority_layers: &[PostStateOverlayLayer], -) -> bool { - entries.iter().any(|(key, value)| { - !value.is_zero() && - !higher_priority_layers.iter().any(|higher_layer| { - higher_layer - .entries() - .binary_search_by_key(key, |(entry_key, _)| *entry_key) - .is_ok() - }) - }) -} - -fn has_visible_storage_value(layers: &[PostStateOverlayLayer]) -> bool { - for (layer_idx, layer) in layers.iter().enumerate() { - if layer_has_visible_storage_value(layer.entries(), &layers[..layer_idx]) { - return true - } + self.post_state_cursor = post_state_cursor; + self.db_cursor_state = DbCursorState::new(cursor_wiped); } - false } #[cfg(test)] mod tests { use super::*; use crate::hashed_cursor::mock::MockHashedCursor; - use alloy_primitives::map::B256Map; use parking_lot::Mutex; use std::{collections::BTreeMap, sync::Arc}; @@ -737,155 +544,20 @@ mod tests { B256::repeat_byte(byte) } - fn account(nonce: u64) -> Account { - Account { nonce, balance: U256::from(nonce), bytecode_hash: None } - } - fn storage_post_state(storage_slots: Vec<(B256, U256)>) -> HashedPostStateSorted { - storage_post_state_for_address(B256::ZERO, storage_slots) + storage_post_state_with_wipe(storage_slots, false) } fn storage_post_state_with_wipe( storage_slots: Vec<(B256, U256)>, wiped: bool, - ) -> HashedPostStateSorted { - storage_post_state_with_wipe_for_address(B256::ZERO, storage_slots, wiped) - } - - fn storage_post_state_for_address( - hashed_address: B256, - storage_slots: Vec<(B256, U256)>, - ) -> HashedPostStateSorted { - storage_post_state_with_wipe_for_address(hashed_address, storage_slots, false) - } - - fn storage_post_state_with_wipe_for_address( - hashed_address: B256, - storage_slots: Vec<(B256, U256)>, - wiped: bool, ) -> HashedPostStateSorted { let storage_sorted = reth_trie_common::HashedStorageSorted { storage_slots, wiped }; let mut storages = alloy_primitives::map::B256Map::default(); - storages.insert(hashed_address, storage_sorted); + storages.insert(B256::ZERO, storage_sorted); HashedPostStateSorted::new(Vec::new(), storages) } - fn storage_cursor<'a>( - cursor: MockHashedCursor, - overlay: &'a HashedPostStateOverlay, - hashed_address: B256, - ) -> HashedPostStateCursor<'a, MockHashedCursor, U256> { - HashedPostStateCursor::new_storage(cursor, overlay, hashed_address) - } - - fn storage_overlay_snapshot( - overlay: &HashedPostStateOverlay, - hashed_address: B256, - ) -> (Vec>, bool, bool) { - match &overlay.storage_overlays { - HashedStorageOverlays::Single(state) => { - let Some(storage) = state.storages.get(&hashed_address) else { - return (Vec::new(), false, false) - }; - let storage_slots = storage.storage_slots_ref(); - let layers = (!storage_slots.is_empty()) - .then(|| vec![storage_slots.to_vec()]) - .unwrap_or_default(); - let has_visible_value = storage_slots.iter().any(|(_, value)| !value.is_zero()); - (layers, storage.is_wiped(), has_visible_value) - } - HashedStorageOverlays::Indexed(overlays) => { - let Some(overlay) = overlays.get(&hashed_address) else { - return (Vec::new(), false, false) - }; - ( - overlay.layers.iter().map(|layer| layer.entries().to_vec()).collect(), - overlay.db_wiped, - overlay.has_visible_value, - ) - } - } - } - - #[test] - fn test_incremental_storage_push_matches_rebuilt_overlay() { - let hashed_address = key(0x01); - let top = Arc::new(storage_post_state_for_address( - hashed_address, - vec![(key(0x10), U256::from(1))], - )); - let lower = Arc::new(storage_post_state_with_wipe_for_address( - hashed_address, - vec![(key(0x20), U256::from(2))], - true, - )); - - let mut incremental = HashedPostStateOverlay::new(vec![Arc::clone(&top)]); - incremental.push_layer(Arc::clone(&lower)); - let rebuilt = HashedPostStateOverlay::new(vec![top, lower]); - - assert_eq!( - storage_overlay_snapshot(&incremental, hashed_address), - storage_overlay_snapshot(&rebuilt, hashed_address) - ); - } - - #[test] - fn test_incremental_storage_prepend_matches_rebuilt_overlay() { - let hashed_address = key(0x01); - let lower = Arc::new(storage_post_state_with_wipe_for_address( - hashed_address, - vec![(key(0x10), U256::from(1))], - true, - )); - let top = Arc::new(storage_post_state_for_address( - hashed_address, - vec![(key(0x10), U256::ZERO), (key(0x20), U256::from(2))], - )); - - let mut incremental = HashedPostStateOverlay::new(vec![Arc::clone(&lower)]); - incremental.prepend_layer(Arc::clone(&top)); - let rebuilt = HashedPostStateOverlay::new(vec![top, lower]); - - assert_eq!( - storage_overlay_snapshot(&incremental, hashed_address), - storage_overlay_snapshot(&rebuilt, hashed_address) - ); - } - - #[test] - fn test_storage_visible_value_tracks_shadowed_lower_layers() { - let hashed_address = key(0x01); - let top_delete = - Arc::new(storage_post_state_for_address(hashed_address, vec![(key(0x10), U256::ZERO)])); - let lower_visible = Arc::new(storage_post_state_for_address( - hashed_address, - vec![(key(0x10), U256::from(1))], - )); - - let overlay = HashedPostStateOverlay::new(vec![top_delete, lower_visible]); - - let (_, _, has_visible_value) = storage_overlay_snapshot(&overlay, hashed_address); - assert!(!has_visible_value); - } - - #[test] - fn test_storage_visible_value_tracks_shadowing_prepend() { - let hashed_address = key(0x01); - let lower_visible = Arc::new(storage_post_state_for_address( - hashed_address, - vec![(key(0x10), U256::from(1))], - )); - let top_delete = - Arc::new(storage_post_state_for_address(hashed_address, vec![(key(0x10), U256::ZERO)])); - - let mut overlay = HashedPostStateOverlay::new(vec![lower_visible]); - overlay.prepend_layer(top_delete); - - let (_, _, has_visible_value) = storage_overlay_snapshot(&overlay, hashed_address); - assert!(!has_visible_value); - } - #[test] fn test_seek_overlay_exact_hit_does_not_touch_db_until_next() { let db_nodes = vec![(key(0x02), U256::from(2)), (key(0x03), U256::from(3))]; @@ -897,8 +569,7 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); let post_state = storage_post_state(post_state_nodes); - let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); - let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); let result = cursor.seek(key(0x02)).unwrap(); assert_eq!(result, Some((key(0x02), U256::from(42)))); @@ -920,8 +591,7 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); let post_state = storage_post_state(post_state_nodes); - let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); - let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); let result = cursor.seek(key(0x01)).unwrap(); assert_eq!(result, Some((key(0x01), U256::from(1)))); @@ -946,8 +616,7 @@ mod tests { let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); let post_state = storage_post_state(post_state_nodes); - let overlay = HashedPostStateOverlay::new(vec![Arc::new(post_state)]); - let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); let result = cursor.seek(key(0x02)).unwrap(); assert_eq!(result, Some((key(0x03), U256::from(3)))); @@ -966,15 +635,20 @@ mod tests { let exact_hit = storage_post_state(vec![(key(0x05), U256::from(5))]); let lower_priority = storage_post_state(vec![(key(0x01), U256::from(10)), (key(0x07), U256::from(7))]); - let overlay = HashedPostStateOverlay::new(vec![ - Arc::new(higher_priority), - Arc::new(exact_hit), - Arc::new(lower_priority), - ]); - let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); + let mut cursor = HashedPostStateCursor::new_storage( + mock_cursor, + [&higher_priority, &exact_hit, &lower_priority], + B256::ZERO, + ); let result = cursor.seek(key(0x05)).unwrap(); assert_eq!(result, Some((key(0x05), U256::from(5)))); + assert_eq!(cursor.post_state_cursor.cursors[0].idx, 1); + assert_eq!(cursor.post_state_cursor.cursors[1].idx, 0); + assert_eq!( + cursor.post_state_cursor.cursors[2].idx, 0, + "lower-priority overlay should not be sought after an exact overlay hit" + ); assert!(visited_keys.lock().is_empty(), "exact overlay hit should not touch the DB cursor"); let result = cursor.next().unwrap(); @@ -983,57 +657,18 @@ mod tests { } #[test] - fn test_seek_reuses_exact_db_position() { - let db_nodes = BTreeMap::from([(key(0x01), account(1)), (key(0x02), account(2))]); + fn test_seek_can_move_backwards() { + let db_nodes = BTreeMap::from([(key(0x01), U256::from(1)), (key(0x03), U256::from(3))]); let db_nodes_arc = Arc::new(db_nodes); let visited_keys = Arc::new(Mutex::new(Vec::new())); - let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); - - let overlay = HashedPostStateOverlay::default(); - let mut cursor = HashedPostStateCursor::new_account(mock_cursor, &overlay); - - assert_eq!(cursor.seek(key(0x01)).unwrap(), Some((key(0x01), account(1)))); - assert_eq!(visited_keys.lock().len(), 1); - - assert_eq!(cursor.next().unwrap(), Some((key(0x02), account(2)))); - assert_eq!(visited_keys.lock().len(), 2); - - assert_eq!(cursor.seek(key(0x02)).unwrap(), Some((key(0x02), account(2)))); - assert_eq!(visited_keys.lock().len(), 2, "seek should reuse the exact DB position"); - } - - #[test] - fn test_seek_reuses_ahead_db_position() { - let db_nodes = BTreeMap::from([(key(0x03), account(3))]); - let db_nodes_arc = Arc::new(db_nodes); - let visited_keys = Arc::new(Mutex::new(Vec::new())); - let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); - - let overlay = HashedPostStateOverlay::default(); - let mut cursor = HashedPostStateCursor::new_account(mock_cursor, &overlay); - - assert_eq!(cursor.seek(key(0x02)).unwrap(), Some((key(0x03), account(3)))); - assert_eq!(visited_keys.lock().len(), 1); - - assert_eq!(cursor.seek(key(0x02)).unwrap(), Some((key(0x03), account(3)))); - assert_eq!(visited_keys.lock().len(), 1, "seek should reuse an ahead DB position"); - } - - #[test] - fn test_seek_does_not_reseek_exhausted_db() { - let db_nodes = BTreeMap::from([(key(0x01), account(1))]); - let db_nodes_arc = Arc::new(db_nodes); - let visited_keys = Arc::new(Mutex::new(Vec::new())); - let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys.clone()); - - let overlay = HashedPostStateOverlay::default(); - let mut cursor = HashedPostStateCursor::new_account(mock_cursor, &overlay); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); - assert_eq!(cursor.seek(key(0x02)).unwrap(), None); - assert_eq!(visited_keys.lock().len(), 1); + let post_state = storage_post_state(vec![(key(0x02), U256::from(2))]); + let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, [&post_state], B256::ZERO); - assert_eq!(cursor.seek(key(0x03)).unwrap(), None); - assert_eq!(visited_keys.lock().len(), 1, "exhausted DB cursor should stay exhausted"); + assert_eq!(cursor.seek(key(0x03)).unwrap(), Some((key(0x03), U256::from(3)))); + assert_eq!(cursor.seek(key(0x01)).unwrap(), Some((key(0x01), U256::from(1)))); + assert_eq!(cursor.next().unwrap(), Some((key(0x02), U256::from(2)))); } #[test] @@ -1053,8 +688,8 @@ mod tests { (key(0x02), U256::from(20)), (key(0x03), U256::from(3)), ]); - let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); - let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); + let mut cursor = + HashedPostStateCursor::new_storage(mock_cursor, [&newest, &oldest], B256::ZERO); let mut results = Vec::new(); if let Some(entry) = cursor.seek(B256::ZERO).unwrap() { @@ -1074,35 +709,6 @@ mod tests { ); } - #[test] - fn test_indexed_account_overlay_resolves_by_precedence() { - let db_nodes = BTreeMap::from([(key(0x01), account(1)), (key(0x03), account(3))]); - let db_nodes_arc = Arc::new(db_nodes); - let visited_keys = Arc::new(Mutex::new(Vec::new())); - let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); - - let newest = HashedPostStateSorted::new( - vec![(key(0x01), None), (key(0x02), Some(account(20)))], - Default::default(), - ); - let oldest = HashedPostStateSorted::new( - vec![(key(0x01), Some(account(10))), (key(0x03), Some(account(30)))], - Default::default(), - ); - let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); - let mut cursor = HashedPostStateCursor::new_account(mock_cursor, &overlay); - - let mut results = Vec::new(); - if let Some(entry) = cursor.seek(B256::ZERO).unwrap() { - results.push(entry); - while let Some(entry) = cursor.next().unwrap() { - results.push(entry); - } - } - - assert_eq!(results, vec![(key(0x02), account(20)), (key(0x03), account(30))]); - } - #[test] fn test_storage_wipe_overlay_hides_lower_precedence_sources() { let db_nodes = BTreeMap::from([(key(0x04), U256::from(4))]); @@ -1113,96 +719,17 @@ mod tests { let newest = storage_post_state(vec![(key(0x02), U256::from(2))]); let wiping = storage_post_state_with_wipe(vec![(key(0x01), U256::from(1))], true); let hidden = storage_post_state(vec![(key(0x03), U256::from(3))]); - let overlay = - HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(wiping), Arc::new(hidden)]); - let mut cursor = storage_cursor(mock_cursor, &overlay, B256::ZERO); - - assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x01), U256::from(1)))); - assert_eq!(cursor.next().unwrap(), Some((key(0x02), U256::from(2)))); - assert_eq!(cursor.next().unwrap(), None); - } - - #[test] - fn test_indexed_storage_wipe_overlay_hides_lower_precedence_sources() { - let db_nodes = BTreeMap::from([(key(0x04), U256::from(4))]); - let db_nodes_arc = Arc::new(db_nodes); - let visited_keys = Arc::new(Mutex::new(Vec::new())); - let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); - - let newest = storage_post_state(vec![(key(0x02), U256::from(2))]); - let wiping = storage_post_state_with_wipe(vec![(key(0x01), U256::from(1))], true); - let hidden = storage_post_state(vec![(key(0x03), U256::from(3))]); - let overlay = - HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(wiping), Arc::new(hidden)]); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &overlay, B256::ZERO); + let mut cursor = HashedPostStateCursor::new_storage( + mock_cursor, + [&newest, &wiping, &hidden], + B256::ZERO, + ); assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x01), U256::from(1)))); assert_eq!(cursor.next().unwrap(), Some((key(0x02), U256::from(2)))); assert_eq!(cursor.next().unwrap(), None); } - #[test] - fn test_indexed_storage_overlay_switches_hashed_address() { - let first_address = B256::with_last_byte(1); - let second_address = B256::with_last_byte(2); - let mut db_storage = B256Map::default(); - db_storage.insert(first_address, BTreeMap::from([(key(0x04), U256::from(4))])); - db_storage.insert(second_address, BTreeMap::from([(key(0x05), U256::from(5))])); - let visited_keys = - Arc::new(db_storage.keys().map(|key| (*key, Default::default())).collect()); - let mock_cursor = - MockHashedCursor::new_storage(Arc::new(db_storage), visited_keys, first_address) - .unwrap(); - - let first_overlay = - storage_post_state_for_address(first_address, vec![(key(0x01), U256::from(1))]); - let second_overlay = - storage_post_state_for_address(second_address, vec![(key(0x02), U256::from(2))]); - let overlay = - HashedPostStateOverlay::new(vec![Arc::new(first_overlay), Arc::new(second_overlay)]); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &overlay, first_address); - - assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x01), U256::from(1)))); - assert_eq!(cursor.next().unwrap(), Some((key(0x04), U256::from(4)))); - - cursor.set_hashed_address(second_address); - - assert_eq!(cursor.seek(B256::ZERO).unwrap(), Some((key(0x02), U256::from(2)))); - assert_eq!(cursor.next().unwrap(), Some((key(0x05), U256::from(5)))); - assert_eq!(cursor.next().unwrap(), None); - } - - #[test] - fn test_storage_empty_respects_layer_precedence() { - let mut db_storage = B256Map::default(); - db_storage.insert(B256::ZERO, BTreeMap::new()); - let visited_keys = - Arc::new(db_storage.keys().map(|key| (*key, Default::default())).collect()); - let mock_cursor = - MockHashedCursor::new_storage(Arc::new(db_storage), visited_keys, B256::ZERO).unwrap(); - - let newest = storage_post_state(vec![(key(0x01), U256::ZERO)]); - let hidden = storage_post_state(vec![(key(0x01), U256::from(1))]); - let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(hidden)]); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &overlay, B256::ZERO); - - assert!(cursor.is_storage_empty().unwrap()); - - let mut db_storage = B256Map::default(); - db_storage.insert(B256::ZERO, BTreeMap::new()); - let visited_keys = - Arc::new(db_storage.keys().map(|key| (*key, Default::default())).collect()); - let mock_cursor = - MockHashedCursor::new_storage(Arc::new(db_storage), visited_keys, B256::ZERO).unwrap(); - - let newest = storage_post_state(vec![(key(0x01), U256::ZERO)]); - let visible = storage_post_state(vec![(key(0x02), U256::from(2))]); - let overlay = HashedPostStateOverlay::new(vec![Arc::new(newest), Arc::new(visible)]); - let mut cursor = HashedPostStateCursor::new_storage(mock_cursor, &overlay, B256::ZERO); - - assert!(!cursor.is_storage_empty().unwrap()); - } - mod proptest_tests { use super::*; use proptest::prelude::*; @@ -1218,7 +745,7 @@ mod tests { db_nodes: &[(B256, U256)], overlays: &[Vec<(B256, U256)>], ) -> Vec<(B256, U256)> { - let mut merged: BTreeMap = db_nodes.iter().copied().collect(); + let mut merged: BTreeMap = db_nodes.iter().cloned().collect(); for overlay in overlays.iter().rev() { for (key, value) in overlay { @@ -1252,7 +779,9 @@ mod tests { entries: &[(B256, U256)], position: &mut Option, ) -> Option<(B256, U256)> { - let next_idx = position.and_then(|idx| idx.checked_add(1))?; + let Some(next_idx) = position.and_then(|idx| idx.checked_add(1)) else { + return None; + }; if next_idx < entries.len() { *position = Some(next_idx); @@ -1337,19 +866,16 @@ mod tests { let mut reference_position = None; // Create the HashedPostStateCursor being tested - let db_nodes_map: BTreeMap = db_nodes.iter().copied().collect(); + let db_nodes_map: BTreeMap = db_nodes.iter().cloned().collect(); let db_nodes_arc = Arc::new(db_nodes_map); let visited_keys = Arc::new(Mutex::new(Vec::new())); let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); let hashed_address = B256::ZERO; - let post_states = overlays - .into_iter() - .map(storage_post_state) - .map(Arc::new) - .collect::>(); - let overlay = HashedPostStateOverlay::new(post_states); - let mut test_cursor = storage_cursor(mock_cursor, &overlay, hashed_address); + let post_states = + overlays.into_iter().map(storage_post_state).collect::>(); + let mut test_cursor = + HashedPostStateCursor::new_storage(mock_cursor, post_states.iter(), hashed_address); // Test: seek to the beginning first let control_first = @@ -1361,14 +887,9 @@ mod tests { "Initial seek returned", ); assert_eq!(control_first, test_first, "Initial seek mismatch"); - let mut seek_floor = control_first.as_ref().map(|(key, _)| *key); // Execute a sequence of random operations for op in ops { - if reference_position.is_none() { - break - } - match op { CursorOp::Next => { let control_result = @@ -1380,11 +901,8 @@ mod tests { "Next returned", ); assert_eq!(control_result, test_result, "Next operation mismatch"); - let Some((key, _)) = control_result else { break }; - seek_floor = Some(key); } CursorOp::Seek(key) => { - let key = seek_floor.map_or(key, |floor| key.max(floor)); let control_result = reference_seek(&expected_combined, &mut reference_position, key); let test_result = test_cursor.seek(key).unwrap(); @@ -1395,8 +913,6 @@ mod tests { "Seek returned", ); assert_eq!(control_result, test_result, "Seek operation mismatch for key {:?}", key); - let Some((key, _)) = control_result else { break }; - seek_floor = Some(key); } } } diff --git a/crates/trie/trie/src/lib.rs b/crates/trie/trie/src/lib.rs index a91048036c5..90c54fbce28 100644 --- a/crates/trie/trie/src/lib.rs +++ b/crates/trie/trie/src/lib.rs @@ -14,7 +14,8 @@ )] #![cfg_attr(docsrs, feature(doc_cfg))] -mod overlay_cursor; +/// The implementation of forward-only in-memory cursor. +pub mod forward_cursor; /// The cursor implementations for navigating account and storage tries. pub mod trie_cursor; diff --git a/crates/trie/trie/src/node_iter.rs b/crates/trie/trie/src/node_iter.rs index facedbb4dce..45d26238984 100644 --- a/crates/trie/trie/src/node_iter.rs +++ b/crates/trie/trie/src/node_iter.rs @@ -310,7 +310,7 @@ mod tests { use crate::{ hashed_cursor::{ mock::MockHashedCursorFactory, noop::NoopHashedCursor, HashedCursorFactory, - HashedPostStateCursor, HashedPostStateOverlay, + HashedPostStateCursor, }, mock::{KeyVisit, KeyVisitType}, trie_cursor::{ @@ -331,7 +331,7 @@ mod tests { prefix_set::PrefixSetMut, updates::TrieUpdates, BranchNode, HashedPostState, LeafNode, RlpNode, }; - use std::{collections::BTreeMap, sync::Arc}; + use std::collections::BTreeMap; /// Calculate the branch node stored in the database by feeding the provided state to the hash /// builder and taking the trie updates. @@ -349,13 +349,12 @@ mod tests { (nibbles.pack().into_inner().unwrap().into(), Some(account)) })) .into_sorted(); - let hashed_post_state = HashedPostStateOverlay::new(vec![Arc::new(hashed_post_state)]); let mut node_iter = TrieNodeIter::state_trie( walker, HashedPostStateCursor::new_account( NoopHashedCursor::::default(), - &hashed_post_state, + [&hashed_post_state], ), ); diff --git a/crates/trie/trie/src/overlay_cursor.rs b/crates/trie/trie/src/overlay_cursor.rs deleted file mode 100644 index 643276aaaa3..00000000000 --- a/crates/trie/trie/src/overlay_cursor.rs +++ /dev/null @@ -1,421 +0,0 @@ -use std::{fmt, slice, sync::Arc}; - -const OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN: usize = 64; - -#[derive(Debug)] -pub(crate) enum DbCursorState { - Unpositioned, - Positioned((K, V)), - Exhausted, - Wiped, -} - -impl DbCursorState { - pub(crate) const fn new(cursor_wiped: bool) -> Self { - if cursor_wiped { - Self::Wiped - } else { - Self::Unpositioned - } - } - - pub(crate) const fn is_wiped(&self) -> bool { - matches!(self, Self::Wiped) - } - - pub(crate) const fn entry(&self) -> Option<&(K, V)> { - match self { - Self::Positioned(entry) => Some(entry), - Self::Unpositioned | Self::Exhausted | Self::Wiped => None, - } - } - - pub(crate) fn set_entry(&mut self, entry: Option<(K, V)>) { - if !self.is_wiped() { - *self = entry.map(Self::Positioned).unwrap_or(Self::Exhausted); - } - } - - pub(crate) fn reset_position(&mut self) { - if !self.is_wiped() { - *self = Self::Unpositioned; - } - } -} - -impl DbCursorState { - pub(crate) fn should_seek(&self, key: &K) -> bool { - match self { - Self::Unpositioned => true, - Self::Positioned((db_key, _)) => db_key < key, - Self::Exhausted | Self::Wiped => false, - } - } - - pub(crate) fn exact_entry(&self, key: &K) -> Option<&(K, V)> { - match self { - Self::Positioned((db_key, _)) if db_key == key => self.entry(), - Self::Unpositioned | Self::Positioned(_) | Self::Exhausted | Self::Wiped => None, - } - } - - pub(crate) fn may_contain_exact(&self, key: &K) -> bool { - match self { - Self::Unpositioned => true, - Self::Positioned((db_key, _)) => db_key <= key, - Self::Exhausted | Self::Wiped => false, - } - } -} - -#[derive(Debug)] -pub(crate) struct PositionedOverlayCursor<'a, K, V> { - cursors: Vec>, -} - -impl Default for PositionedOverlayCursor<'_, K, V> { - fn default() -> Self { - Self { cursors: Vec::new() } - } -} - -impl<'a, K, V> PositionedOverlayCursor<'a, K, V> { - #[cfg(test)] - pub(crate) fn new(layers: &'a [OverlayLayer]) -> Self { - Self::with_capacity(layers, layers.len()) - } - - pub(crate) fn with_capacity(layers: &'a [OverlayLayer], capacity: usize) -> Self { - let mut this = Self { cursors: Vec::with_capacity(capacity.max(layers.len())) }; - this.retarget(layers); - this - } - - pub(crate) fn with_entries(entries: &'a [(K, V)], capacity: usize) -> Self { - let mut cursors = Vec::with_capacity(capacity.max(usize::from(!entries.is_empty()))); - if !entries.is_empty() { - cursors.push(PositionedOverlayLayerCursor::from_entries(entries)); - } - Self { cursors } - } - - pub(crate) fn reset(&mut self) { - for cursor in &mut self.cursors { - cursor.reset(); - } - } - - pub(crate) fn retarget(&mut self, layers: &'a [OverlayLayer]) { - debug_assert!(self.cursors.capacity() >= layers.len()); - self.cursors.clear(); - self.cursors.extend(layers.iter().map(PositionedOverlayLayerCursor::new)); - } - - pub(crate) fn retarget_entries(&mut self, entries: &'a [(K, V)]) { - debug_assert!(self.cursors.capacity() >= usize::from(!entries.is_empty())); - self.cursors.clear(); - if !entries.is_empty() { - self.cursors.push(PositionedOverlayLayerCursor::from_entries(entries)); - } - } -} - -impl PositionedOverlayCursor<'_, K, V> -where - K: Ord, -{ - #[inline(always)] - pub(crate) fn seek_from(&mut self, start: usize, key: &K) { - for cursor in self.cursors.iter_mut().skip(start) { - let _ = cursor.seek(key); - } - } - - #[inline(always)] - pub(crate) fn seek_until_exact(&mut self, key: &K) -> Option<(usize, &V)> { - for (layer_idx, cursor) in self.cursors.iter_mut().enumerate() { - if let Some((_, value)) = cursor.seek_exact(key) { - return Some((layer_idx, value)) - } - } - - None - } - - #[inline(always)] - pub(crate) fn first_after(&mut self, key: &K) { - for cursor in &mut self.cursors { - let _ = cursor.first_after(key); - } - } - - #[inline(always)] - pub(crate) fn advance_key(&mut self, key: &K) { - for cursor in &mut self.cursors { - if cursor.current().is_some_and(|(entry_key, _)| entry_key == key) { - let _ = cursor.first_after(key); - } - } - } -} - -impl PositionedOverlayCursor<'_, K, V> -where - K: Copy + Ord, -{ - #[cfg(test)] - pub(crate) fn min_current_key(&self) -> Option { - self.cursors.iter().filter_map(|cursor| cursor.current().map(|(key, _)| *key)).min() - } - - #[inline(always)] - pub(crate) fn min_current_entry(&self) -> Option<(K, &V)> { - let mut min_entry = None; - for cursor in &self.cursors { - if let Some((key, value)) = cursor.current() { - match min_entry { - Some((min_key, _)) if key >= &min_key => {} - _ => min_entry = Some((*key, value)), - } - } - } - min_entry - } -} - -#[derive(Debug)] -struct PositionedOverlayLayerCursor<'a, K, V> { - entries: &'a [(K, V)], - position: usize, -} - -impl<'a, K, V> PositionedOverlayLayerCursor<'a, K, V> { - fn new(layer: &'a OverlayLayer) -> Self { - Self::from_entries(layer.entries()) - } - - fn from_entries(entries: &'a [(K, V)]) -> Self { - Self { entries, position: 0 } - } - - #[inline(always)] - fn current(&self) -> Option<&'a (K, V)> { - self.entries.get(self.position) - } - - const fn reset(&mut self) { - self.position = 0; - } -} - -impl<'a, K, V> PositionedOverlayLayerCursor<'a, K, V> -where - K: Ord, -{ - #[inline(always)] - fn seek(&mut self, key: &K) -> Option<&'a (K, V)> { - if let Some((entry_key, _)) = self.current() { - match entry_key.cmp(key) { - std::cmp::Ordering::Less => self.position += 1, - std::cmp::Ordering::Equal | std::cmp::Ordering::Greater => return self.current(), - } - } - - let remaining = &self.entries[self.position..]; - let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { - remaining.partition_point(|(entry_key, _)| entry_key < key) - } else { - let mut advance = 0; - while advance < remaining.len() && &remaining[advance].0 < key { - advance += 1; - } - advance - }; - - self.position += advance; - self.current() - } - - #[inline(always)] - fn seek_exact(&mut self, key: &K) -> Option<&'a (K, V)> { - if let Some(current @ (entry_key, _)) = self.current() { - match entry_key.cmp(key) { - std::cmp::Ordering::Less => self.position += 1, - std::cmp::Ordering::Equal => return Some(current), - std::cmp::Ordering::Greater => return None, - } - } - - let remaining = &self.entries[self.position..]; - if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { - self.position += remaining.partition_point(|(entry_key, _)| entry_key < key); - let current = self.current()?; - return (¤t.0 == key).then_some(current) - } - - for (advance, (entry_key, _)) in remaining.iter().enumerate() { - match entry_key.cmp(key) { - std::cmp::Ordering::Less => {} - std::cmp::Ordering::Equal => { - self.position += advance; - return self.current() - } - std::cmp::Ordering::Greater => { - self.position += advance; - return None - } - } - } - - self.position = self.entries.len(); - None - } - - #[inline(always)] - fn first_after(&mut self, key: &K) -> Option<&'a (K, V)> { - if let Some((entry_key, _)) = self.current() { - match entry_key.cmp(key) { - std::cmp::Ordering::Greater => return self.current(), - std::cmp::Ordering::Less | std::cmp::Ordering::Equal => self.position += 1, - } - } - - let remaining = &self.entries[self.position..]; - let advance = if remaining.len() >= OVERLAY_CURSOR_PARTITION_POINT_MIN_LEN { - remaining.partition_point(|(entry_key, _)| entry_key <= key) - } else { - let mut advance = 0; - while advance < remaining.len() && &remaining[advance].0 <= key { - advance += 1; - } - advance - }; - - self.position += advance; - self.current() - } -} - -#[derive(Clone)] -pub(crate) struct OverlayLayer { - _owner: Arc, - entries_ptr: *const (K, V), - entries_len: usize, -} - -impl OverlayLayer { - pub(crate) const fn new(owner: Arc, entries: &[(K, V)]) -> Self { - Self { _owner: owner, entries_ptr: entries.as_ptr(), entries_len: entries.len() } - } - - pub(crate) const fn entries(&self) -> &[(K, V)] { - // SAFETY: `entries_ptr` and `entries_len` are captured from a slice inside `_owner`. - // The `Arc` keeps that allocation alive, and the overlay owners are never mutated through - // this layer. - unsafe { slice::from_raw_parts(self.entries_ptr, self.entries_len) } - } -} - -// SAFETY: the raw pointer only targets immutable data owned by `_owner`, and `_owner` is retained -// for at least as long as the pointer is used. -unsafe impl Send for OverlayLayer {} -// SAFETY: see the `Send` impl; shared access only exposes immutable slices. -unsafe impl Sync for OverlayLayer {} - -impl fmt::Debug for OverlayLayer { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("OverlayLayer").field("entries_len", &self.entries_len).finish() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn layer(entries: Arc>) -> OverlayLayer, u8, u8> { - OverlayLayer::new(Arc::clone(&entries), entries.as_slice()) - } - - #[test] - fn seek_reuses_current_position_when_it_already_satisfies_bound() { - let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); - let overlay = [layer(entries)]; - let mut cursor = PositionedOverlayCursor::new(&overlay); - - cursor.seek_from(0, &100); - assert_eq!(cursor.min_current_key(), Some(100)); - cursor.seek_from(0, &100); - assert_eq!(cursor.min_current_key(), Some(100)); - cursor.first_after(&99); - assert_eq!(cursor.min_current_key(), Some(100)); - cursor.first_after(&100); - assert_eq!(cursor.min_current_key(), Some(101)); - } - - #[test] - fn seek_does_not_move_backwards_from_current_position() { - let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); - let overlay = [layer(entries)]; - let mut cursor = PositionedOverlayCursor::new(&overlay); - - cursor.seek_from(0, &150); - assert_eq!(cursor.min_current_key(), Some(150)); - cursor.seek_from(0, &75); - assert_eq!(cursor.min_current_key(), Some(150)); - assert_eq!(cursor.seek_until_exact(&25), None); - assert_eq!(cursor.min_current_key(), Some(150)); - } - - #[test] - fn seek_does_not_recover_after_past_end() { - let entries = Arc::new((0..=200).map(|value| (value, value)).collect::>()); - let overlay = [layer(entries)]; - let mut cursor = PositionedOverlayCursor::new(&overlay); - - cursor.seek_from(0, &250); - assert_eq!(cursor.min_current_key(), None); - assert_eq!( - cursor.cursors.iter().map(|cursor| cursor.position).collect::>(), - vec![201] - ); - - assert_eq!(cursor.seek_until_exact(&25), None); - assert_eq!( - cursor.cursors.iter().map(|cursor| cursor.position).collect::>(), - vec![201] - ); - - cursor.first_after(&250); - assert_eq!(cursor.min_current_key(), None); - assert_eq!( - cursor.cursors.iter().map(|cursor| cursor.position).collect::>(), - vec![201] - ); - - cursor.first_after(&25); - assert_eq!(cursor.min_current_key(), None); - assert_eq!( - cursor.cursors.iter().map(|cursor| cursor.position).collect::>(), - vec![201] - ); - } - - #[test] - fn retarget_reuses_cursor_allocation() { - let first_entries = Arc::new(vec![(1, 1)]); - let second_entries = Arc::new(vec![(2, 2)]); - let first_overlay = [layer(Arc::clone(&first_entries))]; - let second_overlay = [layer(first_entries), layer(second_entries)]; - let mut cursor = PositionedOverlayCursor::with_capacity(&first_overlay, 2); - let capacity = cursor.cursors.capacity(); - let ptr = cursor.cursors.as_ptr(); - - cursor.retarget(&second_overlay); - assert_eq!(cursor.cursors.capacity(), capacity); - assert_eq!(cursor.cursors.as_ptr(), ptr); - - cursor.reset(); - assert_eq!(cursor.cursors.capacity(), capacity); - assert_eq!(cursor.cursors.as_ptr(), ptr); - } -} diff --git a/crates/trie/trie/src/test_utils.rs b/crates/trie/trie/src/test_utils.rs index 966d90ac3d2..8d3f2f6659f 100644 --- a/crates/trie/trie/src/test_utils.rs +++ b/crates/trie/trie/src/test_utils.rs @@ -55,7 +55,6 @@ pub fn storage_root_prehashed>(storage: I) use crate::{ hashed_cursor::{ mock::MockHashedCursorFactory, HashedCursorFactory, HashedPostStateCursorFactory, - HashedPostStateOverlay, }, proof_v2::StorageProofCalculator, trie_cursor::{mock::MockTrieCursorFactory, TrieCursorFactory}, @@ -66,7 +65,7 @@ use reth_trie_common::{ prefix_set::PrefixSetMut, updates::StorageTrieUpdates, BranchNodeCompact, HashedPostStateSorted, HashedStorage, Nibbles, ProofTrieNodeV2, ProofV2Target, }; -use std::{collections::BTreeMap, iter::once, sync::Arc}; +use std::{collections::BTreeMap, iter::once}; /// General-purpose test harness for storage trie tests. /// @@ -126,9 +125,8 @@ impl TrieTestHarness { Vec::new(), once((self.hashed_address(), hashed_storage.into_sorted())).collect(), ); - let overlay = HashedPostStateOverlay::new(vec![Arc::new(overlay)]); let overlay_cursor_factory = - HashedPostStateCursorFactory::new(self.hashed_cursor_factory.clone(), &overlay); + HashedPostStateCursorFactory::new(self.hashed_cursor_factory.clone(), [&overlay]); let (root, _, updates) = StorageRoot::new_hashed( self.trie_cursor_factory.clone(), diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index cd18965ac25..36ea3ac1764 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -1,12 +1,9 @@ use super::{TrieCursor, TrieCursorFactory, TrieStorageCursor}; -use crate::{ - overlay_cursor::{DbCursorState, OverlayLayer, PositionedOverlayCursor}, - updates::TrieUpdatesSorted, -}; -use alloy_primitives::{map::B256Map, B256}; +use crate::updates::TrieUpdatesSorted; +use alloy_primitives::B256; use reth_storage_errors::db::DatabaseError; use reth_trie_common::{BranchNodeCompact, Nibbles}; -use std::{marker::PhantomData, sync::Arc}; +use std::marker::PhantomData; /// The trie cursor factory for the trie updates. #[derive(Debug, Clone)] @@ -28,21 +25,21 @@ impl<'overlay, CF, T> InMemoryTrieCursorFactory<'overlay, CF, T> { impl<'overlay, CF, T> TrieCursorFactory for InMemoryTrieCursorFactory<'overlay, CF, T> where CF: TrieCursorFactory + 'overlay, - T: AsRef, + T: AsRef<[&'overlay TrieUpdatesSorted]>, { type AccountTrieCursor<'cursor> - = InMemoryTrieCursor<'cursor, CF::AccountTrieCursor<'cursor>> + = InMemoryTrieCursor<'overlay, CF::AccountTrieCursor<'cursor>> where Self: 'cursor; type StorageTrieCursor<'cursor> - = InMemoryTrieCursor<'cursor, CF::StorageTrieCursor<'cursor>> + = InMemoryTrieCursor<'overlay, CF::StorageTrieCursor<'cursor>> where Self: 'cursor; fn account_trie_cursor(&self) -> Result, DatabaseError> { let cursor = self.cursor_factory.account_trie_cursor()?; - Ok(InMemoryTrieCursor::new_account(cursor, self.trie_updates.as_ref())) + Ok(InMemoryTrieCursor::new_account(cursor, self.trie_updates.as_ref().iter().copied())) } fn storage_trie_cursor( @@ -50,7 +47,11 @@ where hashed_address: B256, ) -> Result, DatabaseError> { let cursor = self.cursor_factory.storage_trie_cursor(hashed_address)?; - Ok(InMemoryTrieCursor::new_storage(cursor, self.trie_updates.as_ref(), hashed_address)) + Ok(InMemoryTrieCursor::new_storage( + cursor, + self.trie_updates.as_ref().iter().copied(), + hashed_address, + )) } } @@ -61,7 +62,7 @@ pub struct InMemoryTrieCursor<'a, C> { /// The underlying cursor. cursor: C, /// The current DB cursor state. - db_cursor_state: DbCursorState, + db_cursor_state: DbCursorState, /// In-memory cursors over trie update overlays. in_memory_cursor: OverlayCursor<'a>, /// Lower-priority overlays that still need positioning after a lazy exact overlay hit. @@ -71,14 +72,167 @@ pub struct InMemoryTrieCursor<'a, C> { #[cfg(debug_assertions)] /// Whether an initial seek was called. seeked: bool, - /// Source of trie update overlays. - trie_updates: &'a TrieUpdatesOverlay, + /// Reference to the full trie updates. + trie_updates: Vec<&'a TrieUpdatesSorted>, +} + +#[derive(Debug)] +enum DbCursorState { + Active(Option<(Nibbles, BranchNodeCompact)>), + Wiped, +} + +impl DbCursorState { + const fn new(cursor_wiped: bool) -> Self { + if cursor_wiped { + Self::Wiped + } else { + Self::Active(None) + } + } + + const fn is_wiped(&self) -> bool { + matches!(self, Self::Wiped) + } + + const fn entry(&self) -> Option<&(Nibbles, BranchNodeCompact)> { + match self { + Self::Active(entry) => entry.as_ref(), + Self::Wiped => None, + } + } + + fn set_entry(&mut self, entry: Option<(Nibbles, BranchNodeCompact)>) { + if let Self::Active(current) = self { + *current = entry; + } + } +} + +#[derive(Debug)] +struct OverlayCursor<'a> { + cursors: Vec>, +} + +impl<'a> OverlayCursor<'a> { + fn account(trie_updates: &[&'a TrieUpdatesSorted]) -> Self { + Self { + cursors: trie_updates + .iter() + .map(|updates| SeekableInMemoryCursor::new(updates.account_nodes_ref())) + .collect(), + } + } + + fn storage(trie_updates: &[&'a TrieUpdatesSorted], hashed_address: B256) -> (Self, bool) { + let mut cursors = Vec::new(); + let mut db_wiped = false; + + for updates in trie_updates { + if let Some(storage) = updates.storage_tries_ref().get(&hashed_address) { + cursors.push(SeekableInMemoryCursor::new(storage.storage_nodes_ref())); + if storage.is_deleted() { + db_wiped = true; + break; + } + } + } + + (Self { cursors }, db_wiped) + } + + fn seek_from(&mut self, start: usize, key: &Nibbles) { + for cursor in self.cursors.iter_mut().skip(start) { + cursor.seek(key); + } + } + + fn seek_until_exact(&mut self, key: &Nibbles) -> Option<(usize, Option)> { + for (idx, cursor) in self.cursors.iter_mut().enumerate() { + if let Some((cursor_key, value)) = cursor.seek(key) && + cursor_key == key + { + return Some((idx, value.clone())) + } + } + None + } + + fn first_after(&mut self, key: &Nibbles) { + for cursor in &mut self.cursors { + cursor.first_after(key); + } + } + + fn reset(&mut self) { + for cursor in &mut self.cursors { + cursor.reset(); + } + } + + fn min_current_key(&self) -> Option { + self.cursors.iter().filter_map(|cursor| cursor.current().map(|(key, _)| *key)).min() + } + + fn highest_priority_value_at(&self, key: &Nibbles) -> Option> { + self.cursors.iter().find_map(|cursor| { + let (cursor_key, value) = cursor.current()?; + (cursor_key == key).then(|| value.clone()) + }) + } + + fn advance_key(&mut self, key: &Nibbles) { + for cursor in &mut self.cursors { + if cursor.current().is_some_and(|(cursor_key, _)| cursor_key == key) { + cursor.first_after(key); + } + } + } +} + +#[derive(Debug)] +struct SeekableInMemoryCursor<'a> { + entries: &'a [(Nibbles, Option)], + idx: usize, +} + +impl<'a> SeekableInMemoryCursor<'a> { + const fn new(entries: &'a [(Nibbles, Option)]) -> Self { + Self { entries, idx: 0 } + } + + fn current(&self) -> Option<&'a (Nibbles, Option)> { + self.entries.get(self.idx) + } + + const fn reset(&mut self) { + self.idx = 0; + } + + fn seek(&mut self, key: &Nibbles) -> Option<&'a (Nibbles, Option)> { + self.idx = self.entries.partition_point(|(entry_key, _)| entry_key < key); + self.current() + } + + fn first_after(&mut self, key: &Nibbles) -> Option<&'a (Nibbles, Option)> { + if self.current().is_some_and(|(entry_key, _)| entry_key > key) { + return self.current() + } + + let remaining = &self.entries[self.idx..]; + self.idx += remaining.partition_point(|(entry_key, _)| entry_key <= key); + self.current() + } } impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { - /// Create new account trie cursor from an indexed trie updates overlay. - pub fn new_account(cursor: C, trie_updates: &'a TrieUpdatesOverlay) -> Self { - let in_memory_cursor = trie_updates.account_overlay(); + /// Create new account trie cursor which combines a DB cursor and the trie updates. + pub fn new_account( + cursor: C, + trie_updates: impl IntoIterator, + ) -> Self { + let trie_updates = trie_updates.into_iter().collect::>(); + let in_memory_cursor = OverlayCursor::account(&trie_updates); Self { cursor, db_cursor_state: DbCursorState::new(false), @@ -91,13 +245,15 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { } } - /// Create new storage trie cursor from an indexed trie updates overlay. + /// Create new storage trie cursor with full trie updates reference. + /// This allows the cursor to switch between storage tries when `set_hashed_address` is called. pub fn new_storage( cursor: C, - trie_updates: &'a TrieUpdatesOverlay, + trie_updates: impl IntoIterator, hashed_address: B256, ) -> Self { - let (in_memory_cursor, db_wiped) = trie_updates.storage_overlay(hashed_address); + let trie_updates = trie_updates.into_iter().collect::>(); + let (in_memory_cursor, db_wiped) = Self::get_storage_overlay(&trie_updates, hashed_address); Self { cursor, db_cursor_state: DbCursorState::new(db_wiped), @@ -110,6 +266,14 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { } } + /// Returns the storage overlay for `hashed_address` and whether it was deleted. + fn get_storage_overlay( + trie_updates: &[&'a TrieUpdatesSorted], + hashed_address: B256, + ) -> (OverlayCursor<'a>, bool) { + OverlayCursor::storage(trie_updates, hashed_address) + } + /// Returns a mutable reference to the underlying cursor if it's not wiped, None otherwise. fn get_cursor_mut(&mut self) -> Option<&mut C> { (!self.db_cursor_state.is_wiped()).then_some(&mut self.cursor) @@ -121,10 +285,6 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { /// Positions the DB cursor state using the underlying cursor. fn cursor_seek(&mut self, key: Nibbles) -> Result<(), DatabaseError> { - if !self.db_cursor_state.should_seek(&key) { - return Ok(()) - } - let entry = self.get_cursor_mut().map(|c| c.seek(key)).transpose()?.flatten(); self.db_cursor_state.set_entry(entry); Ok(()) @@ -155,19 +315,13 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { /// Performs a k-way merge over the positioned overlay cursors and the DB cursor. fn choose_next_entry(&mut self) -> Result, DatabaseError> { loop { - let mem_entry = self.in_memory_cursor.min_current_entry(); - let db_entry = self.db_cursor_state.entry(); - let next_key = match (mem_entry, db_entry) { - (Some((mem_key, _)), Some((db_key, _))) => mem_key.min(*db_key), - (Some((mem_key, _)), None) => mem_key, - (None, Some((db_key, _))) => *db_key, - (None, None) => return Ok(None), + let mem_key = self.in_memory_cursor.min_current_key(); + let db_key = self.db_cursor_state.entry().map(|(key, _)| *key); + let Some(next_key) = mem_key.into_iter().chain(db_key).min() else { + return Ok(None); }; - if let Some((mem_key, mem_value)) = mem_entry && - mem_key == next_key - { - let mem_value = mem_value.clone(); + if let Some(mem_value) = self.in_memory_cursor.highest_priority_value_at(&next_key) { if let Some(node) = mem_value { return Ok(Some((next_key, node))) } @@ -201,11 +355,7 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { if mem_value.is_some() { self.deferred_overlay_seek_start = Some(idx + 1); } - mem_value.clone().map(|node| (key, node)) - } else if let Some(db_entry) = self.db_cursor_state.exact_entry(&key) { - Some(db_entry.clone()) - } else if !self.db_cursor_state.may_contain_exact(&key) { - None + mem_value.map(|node| (key, node)) } else { let db_entry = self.get_cursor_mut().map(|c| c.seek_exact(key)).transpose()?.flatten(); self.db_cursor_state.set_entry(db_entry); @@ -228,8 +378,8 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.deferred_overlay_seek_start = None; match self.in_memory_cursor.seek_until_exact(&key) { Some((idx, Some(node))) => { + let entry = Some((key, node)); self.deferred_overlay_seek_start = Some(idx + 1); - let entry = Some((key, node.clone())); self.set_last_key(&entry); return Ok(entry); } @@ -260,11 +410,10 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { self.in_memory_cursor.seek_from(start, &last_key); } self.in_memory_cursor.first_after(&last_key); - - match self.db_cursor_state.entry().map(|(db_key, _)| *db_key) { - Some(db_key) if db_key == last_key => self.cursor_next()?, - Some(db_key) if db_key > last_key => {} - _ => self.cursor_first_after(last_key)?, + if self.db_cursor_state.entry().is_some_and(|(db_key, _)| db_key == &last_key) { + self.cursor_next()?; + } else { + self.cursor_first_after(last_key)?; } let entry = self.choose_next_entry()?; @@ -281,9 +430,9 @@ impl TrieCursor for InMemoryTrieCursor<'_, C> { fn reset(&mut self) { self.cursor.reset(); - - self.db_cursor_state.reset_position(); self.in_memory_cursor.reset(); + + self.db_cursor_state.set_entry(None); self.deferred_overlay_seek_start = None; self.last_key = None; #[cfg(debug_assertions)] @@ -297,205 +446,17 @@ impl TrieStorageCursor for InMemoryTrieCursor<'_, C> { fn set_hashed_address(&mut self, hashed_address: B256) { self.reset(); self.cursor.set_hashed_address(hashed_address); - let db_wiped = - self.trie_updates.retarget_storage_overlay(&mut self.in_memory_cursor, hashed_address); + let (in_memory_cursor, db_wiped) = + Self::get_storage_overlay(&self.trie_updates, hashed_address); + self.in_memory_cursor = in_memory_cursor; self.db_cursor_state = DbCursorState::new(db_wiped); } } -/// Trie updates overlays ordered from highest to lowest precedence. -#[derive(Clone, Debug, Default)] -pub struct TrieUpdatesOverlay { - account_overlay: Arc>, - storage_overlays: TrieStorageOverlays, - layer_capacity: usize, -} - -impl TrieUpdatesOverlay { - /// Create a new indexed trie updates overlay stack. - pub fn new(updates: Vec>) -> Self { - let layer_capacity = updates.len(); - let account_overlay = Self::build_account_overlay(&updates); - let storage_overlays = Self::build_storage_overlays(&updates); - Self { account_overlay, storage_overlays, layer_capacity } - } - - /// Returns `true` if the overlay does not contain any trie updates. - pub fn is_empty(&self) -> bool { - self.account_overlay.is_empty() && self.storage_overlays.is_empty() - } - - fn build_account_overlay(updates: &[Arc]) -> Arc> { - Arc::new( - updates - .iter() - .filter(|update| !update.account_nodes_ref().is_empty()) - .map(|update| TrieOverlayLayer::new(Arc::clone(update), update.account_nodes_ref())) - .collect(), - ) - } - - fn build_storage_overlays(updates: &[Arc]) -> TrieStorageOverlays { - if let [update] = updates { - return TrieStorageOverlays::Single(Arc::clone(update)) - } - - let storage_overlay_capacity = - updates.iter().map(|update| update.storage_tries_ref().len()).sum(); - let mut overlays: B256Map = - B256Map::with_capacity_and_hasher(storage_overlay_capacity, Default::default()); - - for update in updates { - Self::push_storage_layer(&mut overlays, update); - } - - TrieStorageOverlays::Indexed(Arc::new(overlays)) - } - - /// Add a trie updates layer at the end of the precedence stack. - pub fn push_layer(&mut self, update: Arc) { - self.layer_capacity += 1; - if !update.account_nodes_ref().is_empty() { - Arc::make_mut(&mut self.account_overlay) - .push(TrieOverlayLayer::new(Arc::clone(&update), update.account_nodes_ref())); - } - self.storage_overlays.push_layer(update); - } - - fn push_storage_layer( - overlays: &mut B256Map, - update: &Arc, - ) { - for (hashed_address, storage) in update.storage_tries_ref() { - let overlay = overlays.entry(*hashed_address).or_default(); - if overlay.db_wiped { - continue; - } - - if !storage.storage_nodes_ref().is_empty() { - overlay - .layers - .push(TrieOverlayLayer::new(Arc::clone(update), storage.storage_nodes_ref())); - } - - if storage.is_deleted() { - overlay.db_wiped = true; - } - } - } - - fn account_overlay(&self) -> OverlayCursor<'_> { - OverlayCursor::with_capacity(self.account_overlay.as_slice(), self.layer_capacity) - } - - fn storage_overlay(&self, hashed_address: B256) -> (OverlayCursor<'_>, bool) { - match &self.storage_overlays { - TrieStorageOverlays::Single(update) => { - let Some(storage) = update.storage_tries_ref().get(&hashed_address) else { - return (OverlayCursor::with_entries(&[], self.layer_capacity), false) - }; - ( - OverlayCursor::with_entries(storage.storage_nodes_ref(), self.layer_capacity), - storage.is_deleted(), - ) - } - TrieStorageOverlays::Indexed(overlays) => { - let Some(overlay) = overlays.get(&hashed_address) else { - return (OverlayCursor::with_entries(&[], self.layer_capacity), false) - }; - - ( - OverlayCursor::with_capacity(overlay.layers.as_slice(), self.layer_capacity), - overlay.db_wiped, - ) - } - } - } - - fn retarget_storage_overlay<'a>( - &'a self, - cursor: &mut OverlayCursor<'a>, - hashed_address: B256, - ) -> bool { - match &self.storage_overlays { - TrieStorageOverlays::Single(update) => { - let Some(storage) = update.storage_tries_ref().get(&hashed_address) else { - cursor.retarget_entries(&[]); - return false - }; - cursor.retarget_entries(storage.storage_nodes_ref()); - storage.is_deleted() - } - TrieStorageOverlays::Indexed(overlays) => { - let Some(overlay) = overlays.get(&hashed_address) else { - cursor.retarget_entries(&[]); - return false - }; - cursor.retarget(overlay.layers.as_slice()); - overlay.db_wiped - } - } - } -} - -impl AsRef for TrieUpdatesOverlay { - fn as_ref(&self) -> &Self { - self - } -} - -#[derive(Clone, Debug, Default)] -struct TrieStorageOverlay { - layers: Vec, - db_wiped: bool, -} - -#[derive(Clone, Debug)] -enum TrieStorageOverlays { - Single(Arc), - Indexed(Arc>), -} - -impl Default for TrieStorageOverlays { - fn default() -> Self { - Self::Indexed(Default::default()) - } -} - -impl TrieStorageOverlays { - fn is_empty(&self) -> bool { - match self { - Self::Single(update) => update.storage_tries_ref().is_empty(), - Self::Indexed(overlays) => overlays.is_empty(), - } - } - - fn push_layer(&mut self, update: Arc) { - match self { - Self::Single(existing) => { - let storage_overlay_capacity = - existing.storage_tries_ref().len() + update.storage_tries_ref().len(); - let mut overlays: B256Map = - B256Map::with_capacity_and_hasher(storage_overlay_capacity, Default::default()); - TrieUpdatesOverlay::push_storage_layer(&mut overlays, existing); - TrieUpdatesOverlay::push_storage_layer(&mut overlays, &update); - *self = Self::Indexed(Arc::new(overlays)); - } - Self::Indexed(overlays) => { - TrieUpdatesOverlay::push_storage_layer(Arc::make_mut(overlays), &update); - } - } - } -} - -type OverlayCursor<'a> = PositionedOverlayCursor<'a, Nibbles, Option>; -type TrieOverlayLayer = OverlayLayer>; - #[cfg(test)] mod tests { use super::*; use crate::trie_cursor::mock::MockTrieCursor; - use alloy_primitives::map::B256Map; use parking_lot::Mutex; use std::{collections::BTreeMap, sync::Arc}; @@ -514,8 +475,7 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); let trie_updates = TrieUpdatesSorted::new(test_case.in_memory_nodes, Default::default()); - let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); let mut results = Vec::new(); @@ -538,87 +498,6 @@ mod tests { ); } - fn branch_node(id: u16) -> BranchNodeCompact { - BranchNodeCompact::new(id, id, 0, vec![], None) - } - - fn storage_trie_updates( - hashed_address: B256, - is_deleted: bool, - storage_nodes: Vec<(Nibbles, Option)>, - ) -> TrieUpdatesSorted { - let mut storage_tries = B256Map::default(); - storage_tries.insert( - hashed_address, - crate::updates::StorageTrieUpdatesSorted { is_deleted, storage_nodes }, - ); - TrieUpdatesSorted::new(vec![], storage_tries) - } - - fn mock_storage_cursor( - hashed_address: B256, - storage_tries: B256Map>, - ) -> MockTrieCursor { - let visited_storage_keys = - storage_tries.keys().map(|key| (*key, Default::default())).collect(); - MockTrieCursor::new_storage( - Arc::new(storage_tries), - Arc::new(visited_storage_keys), - hashed_address, - ) - .unwrap() - } - - fn storage_overlay_snapshot( - overlay: &TrieUpdatesOverlay, - hashed_address: B256, - ) -> (Vec)>>, bool) { - match &overlay.storage_overlays { - TrieStorageOverlays::Single(update) => { - let Some(storage) = update.storage_tries_ref().get(&hashed_address) else { - return (Vec::new(), false) - }; - let layers = (!storage.storage_nodes_ref().is_empty()) - .then(|| vec![storage.storage_nodes_ref().to_vec()]) - .unwrap_or_default(); - (layers, storage.is_deleted()) - } - TrieStorageOverlays::Indexed(overlays) => { - let Some(overlay) = overlays.get(&hashed_address) else { - return (Vec::new(), false) - }; - ( - overlay.layers.iter().map(|layer| layer.entries().to_vec()).collect(), - overlay.db_wiped, - ) - } - } - } - - #[test] - fn test_incremental_storage_push_matches_rebuilt_overlay() { - let hashed_address = B256::with_last_byte(1); - let top = Arc::new(storage_trie_updates( - hashed_address, - false, - vec![(Nibbles::from_nibbles([0x1]), Some(branch_node(1)))], - )); - let lower = Arc::new(storage_trie_updates( - hashed_address, - true, - vec![(Nibbles::from_nibbles([0x2]), Some(branch_node(2)))], - )); - - let mut incremental = TrieUpdatesOverlay::new(vec![Arc::clone(&top)]); - incremental.push_layer(Arc::clone(&lower)); - let rebuilt = TrieUpdatesOverlay::new(vec![top, lower]); - - assert_eq!( - storage_overlay_snapshot(&incremental, hashed_address), - storage_overlay_snapshot(&rebuilt, hashed_address) - ); - } - #[test] fn test_empty_db_and_memory() { let test_case = InMemoryTrieCursorTestCase { @@ -780,8 +659,7 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); let result = cursor.seek_exact(Nibbles::from_nibbles([0x2])).unwrap(); assert_eq!( @@ -824,8 +702,7 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); let result = cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(); assert_eq!( @@ -892,12 +769,10 @@ mod tests { ], Default::default(), ); - let overlay = TrieUpdatesOverlay::new(vec![ - Arc::new(higher_priority), - Arc::new(exact_hit), - Arc::new(lower_priority), - ]); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + let mut cursor = InMemoryTrieCursor::new_account( + mock_cursor, + [&higher_priority, &exact_hit, &lower_priority], + ); let result = cursor.seek(Nibbles::from_nibbles([0x5])).unwrap(); assert_eq!( @@ -907,6 +782,12 @@ mod tests { BranchNodeCompact::new(0b0101, 0b0101, 0, vec![], None) )) ); + assert_eq!(cursor.in_memory_cursor.cursors[0].idx, 1); + assert_eq!(cursor.in_memory_cursor.cursors[1].idx, 0); + assert_eq!( + cursor.in_memory_cursor.cursors[2].idx, 0, + "lower-priority overlay should not be sought after an exact overlay hit" + ); assert!(visited_keys.lock().is_empty(), "exact overlay hit should not touch the DB cursor"); let result = cursor.next().unwrap(); @@ -938,8 +819,7 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); let result = cursor.seek(Nibbles::from_nibbles([0x1])).unwrap(); assert_eq!( @@ -1047,8 +927,7 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); assert_eq!(cursor.current().unwrap(), None); @@ -1099,8 +978,7 @@ mod tests { let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); let trie_updates = TrieUpdatesSorted::new(in_memory_nodes, Default::default()); - let overlay = TrieUpdatesOverlay::new(vec![Arc::new(trie_updates)]); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); // Seek to beginning should return None (all nodes are deleted) tracing::debug!("seeking to 0x"); @@ -1124,7 +1002,6 @@ mod tests { ]; for seek_key in seek_keys { - cursor.reset(); tracing::debug!("seeking to {seek_key:?}"); let result = cursor.seek(seek_key).unwrap(); assert_eq!( @@ -1140,72 +1017,36 @@ mod tests { } #[test] - fn test_seek_reuses_exact_db_position() { + fn test_seek_can_move_backwards() { let db_nodes = BTreeMap::from([ - (Nibbles::from_nibbles([0x1]), branch_node(1)), - (Nibbles::from_nibbles([0x2]), branch_node(2)), + (Nibbles::from_nibbles([0x1]), BranchNodeCompact::new(1, 1, 0, vec![], None)), + (Nibbles::from_nibbles([0x3]), BranchNodeCompact::new(3, 3, 0, vec![], None)), ]); let db_nodes_arc = Arc::new(db_nodes); let visited_keys = Arc::new(Mutex::new(Vec::new())); - let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); - - let overlay = TrieUpdatesOverlay::default(); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); - assert_eq!( - cursor.seek(Nibbles::from_nibbles([0x1])).unwrap(), - Some((Nibbles::from_nibbles([0x1]), branch_node(1))) + let trie_updates = TrieUpdatesSorted::new( + vec![( + Nibbles::from_nibbles([0x2]), + Some(BranchNodeCompact::new(2, 2, 0, vec![], None)), + )], + Default::default(), ); - assert_eq!(visited_keys.lock().len(), 1); - - assert_eq!(cursor.next().unwrap(), Some((Nibbles::from_nibbles([0x2]), branch_node(2)))); - assert_eq!(visited_keys.lock().len(), 2); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&trie_updates]); assert_eq!( - cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(), - Some((Nibbles::from_nibbles([0x2]), branch_node(2))) + cursor.seek(Nibbles::from_nibbles([0x3])).unwrap(), + Some((Nibbles::from_nibbles([0x3]), BranchNodeCompact::new(3, 3, 0, vec![], None))) ); - assert_eq!(visited_keys.lock().len(), 2, "seek should reuse the exact DB position"); - } - - #[test] - fn test_seek_reuses_ahead_db_position() { - let db_nodes = BTreeMap::from([(Nibbles::from_nibbles([0x3]), branch_node(3))]); - let db_nodes_arc = Arc::new(db_nodes); - let visited_keys = Arc::new(Mutex::new(Vec::new())); - let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); - - let overlay = TrieUpdatesOverlay::default(); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); - assert_eq!( - cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(), - Some((Nibbles::from_nibbles([0x3]), branch_node(3))) + cursor.seek(Nibbles::from_nibbles([0x1])).unwrap(), + Some((Nibbles::from_nibbles([0x1]), BranchNodeCompact::new(1, 1, 0, vec![], None))) ); - assert_eq!(visited_keys.lock().len(), 1); - assert_eq!( - cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(), - Some((Nibbles::from_nibbles([0x3]), branch_node(3))) + cursor.next().unwrap(), + Some((Nibbles::from_nibbles([0x2]), BranchNodeCompact::new(2, 2, 0, vec![], None))) ); - assert_eq!(visited_keys.lock().len(), 1, "seek should reuse an ahead DB position"); - } - - #[test] - fn test_seek_does_not_reseek_exhausted_db() { - let db_nodes = BTreeMap::from([(Nibbles::from_nibbles([0x1]), branch_node(1))]); - let db_nodes_arc = Arc::new(db_nodes); - let visited_keys = Arc::new(Mutex::new(Vec::new())); - let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys.clone()); - - let overlay = TrieUpdatesOverlay::default(); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); - - assert_eq!(cursor.seek(Nibbles::from_nibbles([0x2])).unwrap(), None); - assert_eq!(visited_keys.lock().len(), 1); - - assert_eq!(cursor.seek(Nibbles::from_nibbles([0x3])).unwrap(), None); - assert_eq!(visited_keys.lock().len(), 1, "exhausted DB cursor should stay exhausted"); } #[test] @@ -1243,8 +1084,7 @@ mod tests { ], Default::default(), ); - let overlay = TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); + let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, [&newest, &oldest]); let mut results = Vec::new(); if let Some(entry) = cursor.seek(Nibbles::default()).unwrap() { @@ -1264,56 +1104,10 @@ mod tests { ); } - #[test] - fn test_indexed_account_overlay_resolves_by_precedence() { - let db_nodes = BTreeMap::from([ - (Nibbles::from_nibbles([0x1]), branch_node(1)), - (Nibbles::from_nibbles([0x2]), branch_node(2)), - (Nibbles::from_nibbles([0x4]), branch_node(4)), - ]); - let db_nodes_arc = Arc::new(db_nodes); - let visited_keys = Arc::new(Mutex::new(Vec::new())); - let mock_cursor = MockTrieCursor::new(db_nodes_arc, visited_keys); - - let newest = TrieUpdatesSorted::new( - vec![ - (Nibbles::from_nibbles([0x2]), None), - (Nibbles::from_nibbles([0x3]), Some(branch_node(30))), - ], - Default::default(), - ); - let oldest = TrieUpdatesSorted::new( - vec![ - (Nibbles::from_nibbles([0x1]), Some(branch_node(10))), - (Nibbles::from_nibbles([0x2]), Some(branch_node(20))), - (Nibbles::from_nibbles([0x3]), Some(branch_node(3))), - ], - Default::default(), - ); - let overlay = TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(oldest)]); - let mut cursor = InMemoryTrieCursor::new_account(mock_cursor, &overlay); - - let mut results = Vec::new(); - if let Some(entry) = cursor.seek(Nibbles::default()).unwrap() { - results.push(entry); - while let Some(entry) = cursor.next().unwrap() { - results.push(entry); - } - } - - assert_eq!( - results, - vec![ - (Nibbles::from_nibbles([0x1]), branch_node(10)), - (Nibbles::from_nibbles([0x3]), branch_node(30)), - (Nibbles::from_nibbles([0x4]), branch_node(4)), - ] - ); - } - #[test] fn test_storage_deletion_overlay_hides_lower_precedence_sources() { use crate::updates::StorageTrieUpdatesSorted; + use alloy_primitives::map::B256Map; let hashed_address = B256::with_last_byte(1); let mut db_storage = B256Map::default(); @@ -1372,9 +1166,11 @@ mod tests { ); let hidden = TrieUpdatesSorted::new(vec![], hidden_storage); - let overlay = - TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(deleting), Arc::new(hidden)]); - let mut cursor = InMemoryTrieCursor::new_storage(mock_cursor, &overlay, hashed_address); + let mut cursor = InMemoryTrieCursor::new_storage( + mock_cursor, + [&newest, &deleting, &hidden], + hashed_address, + ); assert_eq!( cursor.seek(Nibbles::default()).unwrap(), @@ -1387,88 +1183,6 @@ mod tests { assert_eq!(cursor.next().unwrap(), None); } - #[test] - fn test_indexed_storage_deletion_overlay_hides_lower_precedence_sources() { - let hashed_address = B256::with_last_byte(1); - let mut db_storage = B256Map::default(); - db_storage.insert( - hashed_address, - BTreeMap::from([(Nibbles::from_nibbles([0x4]), branch_node(4))]), - ); - let mock_cursor = mock_storage_cursor(hashed_address, db_storage); - - let newest = storage_trie_updates( - hashed_address, - false, - vec![(Nibbles::from_nibbles([0x2]), Some(branch_node(2)))], - ); - let deleting = storage_trie_updates( - hashed_address, - true, - vec![(Nibbles::from_nibbles([0x1]), Some(branch_node(1)))], - ); - let hidden = storage_trie_updates( - hashed_address, - false, - vec![(Nibbles::from_nibbles([0x3]), Some(branch_node(3)))], - ); - let overlay = - TrieUpdatesOverlay::new(vec![Arc::new(newest), Arc::new(deleting), Arc::new(hidden)]); - let mut cursor = InMemoryTrieCursor::new_storage(mock_cursor, &overlay, hashed_address); - - assert_eq!( - cursor.seek(Nibbles::default()).unwrap(), - Some((Nibbles::from_nibbles([0x1]), branch_node(1))) - ); - assert_eq!(cursor.next().unwrap(), Some((Nibbles::from_nibbles([0x2]), branch_node(2)))); - assert_eq!(cursor.next().unwrap(), None); - } - - #[test] - fn test_indexed_storage_overlay_switches_hashed_address() { - let first_address = B256::with_last_byte(1); - let second_address = B256::with_last_byte(2); - let mut db_storage = B256Map::default(); - db_storage.insert( - first_address, - BTreeMap::from([(Nibbles::from_nibbles([0x4]), branch_node(4))]), - ); - db_storage.insert( - second_address, - BTreeMap::from([(Nibbles::from_nibbles([0x5]), branch_node(5))]), - ); - let mock_cursor = mock_storage_cursor(first_address, db_storage); - - let first_overlay = storage_trie_updates( - first_address, - false, - vec![(Nibbles::from_nibbles([0x1]), Some(branch_node(1)))], - ); - let second_overlay = storage_trie_updates( - second_address, - false, - vec![(Nibbles::from_nibbles([0x2]), Some(branch_node(2)))], - ); - let overlay = - TrieUpdatesOverlay::new(vec![Arc::new(first_overlay), Arc::new(second_overlay)]); - let mut cursor = InMemoryTrieCursor::new_storage(mock_cursor, &overlay, first_address); - - assert_eq!( - cursor.seek(Nibbles::default()).unwrap(), - Some((Nibbles::from_nibbles([0x1]), branch_node(1))) - ); - assert_eq!(cursor.next().unwrap(), Some((Nibbles::from_nibbles([0x4]), branch_node(4)))); - - cursor.set_hashed_address(second_address); - - assert_eq!( - cursor.seek(Nibbles::default()).unwrap(), - Some((Nibbles::from_nibbles([0x2]), branch_node(2))) - ); - assert_eq!(cursor.next().unwrap(), Some((Nibbles::from_nibbles([0x5]), branch_node(5)))); - assert_eq!(cursor.next().unwrap(), None); - } - mod proptest_tests { use super::*; use proptest::prelude::*; @@ -1540,7 +1254,9 @@ mod tests { entries: &[(Nibbles, BranchNodeCompact)], position: &mut Option, ) -> Option<(Nibbles, BranchNodeCompact)> { - let next_idx = position.and_then(|idx| idx.checked_add(1))?; + let Some(next_idx) = position.and_then(|idx| idx.checked_add(1)) else { + return None; + }; if next_idx < entries.len() { *position = Some(next_idx); @@ -1647,11 +1363,8 @@ mod tests { let trie_updates = overlays .into_iter() .map(|in_memory_nodes| TrieUpdatesSorted::new(in_memory_nodes, Default::default())) - .map(Arc::new) .collect::>(); - let overlay = TrieUpdatesOverlay::new(trie_updates); - let mut test_cursor = - InMemoryTrieCursor::new_account(mock_cursor, &overlay); + let mut test_cursor = InMemoryTrieCursor::new_account(mock_cursor, trie_updates.iter()); // Test: seek to the beginning first let control_first = @@ -1663,14 +1376,9 @@ mod tests { "Initial seek returned", ); assert_eq!(control_first, test_first, "Initial seek mismatch"); - let mut seek_floor = control_first.as_ref().map(|(key, _)| *key); // Execute a sequence of random operations for op in ops { - if reference_position.is_none() { - break - } - match op { CursorOp::Next => { let control_result = @@ -1682,11 +1390,8 @@ mod tests { "Next returned", ); assert_eq!(control_result, test_result, "Next operation mismatch"); - let Some((key, _)) = control_result else { break }; - seek_floor = Some(key); } CursorOp::Seek(key) => { - let key = seek_floor.map_or(key, |floor| key.max(floor)); let control_result = reference_seek(&expected_combined, &mut reference_position, key); let test_result = test_cursor.seek(key).unwrap(); @@ -1697,11 +1402,8 @@ mod tests { "Seek returned", ); assert_eq!(control_result, test_result, "Seek operation mismatch for key {:?}", key); - let Some((key, _)) = control_result else { break }; - seek_floor = Some(key); } CursorOp::SeekExact(key) => { - let key = seek_floor.map_or(key, |floor| key.max(floor)); let control_result = reference_seek_exact(&expected_combined, &mut reference_position, key); let test_result = test_cursor.seek_exact(key).unwrap(); @@ -1712,8 +1414,6 @@ mod tests { "SeekExact returned", ); assert_eq!(control_result, test_result, "SeekExact operation mismatch for key {:?}", key); - let Some((key, _)) = control_result else { break }; - seek_floor = Some(key); } } }