diff --git a/Cargo.lock b/Cargo.lock index c38a00a0973..0a8a54f7643 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7637,7 +7637,6 @@ dependencies = [ "reth-metrics", "reth-primitives-traits", "reth-storage-api", - "reth-tasks", "reth-testing-utils", "reth-trie", "revm-database", diff --git a/crates/chain-state/Cargo.toml b/crates/chain-state/Cargo.toml index ab85cc8fce9..a4ce20fd434 100644 --- a/crates/chain-state/Cargo.toml +++ b/crates/chain-state/Cargo.toml @@ -20,7 +20,6 @@ reth-metrics.workspace = true reth-ethereum-primitives.workspace = true reth-primitives-traits = { workspace = true, features = ["dashmap"] } reth-storage-api.workspace = true -reth-tasks = { workspace = true, features = ["rayon"], optional = true } reth-trie.workspace = true # ethereum @@ -84,6 +83,5 @@ test-utils = [ "reth-primitives-traits/test-utils", "reth-trie/test-utils", "reth-ethereum-primitives/test-utils", - "reth-tasks?/test-utils", ] -rayon = ["dep:rayon", "dep:reth-tasks"] +rayon = ["dep:rayon"] diff --git a/crates/chain-state/src/deferred_trie.rs b/crates/chain-state/src/deferred_trie.rs index ea71b92c15d..5dea55b7be3 100644 --- a/crates/chain-state/src/deferred_trie.rs +++ b/crates/chain-state/src/deferred_trie.rs @@ -1,8 +1,9 @@ +use alloy_primitives::B256; use parking_lot::Mutex; use reth_metrics::{metrics::Counter, Metrics}; use reth_trie::{ updates::{TrieUpdates, TrieUpdatesSorted}, - HashedPostState, HashedPostStateSorted, + HashedPostState, HashedPostStateSorted, TrieInputSorted, }; use std::{ fmt, @@ -10,26 +11,46 @@ use std::{ }; use tracing::{debug_span, instrument}; -/// Shared handle to asynchronously populated per-block trie data. +/// Shared handle to asynchronously populated trie data. /// -/// If the background task has not completed by the time trie data is needed, the caller computes -/// the sorted data synchronously from the retained unsorted inputs and caches the result. +/// Uses a try-lock + fallback computation approach for deadlock-free access. +/// If the deferred task hasn't completed, computes trie data synchronously +/// from stored unsorted inputs rather than blocking. #[derive(Clone)] pub struct DeferredTrieData { /// Shared deferred state holding either raw inputs (pending) or computed result (ready). - state: Arc>, + state: Arc>, } -/// Sorted trie data computed for one executed block. -/// -/// Cumulative overlays are intentionally managed by -/// [`StateTrieOverlayManager`](crate::StateTrieOverlayManager), not by each block. +/// Sorted trie data computed for an executed block. +/// These represent the complete set of sorted trie data required to persist +/// block state for, and generate proofs on top of, a block. #[derive(Clone, Debug, Default)] pub struct ComputedTrieData { /// Sorted hashed post-state produced by execution. pub hashed_state: Arc, /// Sorted trie updates produced by state root computation. pub trie_updates: Arc, + /// Trie input bundled with its anchor hash, if available. + pub anchored_trie_input: Option, +} + +/// Trie input bundled with its anchor hash. +/// +/// The `trie_input` contains the **cumulative** overlay of all in-memory ancestor blocks, +/// not just this block's changes. Child blocks reuse the parent's overlay in O(1) by +/// cloning the Arc-wrapped data. +/// +/// The `anchor_hash` is metadata indicating which persisted base state this overlay +/// sits on top of. It is CRITICAL for overlay reuse decisions: an overlay built on top +/// of Anchor A cannot be reused for a block anchored to Anchor B, as it would result +/// in an incorrect state. +#[derive(Clone, Debug)] +pub struct AnchoredTrieInput { + /// The persisted ancestor hash this trie input is anchored to. + pub anchor_hash: B256, + /// Cumulative trie input overlay from all in-memory ancestors. + pub trie_input: Arc, } /// Metrics for deferred trie computation. @@ -46,9 +67,8 @@ static DEFERRED_TRIE_METRICS: LazyLock = LazyLock::new(DeferredTrieMetrics::default); /// Internal state for deferred trie data. -enum DeferredTrieDataInner { +enum DeferredState { /// Data is not yet available; raw inputs stored for fallback computation. - /// /// Wrapped in `Option` to allow taking ownership during computation. Pending(Option), /// Data has been computed and is ready. @@ -62,16 +82,20 @@ struct PendingInputs { hashed_state: Arc, /// Unsorted trie updates from state root computation. trie_updates: Arc, + /// The persisted ancestor hash this trie input is anchored to. + anchor_hash: B256, + /// Deferred trie data from ancestor blocks for merging. + ancestors: Vec, } impl fmt::Debug for DeferredTrieData { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let state = self.state.lock(); match &*state { - DeferredTrieDataInner::Pending(_) => { + DeferredState::Pending(_) => { f.debug_struct("DeferredTrieData").field("state", &"pending").finish() } - DeferredTrieDataInner::Ready(_) => { + DeferredState::Ready(_) => { f.debug_struct("DeferredTrieData").field("state", &"ready").finish() } } @@ -80,24 +104,64 @@ impl fmt::Debug for DeferredTrieData { impl DeferredTrieData { /// Create a new pending handle with fallback inputs for synchronous computation. - pub fn pending(hashed_state: Arc, trie_updates: Arc) -> Self { + /// + /// If the async task hasn't completed when `wait_cloned` is called, the trie data + /// will be computed synchronously from these inputs. This eliminates deadlock risk. + /// + /// # Arguments + /// * `hashed_state` - Unsorted hashed post-state from execution + /// * `trie_updates` - Unsorted trie updates from state root computation + /// * `anchor_hash` - The persisted ancestor hash this trie input is anchored to + /// * `ancestors` - Deferred trie data from ancestor blocks for merging + pub fn pending( + hashed_state: Arc, + trie_updates: Arc, + anchor_hash: B256, + ancestors: Vec, + ) -> Self { Self { - state: Arc::new(Mutex::new(DeferredTrieDataInner::Pending(Some(PendingInputs { + state: Arc::new(Mutex::new(DeferredState::Pending(Some(PendingInputs { hashed_state, trie_updates, + anchor_hash, + ancestors, })))), } } /// Create a handle that is already populated with the given [`ComputedTrieData`]. + /// + /// Useful when trie data is available immediately. + /// [`Self::wait_cloned`] will return without any computation. pub fn ready(bundle: ComputedTrieData) -> Self { - Self { state: Arc::new(Mutex::new(DeferredTrieDataInner::Ready(bundle))) } + Self { state: Arc::new(Mutex::new(DeferredState::Ready(bundle))) } } - /// Sorts block execution outputs. - pub fn sort( + /// Sort block execution outputs and build a [`TrieInputSorted`] overlay. + /// + /// The trie input overlay accumulates sorted hashed state (account/storage changes) and + /// trie node updates from all in-memory ancestor blocks. This overlay is required for: + /// - Computing state roots on top of in-memory blocks + /// - Generating storage/account proofs for unpersisted state + /// + /// # Process + /// 1. Sort the current block's hashed state and trie updates + /// 2. Reuse parent's cached overlay if available (O(1) - the common case) + /// 3. Otherwise, rebuild overlay from ancestors (rare fallback) + /// 4. Extend the overlay with this block's sorted data + /// + /// Used by both the async background task and the synchronous fallback path. + /// + /// # Arguments + /// * `hashed_state` - Unsorted hashed post-state (account/storage changes) from execution + /// * `trie_updates` - Unsorted trie node updates from state root computation + /// * `anchor_hash` - The persisted ancestor hash this trie input is anchored to + /// * `ancestors` - Deferred trie data from ancestor blocks for merging (oldest -> newest) + pub fn sort_and_build_trie_input( hashed_state: Arc, trie_updates: Arc, + anchor_hash: B256, + ancestors: &[Self], ) -> ComputedTrieData { let _span = debug_span!(target: "engine::tree::deferred_trie", "sort_inputs").entered(); @@ -125,24 +189,200 @@ impl DeferredTrieData { }, ); - ComputedTrieData::new(Arc::new(sorted_hashed_state), Arc::new(sorted_trie_updates)) + drop(_span); + + let _span = debug_span!(target: "engine::tree::deferred_trie", "build_overlay").entered(); + + // Reuse parent's overlay if available and anchors match. + // We can only reuse the parent's overlay if it was built on top of the same + // persisted anchor. If the anchor has changed (e.g., due to persistence), + // the parent's overlay is relative to an old state and cannot be used. + let overlay = if let Some(parent) = ancestors.last() { + let parent_data = parent.wait_cloned(); + + match &parent_data.anchored_trie_input { + // Case 1: Parent has cached overlay AND anchors match. + Some(AnchoredTrieInput { anchor_hash: parent_anchor, trie_input }) + if *parent_anchor == anchor_hash => + { + // O(1): Reuse parent's overlay, extend with current block's data. + let mut overlay = TrieInputSorted::new( + Arc::clone(&trie_input.nodes), + Arc::clone(&trie_input.state), + Default::default(), // prefix_sets are per-block, not cumulative + ); + let _span = + debug_span!(target: "engine::tree::deferred_trie", "extend_overlay") + .entered(); + // Only trigger COW clone if there's actually data to add. + #[cfg(feature = "rayon")] + { + rayon::join( + || { + if !sorted_hashed_state.is_empty() { + Arc::make_mut(&mut overlay.state) + .extend_ref_and_sort(&sorted_hashed_state); + } + }, + || { + if !sorted_trie_updates.is_empty() { + Arc::make_mut(&mut overlay.nodes) + .extend_ref_and_sort(&sorted_trie_updates); + } + }, + ); + } + #[cfg(not(feature = "rayon"))] + { + if !sorted_hashed_state.is_empty() { + Arc::make_mut(&mut overlay.state) + .extend_ref_and_sort(&sorted_hashed_state); + } + if !sorted_trie_updates.is_empty() { + Arc::make_mut(&mut overlay.nodes) + .extend_ref_and_sort(&sorted_trie_updates); + } + } + overlay + } + // Case 2: Parent exists but anchor mismatch or no cached overlay. + // We must rebuild from the ancestors list (which only contains unpersisted blocks). + _ => Self::merge_ancestors_into_overlay( + ancestors, + &sorted_hashed_state, + &sorted_trie_updates, + ), + } + } else { + // Case 3: No in-memory ancestors (first block after persisted anchor). + // Build overlay with just this block's data. + Self::merge_ancestors_into_overlay(&[], &sorted_hashed_state, &sorted_trie_updates) + }; + + ComputedTrieData::with_trie_input( + Arc::new(sorted_hashed_state), + Arc::new(sorted_trie_updates), + anchor_hash, + Arc::new(overlay), + ) + } + + /// Merge all ancestors and current block's data into a single overlay. + /// + /// This is a rare fallback path, only used when no ancestor has a cached + /// `anchored_trie_input` (e.g., blocks created via alternative constructors). + /// In normal operation, the parent always has a cached overlay and this + /// function is never called. + /// + /// When the `rayon` feature is enabled: + /// 1. Collects ancestor data (states and updates) + /// 2. Merges states and trie updates in parallel using k-way merge + #[cfg(feature = "rayon")] + fn merge_ancestors_into_overlay( + ancestors: &[Self], + sorted_hashed_state: &HashedPostStateSorted, + sorted_trie_updates: &TrieUpdatesSorted, + ) -> TrieInputSorted { + // Early exit: no ancestors means just wrap current block's data + if ancestors.is_empty() { + return TrieInputSorted::new( + Arc::new(sorted_trie_updates.clone()), + Arc::new(sorted_hashed_state.clone()), + Default::default(), + ); + } + + // Collect ancestor data in reverse (newest to oldest) for merge_slice + let (states, updates): (Vec<_>, Vec<_>) = ancestors + .iter() + .rev() + .map(|a| { + // Note: we can assume that this trie data has already been computed + let data = a.wait_cloned(); + (data.hashed_state, data.trie_updates) + }) + .unzip(); + + // Merge state and nodes in parallel using k-way merge + let (state, nodes) = rayon::join( + || { + let mut merged = HashedPostStateSorted::merge_slice(&states); + merged.extend_ref_and_sort(sorted_hashed_state); + merged + }, + || { + let mut merged = TrieUpdatesSorted::merge_slice(&updates); + merged.extend_ref_and_sort(sorted_trie_updates); + merged + }, + ); + + TrieInputSorted::new(Arc::new(nodes), Arc::new(state), Default::default()) + } + + /// Sequential fallback when rayon is not available. + #[cfg(not(feature = "rayon"))] + fn merge_ancestors_into_overlay( + ancestors: &[Self], + sorted_hashed_state: &HashedPostStateSorted, + sorted_trie_updates: &TrieUpdatesSorted, + ) -> TrieInputSorted { + let _span = debug_span!(target: "engine::tree::deferred_trie", "merge_ancestors", num_ancestors = ancestors.len()).entered(); + let mut overlay = TrieInputSorted::default(); + + let state_mut = Arc::make_mut(&mut overlay.state); + let nodes_mut = Arc::make_mut(&mut overlay.nodes); + + for ancestor in ancestors { + let ancestor_data = ancestor.wait_cloned(); + state_mut.extend_ref_and_sort(ancestor_data.hashed_state.as_ref()); + nodes_mut.extend_ref_and_sort(ancestor_data.trie_updates.as_ref()); + } + + state_mut.extend_ref_and_sort(sorted_hashed_state); + nodes_mut.extend_ref_and_sort(sorted_trie_updates); + + overlay } /// Returns trie data, computing synchronously if the async task hasn't completed. + /// + /// - If the async task has completed (`Ready`), returns the cached result. + /// - If pending, computes synchronously from stored inputs. + /// + /// Deadlock is avoided as long as the provided ancestors form a true ancestor chain (a DAG): + /// - Each block only waits on its ancestors (blocks on the path to the persisted root) + /// - Sibling blocks (forks) are never in each other's ancestor lists + /// - A block never waits on its descendants + /// + /// Given that invariant, circular wait dependencies are impossible. #[instrument(level = "debug", target = "engine::tree::deferred_trie", skip_all)] pub fn wait_cloned(&self) -> ComputedTrieData { let mut state = self.state.lock(); match &mut *state { - DeferredTrieDataInner::Ready(bundle) => { + // If the deferred trie data is ready, return the cached result. + DeferredState::Ready(bundle) => { DEFERRED_TRIE_METRICS.deferred_trie_async_ready.increment(1); bundle.clone() } - DeferredTrieDataInner::Pending(maybe_inputs) => { + // If the deferred trie data is pending, compute the trie data synchronously and return + // the result. This is the fallback path if the async task hasn't completed. + DeferredState::Pending(maybe_inputs) => { DEFERRED_TRIE_METRICS.deferred_trie_sync_fallback.increment(1); let inputs = maybe_inputs.take().expect("inputs must be present in Pending state"); - let computed = Self::sort(inputs.hashed_state, inputs.trie_updates); - *state = DeferredTrieDataInner::Ready(computed.clone()); + + let computed = Self::sort_and_build_trie_input( + inputs.hashed_state, + inputs.trie_updates, + inputs.anchor_hash, + &inputs.ancestors, + ); + *state = DeferredState::Ready(computed.clone()); + + // Release lock before inputs (and its ancestors) drop to avoid holding it + // while their potential last Arc refs drop (which could trigger recursive locking) + drop(state); computed } @@ -151,102 +391,586 @@ impl DeferredTrieData { } impl ComputedTrieData { - /// Construct sorted trie data for one block. + /// Construct sorted trie data without an accumulated trie input overlay. pub const fn new( hashed_state: Arc, trie_updates: Arc, ) -> Self { - Self { hashed_state, trie_updates } + Self::without_trie_input(hashed_state, trie_updates) + } + + /// Construct a bundle that includes trie input anchored to a persisted ancestor. + pub const fn with_trie_input( + hashed_state: Arc, + trie_updates: Arc, + anchor_hash: B256, + trie_input: Arc, + ) -> Self { + Self { + hashed_state, + trie_updates, + anchored_trie_input: Some(AnchoredTrieInput { anchor_hash, trie_input }), + } + } + + /// Construct a bundle without trie input or anchor information. + /// + /// Unlike [`Self::with_trie_input`], this constructor omits the accumulated trie input overlay + /// and its anchor hash. Use this when the trie input is not needed, such as in block builders + /// or sequencers that don't require proof generation on top of in-memory state. + /// + /// The trie input anchor identifies the persisted block hash from which the in-memory overlay + /// was built. Without it, consumers cannot determine which on-disk state to combine with. + pub const fn without_trie_input( + hashed_state: Arc, + trie_updates: Arc, + ) -> Self { + Self { hashed_state, trie_updates, anchored_trie_input: None } + } + + /// Returns the anchor hash, if present. + pub fn anchor_hash(&self) -> Option { + self.anchored_trie_input.as_ref().map(|anchored| anchored.anchor_hash) + } + + /// Returns the trie input, if present. + pub fn trie_input(&self) -> Option<&Arc> { + self.anchored_trie_input.as_ref().map(|anchored| &anchored.trie_input) } } #[cfg(test)] mod tests { use super::*; - use alloy_primitives::{map::B256Map, B256, U256}; + use alloy_primitives::{map::B256Map, U256}; use reth_primitives_traits::Account; - use reth_trie::{updates::TrieUpdates, HashedStorage}; + use reth_trie::updates::TrieUpdates; use std::{ + sync::Arc, thread, time::{Duration, Instant}, }; + fn empty_bundle() -> ComputedTrieData { + ComputedTrieData { + hashed_state: Arc::default(), + trie_updates: Arc::default(), + anchored_trie_input: None, + } + } + fn empty_pending() -> DeferredTrieData { + empty_pending_with_anchor(B256::ZERO) + } + + fn empty_pending_with_anchor(anchor: B256) -> DeferredTrieData { DeferredTrieData::pending( Arc::new(HashedPostState::default()), Arc::new(TrieUpdates::default()), + anchor, + Vec::new(), ) } + /// Verifies that a ready handle returns immediately without computation. #[test] fn ready_returns_immediately() { - let bundle = ComputedTrieData::default(); + let bundle = empty_bundle(); let deferred = DeferredTrieData::ready(bundle.clone()); + let start = Instant::now(); + let result = deferred.wait_cloned(); + let elapsed = start.elapsed(); + + assert_eq!(result.hashed_state, bundle.hashed_state); + assert_eq!(result.trie_updates, bundle.trie_updates); + assert_eq!(result.anchor_hash(), bundle.anchor_hash()); + assert!(elapsed < Duration::from_millis(20)); + } + + /// Verifies that a pending handle computes trie data synchronously via fallback. + #[test] + fn pending_computes_fallback() { + let deferred = empty_pending(); + + // wait_cloned should compute from inputs without blocking + let start = Instant::now(); let result = deferred.wait_cloned(); + let elapsed = start.elapsed(); - assert_eq!(result.hashed_state.total_len(), bundle.hashed_state.total_len()); - assert_eq!(result.trie_updates.total_len(), bundle.trie_updates.total_len()); + // Should return quickly (fallback computation) + assert!(elapsed < Duration::from_millis(100)); + assert!(result.hashed_state.is_empty()); } + /// Verifies that fallback computation result is cached for subsequent calls. #[test] - fn pending_computes_and_caches_result() { + fn fallback_result_is_cached() { let deferred = empty_pending(); + // First call computes and should stash the result let first = deferred.wait_cloned(); + // Second call should reuse the cached result (same Arc pointer) let second = deferred.wait_cloned(); assert!(Arc::ptr_eq(&first.hashed_state, &second.hashed_state)); assert!(Arc::ptr_eq(&first.trie_updates, &second.trie_updates)); + assert_eq!(first.anchor_hash(), second.anchor_hash()); } + /// Verifies that concurrent `wait_cloned` calls result in only one computation, + /// with all callers receiving the same cached result. #[test] - fn concurrent_waits_share_computed_result() { + fn concurrent_wait_cloned_computes_once() { let deferred = empty_pending(); - let deferred2 = deferred.clone(); - let handle = thread::spawn(move || deferred2.wait_cloned()); - let result1 = deferred.wait_cloned(); - let result2 = handle.join().unwrap(); + // Spawn multiple threads that all call wait_cloned concurrently + let handles: Vec<_> = (0..10) + .map(|_| { + let d = deferred.clone(); + thread::spawn(move || d.wait_cloned()) + }) + .collect(); + + // Collect all results + let results: Vec<_> = handles.into_iter().map(|h| h.join().unwrap()).collect(); + + // All results should share the same Arc pointers (same computed result) + let first = &results[0]; + for result in &results[1..] { + assert!(Arc::ptr_eq(&first.hashed_state, &result.hashed_state)); + assert!(Arc::ptr_eq(&first.trie_updates, &result.trie_updates)); + } + } + + /// Tests that ancestor trie data is merged during fallback computation and that the + /// resulting `ComputedTrieData` uses the current block's anchor hash, not the ancestor's. + #[test] + fn ancestors_are_merged() { + // Create ancestor with some data + let ancestor_bundle = ComputedTrieData { + hashed_state: Arc::default(), + trie_updates: Arc::default(), + anchored_trie_input: Some(AnchoredTrieInput { + anchor_hash: B256::with_last_byte(1), + trie_input: Arc::new(TrieInputSorted::default()), + }), + }; + let ancestor = DeferredTrieData::ready(ancestor_bundle); + + // Create pending with ancestor + let deferred = DeferredTrieData::pending( + Arc::new(HashedPostState::default()), + Arc::new(TrieUpdates::default()), + B256::with_last_byte(2), + vec![ancestor], + ); - assert!(Arc::ptr_eq(&result1.hashed_state, &result2.hashed_state)); - assert!(Arc::ptr_eq(&result1.trie_updates, &result2.trie_updates)); + let result = deferred.wait_cloned(); + // Should have the current block's anchor, not the ancestor's + assert_eq!(result.anchor_hash(), Some(B256::with_last_byte(2))); } + /// Ensures ancestor overlays are merged oldest -> newest so latest state wins (no overwrite by + /// older ancestors). #[test] - fn sorts_non_empty_inputs() { - let hashed_address = B256::with_last_byte(1); - let hashed_slot = B256::with_last_byte(2); - let hashed_state = HashedPostState::default() - .with_accounts([(hashed_address, Some(Account::default()))]) - .with_storages([( - hashed_address, - HashedStorage::from_iter(false, [(hashed_slot, U256::from(1))]), - )]); - - let deferred = - DeferredTrieData::pending(Arc::new(hashed_state), Arc::new(TrieUpdates::default())); + fn ancestors_merge_in_chronological_order() { + let key = B256::with_last_byte(1); + // Oldest ancestor sets nonce to 1 + let oldest_state = HashedPostStateSorted::new( + vec![(key, Some(Account { nonce: 1, balance: U256::ZERO, bytecode_hash: None }))], + B256Map::default(), + ); + // Newest ancestor overwrites nonce to 2 + let newest_state = HashedPostStateSorted::new( + vec![(key, Some(Account { nonce: 2, balance: U256::ZERO, bytecode_hash: None }))], + B256Map::default(), + ); + + let oldest = ComputedTrieData { + hashed_state: Arc::new(oldest_state), + trie_updates: Arc::default(), + anchored_trie_input: None, + }; + let newest = ComputedTrieData { + hashed_state: Arc::new(newest_state), + trie_updates: Arc::default(), + anchored_trie_input: None, + }; + + // Pass ancestors oldest -> newest; newest should take precedence + let deferred = DeferredTrieData::pending( + Arc::new(HashedPostState::default()), + Arc::new(TrieUpdates::default()), + B256::ZERO, + vec![DeferredTrieData::ready(oldest), DeferredTrieData::ready(newest)], + ); + let result = deferred.wait_cloned(); + let overlay_state = &result.anchored_trie_input.as_ref().unwrap().trie_input.state.accounts; + assert_eq!(overlay_state.len(), 1); + let (_, account) = &overlay_state[0]; + assert_eq!(account.unwrap().nonce, 2); + } - assert_eq!(result.hashed_state.total_len(), 2); - assert_eq!(result.trie_updates.total_len(), 0); + /// Helper to create a ready block with anchored trie input containing specific state. + fn ready_block_with_state( + anchor_hash: B256, + accounts: Vec<(B256, Option)>, + ) -> DeferredTrieData { + let hashed_state = Arc::new(HashedPostStateSorted::new(accounts, B256Map::default())); + let trie_updates = Arc::default(); + let mut overlay = TrieInputSorted::default(); + Arc::make_mut(&mut overlay.state).extend_ref_and_sort(hashed_state.as_ref()); + + DeferredTrieData::ready(ComputedTrieData { + hashed_state, + trie_updates, + anchored_trie_input: Some(AnchoredTrieInput { + anchor_hash, + trie_input: Arc::new(overlay), + }), + }) } + /// Verifies that first block after anchor (no ancestors) creates empty base overlay. #[test] - fn wait_does_not_block_after_first_compute() { - let mut accounts = B256Map::default(); - for i in 0..100 { - accounts.insert(B256::with_last_byte(i), Some(Account::default())); - } - let deferred = DeferredTrieData::pending( - Arc::new(HashedPostState { accounts, storages: Default::default() }), + fn first_block_after_anchor_creates_empty_base() { + let anchor = B256::with_last_byte(1); + let key = B256::with_last_byte(42); + let account = Account { nonce: 1, balance: U256::ZERO, bytecode_hash: None }; + + // First block after anchor - no ancestors + let first_block = DeferredTrieData::pending( + Arc::new(HashedPostState::default().with_accounts([(key, Some(account))])), Arc::new(TrieUpdates::default()), + anchor, + vec![], // No ancestors ); - let _ = deferred.wait_cloned(); + let result = first_block.wait_cloned(); + + // Should have overlay with just this block's data + let overlay = result.anchored_trie_input.as_ref().unwrap(); + assert_eq!(overlay.anchor_hash, anchor); + assert_eq!(overlay.trie_input.state.accounts.len(), 1); + let (found_key, found_account) = &overlay.trie_input.state.accounts[0]; + assert_eq!(*found_key, key); + assert_eq!(found_account.unwrap().nonce, 1); + } + + /// Verifies that parent's overlay is reused regardless of anchor. + #[test] + fn reuses_parent_overlay() { + let anchor = B256::with_last_byte(1); + let key = B256::with_last_byte(42); + let account = Account { nonce: 100, balance: U256::ZERO, bytecode_hash: None }; + + // Create parent with anchored trie input + let parent = ready_block_with_state(anchor, vec![(key, Some(account))]); + + // Create child - should reuse parent's overlay + let child = DeferredTrieData::pending( + Arc::new(HashedPostState::default()), + Arc::new(TrieUpdates::default()), + anchor, + vec![parent], + ); + + let result = child.wait_cloned(); + + // Verify parent's account is in the overlay + let overlay = result.anchored_trie_input.as_ref().unwrap(); + assert_eq!(overlay.anchor_hash, anchor); + assert_eq!(overlay.trie_input.state.accounts.len(), 1); + let (found_key, found_account) = &overlay.trie_input.state.accounts[0]; + assert_eq!(*found_key, key); + assert_eq!(found_account.unwrap().nonce, 100); + } + + /// Verifies that parent's overlay is NOT reused when anchor changes (after persist). + /// The overlay data is dependent on the anchor, so it must be rebuilt from the + /// remaining ancestors. + #[test] + fn rebuilds_overlay_when_anchor_changes() { + let old_anchor = B256::with_last_byte(1); + let new_anchor = B256::with_last_byte(2); + let key = B256::with_last_byte(42); + let account = Account { nonce: 50, balance: U256::ZERO, bytecode_hash: None }; + + // Create parent with OLD anchor + let parent = ready_block_with_state(old_anchor, vec![(key, Some(account))]); + + // Create child with NEW anchor (simulates after persist) + // Should NOT reuse parent's overlay because anchor changed + let child = DeferredTrieData::pending( + Arc::new(HashedPostState::default()), + Arc::new(TrieUpdates::default()), + new_anchor, + vec![parent], + ); + + let result = child.wait_cloned(); + + // Verify result uses new anchor + let overlay = result.anchored_trie_input.as_ref().unwrap(); + assert_eq!(overlay.anchor_hash, new_anchor); + + // Crucially, since we provided `parent` in ancestors but it has a different anchor, + // the code falls back to `merge_ancestors_into_overlay`. + // `merge_ancestors_into_overlay` reads `parent.hashed_state` (which has the account). + // So the account IS present, but it was obtained via REBUILD, not REUSE. + // We can check `DEFERRED_TRIE_METRICS` if we want to be sure, but functionally: + assert_eq!(overlay.trie_input.state.accounts.len(), 1); + let (found_key, found_account) = &overlay.trie_input.state.accounts[0]; + assert_eq!(*found_key, key); + assert_eq!(found_account.unwrap().nonce, 50); + } + + /// Verifies that parent without `anchored_trie_input` triggers rebuild path. + #[test] + fn rebuilds_when_parent_has_no_anchored_input() { + let anchor = B256::with_last_byte(1); + let key = B256::with_last_byte(42); + let account = Account { nonce: 25, balance: U256::ZERO, bytecode_hash: None }; + + // Create parent WITHOUT anchored trie input (e.g., from without_trie_input constructor) + let parent_state = + HashedPostStateSorted::new(vec![(key, Some(account))], B256Map::default()); + let parent = DeferredTrieData::ready(ComputedTrieData { + hashed_state: Arc::new(parent_state), + trie_updates: Arc::default(), + anchored_trie_input: None, // No anchored input + }); + + // Create child - should rebuild from parent's hashed_state + let child = DeferredTrieData::pending( + Arc::new(HashedPostState::default()), + Arc::new(TrieUpdates::default()), + anchor, + vec![parent], + ); + + let result = child.wait_cloned(); + + // Verify overlay is built and contains parent's data + let overlay = result.anchored_trie_input.as_ref().unwrap(); + assert_eq!(overlay.anchor_hash, anchor); + assert_eq!(overlay.trie_input.state.accounts.len(), 1); + } + + /// Verifies that a chain of blocks with matching anchors builds correct cumulative overlay. + #[test] + fn chain_of_blocks_builds_cumulative_overlay() { + let anchor = B256::with_last_byte(1); + let key1 = B256::with_last_byte(1); + let key2 = B256::with_last_byte(2); + let key3 = B256::with_last_byte(3); + + // Block 1: sets account at key1 + let block1 = ready_block_with_state( + anchor, + vec![(key1, Some(Account { nonce: 1, balance: U256::ZERO, bytecode_hash: None }))], + ); + + // Block 2: adds account at key2, ancestor is block1 + let block2_hashed = HashedPostState::default().with_accounts([( + key2, + Some(Account { nonce: 2, balance: U256::ZERO, bytecode_hash: None }), + )]); + let block2 = DeferredTrieData::pending( + Arc::new(block2_hashed), + Arc::new(TrieUpdates::default()), + anchor, + vec![block1.clone()], + ); + // Compute block2's trie data + let block2_computed = block2.wait_cloned(); + let block2_ready = DeferredTrieData::ready(block2_computed); + + // Block 3: adds account at key3, ancestor is block2 (which includes block1) + let block3_hashed = HashedPostState::default().with_accounts([( + key3, + Some(Account { nonce: 3, balance: U256::ZERO, bytecode_hash: None }), + )]); + let block3 = DeferredTrieData::pending( + Arc::new(block3_hashed), + Arc::new(TrieUpdates::default()), + anchor, + vec![block1, block2_ready], + ); + + let result = block3.wait_cloned(); + + // Verify all three accounts are in the cumulative overlay + let overlay = result.anchored_trie_input.as_ref().unwrap(); + assert_eq!(overlay.trie_input.state.accounts.len(), 3); + + // Accounts should be sorted by key (B256 ordering) + let accounts = &overlay.trie_input.state.accounts; + assert!(accounts.iter().any(|(k, a)| *k == key1 && a.unwrap().nonce == 1)); + assert!(accounts.iter().any(|(k, a)| *k == key2 && a.unwrap().nonce == 2)); + assert!(accounts.iter().any(|(k, a)| *k == key3 && a.unwrap().nonce == 3)); + } + + /// Verifies that child block's state overwrites parent's state for the same key. + #[test] + fn child_state_overwrites_parent() { + let anchor = B256::with_last_byte(1); + let key = B256::with_last_byte(42); + + // Parent sets nonce to 10 + let parent = ready_block_with_state( + anchor, + vec![(key, Some(Account { nonce: 10, balance: U256::ZERO, bytecode_hash: None }))], + ); + + // Child overwrites nonce to 99 + let child_hashed = HashedPostState::default().with_accounts([( + key, + Some(Account { nonce: 99, balance: U256::ZERO, bytecode_hash: None }), + )]); + let child = DeferredTrieData::pending( + Arc::new(child_hashed), + Arc::new(TrieUpdates::default()), + anchor, + vec![parent], + ); + + let result = child.wait_cloned(); + + // Verify child's value wins (extend_ref uses later value) + let overlay = result.anchored_trie_input.as_ref().unwrap(); + // Note: extend_ref may result in duplicate keys; check the last occurrence + let accounts = &overlay.trie_input.state.accounts; + let last_account = accounts.iter().rfind(|(k, _)| *k == key).unwrap(); + assert_eq!(last_account.1.unwrap().nonce, 99); + } + + /// Stress test: verify O(N) behavior by building a chain of many blocks. + /// This test ensures the fix doesn't regress - previously this would be O(N²). + #[test] + fn long_chain_builds_in_linear_time() { + let anchor = B256::with_last_byte(1); + let num_blocks = 50; // Enough to notice O(N²) vs O(N) difference + + let mut ancestors: Vec = Vec::new(); + let start = Instant::now(); - let _ = deferred.wait_cloned(); - assert!(start.elapsed() < Duration::from_millis(10)); + for i in 0..num_blocks { + let key = B256::with_last_byte(i as u8); + let account = Account { nonce: i as u64, balance: U256::ZERO, bytecode_hash: None }; + let hashed = HashedPostState::default().with_accounts([(key, Some(account))]); + + let block = DeferredTrieData::pending( + Arc::new(hashed), + Arc::new(TrieUpdates::default()), + anchor, + ancestors.clone(), + ); + + // Compute and add to ancestors for next iteration + let computed = block.wait_cloned(); + ancestors.push(DeferredTrieData::ready(computed)); + } + + let elapsed = start.elapsed(); + + // With O(N) fix, 50 blocks should complete quickly (< 1 second) + // With O(N²), this would take significantly longer + assert!( + elapsed < Duration::from_secs(2), + "Chain of {num_blocks} blocks took {:?}, possible O(N²) regression", + elapsed + ); + + // Verify final overlay has all accounts + let final_result = ancestors.last().unwrap().wait_cloned(); + let overlay = final_result.anchored_trie_input.as_ref().unwrap(); + assert_eq!(overlay.trie_input.state.accounts.len(), num_blocks); + } + + /// Verifies that a multi-ancestor overlay is rebuilt when anchor changes. + /// This simulates the "persist prefix then keep building" scenario where: + /// 1. A chain of blocks is built with anchor A + /// 2. Some blocks are persisted, changing anchor to B + /// 3. New blocks must rebuild the overlay from the remaining ancestors + #[test] + fn multi_ancestor_overlay_rebuilt_after_anchor_change() { + let old_anchor = B256::with_last_byte(1); + let new_anchor = B256::with_last_byte(2); + let key1 = B256::with_last_byte(1); + let key2 = B256::with_last_byte(2); + let key3 = B256::with_last_byte(3); + let key4 = B256::with_last_byte(4); + + // Build a chain of 3 blocks with old_anchor + let block1 = ready_block_with_state( + old_anchor, + vec![(key1, Some(Account { nonce: 1, balance: U256::ZERO, bytecode_hash: None }))], + ); + + let block2_hashed = HashedPostState::default().with_accounts([( + key2, + Some(Account { nonce: 2, balance: U256::ZERO, bytecode_hash: None }), + )]); + let block2 = DeferredTrieData::pending( + Arc::new(block2_hashed), + Arc::new(TrieUpdates::default()), + old_anchor, + vec![block1.clone()], + ); + let block2_ready = DeferredTrieData::ready(block2.wait_cloned()); + + let block3_hashed = HashedPostState::default().with_accounts([( + key3, + Some(Account { nonce: 3, balance: U256::ZERO, bytecode_hash: None }), + )]); + let block3 = DeferredTrieData::pending( + Arc::new(block3_hashed), + Arc::new(TrieUpdates::default()), + old_anchor, + vec![block1.clone(), block2_ready.clone()], + ); + let block3_ready = DeferredTrieData::ready(block3.wait_cloned()); + + // Verify block3's overlay has all 3 accounts with old_anchor + let block3_overlay = block3_ready.wait_cloned().anchored_trie_input.unwrap(); + assert_eq!(block3_overlay.anchor_hash, old_anchor); + assert_eq!(block3_overlay.trie_input.state.accounts.len(), 3); + + // Now simulate persist: create block4 with NEW anchor but same ancestors. + // To verify correct rebuilding, we must provide ALL unpersisted ancestors. + // If we only provided block3, the rebuild would only see block3's state. + // We pass block1, block2, block3 to simulate that they are all still in memory + // but the anchor check forces a rebuild (e.g. artificial anchor change). + let block4_hashed = HashedPostState::default().with_accounts([( + key4, + Some(Account { nonce: 4, balance: U256::ZERO, bytecode_hash: None }), + )]); + let block4 = DeferredTrieData::pending( + Arc::new(block4_hashed), + Arc::new(TrieUpdates::default()), + new_anchor, // Different anchor - simulates post-persist + vec![block1, block2_ready, block3_ready], + ); + + let result = block4.wait_cloned(); + + // Verify: + // 1. New anchor is used in result + assert_eq!(result.anchor_hash(), Some(new_anchor)); + + // 2. All 4 accounts are in the overlay (rebuilt from ancestors + extended) + let overlay = result.anchored_trie_input.as_ref().unwrap(); + assert_eq!(overlay.trie_input.state.accounts.len(), 4); + + // 3. All accounts have correct values + let accounts = &overlay.trie_input.state.accounts; + assert!(accounts.iter().any(|(k, a)| *k == key1 && a.unwrap().nonce == 1)); + assert!(accounts.iter().any(|(k, a)| *k == key2 && a.unwrap().nonce == 2)); + assert!(accounts.iter().any(|(k, a)| *k == key3 && a.unwrap().nonce == 3)); + assert!(accounts.iter().any(|(k, a)| *k == key4 && a.unwrap().nonce == 4)); } } diff --git a/crates/chain-state/src/in_memory.rs b/crates/chain-state/src/in_memory.rs index 60a85e44781..702031adfbb 100644 --- a/crates/chain-state/src/in_memory.rs +++ b/crates/chain-state/src/in_memory.rs @@ -17,7 +17,10 @@ use reth_primitives_traits::{ SignedTransaction, }; use reth_storage_api::StateProviderBox; -use reth_trie::{updates::TrieUpdatesSorted, HashedPostStateSorted, LazyTrieData, SortedTrieData}; +use reth_trie::{ + updates::TrieUpdatesSorted, HashedPostStateSorted, LazyTrieData, SortedTrieData, + TrieInputSorted, +}; use std::{collections::BTreeMap, sync::Arc, time::Instant}; use tokio::sync::{broadcast, watch}; @@ -317,6 +320,19 @@ impl CanonicalInMemoryState { /// This will update the links between blocks and remove all blocks that are [.. /// `persisted_height`]. pub fn remove_persisted_blocks(&self, persisted_num_hash: BlockNumHash) { + self.remove_persisted_blocks_until(persisted_num_hash, persisted_num_hash.number); + } + + /// Removes blocks from the in-memory state through `remove_until` while still reporting the + /// provided block as the persisted tip. + /// + /// This is used when block bodies/plain state have been persisted further than trie data, so a + /// suffix still needs to remain in memory for trie-backed operations. + pub fn remove_persisted_blocks_until( + &self, + persisted_num_hash: BlockNumHash, + remove_until: BlockNumber, + ) { self.set_persisted(persisted_num_hash); // if the persisted hash is not in the canonical in memory state, do nothing, because it // means canonical blocks were not actually persisted. @@ -334,16 +350,15 @@ impl CanonicalInMemoryState { let mut numbers = self.inner.in_memory_state.numbers.write(); let mut blocks = self.inner.in_memory_state.blocks.write(); - let BlockNumHash { number: persisted_height, hash: _ } = persisted_num_hash; + let remove_until = remove_until.min(persisted_num_hash.number); // clear all numbers numbers.clear(); - // drain all blocks and only keep the ones that are not persisted (below the persisted - // height) + // Drain all blocks and keep only the suffix that still has to stay in memory. let mut old_blocks = blocks .drain() - .filter(|(_, b)| b.block_ref().recovered_block().number() > persisted_height) + .filter(|(_, b)| b.block_ref().recovered_block().number() > remove_until) .map(|(_, b)| b.block.clone()) .collect::>(); @@ -803,9 +818,10 @@ impl ExecutedBlock { /// This is useful if the trie data is populated somewhere else, e.g. asynchronously /// after the block was validated. /// - /// The [`DeferredTrieData`] handle allows expensive trie operations (sorting hashed state and - /// trie updates) to be performed outside the critical validation path. This can improve latency - /// for time-sensitive operations like block validation. + /// The [`DeferredTrieData`] handle allows expensive trie operations (sorting hashed state, + /// sorting trie updates, and building the accumulated trie input overlay) to be performed + /// outside the critical validation path. This can improve latency for time-sensitive + /// operations like block validation. /// /// If the data hasn't been populated when [`Self::trie_data()`] is called, computation /// occurs synchronously from stored inputs, so there is no blocking or deadlock risk. @@ -874,6 +890,20 @@ impl ExecutedBlock { self.trie_data().trie_updates } + /// Returns the trie input anchored to the persisted ancestor. + /// + /// May compute trie data synchronously if the deferred task hasn't completed. + #[inline] + pub fn trie_input(&self) -> Option> { + self.trie_data().trie_input().cloned() + } + + /// Returns the anchor hash of the trie input, if present. + #[inline] + pub fn anchor_hash(&self) -> Option { + self.trie_data().anchor_hash() + } + /// Returns a [`BlockNumber`] of the block. #[inline] pub fn block_number(&self) -> BlockNumber { diff --git a/crates/chain-state/src/lazy_overlay.rs b/crates/chain-state/src/lazy_overlay.rs new file mode 100644 index 00000000000..d0a779613c7 --- /dev/null +++ b/crates/chain-state/src/lazy_overlay.rs @@ -0,0 +1,328 @@ +//! Lazy overlay computation for trie input. +//! +//! This module provides [`LazyOverlay`], a type that computes the [`TrieInputSorted`] +//! lazily on first access. This allows execution to start before the trie overlay +//! is fully computed. + +use crate::{EthPrimitives, ExecutedBlock}; +use alloy_primitives::B256; +use reth_primitives_traits::{ + dashmap::{self, DashMap}, + AlloyBlockHeader, NodePrimitives, +}; +use reth_trie::{updates::TrieUpdatesSorted, HashedPostStateSorted, TrieInputSorted}; +use std::sync::Arc; +use tracing::debug; + +/// Inputs captured for lazy overlay computation. +#[derive(Clone)] +struct LazyOverlayInputs { + /// In-memory blocks from tip to anchor child. + /// + /// Blocks must be provided in reverse chain order (newest to oldest). + blocks: Vec>, +} + +/// Lazily computed trie overlay. +/// +/// Captures the inputs needed to compute a [`TrieInputSorted`] and defers the actual +/// computation until first access. +/// +/// Blocks must be provided in reverse chain order (newest to oldest), so the first block is the +/// chain tip and the last block is the oldest in-memory block in the chain segment. +/// +/// # Fast Path vs Slow Path +/// +/// - **Fast path**: If the tip block's cached `anchored_trie_input` is ready and its `anchor_hash` +/// matches our expected anchor, we can reuse it directly (O(1)). +/// - **Slow path**: Otherwise, we merge all ancestor blocks' trie data into a new overlay. +#[derive(Clone)] +pub struct LazyOverlay { + /// Computed results, cached by requested anchor hash. + inner: Arc>>, + /// Inputs for lazy computation. + inputs: LazyOverlayInputs, +} + +impl std::fmt::Debug for LazyOverlay { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("LazyOverlay") + .field( + "oldest_block_parent_hash", + &self.inputs.blocks.last().map(|block| block.recovered_block().parent_hash()), + ) + .field("num_blocks", &self.inputs.blocks.len()) + .field("cached_anchors", &self.inner.len()) + .finish() + } +} + +impl LazyOverlay { + /// Create a new lazy overlay from in-memory blocks. + /// + /// # Arguments + /// + /// * `blocks` - Executed blocks in reverse chain order (newest to oldest) + pub fn new(blocks: Vec>) -> Self { + debug_assert!( + blocks.windows(2).all(|window| { + window[0].recovered_block().parent_hash() == window[1].recovered_block().hash() + }), + "LazyOverlay blocks must be ordered newest to oldest along a single chain" + ); + + Self { inner: Default::default(), inputs: LazyOverlayInputs { blocks } } + } + + /// Returns the number of in-memory blocks this overlay covers. + pub const fn num_blocks(&self) -> usize { + self.inputs.blocks.len() + } + + /// Returns the oldest anchor hash this overlay can serve. + /// + /// This is the parent hash of the oldest block in the stored newest-to-oldest chain segment. + pub fn anchor_hash(&self) -> Option { + self.inputs.blocks.last().map(|block| block.recovered_block().parent_hash()) + } + + /// Returns true if there are no blocks in the overlay, or if one of the blocks has the given + /// hash as a parent hash. + pub fn has_anchor_hash(&self, hash: B256) -> bool { + self.inputs.blocks.is_empty() || + self.inputs.blocks.iter().any(|b| b.recovered_block().parent_hash() == hash) + } + + #[cfg(test)] + /// Returns true if the overlay has already been computed for the requested anchor. + pub fn is_computed(&self, anchor_hash: B256) -> bool { + self.inner.contains_key(&anchor_hash) + } + + /// Returns the computed trie input for the requested anchor, computing it if necessary. + /// + /// The first call triggers computation (which may block waiting for deferred data). + /// Subsequent calls for the same anchor return the cached result immediately. + pub fn get(&self, anchor_hash: B256) -> Arc { + match self.inner.entry(anchor_hash) { + dashmap::Entry::Occupied(entry) => { + debug!( + target: "chain_state::lazy_overlay", + %anchor_hash, + num_blocks = self.inputs.blocks.len(), + "Using cached lazy overlay result" + ); + Arc::clone(entry.get()) + } + dashmap::Entry::Vacant(entry) => { + let input = self.compute(anchor_hash); + entry.insert(Arc::clone(&input)); + input + } + } + } + + /// Returns the overlay as (nodes, state) tuple for use with `OverlayStateProviderFactory`. + pub fn as_overlay( + &self, + anchor_hash: B256, + ) -> (Arc, Arc) { + let input = self.get(anchor_hash); + (Arc::clone(&input.nodes), Arc::clone(&input.state)) + } + + /// Compute the trie input overlay. + fn compute(&self, anchor_hash: B256) -> Arc { + let blocks = &self.inputs.blocks; + if blocks.is_empty() { + return Default::default() + } + + let Some(last_index) = + blocks.iter().position(|block| block.recovered_block().parent_hash() == anchor_hash) + else { + panic!( + "LazyOverlay does not contain a block whose parent hash matches requested anchor {anchor_hash}" + ); + }; + let blocks = &blocks[..=last_index]; + + // Fast path: Check if tip block's overlay is ready and anchor matches. + // The tip block (first in list) has the cumulative overlay from all ancestors up to the + // requested anchor. + if let Some(tip) = blocks.first() { + let data = tip.trie_data(); + if let Some(anchored) = &data.anchored_trie_input { + if anchored.anchor_hash == anchor_hash { + return Arc::clone(&anchored.trie_input); + } + debug!( + target: "chain_state::lazy_overlay", + computed_anchor = %anchored.anchor_hash, + %anchor_hash, + "Anchor mismatch, falling back to merge" + ); + } + } + + // Slow path: Merge the prefix of blocks from the tip back to the requested anchor. + Arc::new(Self::merge_blocks(blocks)) + } + + /// Merge all blocks' trie data into a single [`TrieInputSorted`]. + /// + /// Blocks are ordered newest to oldest. + fn merge_blocks(blocks: &[ExecutedBlock]) -> TrieInputSorted { + if blocks.is_empty() { + return TrieInputSorted::default(); + } + + let state = HashedPostStateSorted::merge_batch( + blocks.iter().map(|block| block.trie_data().hashed_state), + ); + let nodes = TrieUpdatesSorted::merge_batch( + blocks.iter().map(|block| block.trie_data().trie_updates), + ); + + TrieInputSorted { state, nodes, prefix_sets: Default::default() } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{test_utils::TestBlockBuilder, ComputedTrieData, EthPrimitives, ExecutedBlock}; + use alloy_primitives::U256; + use reth_primitives_traits::Account; + use reth_trie::{updates::TrieUpdatesSorted, HashedPostState, HashedStorage}; + use std::sync::Arc; + + fn with_unique_state( + block: &ExecutedBlock, + id: u8, + ) -> ExecutedBlock { + let hashed_address = B256::with_last_byte(id); + let hashed_slot = B256::with_last_byte(id.saturating_add(32)); + let hashed_state = HashedPostState::default() + .with_accounts([(hashed_address, Some(Account::default()))]) + .with_storages([( + hashed_address, + HashedStorage::from_iter(false, [(hashed_slot, U256::from(id))]), + )]) + .into_sorted(); + + ExecutedBlock::new( + Arc::clone(&block.recovered_block), + Arc::clone(&block.execution_output), + ComputedTrieData::without_trie_input( + Arc::new(hashed_state), + Arc::new(TrieUpdatesSorted::default()), + ), + ) + } + + fn test_blocks() -> Vec> { + TestBlockBuilder::eth() + .get_executed_blocks(1..4) + .collect::>() + .into_iter() + .rev() + .enumerate() + .map(|(index, block)| with_unique_state(&block, index as u8 + 1)) + .collect() + } + + #[test] + fn single_block_uses_data_directly() { + let block = TestBlockBuilder::eth().get_executed_block_with_number(1, B256::random()); + let anchor_hash = block.recovered_block().parent_hash(); + let overlay = LazyOverlay::new(vec![block]); + + assert!(!overlay.is_computed(anchor_hash)); + let _ = overlay.get(anchor_hash); + assert!(overlay.is_computed(anchor_hash)); + } + + #[test] + fn caches_results_per_anchor() { + let blocks = test_blocks(); + let prefix_anchor = blocks[2].recovered_block().hash(); + let full_anchor = blocks[2].recovered_block().parent_hash(); + let overlay = LazyOverlay::new(blocks); + + let prefix = overlay.get(prefix_anchor); + let full = overlay.get(full_anchor); + + assert!(overlay.is_computed(prefix_anchor)); + assert!(overlay.is_computed(full_anchor)); + assert!(!Arc::ptr_eq(&prefix, &full)); + assert!(Arc::ptr_eq(&prefix, &overlay.get(prefix_anchor))); + assert!(Arc::ptr_eq(&full, &overlay.get(full_anchor))); + } + + #[test] + fn requested_anchor_limits_the_merged_prefix() { + let blocks = test_blocks(); + let prefix_anchor = blocks[2].recovered_block().hash(); + let expected = LazyOverlay::merge_blocks(&blocks[..2]); + let overlay = LazyOverlay::new(blocks); + let actual = overlay.get(prefix_anchor); + + assert_eq!(actual.nodes.as_ref(), expected.nodes.as_ref()); + assert_eq!(actual.state.as_ref(), expected.state.as_ref()); + } + + #[test] + fn anchor_hash_returns_oldest_served_anchor() { + let blocks = test_blocks(); + let expected_anchor = blocks.last().unwrap().recovered_block().parent_hash(); + let overlay = LazyOverlay::new(blocks); + + assert_eq!(overlay.anchor_hash(), Some(expected_anchor)); + } + + #[test] + fn reuses_tip_overlay_when_anchor_matches() { + let mut blocks = test_blocks(); + let prefix_anchor = blocks[2].recovered_block().hash(); + let tip_overlay = Arc::new(LazyOverlay::merge_blocks(&blocks[..2])); + let tip_data = blocks[0].trie_data(); + + blocks[0] = ExecutedBlock::new( + Arc::clone(&blocks[0].recovered_block), + Arc::clone(&blocks[0].execution_output), + ComputedTrieData::with_trie_input( + tip_data.hashed_state, + tip_data.trie_updates, + prefix_anchor, + Arc::clone(&tip_overlay), + ), + ); + + let overlay = LazyOverlay::new(blocks); + let actual = overlay.get(prefix_anchor); + + assert!(Arc::ptr_eq(&actual, &tip_overlay)); + } + + #[test] + #[should_panic( + expected = "LazyOverlay does not contain a block whose parent hash matches requested anchor" + )] + fn missing_anchor_panics() { + let blocks = test_blocks(); + let missing_anchor = blocks[0].recovered_block().hash(); + let overlay = LazyOverlay::new(blocks); + + let _ = overlay.get(missing_anchor); + } + + #[test] + #[should_panic( + expected = "LazyOverlay blocks must be ordered newest to oldest along a single chain" + )] + fn misordered_blocks_panic() { + let blocks: Vec<_> = TestBlockBuilder::eth().get_executed_blocks(1..3).collect(); + let _ = LazyOverlay::new(blocks); + } +} diff --git a/crates/chain-state/src/lib.rs b/crates/chain-state/src/lib.rs index 6912fee626b..4bea662dfcd 100644 --- a/crates/chain-state/src/lib.rs +++ b/crates/chain-state/src/lib.rs @@ -17,8 +17,8 @@ pub use in_memory::*; mod deferred_trie; pub use deferred_trie::*; -mod state_trie_overlay; -pub use state_trie_overlay::*; +mod lazy_overlay; +pub use lazy_overlay::*; mod noop; diff --git a/crates/chain-state/src/state_trie_overlay.rs b/crates/chain-state/src/state_trie_overlay.rs deleted file mode 100644 index d7baef361fd..00000000000 --- a/crates/chain-state/src/state_trie_overlay.rs +++ /dev/null @@ -1,682 +0,0 @@ -//! Flattened state trie overlays for in-memory blocks. -//! -//! Payload validation needs a view of the state trie as of an in-memory parent block even when that -//! parent has not been persisted yet. [`StateTrieOverlayManager`] tracks those in-memory blocks and -//! builds reusable flattened state trie overlays on demand. - -use crate::{EthPrimitives, ExecutedBlock}; -use alloy_primitives::B256; -use reth_metrics::{ - metrics::{Counter, Histogram}, - Metrics, -}; -use reth_primitives_traits::{ - dashmap::{mapref::entry::Entry, DashMap}, - AlloyBlockHeader, NodePrimitives, -}; -#[cfg(feature = "rayon")] -use reth_tasks::WorkerPool; -use reth_trie::{updates::TrieUpdatesSorted, HashedPostStateSorted, TrieInputSorted}; -use std::{fmt, sync::Arc, time::Instant}; -use tracing::{debug, trace}; - -/// Manages flattened state trie overlays for in-memory blocks. -/// -/// The manager owns the in-memory block graph and a cache of flattened state trie overlays keyed by -/// `(anchor_hash, tip_hash)`. -#[derive(Clone)] -pub struct StateTrieOverlayManager { - blocks: Arc>>, - overlays: Arc>>, - #[cfg(feature = "rayon")] - worker_pool: Option>, - metrics: StateTrieOverlayMetrics, -} - -/// Metrics for state trie overlay management. -#[derive(Clone, Metrics)] -#[metrics(scope = "sync.block_validation.state_trie_overlay")] -struct StateTrieOverlayMetrics { - /// Duration of overlay computation in seconds. - overlay_computation_duration_seconds: Histogram, - /// Number of requests satisfied by an existing overlay cache entry. - overlay_cache_reuses: Counter, - /// Number of overlay cache entries populated by computing an overlay. - overlay_cache_fills: Counter, -} - -impl Default for StateTrieOverlayManager { - fn default() -> Self { - Self { - blocks: Default::default(), - overlays: Default::default(), - #[cfg(feature = "rayon")] - worker_pool: None, - metrics: Default::default(), - } - } -} - -impl std::fmt::Debug for StateTrieOverlayManager { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("StateTrieOverlayManager") - .field("blocks", &self.blocks.len()) - .field("overlays", &self.overlays.len()) - .finish() - } -} - -impl StateTrieOverlayManager { - /// Create a new [`StateTrieOverlayManager`] backed by the given worker pool. - #[cfg(feature = "rayon")] - pub fn new(worker_pool: Arc) -> Self { - Self { - blocks: Default::default(), - overlays: Default::default(), - worker_pool: Some(worker_pool), - metrics: Default::default(), - } - } - - /// Inserts an executed in-memory block into the state trie overlay manager. - #[tracing::instrument( - level = "trace", - target = "chain_state::state_trie_overlay", - skip_all, - fields( - block_hash = %block.recovered_block().hash(), - parent_hash = %block.recovered_block().parent_hash(), - duplicate = false, - ) - )] - pub fn insert_block(&self, block: ExecutedBlock) { - let hash = block.recovered_block().hash(); - let parent_hash = block.recovered_block().parent_hash(); - let span = tracing::Span::current(); - - // First add the block to the live graph; duplicate inserts do not need cache work. - match self.blocks.entry(hash) { - Entry::Occupied(_) => { - span.record("duplicate", true); - debug!( - target: "chain_state::state_trie_overlay", - %hash, - %parent_hash, - "state trie overlay block already inserted" - ); - return - } - Entry::Vacant(entry) => { - entry.insert(block); - } - } - - // Snapshot matching parent overlays before spawning so DashMap iteration guards are - // dropped. - let cached_parent_overlays = self - .overlays - .iter() - .filter_map(|entry| { - let key = *entry.key(); - (key.tip_hash == parent_hash).then_some(key.anchor_hash) - }) - .collect::>(); - - debug!( - target: "chain_state::state_trie_overlay", - %hash, - %parent_hash, - "inserted block into state trie overlay manager" - ); - if cached_parent_overlays.is_empty() { - return - } - - #[cfg(feature = "rayon")] - let Some(worker_pool) = self.worker_pool.clone() else { - return - }; - - #[cfg(not(feature = "rayon"))] - let _ = cached_parent_overlays; - - #[cfg(feature = "rayon")] - { - let parent_span = span; - for anchor_hash in cached_parent_overlays { - let manager = ::clone(self); - let parent_span = parent_span.clone(); - worker_pool.spawn(move || { - let _span = tracing::trace_span!( - target: "chain_state::state_trie_overlay", - parent: parent_span, - "precompute_state_trie_overlay", - tip_hash = %hash, - anchor_hash = %anchor_hash, - ) - .entered(); - let _ = manager.get_overlay(hash, anchor_hash); - }); - } - } - } - - /// Removes blocks from the live block graph and prunes cached overlays that can no longer be - /// built from the remaining blocks. - #[tracing::instrument( - level = "trace", - target = "chain_state::state_trie_overlay", - skip_all, - fields( - block_count = tracing::field::Empty, - removed_blocks = tracing::field::Empty, - pruned_overlays = tracing::field::Empty, - ) - )] - pub fn remove_blocks(&self, hashes: impl IntoIterator) { - let span = tracing::Span::current(); - - // Remove blocks first, then prune overlays against the remaining block graph. - let mut block_count = 0usize; - let mut removed_blocks = 0usize; - let mut pruned_overlays = 0usize; - for hash in hashes { - block_count += 1; - removed_blocks += self.blocks.remove(&hash).is_some() as usize; - } - span.record("block_count", block_count); - span.record("removed_blocks", removed_blocks); - - if removed_blocks > 0 { - let overlays_before = self.overlays.len(); - let blocks = Arc::clone(&self.blocks); - self.overlays.retain(|key, _| { - key.tip_hash != key.anchor_hash && - Self::anchor_for_parent_in(blocks.as_ref(), key.tip_hash, key.anchor_hash) == - Some(key.anchor_hash) - }); - pruned_overlays = overlays_before.saturating_sub(self.overlays.len()); - span.record("pruned_overlays", pruned_overlays); - } - debug!( - target: "chain_state::state_trie_overlay", - block_count, - removed_blocks, - pruned_overlays, - "removed blocks from state trie overlay manager" - ); - } - - /// Returns the flattened overlay from `anchor_hash` to `parent_hash`. - #[tracing::instrument( - level = "trace", - target = "chain_state::state_trie_overlay", - skip_all, - fields(tip_hash = %parent_hash, anchor_hash = %anchor_hash) - )] - pub fn overlay_for_parent( - &self, - parent_hash: B256, - anchor_hash: B256, - ) -> Result<(Arc, Arc), StateTrieOverlayError> { - debug!( - target: "chain_state::state_trie_overlay", - tip_hash = %parent_hash, - %anchor_hash, - "loading state trie overlay for parent" - ); - let input = self.get_overlay(parent_hash, anchor_hash)?; - Ok((Arc::clone(&input.nodes), Arc::clone(&input.state))) - } - - #[tracing::instrument( - level = "trace", - target = "chain_state::state_trie_overlay", - skip_all, - fields( - tip_hash = %tip_hash, - anchor_hash = %anchor_hash, - cache_reused = tracing::field::Empty, - block_count = tracing::field::Empty, - parent_overlay_reused = tracing::field::Empty, - ) - )] - fn get_overlay( - &self, - tip_hash: B256, - anchor_hash: B256, - ) -> Result, StateTrieOverlayError> { - let key = OverlayCacheKey { anchor_hash, tip_hash }; - let span = tracing::Span::current(); - - if let Some(input) = self.overlays.get(&key).map(|entry| Arc::clone(entry.value())) { - self.metrics.overlay_cache_reuses.increment(1); - span.record("cache_reused", true); - return Ok(input) - } - span.record("cache_reused", false); - - // Resolve the block path and any cached parent overlay before locking the child entry. - let mut hash = tip_hash; - let mut blocks = Vec::new(); - loop { - let block = - self.blocks.get(&hash).ok_or(StateTrieOverlayError { tip_hash, anchor_hash })?; - let parent_hash = block.recovered_block().parent_hash(); - blocks.push(block.clone()); - - if parent_hash == anchor_hash { - break - } - hash = parent_hash; - } - span.record("block_count", blocks.len()); - let parent_input = blocks.first().and_then(|block| { - let parent_hash = block.recovered_block().parent_hash(); - (parent_hash != anchor_hash) - .then(|| { - self.overlays - .get(&OverlayCacheKey { anchor_hash, tip_hash: parent_hash }) - .map(|entry| Arc::clone(entry.value())) - }) - .flatten() - }); - span.record("parent_overlay_reused", parent_input.is_some()); - let compute_input = match parent_input { - Some(parent_input) => { - ComputeOverlayInput::ExtendCached { block: blocks.swap_remove(0), parent_input } - } - None => ComputeOverlayInput::MergeBlocks(blocks), - }; - - // The vacant entry is the cache-fill gate: racing callers block instead of recomputing. - let input = match self.overlays.entry(key) { - Entry::Occupied(entry) => { - self.metrics.overlay_cache_reuses.increment(1); - span.record("cache_reused", true); - return Ok(Arc::clone(entry.get())) - } - Entry::Vacant(entry) => { - self.metrics.overlay_cache_fills.increment(1); - let input = { - #[cfg(feature = "rayon")] - { - if let Some(worker_pool) = &self.worker_pool { - let compute_span = span; - let metrics = self.metrics.clone(); - Arc::new(worker_pool.install_fn(move || { - let _guard = compute_span.enter(); - compute_overlay(compute_input, anchor_hash, &metrics) - })) - } else { - Arc::new(compute_overlay(compute_input, anchor_hash, &self.metrics)) - } - } - - #[cfg(not(feature = "rayon"))] - { - Arc::new(compute_overlay(compute_input, anchor_hash, &self.metrics)) - } - }; - - entry.insert(Arc::clone(&input)); - input - } - }; - - Ok(input) - } - - /// Returns `preferred_anchor` if it is on the parent chain, otherwise the first missing parent. - /// - /// Returns `None` if `parent_hash` is not `preferred_anchor` and the manager does not contain a - /// block for `parent_hash`, meaning there is no in-memory parent chain to inspect. - pub fn anchor_for_parent(&self, parent_hash: B256, preferred_anchor: B256) -> Option { - Self::anchor_for_parent_in(self.blocks.as_ref(), parent_hash, preferred_anchor) - } - - fn anchor_for_parent_in( - blocks: &DashMap>, - parent_hash: B256, - preferred_anchor: B256, - ) -> Option { - if parent_hash == preferred_anchor { - return Some(preferred_anchor) - } - - let mut hash = parent_hash; - - loop { - let block_parent_hash = blocks.get(&hash)?.recovered_block().parent_hash(); - if block_parent_hash == preferred_anchor { - return Some(block_parent_hash) - } - if !blocks.contains_key(&block_parent_hash) { - return Some(block_parent_hash) - } - hash = block_parent_hash; - } - } -} - -/// Error returned when a state trie overlay cannot be built from the manager's current block set. -#[derive(Debug)] -pub struct StateTrieOverlayError { - /// Requested in-memory tip hash. - tip_hash: B256, - /// Requested anchor hash. - anchor_hash: B256, -} - -impl fmt::Display for StateTrieOverlayError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "state trie overlay for tip {} cannot be anchored to {} with current blocks", - self.tip_hash, self.anchor_hash - ) - } -} - -impl std::error::Error for StateTrieOverlayError {} - -#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -struct OverlayCacheKey { - anchor_hash: B256, - tip_hash: B256, -} - -enum ComputeOverlayInput { - ExtendCached { block: ExecutedBlock, parent_input: Arc }, - MergeBlocks(Vec>), -} - -#[tracing::instrument( - level = "trace", - target = "chain_state::state_trie_overlay", - skip_all, - fields( - anchor_hash = %anchor_hash, - block_count = tracing::field::Empty, - parent_overlay = tracing::field::Empty, - elapsed_us = tracing::field::Empty, - ) -)] -fn compute_overlay( - input: ComputeOverlayInput, - anchor_hash: B256, - metrics: &StateTrieOverlayMetrics, -) -> TrieInputSorted { - let started_at = Instant::now(); - let block_count = match &input { - ComputeOverlayInput::ExtendCached { .. } => 1, - ComputeOverlayInput::MergeBlocks(blocks) => blocks.len(), - }; - let parent_overlay = matches!(&input, ComputeOverlayInput::ExtendCached { .. }); - tracing::Span::current().record("block_count", block_count); - tracing::Span::current().record("parent_overlay", parent_overlay); - - let overlay = match input { - ComputeOverlayInput::ExtendCached { block, parent_input } => { - let trie_data = block.trie_data(); - - trace!( - target: "chain_state::state_trie_overlay", - %anchor_hash, - head = %block.recovered_block().hash(), - "extending cached parent state trie overlay" - ); - - let mut overlay = parent_input.as_ref().clone(); - extend_overlay(&mut overlay, &trie_data.hashed_state, &trie_data.trie_updates); - overlay - } - ComputeOverlayInput::MergeBlocks(blocks) => merge_blocks(blocks), - }; - - let elapsed = started_at.elapsed(); - metrics.overlay_computation_duration_seconds.record(elapsed.as_secs_f64()); - tracing::Span::current().record("elapsed_us", elapsed.as_micros() as u64); - debug!( - target: "chain_state::state_trie_overlay", - %anchor_hash, - block_count, - parent_overlay, - ?elapsed, - "computed state trie overlay" - ); - - overlay -} - -fn merge_blocks(blocks: Vec>) -> TrieInputSorted { - let trie_data = blocks.iter().map(ExecutedBlock::trie_data).collect::>(); - - #[cfg(feature = "rayon")] - let (nodes, state) = rayon::join( - || { - TrieUpdatesSorted::merge_batch( - trie_data.iter().map(|data| Arc::clone(&data.trie_updates)), - ) - }, - || { - HashedPostStateSorted::merge_batch( - trie_data.iter().map(|data| Arc::clone(&data.hashed_state)), - ) - }, - ); - - #[cfg(not(feature = "rayon"))] - let (nodes, state) = ( - TrieUpdatesSorted::merge_batch(trie_data.iter().map(|data| Arc::clone(&data.trie_updates))), - HashedPostStateSorted::merge_batch( - trie_data.iter().map(|data| Arc::clone(&data.hashed_state)), - ), - ); - - TrieInputSorted::new(nodes, state, Default::default()) -} - -fn extend_overlay( - overlay: &mut TrieInputSorted, - hashed_state: &HashedPostStateSorted, - trie_updates: &TrieUpdatesSorted, -) { - #[cfg(feature = "rayon")] - { - rayon::join( - || { - if !hashed_state.is_empty() { - Arc::make_mut(&mut overlay.state).extend_ref_and_sort(hashed_state); - } - }, - || { - if !trie_updates.is_empty() { - Arc::make_mut(&mut overlay.nodes).extend_ref_and_sort(trie_updates); - } - }, - ); - } - - #[cfg(not(feature = "rayon"))] - { - if !hashed_state.is_empty() { - Arc::make_mut(&mut overlay.state).extend_ref_and_sort(hashed_state); - } - if !trie_updates.is_empty() { - Arc::make_mut(&mut overlay.nodes).extend_ref_and_sort(trie_updates); - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{test_utils::TestBlockBuilder, ComputedTrieData, EthPrimitives, ExecutedBlock}; - use alloy_primitives::U256; - use reth_primitives_traits::Account; - use reth_trie::{updates::TrieUpdatesSorted, HashedPostState, HashedStorage}; - use std::sync::Arc; - #[cfg(feature = "rayon")] - use std::{ - thread, - time::{Duration, Instant}, - }; - - fn with_unique_state( - block: &ExecutedBlock, - id: u8, - ) -> ExecutedBlock { - let hashed_address = B256::with_last_byte(id); - let hashed_slot = B256::with_last_byte(id.saturating_add(32)); - let hashed_state = HashedPostState::default() - .with_accounts([(hashed_address, Some(Account::default()))]) - .with_storages([( - hashed_address, - HashedStorage::from_iter(false, [(hashed_slot, U256::from(id))]), - )]) - .into_sorted(); - - ExecutedBlock::new( - Arc::clone(&block.recovered_block), - Arc::clone(&block.execution_output), - ComputedTrieData::new(Arc::new(hashed_state), Arc::new(TrieUpdatesSorted::default())), - ) - } - - fn test_blocks() -> Vec> { - TestBlockBuilder::eth() - .get_executed_blocks(1..4) - .enumerate() - .map(|(index, block)| with_unique_state(&block, index as u8 + 1)) - .collect() - } - - #[test] - fn errors_for_unknown_parent() { - let manager = StateTrieOverlayManager::::default(); - let parent = B256::random(); - let anchor = B256::random(); - - let err = manager.overlay_for_parent(parent, anchor).unwrap_err(); - - assert_eq!(err.tip_hash, parent); - assert_eq!(err.anchor_hash, anchor); - } - - #[test] - fn builds_managed_overlay_for_inserted_blocks() { - let manager = StateTrieOverlayManager::default(); - let blocks = test_blocks(); - for block in &blocks { - manager.insert_block(block.clone()); - } - - let anchor_hash = blocks[0].recovered_block().parent_hash(); - - let (_, state) = - manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); - assert_eq!(state.accounts.len(), 3); - - let short_anchor = blocks[1].recovered_block().hash(); - let (_, short) = - manager.overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor).unwrap(); - assert_eq!(short.accounts.len(), 1); - let (_, cached_short) = - manager.overlay_for_parent(blocks[2].recovered_block().hash(), short_anchor).unwrap(); - assert!(Arc::ptr_eq(&short, &cached_short)); - } - - #[test] - fn returns_anchor_for_in_memory_parent() { - let manager = StateTrieOverlayManager::default(); - let blocks = test_blocks(); - for block in &blocks { - manager.insert_block(block.clone()); - } - - assert_eq!( - manager.anchor_for_parent(blocks[2].recovered_block().hash(), B256::random()), - Some(blocks[0].recovered_block().parent_hash()) - ); - - manager.remove_blocks([blocks[0].recovered_block().hash()]); - assert_eq!( - manager.anchor_for_parent( - blocks[2].recovered_block().hash(), - blocks[0].recovered_block().hash() - ), - Some(blocks[0].recovered_block().hash()) - ); - } - - #[test] - fn prefers_anchor_in_parent_chain() { - let manager = StateTrieOverlayManager::default(); - let blocks = test_blocks(); - for block in &blocks { - manager.insert_block(block.clone()); - } - - let db_tip_hash = blocks[1].recovered_block().hash(); - assert_eq!( - manager.anchor_for_parent(blocks[2].recovered_block().hash(), db_tip_hash), - Some(db_tip_hash) - ); - } - - #[cfg(feature = "rayon")] - #[test] - fn insert_block_prepares_child_overlay_from_cached_parent() { - let manager = StateTrieOverlayManager::new(Arc::new(WorkerPool::new(2, "test-ovly"))); - let blocks = test_blocks(); - - manager.insert_block(blocks[0].clone()); - - let anchor_hash = blocks[0].recovered_block().parent_hash(); - let parent_hash = blocks[0].recovered_block().hash(); - manager.overlay_for_parent(parent_hash, anchor_hash).unwrap(); - - let child_hash = blocks[1].recovered_block().hash(); - manager.insert_block(blocks[1].clone()); - - let child_key = OverlayCacheKey { anchor_hash, tip_hash: child_hash }; - let deadline = Instant::now() + Duration::from_secs(5); - while !manager.overlays.contains_key(&child_key) { - assert!( - Instant::now() < deadline, - "timed out waiting for optimistically prepared child overlay" - ); - thread::sleep(Duration::from_millis(10)); - } - - let (_, state) = manager.overlay_for_parent(child_hash, anchor_hash).unwrap(); - assert_eq!(state.accounts.len(), 2); - } - - #[test] - fn prunes_cached_overlays_after_removing_blocks() { - let manager = StateTrieOverlayManager::default(); - let blocks = test_blocks(); - for block in &blocks { - manager.insert_block(block.clone()); - } - - let original_anchor = blocks[0].recovered_block().parent_hash(); - manager.overlay_for_parent(blocks[2].recovered_block().hash(), original_anchor).unwrap(); - - manager.remove_blocks([ - blocks[0].recovered_block().hash(), - blocks[1].recovered_block().hash(), - ]); - - let anchor_hash = blocks[1].recovered_block().hash(); - assert!(manager - .overlay_for_parent(blocks[2].recovered_block().hash(), original_anchor) - .is_err()); - - let (_, state) = - manager.overlay_for_parent(blocks[2].recovered_block().hash(), anchor_hash).unwrap(); - assert_eq!(state.accounts.len(), 1); - } -} diff --git a/crates/e2e-test-utils/tests/e2e-testsuite/main.rs b/crates/e2e-test-utils/tests/e2e-testsuite/main.rs index eebddef2ff8..84c21baea79 100644 --- a/crates/e2e-test-utils/tests/e2e-testsuite/main.rs +++ b/crates/e2e-test-utils/tests/e2e-testsuite/main.rs @@ -374,7 +374,7 @@ async fn test_setup_builder_with_custom_tree_config() -> Result<()> { PayloadAttributes::default() }) .with_tree_config_modifier(|config| { - config.with_persistence_threshold(0).with_memory_block_buffer_target(5) + config.with_persistence_threshold(6).with_memory_block_buffer_target(5) }) .build() .await?; diff --git a/crates/e2e-test-utils/tests/rocksdb/main.rs b/crates/e2e-test-utils/tests/rocksdb/main.rs index 3a6bce7fe7e..d4659b70e2b 100644 --- a/crates/e2e-test-utils/tests/rocksdb/main.rs +++ b/crates/e2e-test-utils/tests/rocksdb/main.rs @@ -189,7 +189,7 @@ async fn test_rocksdb_transaction_queries() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .build() .await?; @@ -200,7 +200,7 @@ async fn test_rocksdb_transaction_queries() -> Result<()> { let signer = wallets[0].clone(); let client = nodes[0].rpc_client().expect("RPC client should be available"); - let raw_tx = TransactionTestContext::transfer_tx_bytes(chain_id, signer).await; + let raw_tx = TransactionTestContext::transfer_tx_bytes(chain_id, signer.clone()).await; let tx_hash = nodes[0].rpc.inject_tx(raw_tx).await?; // Wait for tx to enter pending pool before mining @@ -209,6 +209,14 @@ async fn test_rocksdb_transaction_queries() -> Result<()> { let payload = nodes[0].advance_block().await?; assert_eq!(payload.block().number(), 1); + let flush_tx = + TransactionTestContext::transfer_tx_bytes_with_nonce(chain_id, signer.clone(), 1).await; + let flush_tx_hash = nodes[0].rpc.inject_tx(flush_tx).await?; + wait_for_pending_tx(&client, flush_tx_hash).await; + + let flush_payload = nodes[0].advance_block().await?; + assert_eq!(flush_payload.block().number(), 2); + // Query each transaction by hash let tx: Option = client.request("eth_getTransactionByHash", [tx_hash]).await?; let tx = tx.expect("Transaction should be found"); @@ -256,7 +264,7 @@ async fn test_rocksdb_multi_tx_same_block() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .build() .await?; @@ -283,6 +291,14 @@ async fn test_rocksdb_multi_tx_same_block() -> Result<()> { let payload = nodes[0].advance_block().await?; assert_eq!(payload.block().number(), 1); + let flush_tx = + TransactionTestContext::transfer_tx_bytes_with_nonce(chain_id, signer.clone(), 3).await; + let flush_tx_hash = nodes[0].rpc.inject_tx(flush_tx).await?; + wait_for_pending_tx(&client, flush_tx_hash).await; + + let flush_payload = nodes[0].advance_block().await?; + assert_eq!(flush_payload.block().number(), 2); + // Verify block contains all 3 txs let block: Option = client.request("eth_getBlockByNumber", ("0x1", true)).await?; @@ -324,7 +340,7 @@ async fn test_rocksdb_txs_across_blocks() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .build() .await?; @@ -409,7 +425,7 @@ async fn test_rocksdb_pending_tx_not_in_storage() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .build() .await?; @@ -417,7 +433,7 @@ async fn test_rocksdb_pending_tx_not_in_storage() -> Result<()> { let signer = wallets[0].clone(); // Inject tx but do NOT mine - let raw_tx = TransactionTestContext::transfer_tx_bytes(chain_id, signer).await; + let raw_tx = TransactionTestContext::transfer_tx_bytes(chain_id, signer.clone()).await; let tx_hash = nodes[0].rpc.inject_tx(raw_tx).await?; // Verify tx is in pending pool via RPC @@ -442,6 +458,14 @@ async fn test_rocksdb_pending_tx_not_in_storage() -> Result<()> { let payload = nodes[0].advance_block().await?; assert_eq!(payload.block().number(), 1); + let flush_tx = + TransactionTestContext::transfer_tx_bytes_with_nonce(chain_id, signer.clone(), 1).await; + let flush_tx_hash = nodes[0].rpc.inject_tx(flush_tx).await?; + wait_for_pending_tx(&client, flush_tx_hash).await; + + let flush_payload = nodes[0].advance_block().await?; + assert_eq!(flush_payload.block().number(), 2); + // Poll until tx appears in RocksDB let tx_number = poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash).await; assert_eq!(tx_number, 0, "First tx should have tx_number 0"); @@ -473,7 +497,7 @@ async fn test_rocksdb_reorg_unwind() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .build() .await?; @@ -495,10 +519,6 @@ async fn test_rocksdb_reorg_unwind() -> Result<()> { let block1_hash = payload1.block().hash(); assert_eq!(payload1.block().number(), 1); - // Poll until tx1 appears in RocksDB (ensures persistence happened) - let tx_number1 = poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash1).await; - assert_eq!(tx_number1, 0, "First tx should have tx_number 0"); - // Mine block 2 with transaction from signer1 (nonce 1) let raw_tx2 = TransactionTestContext::transfer_tx_bytes_with_nonce(chain_id, signer1.clone(), 1).await; @@ -508,6 +528,10 @@ async fn test_rocksdb_reorg_unwind() -> Result<()> { let payload2 = nodes[0].advance_block().await?; assert_eq!(payload2.block().number(), 2); + // The second block triggers the first persistence cycle, which flushes both block 1 and 2. + let tx_number1 = poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash1).await; + assert_eq!(tx_number1, 0, "First tx should have tx_number 0"); + // Poll until tx2 appears in RocksDB let tx_number2 = poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash2).await; assert_eq!(tx_number2, 1, "Second tx should have tx_number 1"); @@ -521,6 +545,14 @@ async fn test_rocksdb_reorg_unwind() -> Result<()> { let payload3 = nodes[0].advance_block().await?; assert_eq!(payload3.block().number(), 3); + let flush_tx = + TransactionTestContext::transfer_tx_bytes_with_nonce(chain_id, signer1.clone(), 3).await; + let flush_tx_hash = nodes[0].rpc.inject_tx(flush_tx).await?; + wait_for_pending_tx(&client, flush_tx_hash).await; + + let flush_payload = nodes[0].advance_block().await?; + assert_eq!(flush_payload.block().number(), 4); + // Poll until tx3 appears in RocksDB let tx_number3 = poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash3).await; assert_eq!(tx_number3, 2, "Third tx should have tx_number 2"); @@ -532,7 +564,7 @@ async fn test_rocksdb_reorg_unwind() -> Result<()> { let alt_tx_hash = nodes[0].rpc.inject_tx(raw_alt_tx).await?; wait_for_pending_tx(&client, alt_tx_hash).await; - // Build an alternate payload (this builds on top of the current head, i.e., block 3) + // Build an alternate payload on top of the current flushed head. // But we want to reorg back to block 1, so we'll use the payload and then FCU to it let alt_payload = nodes[0].new_payload().await?; let alt_block_hash = nodes[0].submit_payload(alt_payload.clone()).await?; @@ -550,8 +582,8 @@ async fn test_rocksdb_reorg_unwind() -> Result<()> { let latest: Option = client.request("eth_getBlockByNumber", ("latest", false)).await?; let latest = latest.expect("Latest block should exist"); - // The alt block is at height 4 (on top of block 3) - assert!(latest.header.number >= 3, "Should be at height >= 3 after operation"); + // The alt block is built on top of the flushed canonical head. + assert!(latest.header.number >= 4, "Should be at height >= 4 after operation"); // tx1 from block 1 should still be there let tx1: Option = client.request("eth_getTransactionByHash", [tx_hash1]).await?; @@ -596,7 +628,7 @@ async fn test_rocksdb_historical_account_queries() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .build() .await?; @@ -621,8 +653,6 @@ async fn test_rocksdb_historical_account_queries() -> Result<()> { let payload1 = nodes[0].advance_block().await?; assert_eq!(payload1.block().number(), 1); - poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash1).await; - // Record state after block 1 let balance_at_1: U256 = client.request("eth_getBalance", (sender, "0x1")).await?; let nonce_at_1: U256 = client.request("eth_getTransactionCount", (sender, "0x1")).await?; @@ -637,8 +667,6 @@ async fn test_rocksdb_historical_account_queries() -> Result<()> { let payload2 = nodes[0].advance_block().await?; assert_eq!(payload2.block().number(), 2); - poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash2).await; - let balance_at_2: U256 = client.request("eth_getBalance", (sender, "0x2")).await?; let nonce_at_2: U256 = client.request("eth_getTransactionCount", (sender, "0x2")).await?; assert!(balance_at_2 < balance_at_1, "Balance should decrease further after second tx"); @@ -652,18 +680,14 @@ async fn test_rocksdb_historical_account_queries() -> Result<()> { let payload3 = nodes[0].advance_block().await?; assert_eq!(payload3.block().number(), 3); - poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash3).await; - let balance_at_3: U256 = client.request("eth_getBalance", (sender, "0x3")).await?; let nonce_at_3: U256 = client.request("eth_getTransactionCount", (sender, "0x3")).await?; assert!(balance_at_3 < balance_at_2, "Balance should decrease further after third tx"); assert_eq!(nonce_at_3, U256::from(3), "Nonce should be 3 after third tx"); // Mine additional blocks to push blocks 1-3 out of the in-memory overlay. - // With persistence_threshold=0 and memory_block_buffer_target=0, each new block - // triggers persistence up to `head` followed by in-memory eviction. Mining several - // more blocks ensures the engine loop has completed at least one full - // persist-then-evict cycle covering blocks 1-3. + // With a persistence threshold of 1, every second block triggers a flush, so a few extra + // blocks are enough to durably persist and evict the earlier history we want to query. // Each block needs a transaction because the payload builder requires non-empty payloads. for nonce in 3..8u64 { let raw_tx = @@ -673,6 +697,7 @@ async fn test_rocksdb_historical_account_queries() -> Result<()> { wait_for_pending_tx(&client, tx_hash).await; nodes[0].advance_block().await?; } + poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash3).await; // Allow the engine loop to process the persistence completions tokio::time::sleep(Duration::from_millis(500)).await; @@ -743,7 +768,7 @@ async fn test_rocksdb_account_history_pruning() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .with_node_config_modifier(|mut config| { config.pruning.account_history_distance = Some(PRUNE_DISTANCE); config.pruning.minimum_distance = Some(PRUNE_DISTANCE); @@ -840,7 +865,7 @@ async fn test_rocksdb_storage_history_pruning() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .with_node_config_modifier(|mut config| { config.pruning.storage_history_distance = Some(PRUNE_DISTANCE); config.pruning.minimum_distance = Some(PRUNE_DISTANCE); @@ -912,10 +937,6 @@ async fn test_rocksdb_storage_history_pruning() -> Result<()> { let payload1 = nodes[0].advance_block().await?; assert_eq!(payload1.block().number(), 1); - poll_tx_in_rocksdb(&nodes[0].inner.provider, deploy_hash).await; - - // Let the persistence cycle complete before the next block (same cadence as the loop below) - tokio::time::sleep(Duration::from_millis(300)).await; // Get the deployed contract address from the receipt let receipt: Option = @@ -965,6 +986,10 @@ async fn test_rocksdb_storage_history_pruning() -> Result<()> { assert_eq!(payload.block().number(), block_num); last_tx_hash = tx_hash; + if nonce == 1 { + poll_tx_in_rocksdb(&nodes[0].inner.provider, deploy_hash).await; + } + // Let the persistence cycle complete before the next block tokio::time::sleep(Duration::from_millis(300)).await; } diff --git a/crates/engine/primitives/Cargo.toml b/crates/engine/primitives/Cargo.toml index dd14824fb43..a7198e184c3 100644 --- a/crates/engine/primitives/Cargo.toml +++ b/crates/engine/primitives/Cargo.toml @@ -37,6 +37,9 @@ auto_impl.workspace = true serde.workspace = true thiserror.workspace = true +[dev-dependencies] +alloy-primitives = { workspace = true, features = ["getrandom"] } + [features] default = ["std"] trie-debug = [] diff --git a/crates/engine/primitives/src/config.rs b/crates/engine/primitives/src/config.rs index dd1b97804eb..c2bf942b2ef 100644 --- a/crates/engine/primitives/src/config.rs +++ b/crates/engine/primitives/src/config.rs @@ -6,12 +6,33 @@ use core::time::Duration; /// Triggers persistence when the number of canonical blocks in memory exceeds this threshold. pub const DEFAULT_PERSISTENCE_THRESHOLD: u64 = 2; -/// Maximum canonical-minus-persisted gap before engine API processing is stalled. -pub const DEFAULT_PERSISTENCE_BACKPRESSURE_THRESHOLD: u64 = 16; +/// Maximum number of consecutive canonical blocks whose non-trie outputs may be persisted ahead +/// of trie persistence. +pub const DEFAULT_NUM_STATE_MASKING_BLOCKS: u64 = 0; /// How close to the canonical head we persist blocks. pub const DEFAULT_MEMORY_BLOCK_BUFFER_TARGET: u64 = 0; +/// Derives the default canonical-minus-persisted gap that triggers backpressure. +pub const fn default_persistence_backpressure_threshold( + persistence_threshold: u64, + memory_block_buffer_target: u64, +) -> u64 { + let threshold = 2 * (persistence_threshold + memory_block_buffer_target); + if threshold < 16 { + 16 + } else { + threshold + } +} + +/// Maximum canonical-minus-persisted gap before engine API processing is stalled. +pub const DEFAULT_PERSISTENCE_BACKPRESSURE_THRESHOLD: u64 = + default_persistence_backpressure_threshold( + DEFAULT_PERSISTENCE_THRESHOLD, + DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, + ); + /// The size of proof targets chunk to spawn in one multiproof calculation. pub const DEFAULT_MULTIPROOF_TASK_CHUNK_SIZE: usize = 5; @@ -60,6 +81,17 @@ const fn assert_backpressure_threshold_invariant( ); } +const fn assert_state_masking_invariant( + persistence_threshold: u64, + num_state_masking_blocks: u64, + memory_block_buffer_target: u64, +) { + debug_assert!( + num_state_masking_blocks + memory_block_buffer_target < persistence_threshold, + "num_state_masking_blocks + memory_block_buffer_target must be less than persistence_threshold", + ); +} + const fn default_cross_block_cache_size() -> usize { if cfg!(test) { 1024 * 1024 // 1 MB in tests @@ -93,6 +125,9 @@ pub struct TreeConfig { /// Maximum number of blocks to be kept only in memory without triggering /// persistence. persistence_threshold: u64, + /// Number of persisted blocks whose state/trie writes are masked instead of being durably + /// written in the current cycle. + num_state_masking_blocks: u64, /// How close to the canonical head we persist blocks. Represents the ideal /// number of most recent blocks to keep in memory for quick access and reorgs. /// @@ -203,14 +238,24 @@ pub struct TreeConfig { impl Default for TreeConfig { fn default() -> Self { + let persistence_backpressure_threshold = default_persistence_backpressure_threshold( + DEFAULT_PERSISTENCE_THRESHOLD, + DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, + ); assert_backpressure_threshold_invariant( DEFAULT_PERSISTENCE_THRESHOLD, - DEFAULT_PERSISTENCE_BACKPRESSURE_THRESHOLD, + persistence_backpressure_threshold, + ); + assert_state_masking_invariant( + DEFAULT_PERSISTENCE_THRESHOLD, + DEFAULT_NUM_STATE_MASKING_BLOCKS, + DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, ); Self { persistence_threshold: DEFAULT_PERSISTENCE_THRESHOLD, + num_state_masking_blocks: DEFAULT_NUM_STATE_MASKING_BLOCKS, memory_block_buffer_target: DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, - persistence_backpressure_threshold: DEFAULT_PERSISTENCE_BACKPRESSURE_THRESHOLD, + persistence_backpressure_threshold, block_buffer_limit: DEFAULT_BLOCK_BUFFER_LIMIT, max_invalid_header_cache_length: DEFAULT_MAX_INVALID_HEADER_CACHE_LENGTH, invalid_header_hit_eviction_threshold: DEFAULT_INVALID_HEADER_HIT_EVICTION_THRESHOLD, @@ -252,6 +297,7 @@ impl TreeConfig { #[expect(clippy::too_many_arguments)] pub const fn new( persistence_threshold: u64, + num_state_masking_blocks: u64, memory_block_buffer_target: u64, persistence_backpressure_threshold: u64, block_buffer_limit: u32, @@ -284,8 +330,14 @@ impl TreeConfig { persistence_threshold, persistence_backpressure_threshold, ); + assert_state_masking_invariant( + persistence_threshold, + num_state_masking_blocks, + memory_block_buffer_target, + ); Self { persistence_threshold, + num_state_masking_blocks, memory_block_buffer_target, persistence_backpressure_threshold, block_buffer_limit, @@ -328,6 +380,11 @@ impl TreeConfig { self.persistence_threshold } + /// Return the number of persisted blocks whose state/trie writes are masked. + pub const fn num_state_masking_blocks(&self) -> u64 { + self.num_state_masking_blocks + } + /// Return the memory block buffer target. pub const fn memory_block_buffer_target(&self) -> u64 { self.memory_block_buffer_target @@ -446,6 +503,22 @@ impl TreeConfig { self.persistence_threshold, self.persistence_backpressure_threshold, ); + assert_state_masking_invariant( + self.persistence_threshold, + self.num_state_masking_blocks, + self.memory_block_buffer_target, + ); + self + } + + /// Setter for the number of persisted blocks whose state/trie writes are masked. + pub const fn with_num_state_masking_blocks(mut self, num_state_masking_blocks: u64) -> Self { + self.num_state_masking_blocks = num_state_masking_blocks; + assert_state_masking_invariant( + self.persistence_threshold, + self.num_state_masking_blocks, + self.memory_block_buffer_target, + ); self } @@ -455,6 +528,11 @@ impl TreeConfig { memory_block_buffer_target: u64, ) -> Self { self.memory_block_buffer_target = memory_block_buffer_target; + assert_state_masking_invariant( + self.persistence_threshold, + self.num_state_masking_blocks, + self.memory_block_buffer_target, + ); self } @@ -764,7 +842,26 @@ impl TreeConfig { #[cfg(test)] mod tests { - use super::TreeConfig; + use super::{ + default_persistence_backpressure_threshold, TreeConfig, DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, + DEFAULT_NUM_STATE_MASKING_BLOCKS, DEFAULT_PERSISTENCE_THRESHOLD, + }; + + #[test] + fn default_thresholds_use_derived_backpressure_threshold() { + let config = TreeConfig::default(); + + assert_eq!(config.persistence_threshold(), DEFAULT_PERSISTENCE_THRESHOLD); + assert_eq!(config.num_state_masking_blocks(), DEFAULT_NUM_STATE_MASKING_BLOCKS); + assert_eq!(config.memory_block_buffer_target(), DEFAULT_MEMORY_BLOCK_BUFFER_TARGET); + assert_eq!( + config.persistence_backpressure_threshold(), + default_persistence_backpressure_threshold( + DEFAULT_PERSISTENCE_THRESHOLD, + DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, + ) + ); + } #[test] #[should_panic( @@ -775,4 +872,15 @@ mod tests { .with_persistence_threshold(4) .with_persistence_backpressure_threshold(4); } + + #[test] + #[should_panic( + expected = "num_state_masking_blocks + memory_block_buffer_target must be less than persistence_threshold" + )] + fn rejects_state_masking_window_at_or_above_persistence_threshold() { + let _ = TreeConfig::default() + .with_persistence_threshold(4) + .with_num_state_masking_blocks(2) + .with_memory_block_buffer_target(2); + } } diff --git a/crates/engine/tree/src/persistence.rs b/crates/engine/tree/src/persistence.rs index 1087a3e8291..b3ee42006cb 100644 --- a/crates/engine/tree/src/persistence.rs +++ b/crates/engine/tree/src/persistence.rs @@ -1,16 +1,16 @@ use crate::metrics::PersistenceMetrics; use alloy_eips::BlockNumHash; use crossbeam_channel::Sender as CrossbeamSender; -use reth_chain_state::ExecutedBlock; use reth_errors::ProviderError; use reth_ethereum_primitives::EthPrimitives; use reth_primitives_traits::{FastInstant as Instant, NodePrimitives}; use reth_provider::{ providers::ProviderNodeTypes, BalProvider, BlockExecutionWriter, BlockHashReader, ChainStateBlockWriter, DBProvider, DatabaseProviderFactory, ProviderFactory, SaveBlocksMode, + SaveBlocksPlan, StageCheckpointReader, }; use reth_prune::{PrunerError, PrunerWithFactory}; -use reth_stages_api::{MetricEvent, MetricEventsSender}; +use reth_stages_api::{MetricEvent, MetricEventsSender, StageId}; use reth_tasks::spawn_os_thread; use std::{ sync::{ @@ -26,8 +26,13 @@ use tracing::{debug, error, instrument, warn}; /// Unified result of any persistence operation. #[derive(Debug)] pub struct PersistenceResult { - /// The last block that was persisted, if any. + /// The highest block whose non-state/trie outputs are persisted, if any. pub last_block: Option, + /// The highest block whose state/trie data is fully persisted, if known. + /// + /// When this lags behind [`Self::last_block`], callers must retain the suffix + /// above it in memory so trie-backed operations can still unwind from that point. + pub last_state_trie_block: Option, /// The commit duration, only available for save-blocks operations. pub commit_duration: Option, } @@ -96,14 +101,14 @@ where while let Ok(action) = self.incoming.recv() { match action { PersistenceAction::RemoveBlocksAbove(new_tip_num, sender) => { - let last_block = self.on_remove_blocks_above(new_tip_num)?; + let result = self.on_remove_blocks_above(new_tip_num)?; // send new sync metrics based on removed blocks let _ = self.sync_metrics_tx.send(MetricEvent::SyncHeight { height: new_tip_num }); - let _ = sender.send(PersistenceResult { last_block, commit_duration: None }); + let _ = sender.send(result); } - PersistenceAction::SaveBlocks(blocks, sender) => { - let result = self.on_save_blocks(blocks)?; + PersistenceAction::SaveBlocks(plan, sender) => { + let result = self.on_save_blocks(plan)?; let result_number = result.last_block.map(|b| b.number); let _ = sender.send(result); @@ -131,28 +136,41 @@ where fn on_remove_blocks_above( &self, new_tip_num: u64, - ) -> Result, PersistenceError> { + ) -> Result { debug!(target: "engine::persistence", ?new_tip_num, "Removing blocks"); let start_time = Instant::now(); let provider_rw = self.provider.database_provider_rw()?; let new_tip_hash = provider_rw.block_hash(new_tip_num)?; + provider_rw.remove_block_and_execution_above(new_tip_num)?; + let last_state_trie_block = + provider_rw.get_stage_checkpoint(StageId::Finish)?.map(|checkpoint| { + checkpoint + .finish_stage_checkpoint() + .and_then(|finish| finish.partial_state_trie) + .unwrap_or(checkpoint.block_number) + }); provider_rw.commit()?; debug!(target: "engine::persistence", ?new_tip_num, ?new_tip_hash, "Removed blocks from disk"); self.metrics.remove_blocks_above_duration_seconds.record(start_time.elapsed()); - Ok(new_tip_hash.map(|hash| BlockNumHash { hash, number: new_tip_num })) + Ok(PersistenceResult { + last_block: new_tip_hash.map(|hash| BlockNumHash { hash, number: new_tip_num }), + last_state_trie_block, + commit_duration: None, + }) } - #[instrument(level = "debug", target = "engine::persistence", skip_all, fields(block_count = blocks.len()))] + #[instrument(level = "debug", target = "engine::persistence", skip_all, fields(block_count = plan.blocks.len()))] fn on_save_blocks( &mut self, - blocks: Vec>, + plan: SaveBlocksPlan, ) -> Result { - let first_block = blocks.first().map(|b| b.recovered_block.num_hash()); - let last_block = blocks.last().map(|b| b.recovered_block.num_hash()); - let block_count = blocks.len(); + let first_block = plan.blocks.first().map(|block| block.recovered_block().num_hash()); + let last_block = plan.last_block(); + let block_count = plan.blocks.len(); + let mut last_state_trie_block = None; let pending_finalized = self.pending_finalized_block.take(); let pending_safe = self.pending_safe_block.take(); @@ -161,19 +179,27 @@ where let start_time = Instant::now(); - if let Some(last) = last_block { + if let Some(last_block) = last_block { let provider_rw = self.provider.database_provider_rw()?; - provider_rw.save_blocks(blocks, SaveBlocksMode::Full)?; + provider_rw.save_blocks(&plan, SaveBlocksMode::Full)?; + last_state_trie_block = provider_rw + .get_stage_checkpoint(StageId::Finish)? + .and_then(|checkpoint| { + checkpoint + .finish_stage_checkpoint() + .and_then(|finish| finish.partial_state_trie) + }) + .or(Some(last_block.number)); if let Some(finalized) = pending_finalized { - provider_rw.save_finalized_block_number(finalized.min(last.number))?; - if finalized > last.number { + provider_rw.save_finalized_block_number(finalized.min(last_block.number))?; + if finalized > last_block.number { self.pending_finalized_block = Some(finalized); } } if let Some(safe) = pending_safe { - provider_rw.save_safe_block_number(safe.min(last.number))?; - if safe > last.number { + provider_rw.save_safe_block_number(safe.min(last_block.number))?; + if safe > last_block.number { self.pending_safe_block = Some(safe); } } @@ -189,7 +215,7 @@ where self.metrics.save_blocks_batch_size.record(block_count as f64); self.metrics.save_blocks_duration_seconds.record(elapsed); - Ok(PersistenceResult { last_block, commit_duration: Some(elapsed) }) + Ok(PersistenceResult { last_block, last_state_trie_block, commit_duration: Some(elapsed) }) } fn maybe_run_pruner(&mut self, block_number: u64) -> Result<(), PersistenceError> { @@ -235,9 +261,10 @@ pub enum PersistenceAction { /// The section of tree state that should be persisted. These blocks are expected in order of /// increasing block number. /// - /// First, header, transaction, and receipt-related data should be written to static files. - /// Then the execution history-related data will be written to the database. - SaveBlocks(Vec>, CrossbeamSender), + /// First, header, transaction, and receipt-related data should be written to static files for + /// the deferred trie region. Then the execution history-related data will be written to the + /// database, while trie catchup is persisted for the prefix. + SaveBlocks(SaveBlocksPlan, CrossbeamSender), /// Removes block data above the given block number from the database. /// @@ -321,10 +348,10 @@ impl PersistenceHandle { /// If there are no blocks to persist, then `None` is sent in the sender. pub fn save_blocks( &self, - blocks: Vec>, + plan: SaveBlocksPlan, tx: CrossbeamSender, ) -> Result<(), SendError>> { - self.send_action(PersistenceAction::SaveBlocks(blocks, tx)) + self.send_action(PersistenceAction::SaveBlocks(plan, tx)) } /// Queues the finalized block number to be persisted on disk. @@ -389,14 +416,14 @@ mod tests { use super::*; use alloy_eips::NumHash; use alloy_primitives::{keccak256, BlockHash, BlockNumber, Bytes, Sealed, B256, U256}; - use reth_chain_state::test_utils::TestBlockBuilder; + use reth_chain_state::{test_utils::TestBlockBuilder, ExecutedBlock}; use reth_exex_types::FinishedExExHeight; use reth_provider::{ providers::{ProviderFactoryBuilder, ReadOnlyConfig}, test_utils::{create_test_provider_factory, MockNodeTypes}, AccountReader, BalConfig, BalNotificationStream, BalStore, BalStoreHandle, ChainSpecProvider, HeaderProvider, InMemoryBalStore, ProviderError, ProviderResult, - SealedBal, StorageSettingsCache, TryIntoHistoricalStateProvider, + SaveBlocksPlanStep, SealedBal, StorageSettingsCache, TryIntoHistoricalStateProvider, }; use reth_prune::Pruner; use reth_prune_types::PruneMode; @@ -405,6 +432,13 @@ mod tests { fn default_persistence_handle() -> PersistenceHandle { let provider = create_test_provider_factory(); + persistence_handle(provider) + } + + fn persistence_handle(provider: ProviderFactory) -> PersistenceHandle + where + N: ProviderNodeTypes, + { let (_finished_exex_height_tx, finished_exex_height_rx) = tokio::sync::watch::channel(FinishedExExHeight::NoExExs); @@ -415,6 +449,18 @@ mod tests { PersistenceHandle::::spawn_service(provider, pruner, sync_metrics_tx) } + fn full_save_plan(blocks: Vec>) -> SaveBlocksPlan { + let full_range = 0..blocks.len(); + SaveBlocksPlan::new( + blocks, + vec![SaveBlocksPlanStep::new( + full_range.clone(), + Some(full_range.end..full_range.end), + true, + )], + ) + } + #[test] fn test_pruner_prunes_bal_store() { reth_tracing::init_test_tracing(); @@ -500,13 +546,14 @@ mod tests { reth_tracing::init_test_tracing(); let handle = default_persistence_handle(); - let blocks = vec![]; + let blocks = full_save_plan(vec![]); let (tx, rx) = crossbeam_channel::bounded(1); handle.save_blocks(blocks, tx).unwrap(); let result = rx.recv().unwrap(); assert!(result.last_block.is_none()); + assert!(result.last_state_trie_block.is_none()); } #[test] @@ -519,14 +566,16 @@ mod tests { test_block_builder.get_executed_block_with_number(block_number, B256::random()); let block_hash = executed.recovered_block().hash(); - let blocks = vec![executed]; + let blocks = full_save_plan(vec![executed]); let (tx, rx) = crossbeam_channel::bounded(1); handle.save_blocks(blocks, tx).unwrap(); let result = rx.recv_timeout(std::time::Duration::from_secs(10)).expect("test timed out"); - assert_eq!(block_hash, result.last_block.unwrap().hash); + let last_block = result.last_block.unwrap(); + assert_eq!(block_hash, last_block.hash); + assert_eq!(result.last_state_trie_block, Some(last_block.number)); } #[test] @@ -539,9 +588,11 @@ mod tests { let last_hash = blocks.last().unwrap().recovered_block().hash(); let (tx, rx) = crossbeam_channel::bounded(1); - handle.save_blocks(blocks, tx).unwrap(); + handle.save_blocks(full_save_plan(blocks), tx).unwrap(); let result = rx.recv().unwrap(); - assert_eq!(last_hash, result.last_block.unwrap().hash); + let last_block = result.last_block.unwrap(); + assert_eq!(last_hash, last_block.hash); + assert_eq!(result.last_state_trie_block, Some(last_block.number)); } #[test] @@ -556,10 +607,12 @@ mod tests { let last_hash = blocks.last().unwrap().recovered_block().hash(); let (tx, rx) = crossbeam_channel::bounded(1); - handle.save_blocks(blocks, tx).unwrap(); + handle.save_blocks(full_save_plan(blocks), tx).unwrap(); let result = rx.recv().unwrap(); - assert_eq!(last_hash, result.last_block.unwrap().hash); + let last_block = result.last_block.unwrap(); + assert_eq!(last_hash, last_block.hash); + assert_eq!(result.last_state_trie_block, Some(last_block.number)); } } @@ -651,7 +704,7 @@ mod tests { { let provider_rw = provider_factory.database_provider_rw().unwrap(); - provider_rw.save_blocks(blocks_a, SaveBlocksMode::Full).unwrap(); + provider_rw.save_blocks(&full_save_plan(blocks_a), SaveBlocksMode::Full).unwrap(); provider_rw.commit().unwrap(); } @@ -708,7 +761,12 @@ mod tests { provider_rw.commit().unwrap(); let provider_rw = pf.database_provider_rw().unwrap(); - provider_rw.save_blocks(vec![block_b2], SaveBlocksMode::Full).unwrap(); + provider_rw + .save_blocks( + &full_save_plan(std::slice::from_ref(&block_b2).to_vec()), + SaveBlocksMode::Full, + ) + .unwrap(); provider_rw.commit().unwrap(); }); diff --git a/crates/engine/tree/src/tree/metrics.rs b/crates/engine/tree/src/tree/metrics.rs index 018f422e22e..f8a96e3c6bb 100644 --- a/crates/engine/tree/src/tree/metrics.rs +++ b/crates/engine/tree/src/tree/metrics.rs @@ -537,6 +537,10 @@ pub struct BlockValidationMetrics { pub hashed_post_state_size: Histogram, /// Size of `TrieUpdatesSorted` (`total_len`) pub trie_updates_sorted_size: Histogram, + /// Size of `AnchoredTrieInput` overlay `TrieUpdatesSorted` (`total_len`) + pub anchored_overlay_trie_updates_size: Histogram, + /// Size of `AnchoredTrieInput` overlay `HashedPostStateSorted` (`total_len`) + pub anchored_overlay_hashed_state_size: Histogram, } impl BlockValidationMetrics { diff --git a/crates/engine/tree/src/tree/mod.rs b/crates/engine/tree/src/tree/mod.rs index a42fc60523e..d2ccf9f2260 100644 --- a/crates/engine/tree/src/tree/mod.rs +++ b/crates/engine/tree/src/tree/mod.rs @@ -14,7 +14,7 @@ use alloy_rpc_types_engine::{ use error::{InsertBlockError, InsertBlockFatalError, InsertBlockValidationError}; use reth_chain_state::{ CanonicalInMemoryState, ComputedTrieData, ExecutedBlock, ExecutionTimingStats, - MemoryOverlayStateProvider, NewCanonicalChain, StateTrieOverlayManager, + MemoryOverlayStateProvider, NewCanonicalChain, }; use reth_consensus::{Consensus, FullConsensus}; use reth_engine_primitives::{ @@ -30,13 +30,13 @@ use reth_primitives_traits::{ }; use reth_provider::{ BlockExecutionOutput, BlockExecutionResult, BlockReader, ChangeSetReader, - DatabaseProviderFactory, HashedPostStateProvider, ProviderError, StageCheckpointReader, - StateProviderBox, StateProviderFactory, StateReader, StorageChangeSetReader, - StorageSettingsCache, TransactionVariant, + DatabaseProviderFactory, HashedPostStateProvider, ProviderError, SaveBlocksPlan, + SaveBlocksPlanStep, StageCheckpointReader, StateProviderBox, StateProviderFactory, StateReader, + StorageChangeSetReader, StorageSettingsCache, TransactionVariant, }; use reth_revm::database::StateProviderDatabase; use reth_stages_api::ControlFlow; -use reth_tasks::{spawn_os_thread, utils::increase_thread_priority, WorkerPool}; +use reth_tasks::{spawn_os_thread, utils::increase_thread_priority}; use reth_trie_db::ChangesetCache; use revm::interpreter::debug_unreachable; use state::TreeState; @@ -156,7 +156,6 @@ impl EngineApiTreeState { invalid_header_hit_eviction_threshold: u8, canonical_block: BlockNumHash, engine_kind: EngineApiKind, - state_trie_overlay_worker_pool: Arc, ) -> Self { Self { invalid_headers: InvalidHeaderCache::new( @@ -164,11 +163,7 @@ impl EngineApiTreeState { invalid_header_hit_eviction_threshold, ), buffer: BlockBuffer::new(block_buffer_limit), - tree_state: TreeState::new( - canonical_block, - engine_kind, - StateTrieOverlayManager::new(state_trie_overlay_worker_pool), - ), + tree_state: TreeState::new(canonical_block, engine_kind), forkchoice_state_tracker: ForkchoiceStateTracker::default(), } } @@ -440,6 +435,7 @@ where let persistence_state = PersistenceState { last_persisted_block: BlockNumHash::new(best_block_number, header.hash()), + last_state_trie_persisted_block: BlockNumHash::new(best_block_number, header.hash()), rx: None, }; @@ -450,7 +446,6 @@ where config.invalid_header_hit_eviction_threshold(), header.num_hash(), kind, - runtime.state_trie_overlay_worker_pool(), ); let task = Self::new( @@ -1369,24 +1364,17 @@ where /// Helper method to save blocks and set the persistence state. This ensures we keep track of /// the current persistence action while we're saving blocks. - fn persist_blocks(&mut self, blocks_to_persist: Vec>) { - if blocks_to_persist.is_empty() { + fn persist_blocks(&mut self, plan: SaveBlocksPlan) { + if plan.is_empty() { debug!(target: "engine::tree", "Returned empty set of blocks to persist"); return } - // NOTE: checked non-empty above - let highest_num_hash = blocks_to_persist - .iter() - .max_by_key(|block| block.recovered_block().number()) - .map(|b| b.recovered_block().num_hash()) - .expect("Checked non-empty persisting blocks"); - - debug!(target: "engine::tree", count=blocks_to_persist.len(), blocks = ?blocks_to_persist.iter().map(|block| block.recovered_block().num_hash()).collect::>(), "Persisting blocks"); + let last_block = plan.last_block().expect("checked non-empty persisting blocks"); let (tx, rx) = crossbeam_channel::bounded(1); - let _ = self.persistence.save_blocks(blocks_to_persist, tx); + let _ = self.persistence.save_blocks(plan, tx); - self.persistence_state.start_save(highest_num_hash, rx); + self.persistence_state.start_save(last_block, rx); } /// Triggers new persistence actions if no persistence task is currently in progress. @@ -1398,9 +1386,8 @@ where if let Some(new_tip_num) = self.find_disk_reorg()? { self.remove_blocks(new_tip_num) } else if self.should_persist() { - let blocks_to_persist = - self.get_canonical_blocks_to_persist(PersistTarget::Threshold)?; - self.persist_blocks(blocks_to_persist); + let plan = self.get_save_blocks_plan(PersistTarget::Threshold)?; + self.persist_blocks(plan); } } @@ -1431,15 +1418,15 @@ where self.on_persistence_complete(result, start_time)?; } - let blocks_to_persist = self.get_canonical_blocks_to_persist(PersistTarget::Head)?; + let plan = self.get_save_blocks_plan(PersistTarget::Head)?; - if blocks_to_persist.is_empty() { + if plan.is_empty() { debug!(target: "engine::tree", "persistence complete, signaling termination"); return Ok(()) } - debug!(target: "engine::tree", count = blocks_to_persist.len(), "persisting remaining blocks before shutdown"); - self.persist_blocks(blocks_to_persist); + debug!(target: "engine::tree", count = plan.blocks.len(), "persisting remaining blocks before shutdown"); + self.persist_blocks(plan); } } @@ -1475,25 +1462,25 @@ where ) -> Result<(), AdvancePersistenceError> { self.metrics.engine.persistence_duration.record(start_time.elapsed()); - let commit_duration = result.commit_duration; - let Some(BlockNumHash { - hash: last_persisted_block_hash, - number: last_persisted_block_number, - }) = result.last_block + let PersistenceResult { last_block, last_state_trie_block, commit_duration } = result; + let Some(BlockNumHash { hash: last_block_hash, number: last_block_number }) = last_block else { // if this happened, then we persisted no blocks because we sent an empty vec of blocks warn!(target: "engine::tree", "Persistence task completed but did not persist any blocks"); return Ok(()) }; - debug!(target: "engine::tree", ?last_persisted_block_hash, ?last_persisted_block_number, elapsed=?start_time.elapsed(), "Finished persisting, calling finish"); - self.persistence_state.finish(last_persisted_block_hash, last_persisted_block_number); + let last_block = BlockNumHash::new(last_block_number, last_block_hash); + let last_state_trie_persisted_block = + self.last_state_trie_persisted_block(last_block, last_state_trie_block)?; + + debug!(target: "engine::tree", ?last_block_hash, ?last_block_number, last_state_trie_persisted_block = last_state_trie_persisted_block.number, elapsed=?start_time.elapsed(), "Finished persisting, calling finish"); + self.persistence_state.finish(last_block, last_state_trie_persisted_block); // Evict trie changesets for blocks below the eviction threshold. // Keep at least CHANGESET_CACHE_RETENTION_BLOCKS from the persisted tip, and also respect // the finalized block if set. - let min_threshold = - last_persisted_block_number.saturating_sub(CHANGESET_CACHE_RETENTION_BLOCKS); + let min_threshold = last_block_number.saturating_sub(CHANGESET_CACHE_RETENTION_BLOCKS); let eviction_threshold = if let Some(finalized) = self.canonical_in_memory_state.get_finalized_num_hash() { // Use the minimum of finalized block and retention threshold to be conservative @@ -1504,20 +1491,60 @@ where }; debug!( target: "engine::tree", - last_persisted = last_persisted_block_number, + last_persisted_block = last_block_number, finalized_number = ?self.canonical_in_memory_state.get_finalized_num_hash().map(|f| f.number), eviction_threshold, "Evicting changesets below threshold" ); self.changeset_cache.evict(eviction_threshold); - self.on_new_persisted_block()?; + // Invalidate cached overlay since the anchor has changed. + self.state.tree_state.invalidate_cached_overlay(); + + self.on_new_persisted_block(last_state_trie_persisted_block)?; + + // Re-prepare overlay for the current canonical head with the new anchor. + // Spawn a background task to trigger computation so it's ready when the next payload + // arrives. + if let Some(prepared) = self.state.tree_state.prepare_canonical_overlay() { + self.runtime.spawn_blocking_named("prepare-overlay", move || { + let _ = prepared.overlay.get(prepared.anchor_hash); + }); + } - self.purge_timing_stats(last_persisted_block_number, commit_duration); + self.purge_timing_stats(last_block_number, commit_duration); Ok(()) } + /// Returns the highest block that can be dropped from memory after persistence completes. + fn last_state_trie_persisted_block( + &self, + last_block: BlockNumHash, + last_state_trie_block: Option, + ) -> ProviderResult { + let Some(last_state_trie_block) = last_state_trie_block else { return Ok(last_block) }; + debug_assert!( + last_state_trie_block <= last_block.number, + "state/trie frontier cannot exceed the last persisted block" + ); + if last_state_trie_block >= last_block.number { + return Ok(last_block) + } + + let hash = self + .canonical_in_memory_state + .hash_by_number(last_state_trie_block) + .map(Ok) + .unwrap_or_else(|| { + self.provider + .block_hash(last_state_trie_block)? + .ok_or_else(|| ProviderError::HeaderNotFound(last_state_trie_block.into())) + })?; + + Ok(BlockNumHash::new(last_state_trie_block, hash)) + } + /// Handles a message from the engine. /// /// Returns `ControlFlow::Break(())` if the engine should terminate. @@ -1836,7 +1863,7 @@ where // update the tracked chain height, after backfill sync both the canonical height and // persisted height are the same self.state.tree_state.set_canonical_head(new_head.num_hash()); - self.persistence_state.finish(new_head.hash(), new_head.number()); + self.persistence_state.finish(new_head.num_hash(), new_head.num_hash()); // update the tracked canonical head self.canonical_in_memory_state.set_canonical_head(new_head); @@ -2072,62 +2099,98 @@ where self.config.persistence_threshold() } - /// Returns a batch of consecutive canonical blocks to persist in the range - /// `(last_persisted_number .. target]`. The expected order is oldest -> newest. - fn get_canonical_blocks_to_persist( + /// Returns the save plan for the next persistence cycle. + fn get_save_blocks_plan( &self, target: PersistTarget, - ) -> Result>, AdvancePersistenceError> { + ) -> Result, AdvancePersistenceError> { // We will calculate the state root using the database, so we need to be sure there are no // changes debug_assert!(!self.persistence_state.in_progress()); - let mut blocks_to_persist = Vec::new(); + let mut blocks = Vec::new(); let mut current_hash = self.state.tree_state.canonical_block_hash(); - let last_persisted_number = self.persistence_state.last_persisted_block.number; + let last_state_trie_persisted_block_number = + self.persistence_state.last_state_trie_persisted_block.number; + let last_persisted_block_number = self.persistence_state.last_persisted_block.number; let canonical_head_number = self.state.tree_state.canonical_block_number(); - - let target_number = match target { - PersistTarget::Head => canonical_head_number, + let last_block_target_number = match target { PersistTarget::Threshold => { - canonical_head_number.saturating_sub(self.config.memory_block_buffer_target()) + canonical_head_number.saturating_sub(self.config.memory_block_buffer_target()).min( + last_persisted_block_number.saturating_add(self.config.persistence_threshold()), + ) } + PersistTarget::Head => canonical_head_number, }; debug!( target: "engine::tree", ?current_hash, - ?last_persisted_number, + ?last_state_trie_persisted_block_number, + ?last_persisted_block_number, ?canonical_head_number, - ?target_number, - "Returning canonical blocks to persist" + target = ?target, + "Returning save plan" ); while let Some(block) = self.state.tree_state.blocks_by_hash.get(¤t_hash) { - if block.recovered_block().number() <= last_persisted_number { + if block.recovered_block().number() <= last_state_trie_persisted_block_number { break; } - if block.recovered_block().number() <= target_number { - blocks_to_persist.push(block.clone()); + if block.recovered_block().number() <= last_block_target_number { + blocks.push(block.clone()); } current_hash = block.recovered_block().parent_hash(); } // Reverse the order so that the oldest block comes first - blocks_to_persist.reverse(); + blocks.reverse(); - Ok(blocks_to_persist) + let trie_catchup_block_count = last_persisted_block_number + .saturating_sub(last_state_trie_persisted_block_number) + .min(blocks.len() as u64) as usize; + let persist_rest_block_count = blocks.len().saturating_sub(trie_catchup_block_count); + let state_masking_block_count = + persist_rest_block_count.min(self.config.num_state_masking_blocks() as usize); + let full_persist_block_count = persist_rest_block_count - state_masking_block_count; + let full_persist_start = trie_catchup_block_count; + let state_masking_start = full_persist_start + full_persist_block_count; + let state_masking_range = state_masking_start..blocks.len(); + let mut steps = Vec::new(); + + if trie_catchup_block_count > 0 { + steps.push(SaveBlocksPlanStep::new( + 0..trie_catchup_block_count, + Some(state_masking_range.clone()), + false, + )); + } + if full_persist_block_count > 0 { + steps.push(SaveBlocksPlanStep::new( + full_persist_start..state_masking_start, + Some(state_masking_range.clone()), + true, + )); + } + if state_masking_block_count > 0 { + steps.push(SaveBlocksPlanStep::new(state_masking_range, None, true)); + } + + Ok(SaveBlocksPlan::new(blocks, steps)) } - /// This clears the blocks from the in-memory tree state that have been persisted to the - /// database. + /// This clears the blocks from the in-memory tree state that no longer need to stay resident + /// after persistence completes. /// - /// This also updates the canonical in-memory state to reflect the newest persisted block - /// height. + /// This also updates the canonical in-memory state to reflect the newest persisted block tip, + /// even if trie persistence only advanced through an earlier block. /// /// Assumes that `finish` has been called on the `persistence_state` at least once - fn on_new_persisted_block(&mut self) -> ProviderResult<()> { + fn on_new_persisted_block( + &mut self, + in_memory_persisted_block: BlockNumHash, + ) -> ProviderResult<()> { // If we have an on-disk reorg, we need to handle it first before touching the in-memory // state. if let Some(remove_above) = self.find_disk_reorg()? { @@ -2136,11 +2199,11 @@ where } let finalized = self.state.forkchoice_state_tracker.last_valid_finalized(); - self.remove_before(self.persistence_state.last_persisted_block, finalized)?; - self.canonical_in_memory_state.remove_persisted_blocks(BlockNumHash { - number: self.persistence_state.last_persisted_block.number, - hash: self.persistence_state.last_persisted_block.hash, - }); + self.remove_before(in_memory_persisted_block, finalized)?; + self.canonical_in_memory_state.remove_persisted_blocks_until( + self.persistence_state.last_persisted_block, + in_memory_persisted_block.number, + ); Ok(()) } @@ -2183,7 +2246,9 @@ where let sorted_hashed_state = Arc::new(hashed_state.into_sorted()); let sorted_trie_updates = Arc::new(trie_updates); - let trie_data = ComputedTrieData::new(sorted_hashed_state, sorted_trie_updates); + // Skip building trie input and anchor for DB-loaded blocks. + let trie_data = + ComputedTrieData::without_trie_input(sorted_hashed_state, sorted_trie_updates); let execution_output = Arc::new(BlockExecutionOutput { state: execution_output.bundle, diff --git a/crates/engine/tree/src/tree/payload_validator.rs b/crates/engine/tree/src/tree/payload_validator.rs index 2645404b696..25be99de217 100644 --- a/crates/engine/tree/src/tree/payload_validator.rs +++ b/crates/engine/tree/src/tree/payload_validator.rs @@ -59,7 +59,7 @@ use reth_trie_sparse::debug_recorder::TrieDebugRecorder; use crate::tree::payload_processor::receipt_root_task::{IndexedReceipt, ReceiptRootTaskHandle}; use reth_chain_state::{ - CanonicalInMemoryState, DeferredTrieData, ExecutedBlock, ExecutionTimingStats, + CanonicalInMemoryState, DeferredTrieData, ExecutedBlock, ExecutionTimingStats, LazyOverlay, }; use reth_consensus::{ConsensusError, FullConsensus, ReceiptRootBloom}; use reth_engine_primitives::{ @@ -482,14 +482,15 @@ where // Get an iterator over the transactions in the payload let txs = self.tx_iterator_for(&input)?; + // Create lazy overlay from ancestors. This doesn't block, allowing execution to start + // before the trie data is ready. The overlay will be computed on first access. + let (lazy_overlay, anchor_hash) = Self::get_parent_lazy_overlay(parent_hash, ctx.state()); + // Create overlay factory for payload processor (StateRootTask path needs it for - // multiproofs) + // multiproofs). let provider_factory = self.provider.clone(); - let overlay_builder = Self::overlay_builder_for_parent( - parent_hash, - ctx.state(), - self.changeset_cache.clone(), - ); + let overlay_builder = OverlayBuilder::::new(anchor_hash, self.changeset_cache.clone()) + .with_lazy_overlay(lazy_overlay); let overlay_factory = OverlayStateProviderFactory::new(provider_factory.clone(), overlay_builder.clone()); let changeset_provider = self.spawn_changeset_provider_task(overlay_factory.clone()); @@ -688,10 +689,11 @@ where if state_root == block.header().state_root() { maybe_state_root = Some((state_root, trie_updates, elapsed)) } else { + let block_state_root = block.header().state_root(); warn!( target: "engine::tree::payload_validator", ?state_root, - block_state_root = ?block.header().state_root(), + ?block_state_root, "State root task returned incorrect state root" ); #[cfg(feature = "trie-debug")] @@ -699,7 +701,7 @@ where block.header().number(), &trie_debug_recorders, ); - state_root_task_failed = true; + std::process::abort(); } maybe_new_hashed_state @@ -853,6 +855,7 @@ where let executed_block = self.spawn_deferred_trie_task( Arc::new(block), output, + ctx.state(), hashed_state, trie_output, changeset_provider, @@ -1753,14 +1756,45 @@ where self.invalid_block_hook.on_invalid_block(parent_header, block, output, trie_updates); } - /// Returns an overlay builder configured for a payload parent. - fn overlay_builder_for_parent( + /// Creates a [`LazyOverlay`] for the parent block without blocking. + /// + /// Returns a lazy overlay that will compute the trie input on first access, and the anchor + /// block hash (the highest persisted ancestor). This allows execution to start immediately + /// while the trie input computation is deferred until the overlay is actually needed. + /// + /// If parent is on disk (no in-memory blocks), returns `None` for the lazy overlay. + /// + /// Uses a cached overlay if available for the canonical head (the common case). + fn get_parent_lazy_overlay( parent_hash: B256, state: &EngineApiTreeState, - changeset_cache: ChangesetCache, - ) -> OverlayBuilder { - OverlayBuilder::new(parent_hash, changeset_cache) - .with_state_trie_overlay_manager(state.tree_state.state_trie_overlays.clone()) + ) -> (Option>, B256) { + let (anchor_hash, blocks) = + state.tree_state.blocks_by_hash(parent_hash).unwrap_or_else(|| (parent_hash, vec![])); + + if blocks.is_empty() { + debug!(target: "engine::tree::payload_validator", "Parent found on disk, no lazy overlay needed"); + return (None, anchor_hash) + } + + if let Some(cached) = state.tree_state.get_cached_overlay(parent_hash, anchor_hash) { + debug!( + target: "engine::tree::payload_validator", + %parent_hash, + %anchor_hash, + "Using cached canonical overlay" + ); + return (Some(cached.overlay.clone()), cached.anchor_hash) + } + + debug!( + target: "engine::tree::payload_validator", + %anchor_hash, + num_blocks = blocks.len(), + "Creating lazy overlay for in-memory blocks" + ); + + (Some(LazyOverlay::new(blocks)), anchor_hash) } /// Spawns a background task to compute and sort trie data for the executed block. @@ -1780,10 +1814,22 @@ where &self, block: Arc>, execution_outcome: Arc>, + state: &EngineApiTreeState, hashed_state: LazyHashedPostState, trie_output: Arc, changeset_provider: impl TrieCursorFactory + Send + 'static, ) -> ExecutedBlock { + // Capture parent hash and ancestor overlays for deferred trie input construction. + let (anchor_hash, overlay_blocks) = state + .tree_state + .blocks_by_hash(block.parent_hash()) + .unwrap_or_else(|| (block.parent_hash(), Vec::new())); + + // Collect lightweight ancestor trie data handles. We don't call trie_data() here; + // the merge and any fallback sorting happens in the compute_trie_input_task. + let ancestors: Vec = + overlay_blocks.iter().rev().map(|b| b.trie_data_handle()).collect(); + // Create deferred handle with fallback inputs in case the background task hasn't completed. // Resolve the lazy handle into Arc. By this point the hashed state has // already been computed and used for state root verification, so .get() returns instantly. @@ -1791,7 +1837,8 @@ where Ok(state) => state, Err(handle) => handle.get().clone(), }; - let deferred_trie_data = DeferredTrieData::pending(hashed_state, trie_output); + let deferred_trie_data = + DeferredTrieData::pending(hashed_state, trie_output, anchor_hash, ancestors); let deferred_handle_task = deferred_trie_data.clone(); let block_validation_metrics = self.metrics.block_validation.clone(); @@ -1828,6 +1875,15 @@ where block_validation_metrics .trie_updates_sorted_size .record(computed.trie_updates.total_len() as f64); + if let Some(anchored) = &computed.anchored_trie_input { + block_validation_metrics + .anchored_overlay_trie_updates_size + .record(anchored.trie_input.nodes.total_len() as f64); + block_validation_metrics + .anchored_overlay_hashed_state_size + .record(anchored.trie_input.state.total_len() as f64); + } + // Compute and cache changesets using the computed trie_updates. // Use the pre-created provider to avoid races with changeset cache // eviction that can happen between task spawn and execution. @@ -2160,19 +2216,18 @@ where &block.execution_output.state, ); - let overlay_factory = OverlayStateProviderFactory::new( - self.provider.clone(), - Self::overlay_builder_for_parent( - block.recovered_block.parent_hash(), - state, - self.changeset_cache.clone(), - ), - ); + let (lazy_overlay, anchor_hash) = + Self::get_parent_lazy_overlay(block.recovered_block.parent_hash(), state); + let overlay_builder = OverlayBuilder::::new(anchor_hash, self.changeset_cache.clone()) + .with_lazy_overlay(lazy_overlay); + let overlay_factory = + OverlayStateProviderFactory::new(self.provider.clone(), overlay_builder); let changeset_provider = overlay_factory.database_provider_ro()?; Ok(self.spawn_deferred_trie_task( block.recovered_block, block.execution_output, + state, LazyHashedPostState::ready(block.hashed_state), block.trie_updates, changeset_provider, @@ -2189,10 +2244,11 @@ where parent_state_root: B256, state: &EngineApiTreeState, ) -> Option { - let overlay_factory = OverlayStateProviderFactory::new( - self.provider.clone(), - Self::overlay_builder_for_parent(parent_hash, state, self.changeset_cache.clone()), - ); + let (lazy_overlay, anchor_hash) = Self::get_parent_lazy_overlay(parent_hash, state); + let overlay_builder = OverlayBuilder::::new(anchor_hash, self.changeset_cache.clone()) + .with_lazy_overlay(lazy_overlay); + let overlay_factory = + OverlayStateProviderFactory::new(self.provider.clone(), overlay_builder); Some(self.payload_processor.spawn_state_root( overlay_factory, diff --git a/crates/engine/tree/src/tree/persistence_state.rs b/crates/engine/tree/src/tree/persistence_state.rs index c3ab00dbece..e4e0590fc56 100644 --- a/crates/engine/tree/src/tree/persistence_state.rs +++ b/crates/engine/tree/src/tree/persistence_state.rs @@ -22,7 +22,6 @@ use crate::persistence::PersistenceResult; use alloy_eips::BlockNumHash; -use alloy_primitives::B256; use crossbeam_channel::Receiver as CrossbeamReceiver; use reth_primitives_traits::FastInstant as Instant; use tracing::trace; @@ -30,10 +29,12 @@ use tracing::trace; /// The state of the persistence task. #[derive(Debug)] pub struct PersistenceState { - /// Hash and number of the last block persisted. + /// Hash and number of the highest block whose non-state/trie outputs are persisted. /// - /// This tracks the chain height that is persisted on disk + /// This tracks the highest canonical block with durable block/static-file/plain-state data. pub(crate) last_persisted_block: BlockNumHash, + /// Hash and number of the highest block whose state/trie outputs are persisted. + pub(crate) last_state_trie_persisted_block: BlockNumHash, /// Receiver end of channel where the result of the persistence task will be /// sent when done. A None value means there's no persistence task in progress. pub(crate) rx: @@ -76,13 +77,18 @@ impl PersistenceState { /// Sets state for a finished persistence task. pub(crate) fn finish( &mut self, - last_persisted_block_hash: B256, - last_persisted_block_number: u64, + last_persisted_block: BlockNumHash, + last_state_trie_persisted_block: BlockNumHash, ) { - trace!(target: "engine::tree", block= %last_persisted_block_number, hash=%last_persisted_block_hash, "updating persistence state"); + trace!( + target: "engine::tree", + last_persisted_block = %last_persisted_block.number, + last_state_trie_persisted_block = %last_state_trie_persisted_block.number, + "updating persistence state" + ); self.rx = None; - self.last_persisted_block = - BlockNumHash::new(last_persisted_block_number, last_persisted_block_hash); + self.last_persisted_block = last_persisted_block; + self.last_state_trie_persisted_block = last_state_trie_persisted_block; } } diff --git a/crates/engine/tree/src/tree/state.rs b/crates/engine/tree/src/tree/state.rs index b8a33adb8b0..a5b3e40d0b0 100644 --- a/crates/engine/tree/src/tree/state.rs +++ b/crates/engine/tree/src/tree/state.rs @@ -6,7 +6,7 @@ use alloy_primitives::{ map::{B256Map, B256Set}, BlockNumber, B256, }; -use reth_chain_state::{EthPrimitives, ExecutedBlock, StateTrieOverlayManager}; +use reth_chain_state::{EthPrimitives, ExecutedBlock, LazyOverlay}; use reth_primitives_traits::{AlloyBlockHeader, NodePrimitives, SealedHeader}; use std::{ collections::{btree_map, hash_map, BTreeMap, VecDeque}, @@ -38,39 +38,30 @@ pub struct TreeState { pub(crate) current_canonical_head: BlockNumHash, /// The engine API variant of this handler pub(crate) engine_kind: EngineApiKind, - /// Flattened state trie overlays for in-memory blocks. - pub(crate) state_trie_overlays: StateTrieOverlayManager, + /// Pre-computed lazy overlay for the canonical head. + /// + /// This is optimistically prepared after the canonical head changes, so that + /// the next payload building on the canonical head can use it immediately + /// without recomputing. + pub(crate) cached_canonical_overlay: Option>, } impl TreeState { /// Returns a new, empty tree state that points to the given canonical head. - pub fn new( - current_canonical_head: BlockNumHash, - engine_kind: EngineApiKind, - state_trie_overlays: StateTrieOverlayManager, - ) -> Self { + pub fn new(current_canonical_head: BlockNumHash, engine_kind: EngineApiKind) -> Self { Self { blocks_by_hash: B256Map::default(), blocks_by_number: BTreeMap::new(), current_canonical_head, parent_to_child: B256Map::default(), engine_kind, - state_trie_overlays, + cached_canonical_overlay: None, } } /// Resets the state and points to the given canonical head. pub fn reset(&mut self, current_canonical_head: BlockNumHash) { - let engine_kind = self.engine_kind; - let removed_hashes = self.blocks_by_hash.keys().copied().collect::>(); - if !removed_hashes.is_empty() { - self.state_trie_overlays.remove_blocks(removed_hashes); - } - self.blocks_by_hash.clear(); - self.blocks_by_number.clear(); - self.parent_to_child.clear(); - self.current_canonical_head = current_canonical_head; - self.engine_kind = engine_kind; + *self = Self::new(current_canonical_head, self.engine_kind); } /// Returns the number of executed blocks stored. @@ -110,6 +101,64 @@ impl TreeState { Some((parent_hash, blocks)) } + /// Prepares a cached lazy overlay for the current canonical head. + /// + /// This should be called after the canonical head changes to optimistically + /// prepare the overlay for the next payload that will likely build on it. + /// + /// Returns a clone of the prepared overlay so the caller can spawn a background + /// task to trigger computation via [`LazyOverlay::get`] for the cached anchor. + /// This ensures the overlay is actually computed before the next payload arrives. + pub(crate) fn prepare_canonical_overlay(&mut self) -> Option> { + let canonical_hash = self.current_canonical_head.hash; + + // Get blocks leading to the canonical head + let Some((anchor_hash, blocks)) = self.blocks_by_hash(canonical_hash) else { + // Canonical head not in memory (persisted), no overlay needed + self.cached_canonical_overlay = None; + return None; + }; + + let num_blocks = blocks.len(); + let prepared = PreparedCanonicalOverlay { + parent_hash: canonical_hash, + overlay: LazyOverlay::new(blocks), + anchor_hash, + }; + self.cached_canonical_overlay = Some(prepared.clone()); + + debug!( + target: "engine::tree", + %canonical_hash, + %anchor_hash, + num_blocks, + "Prepared cached canonical overlay" + ); + + Some(prepared) + } + + /// Returns the cached overlay if it matches the requested parent hash and anchor. + /// + /// Both parent hash and anchor hash must match to ensure the overlay is valid. + /// This prevents using a stale overlay after persistence has advanced the anchor. + pub fn get_cached_overlay( + &self, + parent_hash: B256, + expected_anchor: B256, + ) -> Option<&PreparedCanonicalOverlay> { + self.cached_canonical_overlay.as_ref().filter(|cached| { + cached.parent_hash == parent_hash && cached.anchor_hash == expected_anchor + }) + } + + /// Invalidates the cached overlay. + /// + /// Should be called when the anchor changes (e.g., after persistence). + pub(crate) fn invalidate_cached_overlay(&mut self) { + self.cached_canonical_overlay = None; + } + /// Insert executed block into the state. pub fn insert_executed(&mut self, executed: ExecutedBlock) { let hash = executed.recovered_block().hash(); @@ -120,13 +169,11 @@ impl TreeState { return; } - let overlay_block = executed.clone(); self.blocks_by_hash.insert(hash, executed.clone()); self.blocks_by_number.entry(block_number).or_default().push(executed); self.parent_to_child.entry(parent_hash).or_default().insert(hash); - self.state_trie_overlays.insert_block(overlay_block); } /// Remove single executed block by its hash. @@ -187,12 +234,7 @@ impl TreeState { /// Removes canonical blocks below the upper bound, only if the last persisted hash is /// part of the canonical chain. - fn remove_canonical_until( - &mut self, - upper_bound: BlockNumber, - last_persisted_hash: B256, - removed_hashes: &mut Vec, - ) { + pub fn remove_canonical_until(&mut self, upper_bound: BlockNumber, last_persisted_hash: B256) { debug!(target: "engine::tree", ?upper_bound, ?last_persisted_hash, "Removing canonical blocks from the tree"); // If the last persisted hash is not canonical, then we don't want to remove any canonical @@ -207,12 +249,9 @@ impl TreeState { while let Some(executed) = self.blocks_by_hash.get(¤t_block) { current_block = executed.recovered_block().parent_hash(); if executed.recovered_block().number() <= upper_bound { - let hash = executed.recovered_block().hash(); let num_hash = executed.recovered_block().num_hash(); debug!(target: "engine::tree", ?num_hash, "Attempting to remove block walking back from the head"); - if self.remove_by_hash(hash).is_some() { - removed_hashes.push(hash); - } + self.remove_by_hash(executed.recovered_block().hash()); } } debug!(target: "engine::tree", ?upper_bound, ?last_persisted_hash, "Removed canonical blocks from the tree"); @@ -220,11 +259,7 @@ impl TreeState { /// Removes all blocks that are below the finalized block, as well as removing non-canonical /// sidechains that fork from below the finalized block. - fn prune_finalized_sidechains( - &mut self, - finalized_num_hash: BlockNumHash, - removed_hashes: &mut Vec, - ) { + pub fn prune_finalized_sidechains(&mut self, finalized_num_hash: BlockNumHash) { let BlockNumHash { number: finalized_num, hash: finalized_hash } = finalized_num_hash; // We remove disconnected sidechains in three steps: @@ -243,7 +278,6 @@ impl TreeState { for hash in blocks_to_remove { if let Some((removed, _)) = self.remove_by_hash(hash) { debug!(target: "engine::tree", num_hash=?removed.recovered_block().num_hash(), "Removed finalized sidechain block"); - removed_hashes.push(hash); } } @@ -270,7 +304,6 @@ impl TreeState { while let Some(block) = blocks_to_remove.pop_front() { if let Some((removed, children)) = self.remove_by_hash(block) { debug!(target: "engine::tree", num_hash=?removed.recovered_block().num_hash(), "Removed finalized sidechain child block"); - removed_hashes.push(block); blocks_to_remove.extend(children); } } @@ -311,18 +344,16 @@ impl TreeState { // * remove all canonical blocks below the upper bound // * fetch the number of the finalized hash, removing any sidechains that are __below__ the // finalized block - let mut removed_hashes = Vec::new(); - self.remove_canonical_until(upper_bound.number, last_persisted_hash, &mut removed_hashes); + self.remove_canonical_until(upper_bound.number, last_persisted_hash); // Now, we have removed canonical blocks (assuming the upper bound is above the finalized // block) and only have sidechains below the finalized block. if let Some(finalized_num_hash) = finalized_num_hash { - self.prune_finalized_sidechains(finalized_num_hash, &mut removed_hashes); + self.prune_finalized_sidechains(finalized_num_hash); } - if !removed_hashes.is_empty() { - self.state_trie_overlays.remove_blocks(removed_hashes); - } + // Invalidate the cached overlay since blocks were removed and the anchor may have changed + self.invalidate_cached_overlay(); } /// Updates the canonical head to the given block. @@ -390,6 +421,39 @@ impl TreeState { } } +/// Pre-computed lazy overlay for the canonical head block. +/// +/// This is prepared **optimistically** when the canonical head changes, allowing +/// the next payload (which typically builds on the canonical head) to reuse +/// the pre-computed overlay immediately without re-traversing in-memory blocks. +/// +/// The overlay captures executed blocks from all in-memory blocks +/// between the canonical head and the persisted anchor. When a new payload +/// arrives building on the canonical head, this cached overlay can be used +/// directly instead of calling `blocks_by_hash` again. +/// +/// # Invalidation +/// +/// The cached overlay is invalidated when: +/// - Persistence completes (anchor changes) +/// - The canonical head changes to a different block +#[derive(Debug, Clone)] +pub struct PreparedCanonicalOverlay { + /// The block hash for which this overlay is prepared as a parent. + /// + /// When a payload arrives with this parent hash, the overlay can be reused. + pub parent_hash: B256, + /// The pre-computed lazy overlay containing executed blocks for the canonical segment. + /// + /// This is computed optimistically after `set_canonical_head` so subsequent payloads don't + /// need to walk the in-memory chain again. + pub overlay: LazyOverlay, + /// The anchor hash (persisted ancestor) this overlay is based on. + /// + /// Used to verify the overlay is still valid (anchor hasn't changed due to persistence). + pub anchor_hash: B256, +} + #[cfg(test)] mod tests { use super::*; @@ -397,11 +461,7 @@ mod tests { #[test] fn test_tree_state_normal_descendant() { - let mut tree_state = TreeState::new( - BlockNumHash::default(), - EngineApiKind::Ethereum, - StateTrieOverlayManager::default(), - ); + let mut tree_state = TreeState::new(BlockNumHash::default(), EngineApiKind::Ethereum); let blocks: Vec<_> = TestBlockBuilder::eth().get_executed_blocks(1..4).collect(); tree_state.insert_executed(blocks[0].clone()); @@ -424,11 +484,7 @@ mod tests { #[tokio::test] async fn test_tree_state_insert_executed() { - let mut tree_state = TreeState::new( - BlockNumHash::default(), - EngineApiKind::Ethereum, - StateTrieOverlayManager::default(), - ); + let mut tree_state = TreeState::new(BlockNumHash::default(), EngineApiKind::Ethereum); let blocks: Vec<_> = TestBlockBuilder::eth().get_executed_blocks(1..4).collect(); tree_state.insert_executed(blocks[0].clone()); @@ -454,11 +510,7 @@ mod tests { #[tokio::test] async fn test_tree_state_insert_executed_with_reorg() { - let mut tree_state = TreeState::new( - BlockNumHash::default(), - EngineApiKind::Ethereum, - StateTrieOverlayManager::default(), - ); + let mut tree_state = TreeState::new(BlockNumHash::default(), EngineApiKind::Ethereum); let mut test_block_builder = TestBlockBuilder::eth(); let blocks: Vec<_> = test_block_builder.get_executed_blocks(1..6).collect(); @@ -498,11 +550,7 @@ mod tests { #[tokio::test] async fn test_tree_state_remove_before() { let start_num_hash = BlockNumHash::default(); - let mut tree_state = TreeState::new( - start_num_hash, - EngineApiKind::Ethereum, - StateTrieOverlayManager::default(), - ); + let mut tree_state = TreeState::new(start_num_hash, EngineApiKind::Ethereum); let blocks: Vec<_> = TestBlockBuilder::eth().get_executed_blocks(1..6).collect(); for block in &blocks { @@ -552,11 +600,7 @@ mod tests { #[tokio::test] async fn test_tree_state_remove_before_finalized() { let start_num_hash = BlockNumHash::default(); - let mut tree_state = TreeState::new( - start_num_hash, - EngineApiKind::Ethereum, - StateTrieOverlayManager::default(), - ); + let mut tree_state = TreeState::new(start_num_hash, EngineApiKind::Ethereum); let blocks: Vec<_> = TestBlockBuilder::eth().get_executed_blocks(1..6).collect(); for block in &blocks { @@ -606,11 +650,7 @@ mod tests { #[tokio::test] async fn test_tree_state_remove_before_lower_finalized() { let start_num_hash = BlockNumHash::default(); - let mut tree_state = TreeState::new( - start_num_hash, - EngineApiKind::Ethereum, - StateTrieOverlayManager::default(), - ); + let mut tree_state = TreeState::new(start_num_hash, EngineApiKind::Ethereum); let blocks: Vec<_> = TestBlockBuilder::eth().get_executed_blocks(1..6).collect(); for block in &blocks { diff --git a/crates/engine/tree/src/tree/tests.rs b/crates/engine/tree/src/tree/tests.rs index 4d70ef50634..2160c3214b1 100644 --- a/crates/engine/tree/src/tree/tests.rs +++ b/crates/engine/tree/src/tree/tests.rs @@ -19,9 +19,7 @@ use alloy_rpc_types_engine::{ ExecutionData, ExecutionPayloadSidecar, ExecutionPayloadV1, ForkchoiceState, }; use assert_matches::assert_matches; -use reth_chain_state::{ - test_utils::TestBlockBuilder, BlockState, ComputedTrieData, StateTrieOverlayManager, -}; +use reth_chain_state::{test_utils::TestBlockBuilder, BlockState, ComputedTrieData}; use reth_chainspec::{ChainSpec, HOLESKY, MAINNET}; use reth_engine_primitives::{EngineApiValidator, ForkchoiceStatus, NoopInvalidBlockHook}; use reth_ethereum_consensus::EthBeaconConsensus; @@ -188,7 +186,6 @@ impl TestHarness { let (from_tree_tx, from_tree_rx) = unbounded_channel(); let tree_config = TreeConfig::default().with_legacy_state_root(false).with_has_enough_parallelism(true); - let runtime = reth_tasks::Runtime::test(); let header = chain_spec.genesis_header().clone(); let header = SealedHeader::seal_slow(header); @@ -198,7 +195,6 @@ impl TestHarness { tree_config.invalid_header_hit_eviction_threshold(), header.num_hash(), EngineApiKind::Ethereum, - runtime.state_trie_overlay_worker_pool(), ); let canonical_in_memory_state = CanonicalInMemoryState::with_head(header, None, None); @@ -215,7 +211,7 @@ impl TestHarness { TreeConfig::default(), Box::new(NoopInvalidBlockHook::default()), changeset_cache.clone(), - runtime.clone(), + reth_tasks::Runtime::test(), ); let tree = EngineApiTreeHandler::new( @@ -226,13 +222,17 @@ impl TestHarness { engine_api_tree_state, canonical_in_memory_state, persistence_handle, - PersistenceState { last_persisted_block: BlockNumHash::default(), rx: None }, + PersistenceState { + last_persisted_block: BlockNumHash::default(), + last_state_trie_persisted_block: BlockNumHash::default(), + rx: None, + }, payload_builder, tree_config, EngineApiKind::Ethereum, evm_config, changeset_cache, - runtime, + reth_tasks::Runtime::test(), ); let block_builder = TestBlockBuilder::default().with_chain_spec((*chain_spec).clone()); @@ -267,18 +267,13 @@ impl TestHarness { parent_hash = hash; } - let state_trie_overlays = StateTrieOverlayManager::default(); - for block in &blocks { - state_trie_overlays.insert_block(block.clone()); - } - self.tree.state.tree_state = TreeState { blocks_by_hash, blocks_by_number, current_canonical_head: blocks.last().unwrap().recovered_block().num_hash(), parent_to_child, engine_kind: EngineApiKind::Ethereum, - state_trie_overlays, + cached_canonical_overlay: None, }; let last_executed_block = blocks.last().unwrap().clone(); @@ -369,6 +364,17 @@ impl TestHarness { } } +type ExpectedPlanStep = (std::ops::Range, Option>, bool); + +fn assert_plan_steps(plan: &SaveBlocksPlan, expected: &[ExpectedPlanStep]) { + assert_eq!(plan.steps.len(), expected.len()); + for (step, (block_range, masking_range, persist_rest)) in plan.steps.iter().zip(expected) { + assert_eq!(&step.block_range, block_range); + assert_eq!(&step.state_trie_masking_range, masking_range); + assert_eq!(step.persist_rest, *persist_rest); + } +} + /// Simplified test metrics for validation calls #[derive(Debug, Default)] struct TestMetrics { @@ -563,12 +569,16 @@ async fn test_tree_persist_blocks() { let received_action = test_harness.action_rx.recv().expect("Failed to receive save blocks action"); - if let PersistenceAction::SaveBlocks(saved_blocks, _) = received_action { + if let PersistenceAction::SaveBlocks(plan, _) = received_action { // only blocks.len() - tree_config.memory_block_buffer_target() will be // persisted let expected_persist_len = blocks.len() - tree_config.memory_block_buffer_target() as usize; - assert_eq!(saved_blocks.len(), expected_persist_len); - assert_eq!(saved_blocks, blocks[..expected_persist_len]); + assert_eq!(plan.blocks.len(), expected_persist_len); + assert_eq!(plan.blocks, blocks[..expected_persist_len]); + assert_plan_steps( + &plan, + &[(0..expected_persist_len, Some(expected_persist_len..expected_persist_len), true)], + ); } else { panic!("unexpected action received {received_action:?}"); } @@ -713,8 +723,8 @@ fn test_backpressure_waits_for_persistence_before_reading_incoming() { test_harness.tree.config = test_harness .tree .config - .with_persistence_threshold(0) - .with_persistence_backpressure_threshold(1); + .with_persistence_threshold(1) + .with_persistence_backpressure_threshold(2); let (persist_tx, persist_rx) = crossbeam_channel::bounded(1); let persisted = blocks.last().unwrap().recovered_block().num_hash(); @@ -745,6 +755,7 @@ fn test_backpressure_waits_for_persistence_before_reading_incoming() { persist_tx .send(PersistenceResult { last_block: Some(persisted), + last_state_trie_block: Some(persisted.number), commit_duration: Some(Duration::ZERO), }) .unwrap(); @@ -779,10 +790,10 @@ async fn test_tree_state_on_new_head_reorg() { reth_tracing::init_test_tracing(); let chain_spec = MAINNET.clone(); - // Set persistence_threshold to 1 + // Keep a single block in memory while still leaving room for the persistence threshold. let mut test_harness = TestHarness::new(chain_spec); test_harness.tree.config = - test_harness.tree.config.with_persistence_threshold(1).with_memory_block_buffer_target(1); + test_harness.tree.config.with_persistence_threshold(2).with_memory_block_buffer_target(1); let mut test_block_builder = TestBlockBuilder::eth(); let blocks: Vec<_> = test_block_builder.get_executed_blocks(1..6).collect(); @@ -833,15 +844,16 @@ async fn test_tree_state_on_new_head_reorg() { // get rid of the prev action let received_action = test_harness.action_rx.recv().unwrap(); - let PersistenceAction::SaveBlocks(saved_blocks, sender) = received_action else { + let PersistenceAction::SaveBlocks(plan, sender) = received_action else { panic!("received wrong action"); }; - assert_eq!(saved_blocks, vec![blocks[0].clone(), blocks[1].clone()]); + assert_eq!(plan.blocks, vec![blocks[0].clone(), blocks[1].clone()]); // send the response so we can advance again sender .send(PersistenceResult { last_block: Some(blocks[1].recovered_block().num_hash()), + last_state_trie_block: Some(blocks[1].recovered_block().number()), commit_duration: Some(Duration::ZERO), }) .unwrap(); @@ -977,8 +989,10 @@ async fn test_get_canonical_blocks_to_persist() { test_harness = test_harness.with_blocks(blocks.clone()); let last_persisted_block_number = 3; - test_harness.tree.persistence_state.last_persisted_block = + let last_persisted_block = blocks[last_persisted_block_number as usize].recovered_block.num_hash(); + test_harness.tree.persistence_state.last_persisted_block = last_persisted_block; + test_harness.tree.persistence_state.last_state_trie_persisted_block = last_persisted_block; let persistence_threshold = 4; let memory_block_buffer_target = 3; @@ -986,16 +1000,15 @@ async fn test_get_canonical_blocks_to_persist() { .with_persistence_threshold(persistence_threshold) .with_memory_block_buffer_target(memory_block_buffer_target); - let blocks_to_persist = - test_harness.tree.get_canonical_blocks_to_persist(PersistTarget::Threshold).unwrap(); + let plan = test_harness.tree.get_save_blocks_plan(PersistTarget::Threshold).unwrap(); let expected_blocks_to_persist_length: usize = (canonical_head_number - memory_block_buffer_target - last_persisted_block_number) .try_into() .unwrap(); - assert_eq!(blocks_to_persist.len(), expected_blocks_to_persist_length); - for (i, item) in blocks_to_persist.iter().enumerate().take(expected_blocks_to_persist_length) { + assert_eq!(plan.blocks.len(), expected_blocks_to_persist_length); + for (i, item) in plan.blocks.iter().enumerate().take(expected_blocks_to_persist_length) { assert_eq!(item.recovered_block().number, last_persisted_block_number + i as u64 + 1); } @@ -1006,15 +1019,14 @@ async fn test_get_canonical_blocks_to_persist() { assert!(test_harness.tree.state.tree_state.sealed_header_by_hash(&fork_block_hash).is_some()); - let blocks_to_persist = - test_harness.tree.get_canonical_blocks_to_persist(PersistTarget::Threshold).unwrap(); - assert_eq!(blocks_to_persist.len(), expected_blocks_to_persist_length); + let plan = test_harness.tree.get_save_blocks_plan(PersistTarget::Threshold).unwrap(); + assert_eq!(plan.blocks.len(), expected_blocks_to_persist_length); // check that the fork block is not included in the blocks to persist - assert!(!blocks_to_persist.iter().any(|b| b.recovered_block().hash() == fork_block_hash)); + assert!(!plan.blocks.iter().any(|b| b.recovered_block().hash() == fork_block_hash)); // check that the original block 4 is still included - assert!(blocks_to_persist.iter().any(|b| b.recovered_block().number == 4 && + assert!(plan.blocks.iter().any(|b| b.recovered_block().number == 4 && b.recovered_block().hash() == blocks[4].recovered_block().hash())); // check that if we advance persistence, the persistence action is the correct value @@ -1022,11 +1034,252 @@ async fn test_get_canonical_blocks_to_persist() { assert_eq!( test_harness.tree.persistence_state.current_action().cloned(), Some(CurrentPersistenceAction::SavingBlocks { - highest: blocks_to_persist.last().unwrap().recovered_block().num_hash() + highest: plan.blocks.last().unwrap().recovered_block().num_hash() }) ); } +#[test] +fn test_get_save_blocks_plan_with_state_masking_blocks() { + let chain_spec = MAINNET.clone(); + let mut test_harness = TestHarness::new(chain_spec); + let mut test_block_builder = TestBlockBuilder::eth(); + + let blocks: Vec<_> = test_block_builder.get_executed_blocks(0..7).collect(); + test_harness = test_harness.with_blocks(blocks.clone()); + test_harness.tree.persistence_state.last_state_trie_persisted_block = + blocks[1].recovered_block().num_hash(); + test_harness.tree.persistence_state.last_persisted_block = + blocks[3].recovered_block().num_hash(); + test_harness.tree.config = TreeConfig::default() + .with_persistence_threshold(4) + .with_memory_block_buffer_target(1) + .with_num_state_masking_blocks(2); + + let plan = test_harness.tree.get_save_blocks_plan(PersistTarget::Threshold).unwrap(); + + assert_plan_steps(&plan, &[(0..2, Some(2..4), false), (2..4, None, true)]); + assert_eq!(plan.blocks.len(), 4); + assert_eq!( + plan.blocks.iter().map(|block| block.recovered_block().number()).collect::>(), + vec![2, 3, 4, 5] + ); + assert_eq!(plan.last_block(), Some(blocks[5].recovered_block().num_hash())); +} + +#[test] +fn test_get_save_blocks_plan_limits_partial_persistence_to_threshold() { + let chain_spec = MAINNET.clone(); + let mut test_harness = TestHarness::new(chain_spec); + let mut test_block_builder = TestBlockBuilder::eth(); + + let blocks: Vec<_> = test_block_builder.get_executed_blocks(0..31).collect(); + test_harness = test_harness.with_blocks(blocks.clone()); + test_harness.tree.persistence_state.last_state_trie_persisted_block = + blocks[12].recovered_block().num_hash(); + test_harness.tree.persistence_state.last_persisted_block = + blocks[15].recovered_block().num_hash(); + test_harness.tree.config = TreeConfig::default() + .with_persistence_threshold(5) + .with_memory_block_buffer_target(2) + .with_num_state_masking_blocks(2); + + let plan = test_harness.tree.get_save_blocks_plan(PersistTarget::Threshold).unwrap(); + + assert_plan_steps( + &plan, + &[(0..3, Some(6..8), false), (3..6, Some(6..8), true), (6..8, None, true)], + ); + assert_eq!(plan.blocks.len(), 8); + assert_eq!( + plan.blocks.iter().map(|block| block.recovered_block().number()).collect::>(), + (13..=20).collect::>() + ); + assert_eq!(plan.last_block(), Some(blocks[20].recovered_block().num_hash())); +} + +#[test] +fn test_get_save_blocks_plan_state_masking_does_not_reduce_persist_rest_threshold() { + let chain_spec = MAINNET.clone(); + let mut test_harness = TestHarness::new(chain_spec); + let mut test_block_builder = TestBlockBuilder::eth(); + + let blocks: Vec<_> = test_block_builder.get_executed_blocks(0..20).collect(); + test_harness = test_harness.with_blocks(blocks.clone()); + test_harness.tree.persistence_state.last_state_trie_persisted_block = + blocks[0].recovered_block().num_hash(); + test_harness.tree.persistence_state.last_persisted_block = + blocks[3].recovered_block().num_hash(); + test_harness.tree.config = + TreeConfig::default().with_persistence_threshold(13).with_num_state_masking_blocks(10); + + let plan = test_harness.tree.get_save_blocks_plan(PersistTarget::Threshold).unwrap(); + + assert_plan_steps( + &plan, + &[(0..3, Some(6..16), false), (3..6, Some(6..16), true), (6..16, None, true)], + ); + assert_eq!(plan.blocks.len(), 16); + assert_eq!( + plan.blocks.iter().map(|block| block.recovered_block().number()).collect::>(), + (1..=16).collect::>() + ); + assert_eq!(plan.last_block(), Some(blocks[16].recovered_block().num_hash())); +} + +#[test] +fn test_get_save_blocks_plan_steady_state_masking_has_catchup_overlap_and_masked_tail() { + let chain_spec = MAINNET.clone(); + let mut test_harness = TestHarness::new(chain_spec); + let mut test_block_builder = TestBlockBuilder::eth(); + + let blocks: Vec<_> = test_block_builder.get_executed_blocks(0..30).collect(); + test_harness = test_harness.with_blocks(blocks.clone()); + test_harness.tree.persistence_state.last_state_trie_persisted_block = + blocks[5].recovered_block().num_hash(); + test_harness.tree.persistence_state.last_persisted_block = + blocks[11].recovered_block().num_hash(); + test_harness.tree.config = + TreeConfig::default().with_persistence_threshold(11).with_num_state_masking_blocks(6); + + let plan = test_harness.tree.get_save_blocks_plan(PersistTarget::Threshold).unwrap(); + + assert_plan_steps( + &plan, + &[(0..6, Some(11..17), false), (6..11, Some(11..17), true), (11..17, None, true)], + ); + assert_eq!(plan.blocks.len(), 17); + assert_eq!( + plan.blocks.iter().map(|block| block.recovered_block().number()).collect::>(), + (6..=22).collect::>() + ); + assert_eq!(plan.last_block(), Some(blocks[22].recovered_block().num_hash())); + assert_eq!(plan.last_state_trie_block(), Some(blocks[16].recovered_block().num_hash())); +} + +#[test] +fn test_on_persistence_complete_retains_blocks_above_partial_state_trie() { + let chain_spec = MAINNET.clone(); + let mut test_harness = TestHarness::new(chain_spec); + let mut test_block_builder = TestBlockBuilder::eth(); + + let blocks: Vec<_> = test_block_builder.get_executed_blocks(0..7).collect(); + test_harness = test_harness.with_blocks(blocks.clone()); + test_harness.tree.persistence_state.last_persisted_block = + blocks[1].recovered_block().num_hash(); + test_harness.tree.persistence_state.last_state_trie_persisted_block = + blocks[1].recovered_block().num_hash(); + + let persisted_tip = blocks[5].recovered_block().num_hash(); + let last_state_trie_block = blocks[3].recovered_block().number(); + + test_harness + .tree + .on_persistence_complete( + PersistenceResult { + last_block: Some(persisted_tip), + last_state_trie_block: Some(last_state_trie_block), + commit_duration: Some(Duration::ZERO), + }, + Instant::now(), + ) + .unwrap(); + + assert_eq!(test_harness.tree.persistence_state.last_persisted_block, persisted_tip); + assert_eq!( + test_harness.tree.persistence_state.last_state_trie_persisted_block, + blocks[3].recovered_block().num_hash() + ); + assert_eq!( + test_harness.tree.canonical_in_memory_state.get_persisted_num_hash(), + Some(persisted_tip) + ); + + for block in &blocks[..=last_state_trie_block as usize] { + assert!(test_harness + .tree + .state + .tree_state + .executed_block_by_hash(block.recovered_block().hash()) + .is_none()); + assert!(test_harness + .tree + .canonical_in_memory_state + .state_by_number(block.recovered_block().number()) + .is_none()); + } + + for block in &blocks[last_state_trie_block as usize + 1..] { + assert!(test_harness + .tree + .state + .tree_state + .executed_block_by_hash(block.recovered_block().hash()) + .is_some()); + assert!(test_harness + .tree + .canonical_in_memory_state + .state_by_number(block.recovered_block().number()) + .is_some()); + } +} + +#[test] +fn test_on_persistence_complete_without_partial_state_trie_prunes_through_tip() { + let chain_spec = MAINNET.clone(); + let mut test_harness = TestHarness::new(chain_spec); + let mut test_block_builder = TestBlockBuilder::eth(); + + let blocks: Vec<_> = test_block_builder.get_executed_blocks(0..7).collect(); + test_harness = test_harness.with_blocks(blocks.clone()); + test_harness.tree.persistence_state.last_persisted_block = + blocks[1].recovered_block().num_hash(); + test_harness.tree.persistence_state.last_state_trie_persisted_block = + blocks[1].recovered_block().num_hash(); + + let persisted_tip = blocks[5].recovered_block().num_hash(); + + test_harness + .tree + .on_persistence_complete( + PersistenceResult { + last_block: Some(persisted_tip), + last_state_trie_block: None, + commit_duration: Some(Duration::ZERO), + }, + Instant::now(), + ) + .unwrap(); + + for block in &blocks[..=persisted_tip.number as usize] { + assert!(test_harness + .tree + .state + .tree_state + .executed_block_by_hash(block.recovered_block().hash()) + .is_none()); + assert!(test_harness + .tree + .canonical_in_memory_state + .state_by_number(block.recovered_block().number()) + .is_none()); + } + + for block in &blocks[persisted_tip.number as usize + 1..] { + assert!(test_harness + .tree + .state + .tree_state + .executed_block_by_hash(block.recovered_block().hash()) + .is_some()); + assert!(test_harness + .tree + .canonical_in_memory_state + .state_by_number(block.recovered_block().number()) + .is_some()); + } +} + #[tokio::test] async fn test_engine_tree_fcu_missing_head() { let chain_spec = MAINNET.clone(); @@ -2121,15 +2374,18 @@ mod forkchoice_updated_tests { break; } - if let Ok(PersistenceAction::SaveBlocks(saved_blocks, sender)) = + if let Ok(PersistenceAction::SaveBlocks(plan, sender)) = action_rx.recv_timeout(std::time::Duration::from_millis(100)) { - if let Some(last) = saved_blocks.last() { + if let Some(last) = plan.last_block() { + last_persisted_number = last.number; + } else if let Some(last) = plan.blocks.last() { last_persisted_number = last.recovered_block().number; } sender .send(PersistenceResult { - last_block: saved_blocks.last().map(|b| b.recovered_block().num_hash()), + last_block: plan.last_block(), + last_state_trie_block: plan.last_block().map(|tip| tip.number), commit_duration: Some(Duration::ZERO), }) .unwrap(); diff --git a/crates/node/builder/src/launch/common.rs b/crates/node/builder/src/launch/common.rs index dca47987689..c2d4d7550ed 100644 --- a/crates/node/builder/src/launch/common.rs +++ b/crates/node/builder/src/launch/common.rs @@ -69,16 +69,17 @@ use reth_node_metrics::{ }; use reth_provider::{ providers::{NodeTypesForProvider, ProviderNodeTypes, RocksDBProvider, StaticFileProvider}, - BalConfig, BalStoreHandle, BlockHashReader, BlockNumReader, InMemoryBalStore, ProviderError, - ProviderFactory, ProviderResult, RocksDBProviderFactory, StageCheckpointReader, - StaticFileProviderBuilder, StaticFileProviderFactory, StorageSettingsCache, + BalConfig, BalStoreHandle, BlockHashReader, BlockNumReader, DatabaseProviderFactory, + InMemoryBalStore, ProviderError, ProviderFactory, ProviderResult, RocksDBProviderFactory, + StageCheckpointReader, StaticFileProviderBuilder, StaticFileProviderFactory, + StorageSettingsCache, }; use reth_prune::{PruneModes, PrunerBuilder}; use reth_rpc_builder::config::RethRpcServerConfig; use reth_rpc_layer::JwtSecret; use reth_stages::{ sets::DefaultStages, stages::EraImportSource, MetricEvent, PipelineBuilder, PipelineTarget, - StageId, StageSet, + StageCheckpoint, StageId, StageSet, }; use reth_static_file::StaticFileProducer; use reth_tasks::TaskExecutor; @@ -531,19 +532,26 @@ where // the unwind targets for each storage layer if inconsistencies are // found. let (rocksdb_unwind, static_file_unwind) = factory.check_consistency()?; + let partial_trie_unwind = partial_trie_unwind_target( + factory.database_provider_ro()?.get_stage_checkpoint(StageId::Finish)?, + ); // Take the minimum block number to ensure all storage layers are consistent. - let unwind_target = [rocksdb_unwind, static_file_unwind].into_iter().flatten().min(); + let unwind_target = + [rocksdb_unwind, static_file_unwind, partial_trie_unwind].into_iter().flatten().min(); if let Some(unwind_block) = unwind_target { + let inconsistency_source = [ + rocksdb_unwind.map(|_| "RocksDB"), + static_file_unwind.map(|_| "static file"), + partial_trie_unwind.map(|_| "partial state trie"), + ] + .into_iter() + .flatten() + .collect::>() + .join(" and "); // Highly unlikely to happen, and given its destructive nature, it's better to panic // instead. Unwinding to 0 would leave MDBX with a huge free list size. - let inconsistency_source = match (rocksdb_unwind, static_file_unwind) { - (Some(_), Some(_)) => "RocksDB and static file", - (Some(_), None) => "RocksDB", - (None, Some(_)) => "static file", - (None, None) => unreachable!(), - }; assert_ne!( unwind_block, 0, "A {} inconsistency was found that would trigger an unwind to block 0", @@ -1323,11 +1331,19 @@ pub fn metrics_hooks(provider_factory: &ProviderFactory) .build() } +fn partial_trie_unwind_target(finish_checkpoint: Option) -> Option { + let finish_checkpoint = finish_checkpoint?; + let partial_state_trie = finish_checkpoint.finish_stage_checkpoint()?.partial_state_trie?; + + (partial_state_trie != finish_checkpoint.block_number).then_some(partial_state_trie) +} + #[cfg(test)] mod tests { - use super::{LaunchContext, NodeConfig}; + use super::{partial_trie_unwind_target, LaunchContext, NodeConfig}; use reth_config::Config; use reth_node_core::args::PruningArgs; + use reth_stages::{FinishCheckpoint, StageCheckpoint}; const EXTENSION: &str = "toml"; @@ -1379,4 +1395,24 @@ mod tests { assert_eq!(reth_config, loaded_config); }) } + + #[test] + fn partial_trie_unwind_target_uses_partial_finish_checkpoint() { + let finish_checkpoint = StageCheckpoint::new(42) + .with_finish_stage_checkpoint(FinishCheckpoint { partial_state_trie: Some(21) }); + + assert_eq!(partial_trie_unwind_target(Some(finish_checkpoint)), Some(21)); + } + + #[test] + fn partial_trie_unwind_target_ignores_matching_or_missing_partial_checkpoint() { + let matching_finish_checkpoint = StageCheckpoint::new(42) + .with_finish_stage_checkpoint(FinishCheckpoint { partial_state_trie: Some(42) }); + let missing_partial_finish_checkpoint = StageCheckpoint::new(42) + .with_finish_stage_checkpoint(FinishCheckpoint { partial_state_trie: None }); + + assert_eq!(partial_trie_unwind_target(Some(matching_finish_checkpoint)), None); + assert_eq!(partial_trie_unwind_target(Some(missing_partial_finish_checkpoint)), None); + assert_eq!(partial_trie_unwind_target(None), None); + } } diff --git a/crates/node/core/src/args/engine.rs b/crates/node/core/src/args/engine.rs index a2f1b06b474..bd3112c78f0 100644 --- a/crates/node/core/src/args/engine.rs +++ b/crates/node/core/src/args/engine.rs @@ -7,8 +7,9 @@ use clap::{ use eyre::ensure; use reth_cli_util::{parse_duration_from_secs_or_ms, parsers::format_duration_as_secs_or_ms}; use reth_engine_primitives::{ - TreeConfig, DEFAULT_INVALID_HEADER_HIT_EVICTION_THRESHOLD, DEFAULT_MULTIPROOF_TASK_CHUNK_SIZE, - DEFAULT_PERSISTENCE_BACKPRESSURE_THRESHOLD, DEFAULT_SPARSE_TRIE_MAX_HOT_ACCOUNTS, + default_persistence_backpressure_threshold, TreeConfig, + DEFAULT_INVALID_HEADER_HIT_EVICTION_THRESHOLD, DEFAULT_MULTIPROOF_TASK_CHUNK_SIZE, + DEFAULT_NUM_STATE_MASKING_BLOCKS, DEFAULT_SPARSE_TRIE_MAX_HOT_ACCOUNTS, DEFAULT_SPARSE_TRIE_MAX_HOT_SLOTS, }; use std::{sync::OnceLock, time::Duration}; @@ -27,7 +28,8 @@ static ENGINE_DEFAULTS: OnceLock = OnceLock::new(); #[derive(Debug, Clone)] pub struct DefaultEngineValues { persistence_threshold: u64, - persistence_backpressure_threshold: u64, + persistence_backpressure_threshold: Option, + num_state_masking_blocks: u64, memory_block_buffer_target: u64, invalid_header_hit_eviction_threshold: u8, legacy_state_root_task_enabled: bool, @@ -76,9 +78,26 @@ impl DefaultEngineValues { self } + /// Get the default persistence backpressure threshold. + pub const fn persistence_backpressure_threshold(&self) -> u64 { + match self.persistence_backpressure_threshold { + Some(v) => v, + None => default_persistence_backpressure_threshold( + self.persistence_threshold, + self.memory_block_buffer_target, + ), + } + } + /// Set the default persistence backpressure threshold pub const fn with_persistence_backpressure_threshold(mut self, v: u64) -> Self { - self.persistence_backpressure_threshold = v; + self.persistence_backpressure_threshold = Some(v); + self + } + + /// Set the default number of state masking blocks. + pub const fn with_num_state_masking_blocks(mut self, v: u64) -> Self { + self.num_state_masking_blocks = v; self } @@ -264,7 +283,8 @@ impl Default for DefaultEngineValues { fn default() -> Self { Self { persistence_threshold: DEFAULT_PERSISTENCE_THRESHOLD, - persistence_backpressure_threshold: DEFAULT_PERSISTENCE_BACKPRESSURE_THRESHOLD, + persistence_backpressure_threshold: None, + num_state_masking_blocks: DEFAULT_NUM_STATE_MASKING_BLOCKS, memory_block_buffer_target: DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, invalid_header_hit_eviction_threshold: DEFAULT_INVALID_HEADER_HIT_EVICTION_THRESHOLD, legacy_state_root_task_enabled: false, @@ -298,12 +318,6 @@ impl Default for DefaultEngineValues { } } -fn default_persistence_backpressure_threshold(persistence_threshold: u64) -> u64 { - DefaultEngineValues::get_global() - .persistence_backpressure_threshold - .max(persistence_threshold.saturating_mul(2)) -} - /// Parameters for configuring the engine driver. #[derive(Debug, Clone, Args, PartialEq, Eq)] #[command(next_help_heading = "Engine")] @@ -319,13 +333,19 @@ pub struct EngineArgs { /// Configure the maximum canonical-minus-persisted gap before engine API processing stalls. /// - /// If omitted, this defaults to the larger of the default backpressure threshold and twice - /// `--engine.persistence-threshold`. + /// If omitted, this is derived from `--engine.persistence-threshold` and + /// `--engine.memory-block-buffer-target`, unless the process configured an explicit global + /// default. /// /// This value must be greater than `--engine.persistence-threshold`. #[arg(long = "engine.persistence-backpressure-threshold")] pub persistence_backpressure_threshold: Option, + /// Configure how many of the blocks being persisted should only mask state/trie writes instead + /// of durably persisting their state/trie updates in the current cycle. + #[arg(long = "engine.num-state-masking-blocks", default_value_t = DefaultEngineValues::get_global().num_state_masking_blocks)] + pub num_state_masking_blocks: u64, + /// Configure the target number of blocks to keep in memory. #[arg(long = "engine.memory-block-buffer-target", default_value_t = DefaultEngineValues::get_global().memory_block_buffer_target)] pub memory_block_buffer_target: u64, @@ -556,6 +576,7 @@ impl Default for EngineArgs { fn default() -> Self { let DefaultEngineValues { persistence_threshold, + num_state_masking_blocks, persistence_backpressure_threshold: _, memory_block_buffer_target, invalid_header_hit_eviction_threshold, @@ -590,6 +611,7 @@ impl Default for EngineArgs { Self { persistence_threshold, persistence_backpressure_threshold: None, + num_state_masking_blocks, memory_block_buffer_target, invalid_header_hit_eviction_threshold, legacy_state_root_task_enabled, @@ -635,9 +657,14 @@ impl Default for EngineArgs { impl EngineArgs { /// Returns the effective persistence backpressure threshold. pub fn persistence_backpressure_threshold(&self) -> u64 { - self.persistence_backpressure_threshold.unwrap_or_else(|| { - default_persistence_backpressure_threshold(self.persistence_threshold) - }) + self.persistence_backpressure_threshold + .or(DefaultEngineValues::get_global().persistence_backpressure_threshold) + .unwrap_or_else(|| { + default_persistence_backpressure_threshold( + self.persistence_threshold, + self.memory_block_buffer_target, + ) + }) } /// Validates cross-field engine arguments. @@ -649,6 +676,13 @@ impl EngineArgs { persistence_backpressure_threshold, self.persistence_threshold ); + ensure!( + self.num_state_masking_blocks + self.memory_block_buffer_target < self.persistence_threshold, + "--engine.num-state-masking-blocks ({}) + --engine.memory-block-buffer-target ({}) must be less than --engine.persistence-threshold ({})", + self.num_state_masking_blocks, + self.memory_block_buffer_target, + self.persistence_threshold, + ); ensure!( self.bal_parallel_execution_disabled || !self.bal_parallel_state_root_disabled, "--engine.disable-bal-parallel-state-root requires --engine.disable-bal-parallel-execution because BAL parallel execution depends on BAL prewarm state-root updates" @@ -661,6 +695,7 @@ impl EngineArgs { let config = TreeConfig::default() .with_persistence_backpressure_threshold(self.persistence_backpressure_threshold()) .with_persistence_threshold(self.persistence_threshold) + .with_num_state_masking_blocks(self.num_state_masking_blocks) .with_memory_block_buffer_target(self.memory_block_buffer_target) .with_invalid_header_hit_eviction_threshold(self.invalid_header_hit_eviction_threshold) .with_legacy_state_root(self.legacy_state_root_task_enabled) @@ -718,7 +753,7 @@ mod tests { assert_eq!(args, default_args); assert_eq!( args.persistence_backpressure_threshold(), - DefaultEngineValues::get_global().persistence_backpressure_threshold + DefaultEngineValues::get_global().persistence_backpressure_threshold() ); } @@ -733,12 +768,12 @@ mod tests { ]) .args; - assert_eq!(args.persistence_backpressure_threshold(), 200); + assert_eq!(args.persistence_backpressure_threshold(), 300); let tree_config = args.tree_config(); assert_eq!(tree_config.persistence_threshold(), 100); assert_eq!(tree_config.memory_block_buffer_target(), 50); - assert_eq!(tree_config.persistence_backpressure_threshold(), 200); + assert_eq!(tree_config.persistence_backpressure_threshold(), 300); } #[test] @@ -752,7 +787,7 @@ mod tests { assert_eq!( args.persistence_backpressure_threshold(), - DefaultEngineValues::get_global().persistence_backpressure_threshold + DefaultEngineValues::get_global().persistence_backpressure_threshold() ); } @@ -772,12 +807,49 @@ mod tests { assert_eq!(args.persistence_backpressure_threshold(), 101); } + #[test] + fn default_engine_values_derive_backpressure_threshold() { + let defaults = DefaultEngineValues::default() + .with_persistence_threshold(10) + .with_memory_block_buffer_target(3); + + assert_eq!(defaults.persistence_backpressure_threshold(), 26); + } + + #[test] + fn explicit_backpressure_default_override_is_preserved() { + let defaults = DefaultEngineValues::default() + .with_persistence_backpressure_threshold(99) + .with_persistence_threshold(10) + .with_memory_block_buffer_target(3); + + assert_eq!(defaults.persistence_backpressure_threshold(), 99); + } + + #[test] + fn engine_args_default_thresholds_match_expected_defaults() { + let args = EngineArgs::default(); + + assert_eq!(args.persistence_threshold, DEFAULT_PERSISTENCE_THRESHOLD); + assert_eq!(args.num_state_masking_blocks, DEFAULT_NUM_STATE_MASKING_BLOCKS); + assert_eq!(args.memory_block_buffer_target, DEFAULT_MEMORY_BLOCK_BUFFER_TARGET); + assert_eq!(args.persistence_backpressure_threshold, None); + assert_eq!( + args.persistence_backpressure_threshold(), + default_persistence_backpressure_threshold( + args.persistence_threshold, + args.memory_block_buffer_target, + ) + ); + } + #[test] #[allow(deprecated)] fn engine_args() { let args = EngineArgs { persistence_threshold: 100, persistence_backpressure_threshold: Some(101), + num_state_masking_blocks: 25, memory_block_buffer_target: 50, invalid_header_hit_eviction_threshold: 7, legacy_state_root_task_enabled: true, @@ -822,6 +894,8 @@ mod tests { "100", "--engine.persistence-backpressure-threshold", "101", + "--engine.num-state-masking-blocks", + "25", "--engine.memory-block-buffer-target", "50", "--engine.invalid-header-cache-hit-eviction-threshold", @@ -865,6 +939,21 @@ mod tests { assert_eq!(parsed_args, args); } + #[test] + fn test_parse_num_state_masking_blocks() { + let args = CommandParser::::parse_from([ + "reth", + "--engine.persistence-threshold", + "8", + "--engine.num-state-masking-blocks", + "7", + ]) + .args; + + assert_eq!(args.num_state_masking_blocks, 7); + assert_eq!(args.tree_config().num_state_masking_blocks(), 7); + } + #[test] fn validate_rejects_invalid_backpressure_threshold() { let args = EngineArgs { @@ -889,6 +978,21 @@ mod tests { assert!(result.is_err()); } + #[test] + fn validate_rejects_state_masking_window_at_or_above_threshold() { + let args = EngineArgs { + persistence_threshold: 4, + num_state_masking_blocks: 2, + memory_block_buffer_target: 2, + ..EngineArgs::default() + }; + + let err = args.validate().unwrap_err().to_string(); + assert!(err.contains("engine.num-state-masking-blocks")); + assert!(err.contains("engine.memory-block-buffer-target")); + assert!(err.contains("engine.persistence-threshold")); + } + #[test] fn validate_rejects_bal_parallel_execution_without_bal_parallel_state_root() { let args = EngineArgs { diff --git a/crates/stages/stages/src/stages/bodies.rs b/crates/stages/stages/src/stages/bodies.rs index 649b48b86e5..9e863f1b806 100644 --- a/crates/stages/stages/src/stages/bodies.rs +++ b/crates/stages/stages/src/stages/bodies.rs @@ -295,7 +295,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, // 1 seeded block body + batch size total // seeded headers - })) + })), + .. }, done: false }) if block_number < 200 && processed == batch_size + 1 && total == previous_stage + 1 ); @@ -333,7 +334,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: true }) if processed + 1 == total && total == previous_stage + 1 @@ -370,7 +372,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: false }) if block_number >= 10 && processed - 1 == batch_size && total == previous_stage + 1 ); @@ -391,7 +394,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: true }) if block_number > first_run_checkpoint.block_number && processed + 1 == total && total == previous_stage + 1 ); @@ -432,7 +436,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: true }) if block_number == previous_stage && processed + 1 == total && total == previous_stage + 1 ); @@ -460,7 +465,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed: 1, total - })) + })), + .. }}) if total == previous_stage + 1 ); diff --git a/crates/stages/stages/src/stages/era.rs b/crates/stages/stages/src/stages/era.rs index 6e81054ed68..862b63a3880 100644 --- a/crates/stages/stages/src/stages/era.rs +++ b/crates/stages/stages/src/stages/era.rs @@ -298,7 +298,7 @@ mod tests { assert_matches!( output, Ok(ExecOutput { - checkpoint: StageCheckpoint { block_number, stage_checkpoint: None }, + checkpoint: StageCheckpoint { block_number, stage_checkpoint: None, .. }, done: false }) if block_number == era_cap ); @@ -318,7 +318,7 @@ mod tests { assert_matches!( output, Ok(ExecOutput { - checkpoint: StageCheckpoint { block_number, stage_checkpoint: None }, + checkpoint: StageCheckpoint { block_number, stage_checkpoint: None, .. }, done: true }) if block_number == target ); diff --git a/crates/stages/stages/src/stages/execution/mod.rs b/crates/stages/stages/src/stages/execution/mod.rs index a2154fe54a7..da128c3ab28 100644 --- a/crates/stages/stages/src/stages/execution/mod.rs +++ b/crates/stages/stages/src/stages/execution/mod.rs @@ -1017,7 +1017,8 @@ mod tests { processed, total } - })) + })), + .. }, done: true } if processed == total && total == block.gas_used); @@ -1172,7 +1173,8 @@ mod tests { processed: 0, total } - })) + })), + .. } } if total == block.gas_used); diff --git a/crates/stages/stages/src/stages/hashing_account.rs b/crates/stages/stages/src/stages/hashing_account.rs index 2410e8131fe..ddf26b41b1e 100644 --- a/crates/stages/stages/src/stages/hashing_account.rs +++ b/crates/stages/stages/src/stages/hashing_account.rs @@ -397,6 +397,7 @@ mod tests { }, .. })), + .. }, done: true, }) if block_number == previous_stage && diff --git a/crates/stages/stages/src/stages/headers.rs b/crates/stages/stages/src/stages/headers.rs index f9ca2a86f3a..6f719d0e542 100644 --- a/crates/stages/stages/src/stages/headers.rs +++ b/crates/stages/stages/src/stages/headers.rs @@ -594,7 +594,8 @@ mod tests { processed, total, } - })) + })), + .. }, done: true }) if block_number == tip.number && from == checkpoint && to == previous_stage && // -1 because we don't need to download the local head @@ -666,7 +667,8 @@ mod tests { processed, total, } - })) + })), + .. }, done: true }) if block_number == tip.number && from == checkpoint && to == previous_stage && // -1 because we don't need to download the local head diff --git a/crates/stages/stages/src/stages/merkle.rs b/crates/stages/stages/src/stages/merkle.rs index bb1e44b6668..ea65ae51feb 100644 --- a/crates/stages/stages/src/stages/merkle.rs +++ b/crates/stages/stages/src/stages/merkle.rs @@ -402,7 +402,11 @@ where info!(target: "sync::stages::merkle::unwind", "Nothing to unwind"); } else { let (block_root, updates) = reth_trie_db::with_adapter!(provider, |A| { - DbStateRoot::<_, A>::incremental_root_with_updates(provider, range) + DbStateRoot::<_, A>::incremental_root_calculator(provider, range).and_then( + |calculator| { + calculator.with_walk_all_changed_branch_children(true).root_with_updates() + }, + ) }) .map_err(|e| StageError::Fatal(Box::new(e)))?; @@ -502,7 +506,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: true }) if block_number == previous_stage && processed == total && @@ -542,7 +547,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: true }) if block_number == previous_stage && processed == total && @@ -584,7 +590,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: true }) if block_number == previous_stage && processed == total && diff --git a/crates/stages/stages/src/stages/sender_recovery.rs b/crates/stages/stages/src/stages/sender_recovery.rs index 1d44de77271..7487099d6bb 100644 --- a/crates/stages/stages/src/stages/sender_recovery.rs +++ b/crates/stages/stages/src/stages/sender_recovery.rs @@ -527,7 +527,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed: 1, total: 1 - })) + })), + .. }, done: true }) if block_number == previous_stage ); diff --git a/crates/stages/stages/src/stages/tx_lookup.rs b/crates/stages/stages/src/stages/tx_lookup.rs index 9eae2084e3d..ebefdb9d507 100644 --- a/crates/stages/stages/src/stages/tx_lookup.rs +++ b/crates/stages/stages/src/stages/tx_lookup.rs @@ -337,12 +337,12 @@ mod tests { result, Ok(ExecOutput { checkpoint: StageCheckpoint { - block_number, - stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { - processed, - total - })) - }, done: true }) if block_number == previous_stage && processed == total && + block_number, + stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { + processed, + total + })) + }, done: true }) if block_number == previous_stage && processed == total && total == runner.db.count_entries::().unwrap() as u64 ); @@ -383,12 +383,12 @@ mod tests { result, Ok(ExecOutput { checkpoint: StageCheckpoint { - block_number, - stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { - processed, - total - })) - }, done: true }) if block_number == previous_stage && processed == total && + block_number, + stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { + processed, + total + })) + }, done: true }) if block_number == previous_stage && processed == total && total == runner.db.count_entries::().unwrap() as u64 ); diff --git a/crates/stages/types/src/checkpoints.rs b/crates/stages/types/src/checkpoints.rs index 6486dce31be..9aff8ac726a 100644 --- a/crates/stages/types/src/checkpoints.rs +++ b/crates/stages/types/src/checkpoints.rs @@ -379,6 +379,9 @@ pub struct StageCheckpoint { pub stage_checkpoint: Option, } +#[cfg(any(test, feature = "reth-codec"))] +reth_codecs::impl_compression_for_compact!(StageCheckpoint); + impl StageCheckpoint { /// Creates a new [`StageCheckpoint`] with only `block_number` set. pub fn new(block_number: BlockNumber) -> Self { @@ -431,13 +434,21 @@ impl StageCheckpoint { progress: entities, .. }) => Some(entities), - StageUnitCheckpoint::MerkleChangeSets(_) => None, + StageUnitCheckpoint::MerkleChangeSets(_) | StageUnitCheckpoint::Finish(_) => None, } } } -#[cfg(any(test, feature = "reth-codec"))] -reth_codecs::impl_compression_for_compact!(StageCheckpoint); +/// Saves the progress of the Finish stage. +#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "test-utils"), derive(arbitrary::Arbitrary))] +#[cfg_attr(any(test, feature = "reth-codec"), derive(reth_codecs::Compact))] +#[cfg_attr(any(test, feature = "reth-codec"), reth_codecs::add_arbitrary_tests(compact))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct FinishCheckpoint { + /// The highest block with a partially persisted state and trie. + pub partial_state_trie: Option, +} // TODO(alexey): add a merkle checkpoint. Currently it's hard because [`MerkleCheckpoint`] // is not a Copy type. @@ -465,6 +476,8 @@ pub enum StageUnitCheckpoint { /// Note: This variant is only kept for backward compatibility with the Compact codec. /// The `MerkleChangeSets` stage has been removed. MerkleChangeSets(MerkleChangeSetsCheckpoint), + /// Saves the progress of the Finish stage. + Finish(FinishCheckpoint), } impl StageUnitCheckpoint { @@ -573,6 +586,15 @@ stage_unit_checkpoints!( index_history_stage_checkpoint, /// Sets the stage checkpoint to index history. with_index_history_stage_checkpoint + ), + ( + 6, + Finish, + FinishCheckpoint, + /// Returns the finish stage checkpoint, if any. + finish_stage_checkpoint, + /// Sets the stage checkpoint to finish. + with_finish_stage_checkpoint ) ); @@ -664,4 +686,15 @@ mod tests { let (decoded, _) = MerkleCheckpoint::from_compact(&buf, encoded); assert_eq!(decoded, checkpoint); } + + #[test] + fn finish_checkpoint_roundtrip() { + let checkpoint = StageCheckpoint::new(42) + .with_finish_stage_checkpoint(FinishCheckpoint { partial_state_trie: Some(21) }); + + let mut buf = Vec::new(); + let encoded = checkpoint.to_compact(&mut buf); + let (decoded, _) = StageCheckpoint::from_compact(&buf, encoded); + assert_eq!(decoded, checkpoint); + } } diff --git a/crates/stages/types/src/lib.rs b/crates/stages/types/src/lib.rs index 4e30ce27cd7..70c5de17fe5 100644 --- a/crates/stages/types/src/lib.rs +++ b/crates/stages/types/src/lib.rs @@ -18,7 +18,7 @@ pub use id::StageId; mod checkpoints; pub use checkpoints::{ AccountHashingCheckpoint, CheckpointBlockRange, EntitiesCheckpoint, ExecutionCheckpoint, - HeadersCheckpoint, IndexHistoryCheckpoint, MerkleCheckpoint, StageCheckpoint, + FinishCheckpoint, HeadersCheckpoint, IndexHistoryCheckpoint, MerkleCheckpoint, StageCheckpoint, StageUnitCheckpoint, StorageHashingCheckpoint, StorageRootMerkleCheckpoint, }; diff --git a/crates/storage/provider/src/lib.rs b/crates/storage/provider/src/lib.rs index 909a9c24b38..26b7c7ad91f 100644 --- a/crates/storage/provider/src/lib.rs +++ b/crates/storage/provider/src/lib.rs @@ -24,8 +24,8 @@ pub mod providers; pub use providers::{ DatabaseProvider, DatabaseProviderRO, DatabaseProviderRW, HistoricalStateProvider, HistoricalStateProviderRef, LatestStateProvider, LatestStateProviderRef, ProviderFactory, - PruneShardOutcome, PrunedIndices, SaveBlocksMode, StaticFileAccess, StaticFileProviderBuilder, - StaticFileWriteCtx, StaticFileWriter, + PruneShardOutcome, PrunedIndices, SaveBlocksMode, SaveBlocksPlan, SaveBlocksPlanStep, + StaticFileAccess, StaticFileProviderBuilder, StaticFileWriteCtx, StaticFileWriter, }; pub mod changeset_walker; diff --git a/crates/storage/provider/src/providers/blockchain_provider.rs b/crates/storage/provider/src/providers/blockchain_provider.rs index 676311d321e..31fea4541c8 100644 --- a/crates/storage/provider/src/providers/blockchain_provider.rs +++ b/crates/storage/provider/src/providers/blockchain_provider.rs @@ -792,7 +792,8 @@ mod tests { create_test_provider_factory, create_test_provider_factory_with_chain_spec, MockNodeTypesWithDB, }, - BlockWriter, CanonChainTracker, ProviderFactory, SaveBlocksMode, + BlockWriter, CanonChainTracker, ProviderFactory, SaveBlocksMode, SaveBlocksPlan, + SaveBlocksPlanStep, }; use alloy_eips::{BlockHashOrNumber, BlockNumHash, BlockNumberOrTag}; use alloy_primitives::{BlockNumber, TxNumber, B256}; @@ -1009,7 +1010,15 @@ mod tests { // Push to disk let provider_rw = hook_provider.database_provider_rw().unwrap(); - provider_rw.save_blocks(vec![lowest_memory_block], SaveBlocksMode::Full).unwrap(); + provider_rw + .save_blocks( + &SaveBlocksPlan::new( + vec![lowest_memory_block], + vec![SaveBlocksPlanStep::new(0..1, Some(1..1), true)], + ), + SaveBlocksMode::Full, + ) + .unwrap(); provider_rw.commit().unwrap(); // Remove from memory diff --git a/crates/storage/provider/src/providers/database/mod.rs b/crates/storage/provider/src/providers/database/mod.rs index fea5267e35b..38a517729fe 100644 --- a/crates/storage/provider/src/providers/database/mod.rs +++ b/crates/storage/provider/src/providers/database/mod.rs @@ -51,6 +51,9 @@ pub use provider::{ CommitOrder, DatabaseProvider, DatabaseProviderRO, DatabaseProviderRW, SaveBlocksMode, }; +mod save_blocks; +pub use save_blocks::{SaveBlocksPlan, SaveBlocksPlanStep}; + use super::ProviderNodeTypes; use reth_trie::KeccakKeyHasher; diff --git a/crates/storage/provider/src/providers/database/provider.rs b/crates/storage/provider/src/providers/database/provider.rs index c9807eaf8f2..56ebee73286 100644 --- a/crates/storage/provider/src/providers/database/provider.rs +++ b/crates/storage/provider/src/providers/database/provider.rs @@ -1,3 +1,4 @@ +use super::SaveBlocksPlan; use crate::{ changesets_utils::StorageRevertsIter, providers::{ @@ -57,7 +58,7 @@ use reth_primitives_traits::{ use reth_prune_types::{ PruneCheckpoint, PruneMode, PruneModes, PruneSegment, MINIMUM_UNWIND_SAFE_DISTANCE, }; -use reth_stages_types::{StageCheckpoint, StageId}; +use reth_stages_types::{FinishCheckpoint, StageCheckpoint, StageId}; use reth_static_file_types::StaticFileSegment; use reth_storage_api::{ BlockBodyIndicesProvider, BlockBodyReader, MetadataProvider, MetadataWriter, @@ -568,38 +569,60 @@ impl DatabaseProvider>, + plan: &SaveBlocksPlan, save_mode: SaveBlocksMode, ) -> ProviderResult<()> { + let blocks = &plan.blocks; if blocks.is_empty() { debug!(target: "providers::db", "Attempted to write empty block range"); return Ok(()) } + let persist_rest_range = plan.persist_rest_range(); + let persist_rest_blocks = + persist_rest_range.as_ref().map(|range| &blocks[range.clone()]).unwrap_or(&[]); + let total_start = Instant::now(); let block_count = blocks.len() as u64; let first_number = blocks.first().unwrap().recovered_block().number(); - let last_block_number = blocks.last().unwrap().recovered_block().number(); + let last_block_number = plan.last_block().expect("checked non-empty block range").number; debug!(target: "providers::db", block_count, "Writing blocks and execution data to storage"); + let tx_nums: SmallVec<[TxNumber; 4]> = if persist_rest_blocks.is_empty() { + SmallVec::new() + } else { + let first_tx_num = self + .tx + .cursor_read::()? + .last()? + .map(|(n, _)| n + 1) + .unwrap_or_default(); - // Compute tx_nums upfront (both threads need these) - let first_tx_num = self - .tx - .cursor_read::()? - .last()? - .map(|(n, _)| n + 1) - .unwrap_or_default(); - - let tx_nums: SmallVec<[TxNumber; 4]> = { - let mut nums = SmallVec::with_capacity(blocks.len()); + let mut nums = SmallVec::with_capacity(persist_rest_blocks.len()); let mut current = first_tx_num; - for block in &blocks { + for block in persist_rest_blocks { nums.push(current); current += block.recovered_block().body().transaction_count() as u64; } @@ -609,12 +632,32 @@ impl DatabaseProvider DatabaseProvider DatabaseProvider>(); + let masking_trie_data = blocks[masking_range.clone()] + .iter() + .map(|block| block.trie_data()) + .collect::>(); let start = Instant::now(); - let merged_hashed_state = HashedPostStateSorted::merge_batch( - blocks.iter().rev().map(|b| b.trie_data().hashed_state), + let merged_hashed_state = HashedPostStateSorted::disjointed_merge_batch( + step_trie_data.iter().map(|data| data.hashed_state.as_ref()).collect(), + masking_trie_data.iter().map(|data| data.hashed_state.as_ref()).collect(), ); if !merged_hashed_state.is_empty() { self.write_hashed_state(&merged_hashed_state)?; @@ -730,24 +812,49 @@ impl DatabaseProvider DatabaseProvider(()) })?; - // Collect results from spawned tasks - timings.sf = sf_result.ok_or(StaticFileWriterError::ThreadPanic("static file"))??; + // Collect results from spawned tasks. + if has_persist_rest_blocks { + timings.sf = sf_result.ok_or(StaticFileWriterError::ThreadPanic("static file"))??; - if rocksdb_enabled { - timings.rocksdb = rocksdb_result.ok_or_else(|| { - ProviderError::Database(reth_db_api::DatabaseError::Other( - "RocksDB thread panicked".into(), - )) - })??; + if rocksdb_enabled { + timings.rocksdb = rocksdb_result.ok_or_else(|| { + ProviderError::Database(reth_db_api::DatabaseError::Other( + "RocksDB thread panicked".into(), + )) + })??; + } } timings.total = total_start.elapsed(); self.metrics.record_save_blocks(&timings); - debug!(target: "providers::db", range = ?first_number..=last_block_number, "Appended block data"); + debug!( + target: "providers::db", + range = ?first_number..=last_block_number, + "Appended block data" + ); Ok(()) } @@ -3464,8 +3577,9 @@ impl BlockExecutionWriter // that is why it is deleted afterwards. self.remove_blocks_above(block)?; - // Update pipeline progress - self.update_pipeline_stages(block, true)?; + // Keep the finish checkpoint's trie frontier aligned with the highest trie data that is + // still durably materialized after truncation. + self.update_finish_checkpoint_after_remove(block)?; Ok(Chain::new(blocks, execution_state, BTreeMap::new())) } @@ -3480,8 +3594,35 @@ impl BlockExecutionWriter // that is why it is deleted afterwards. self.remove_blocks_above(block)?; - // Update pipeline progress + // Keep the finish checkpoint's trie frontier aligned with the highest trie data that is + // still durably materialized after truncation. + self.update_finish_checkpoint_after_remove(block)?; + + Ok(()) + } +} + +impl DatabaseProvider { + fn trie_persisted_tip_block_number(&self) -> ProviderResult> { + Ok(self.get_stage_checkpoint(StageId::Finish)?.map(|checkpoint| { + checkpoint + .finish_stage_checkpoint() + .and_then(|finish| finish.partial_state_trie) + .unwrap_or(checkpoint.block_number) + })) + } + + fn update_finish_checkpoint_after_remove(&self, block: BlockNumber) -> ProviderResult<()> { + let partial_state_trie = self + .trie_persisted_tip_block_number()? + .map(|trie_persisted_tip| trie_persisted_tip.min(block)); + self.update_pipeline_stages(block, true)?; + self.save_stage_checkpoint( + StageId::Finish, + StageCheckpoint::new(block) + .with_finish_stage_checkpoint(FinishCheckpoint { partial_state_trie }), + )?; Ok(()) } @@ -3519,7 +3660,13 @@ impl BlockWriter ); // Delegate to save_blocks with BlocksOnly mode (skips receipts/state/trie) - self.save_blocks(vec![executed_block], SaveBlocksMode::BlocksOnly)?; + self.save_blocks( + &SaveBlocksPlan::new( + vec![executed_block], + vec![super::SaveBlocksPlanStep::new(0..1, None, true)], + ), + SaveBlocksMode::BlocksOnly, + )?; // Return the body indices self.block_body_indices(block_number)? @@ -3933,6 +4080,7 @@ impl StoragePath for DatabaseProvider { mod tests { use super::*; use crate::{ + providers::database::SaveBlocksPlanStep, test_utils::{blocks::BlockchainTestData, create_test_provider_factory}, BlockWriter, }; @@ -3941,9 +4089,9 @@ mod tests { map::{AddressMap, B256Map}, U256, }; - use reth_chain_state::ExecutedBlock; + use reth_chain_state::{test_utils::TestBlockBuilder, ComputedTrieData, ExecutedBlock}; use reth_db_api::models::StorageSettings; - use reth_ethereum_primitives::Receipt; + use reth_ethereum_primitives::{EthPrimitives, Receipt}; use reth_execution_types::{AccountRevertInit, BlockExecutionOutput, BlockExecutionResult}; use reth_primitives_traits::SealedBlock; use reth_storage_api::MetadataWriter; @@ -3953,7 +4101,32 @@ mod tests { }; use revm_database::BundleState; use revm_state::AccountInfo; - use std::{sync::mpsc, time::Duration}; + use std::{ + sync::{mpsc, Arc}, + time::Duration, + }; + + fn full_save_plan( + blocks: impl IntoIterator>, + ) -> SaveBlocksPlan { + let blocks = blocks.into_iter().collect::>(); + let full_range = 0..blocks.len(); + SaveBlocksPlan::new( + blocks, + vec![SaveBlocksPlanStep::new( + full_range.clone(), + Some(full_range.end..full_range.end), + true, + )], + ) + } + + fn partial_save_plan( + blocks: impl IntoIterator>, + steps: Vec, + ) -> SaveBlocksPlan { + SaveBlocksPlan::new(blocks.into_iter().collect(), steps) + } #[test] fn test_receipts_by_block_range_empty_range() { @@ -4442,6 +4615,394 @@ mod tests { provider_rw.commit().unwrap(); } + #[test] + fn test_save_blocks_only_masks_trie_with_deferred_blocks() { + use reth_trie::{ + updates::{StorageTrieUpdatesSorted, TrieUpdatesSorted}, + BranchNodeCompact, HashedPostStateSorted, HashedStorageSorted, + }; + + fn empty_execution_output() -> BlockExecutionOutput { + BlockExecutionOutput { + result: BlockExecutionResult { + receipts: vec![], + requests: Default::default(), + gas_used: 0, + blob_gas_used: 0, + }, + state: Default::default(), + } + } + + fn branch(mask: u16) -> BranchNodeCompact { + BranchNodeCompact::new(mask, 0, 0, vec![], None) + } + + let factory = create_test_provider_factory(); + factory.set_storage_settings_cache(StorageSettings::v1()); + + let genesis = SealedBlock::::from_sealed_parts( + SealedHeader::new( + Header { number: 0, difficulty: U256::from(1), ..Default::default() }, + B256::ZERO, + ), + Default::default(), + ); + let genesis_executed = ExecutedBlock::new( + Arc::new(genesis.try_recover().unwrap()), + Arc::new(empty_execution_output()), + ComputedTrieData::default(), + ); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &full_save_plan(std::slice::from_ref(&genesis_executed).to_vec()), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let kept_account = B256::with_last_byte(0x11); + let deferred_masked_account = B256::with_last_byte(0x12); + let in_memory_overlap_account = B256::with_last_byte(0x13); + let in_memory_only_account = B256::with_last_byte(0x14); + let kept_storage = B256::with_last_byte(0x21); + let deferred_masked_storage = B256::with_last_byte(0x22); + let in_memory_overlap_storage = B256::with_last_byte(0x23); + let in_memory_only_storage = B256::with_last_byte(0x24); + let kept_slot = B256::with_last_byte(0x31); + let deferred_masked_slot = B256::with_last_byte(0x32); + let in_memory_overlap_slot = B256::with_last_byte(0x33); + let in_memory_only_slot = B256::with_last_byte(0x34); + let kept_account_node = Nibbles::from_nibbles([0x1, 0x2]); + let deferred_masked_account_node = Nibbles::from_nibbles([0x1, 0x3]); + let in_memory_overlap_account_node = Nibbles::from_nibbles([0x1, 0x4]); + let in_memory_only_account_node = Nibbles::from_nibbles([0x1, 0x5]); + let kept_storage_node = Nibbles::from_nibbles([0x2, 0x1]); + let deferred_masked_storage_node = Nibbles::from_nibbles([0x2, 0x2]); + let in_memory_overlap_storage_node = Nibbles::from_nibbles([0x2, 0x3]); + let in_memory_only_storage_node = Nibbles::from_nibbles([0x2, 0x4]); + let blocks: Vec<_> = + TestBlockBuilder::eth().with_state().get_executed_blocks(1..4).collect(); + let full_persist_base = &blocks[0]; + let deferred_trie_base = &blocks[1]; + let in_memory_only_base = &blocks[2]; + + let full_persist_hashed_state = HashedPostStateSorted::new( + vec![ + (kept_account, Some(Account::default())), + (deferred_masked_account, Some(Account { nonce: 1, ..Default::default() })), + (in_memory_overlap_account, Some(Account { nonce: 2, ..Default::default() })), + ], + B256Map::from_iter([ + ( + kept_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(kept_slot, U256::from(1))], + }, + ), + ( + deferred_masked_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(deferred_masked_slot, U256::from(2))], + }, + ), + ( + in_memory_overlap_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(in_memory_overlap_slot, U256::from(3))], + }, + ), + ]), + ); + let full_persist_trie_updates = TrieUpdatesSorted::new( + vec![ + (kept_account_node, Some(branch(0b0000_1111_0000_1111))), + (deferred_masked_account_node, Some(branch(0b1111_0000_1111_0000))), + (in_memory_overlap_account_node, Some(branch(0b1010_1010_1010_1010))), + ], + B256Map::from_iter([ + ( + kept_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(kept_storage_node, Some(branch(0b1010)))], + }, + ), + ( + deferred_masked_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(deferred_masked_storage_node, Some(branch(0b0101)))], + }, + ), + ( + in_memory_overlap_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(in_memory_overlap_storage_node, Some(branch(0b0110)))], + }, + ), + ]), + ); + + let full_persist_block = ExecutedBlock::new( + Arc::clone(&full_persist_base.recovered_block), + Arc::clone(&full_persist_base.execution_output), + ComputedTrieData { + hashed_state: Arc::new(full_persist_hashed_state), + trie_updates: Arc::new(full_persist_trie_updates), + ..Default::default() + }, + ); + + let deferred_trie_hashed_state = HashedPostStateSorted::new( + vec![(deferred_masked_account, Some(Account { nonce: 3, ..Default::default() }))], + B256Map::from_iter([( + deferred_masked_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(deferred_masked_slot, U256::from(4))], + }, + )]), + ); + let deferred_trie_updates = TrieUpdatesSorted::new( + vec![(deferred_masked_account_node, Some(branch(0b0011_0011)))], + B256Map::from_iter([( + deferred_masked_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(deferred_masked_storage_node, Some(branch(0b1100)))], + }, + )]), + ); + let deferred_trie_block = ExecutedBlock::new( + Arc::clone(&deferred_trie_base.recovered_block), + Arc::clone(&deferred_trie_base.execution_output), + ComputedTrieData { + hashed_state: Arc::new(deferred_trie_hashed_state), + trie_updates: Arc::new(deferred_trie_updates), + ..Default::default() + }, + ); + + let in_memory_only_hashed_state = HashedPostStateSorted::new( + vec![ + (in_memory_overlap_account, Some(Account { nonce: 4, ..Default::default() })), + (in_memory_only_account, Some(Account { nonce: 5, ..Default::default() })), + ], + B256Map::from_iter([ + ( + in_memory_overlap_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(in_memory_overlap_slot, U256::from(5))], + }, + ), + ( + in_memory_only_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(in_memory_only_slot, U256::from(6))], + }, + ), + ]), + ); + let in_memory_only_trie_updates = TrieUpdatesSorted::new( + vec![ + (in_memory_overlap_account_node, Some(branch(0b0101_0101))), + (in_memory_only_account_node, Some(branch(0b1111_0000))), + ], + B256Map::from_iter([ + ( + in_memory_overlap_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(in_memory_overlap_storage_node, Some(branch(0b1001)))], + }, + ), + ( + in_memory_only_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(in_memory_only_storage_node, Some(branch(0b1111)))], + }, + ), + ]), + ); + let in_memory_only_block = ExecutedBlock::new( + Arc::clone(&in_memory_only_base.recovered_block), + Arc::clone(&in_memory_only_base.execution_output), + ComputedTrieData { + hashed_state: Arc::new(in_memory_only_hashed_state), + trie_updates: Arc::new(in_memory_only_trie_updates), + ..Default::default() + }, + ); + + let provider_rw = factory.provider_rw().unwrap(); + let blocks = vec![full_persist_block, deferred_trie_block, in_memory_only_block]; + provider_rw + .save_blocks( + &partial_save_plan( + blocks, + vec![ + SaveBlocksPlanStep::new(0..1, Some(1..2), true), + SaveBlocksPlanStep::new(1..2, None, true), + ], + ), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider = factory.provider().unwrap(); + let tx = provider.tx_ref(); + let finish_checkpoint = provider.get_stage_checkpoint(StageId::Finish).unwrap().unwrap(); + assert_eq!(finish_checkpoint.block_number, 2); + assert_eq!( + finish_checkpoint.finish_stage_checkpoint().unwrap().partial_state_trie, + Some(1) + ); + assert!(provider.block_hash(2).unwrap().is_some()); + assert!(provider.block_hash(3).unwrap().is_none()); + + let mut hashed_accounts = tx.cursor_read::().unwrap(); + assert!(hashed_accounts.seek_exact(kept_account).unwrap().is_some()); + assert!(hashed_accounts.seek_exact(deferred_masked_account).unwrap().is_none()); + assert!(hashed_accounts.seek_exact(in_memory_overlap_account).unwrap().is_some()); + assert!(hashed_accounts.seek_exact(in_memory_only_account).unwrap().is_none()); + + let mut hashed_storages = tx.cursor_dup_read::().unwrap(); + assert!(hashed_storages.seek_by_key_subkey(kept_storage, kept_slot).unwrap().is_some()); + assert!(hashed_storages + .walk_dup(Some(deferred_masked_storage), None) + .unwrap() + .next() + .transpose() + .unwrap() + .is_none()); + assert!(hashed_storages + .seek_by_key_subkey(in_memory_overlap_storage, in_memory_overlap_slot) + .unwrap() + .is_some()); + assert!(hashed_storages + .walk_dup(Some(in_memory_only_storage), None) + .unwrap() + .next() + .transpose() + .unwrap() + .is_none()); + + let mut account_trie = tx.cursor_read::().unwrap(); + assert!(account_trie.seek_exact(StoredNibbles(kept_account_node)).unwrap().is_some()); + assert!(account_trie + .seek_exact(StoredNibbles(deferred_masked_account_node)) + .unwrap() + .is_none()); + assert!(account_trie + .seek_exact(StoredNibbles(in_memory_overlap_account_node)) + .unwrap() + .is_some()); + assert!(account_trie + .seek_exact(StoredNibbles(in_memory_only_account_node)) + .unwrap() + .is_none()); + + let mut storage_trie = tx.cursor_dup_read::().unwrap(); + let kept_entries: Vec<_> = storage_trie + .walk_dup(Some(kept_storage), None) + .unwrap() + .collect::, _>>() + .unwrap(); + assert_eq!(kept_entries.len(), 1); + assert_eq!(kept_entries[0].1.nibbles.0, kept_storage_node); + + let deferred_masked_entries: Vec<_> = storage_trie + .walk_dup(Some(deferred_masked_storage), None) + .unwrap() + .collect::, _>>() + .unwrap(); + assert!(deferred_masked_entries.is_empty()); + + let in_memory_overlap_entries: Vec<_> = storage_trie + .walk_dup(Some(in_memory_overlap_storage), None) + .unwrap() + .collect::, _>>() + .unwrap(); + assert_eq!(in_memory_overlap_entries.len(), 1); + assert_eq!(in_memory_overlap_entries[0].1.nibbles.0, in_memory_overlap_storage_node); + + let in_memory_entries: Vec<_> = storage_trie + .walk_dup(Some(in_memory_only_storage), None) + .unwrap() + .collect::, _>>() + .unwrap(); + assert!(in_memory_entries.is_empty()); + } + + #[test] + fn test_save_blocks_partial_cycles_do_not_duplicate_static_file_writes() { + let factory = create_test_provider_factory(); + let mut test_block_builder = TestBlockBuilder::eth().with_state(); + + let genesis = test_block_builder.get_executed_blocks(0..1).next().unwrap(); + let blocks: Vec<_> = test_block_builder.get_executed_blocks(1..5).collect(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &full_save_plan(std::slice::from_ref(&genesis).to_vec()), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks(&full_save_plan(blocks[..2].to_vec()), SaveBlocksMode::Full) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &partial_save_plan( + blocks, + vec![ + SaveBlocksPlanStep::new(0..2, Some(2..4), false), + SaveBlocksPlanStep::new(2..4, None, true), + ], + ), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider = factory.provider().unwrap(); + let finish_checkpoint = provider.get_stage_checkpoint(StageId::Finish).unwrap().unwrap(); + assert_eq!(finish_checkpoint.block_number, 4); + assert_eq!( + finish_checkpoint.finish_stage_checkpoint().unwrap().partial_state_trie, + Some(2) + ); + + let static_files = factory.static_file_provider(); + assert_eq!(static_files.get_highest_static_file_block(StaticFileSegment::Headers), Some(4)); + assert_eq!( + static_files.get_highest_static_file_block(StaticFileSegment::Transactions), + Some(4) + ); + assert_eq!( + static_files.get_highest_static_file_block(StaticFileSegment::Receipts), + Some(4) + ); + } + #[test] fn test_prunable_receipts_logic() { let insert_blocks = @@ -5027,7 +5588,12 @@ mod tests { ComputedTrieData::default(), ); let provider_rw = factory.provider_rw().unwrap(); - provider_rw.save_blocks(vec![genesis_executed], SaveBlocksMode::Full).unwrap(); + provider_rw + .save_blocks( + &full_save_plan(std::slice::from_ref(&genesis_executed).to_vec()), + SaveBlocksMode::Full, + ) + .unwrap(); provider_rw.commit().unwrap(); let mut blocks: Vec = Vec::new(); @@ -5099,7 +5665,7 @@ mod tests { } let provider_rw = factory.provider_rw().unwrap(); - provider_rw.save_blocks(blocks, SaveBlocksMode::Full).unwrap(); + provider_rw.save_blocks(&full_save_plan(blocks), SaveBlocksMode::Full).unwrap(); provider_rw.commit().unwrap(); let provider = factory.provider().unwrap(); diff --git a/crates/storage/provider/src/providers/database/save_blocks.rs b/crates/storage/provider/src/providers/database/save_blocks.rs new file mode 100644 index 00000000000..185136cedff --- /dev/null +++ b/crates/storage/provider/src/providers/database/save_blocks.rs @@ -0,0 +1,90 @@ +use alloy_eips::BlockNumHash; +use reth_chain_state::ExecutedBlock; +use reth_ethereum_primitives::EthPrimitives; +use reth_primitives_traits::NodePrimitives; +use std::ops::Range; + +/// A single persistence step over a contiguous region of [`SaveBlocksPlan::blocks`]. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SaveBlocksPlanStep { + /// Range of [`SaveBlocksPlan::blocks`] covered by this step. + pub block_range: Range, + /// Optional range of blocks whose state/trie updates should be used to mask this step's + /// durable state/trie writes. + /// + /// `Some(empty_range)` means persist state/trie without any masking. `None` means skip + /// durable state/trie persistence for this step. + pub state_trie_masking_range: Option>, + /// Whether to persist non-state/trie data for this step. + pub persist_rest: bool, +} + +impl SaveBlocksPlanStep { + /// Creates a new persistence step. + pub const fn new( + block_range: Range, + state_trie_masking_range: Option>, + persist_rest: bool, + ) -> Self { + Self { block_range, state_trie_masking_range, persist_rest } + } + + /// Returns `true` if this step persists state/trie data. + pub const fn persists_state_trie(&self) -> bool { + self.state_trie_masking_range.is_some() + } +} + +/// Plan for a single `save_blocks` persistence cycle. +#[derive(Debug, Clone)] +pub struct SaveBlocksPlan { + /// Canonical blocks covered by this plan. + pub blocks: Vec>, + /// Ordered persistence steps over [`Self::blocks`]. + pub steps: Vec, +} + +impl SaveBlocksPlan { + /// Creates a new save plan. + pub const fn new(blocks: Vec>, steps: Vec) -> Self { + Self { blocks, steps } + } + + /// Returns `true` if the plan contains no blocks to persist. + pub fn is_empty(&self) -> bool { + self.last_block().is_none() + } + + /// Returns the highest block covered by this plan. + pub fn last_block(&self) -> Option { + let last_index = + self.steps.iter().rev().find_map(|step| step.block_range.end.checked_sub(1))?; + self.blocks.get(last_index).map(|block| block.recovered_block().num_hash()) + } + + /// Returns the highest block whose state/trie data is durably persisted by this plan. + pub fn last_state_trie_block(&self) -> Option { + let last_index = self + .steps + .iter() + .rev() + .find(|step| step.persists_state_trie())? + .block_range + .end + .checked_sub(1)?; + self.blocks.get(last_index).map(|block| block.recovered_block().num_hash()) + } + + /// Returns the contiguous range of blocks whose non-state/trie outputs are persisted. + pub fn persist_rest_range(&self) -> Option> { + let mut ranges = + self.steps.iter().filter(|step| step.persist_rest).map(|step| &step.block_range); + let first = ranges.next()?.clone(); + let merged = ranges.fold(first, |mut merged, range| { + debug_assert_eq!(merged.end, range.start, "persist_rest steps must be contiguous"); + merged.end = range.end; + merged + }); + Some(merged) + } +} diff --git a/crates/storage/provider/src/providers/state/overlay.rs b/crates/storage/provider/src/providers/state/overlay.rs index 1d45351f0a8..e278a46e187 100644 --- a/crates/storage/provider/src/providers/state/overlay.rs +++ b/crates/storage/provider/src/providers/state/overlay.rs @@ -1,7 +1,7 @@ use alloy_eips::BlockNumHash; use alloy_primitives::{BlockHash, BlockNumber, B256}; use metrics::{Counter, Histogram}; -use reth_chain_state::{EthPrimitives, StateTrieOverlayManager}; +use reth_chain_state::{EthPrimitives, LazyOverlay}; use reth_db_api::{tables, transaction::DbTx, DatabaseError}; use reth_errors::{ProviderError, ProviderResult}; use reth_metrics::Metrics; @@ -61,39 +61,38 @@ pub(super) struct Overlay { pub(super) hashed_post_state: Arc, } +#[derive(Debug)] +struct OverlayRevertPlan { + revert_blocks: Option>, + overlay_anchor_hash: BlockHash, +} + /// Source of overlay data for [`OverlayStateProviderFactory`]. +/// +/// Either provides immediate pre-computed overlay data, or a lazy overlay that computes +/// on first access. #[derive(Debug, Clone)] pub(super) enum OverlaySource { /// Immediate overlay with already-computed data. Immediate { /// Trie updates overlay. - /// - /// This can be non-empty when a caller starts with an explicit `TrieInputSorted`, such - /// as historical providers. trie: Arc, /// Hashed state overlay. state: Arc, }, - /// Manager-backed overlay for in-memory state, with optional immediate overlay data. - Managed { - /// Manager used to resolve in-memory parent state if the parent is not persisted. - manager: StateTrieOverlayManager, - /// Immediate hashed state overlay applied on top of any manager-produced overlay. - /// - /// This is populated by the `with_hashed_state_overlay` methods. - state: Arc, - }, + /// Lazy overlay computed on first access. + Lazy(LazyOverlay), } /// Builder for calculating trie and hashed-state overlays. /// -/// This stores the overlay configuration and the logic for resolving overlays and collecting -/// reverts. It is intentionally independent from any provider factory or overlay cache. +/// This stores the overlay configuration and the logic for resolving immediate/lazy overlays and +/// collecting reverts. It is intentionally independent from any provider factory or overlay cache. #[derive(Debug, Clone)] pub struct OverlayBuilder { - /// Parent hash requested by the caller. - parent_hash: B256, - /// Optional overlay source. + /// Anchor hash to revert the DB state to before applying overlays. + anchor_hash: B256, + /// Optional overlay source (lazy or immediate). overlay_source: Option>, /// Changeset cache handle for retrieving trie changesets changeset_cache: ChangesetCache, @@ -103,32 +102,59 @@ pub struct OverlayBuilder { impl OverlayBuilder { /// Create a new overlay builder. - pub fn new(parent_hash: B256, changeset_cache: ChangesetCache) -> Self { + pub fn new(anchor_hash: B256, changeset_cache: ChangesetCache) -> Self { Self { - parent_hash, + anchor_hash, overlay_source: None, changeset_cache, metrics: OverlayStateProviderMetrics::default(), } } - /// Set the overlay source. + /// Set the overlay source (lazy or immediate). /// - /// This overlay will be applied on top of any reverts. + /// This overlay will be applied on top of any reverts applied via `anchor_hash`. pub(super) fn with_overlay_source(mut self, source: Option>) -> Self { + if let Some(OverlaySource::Lazy(lazy_overlay)) = source.as_ref() { + self.assert_lazy_overlay_anchor(lazy_overlay); + } + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + source = overlay_source_kind(source.as_ref()), + source_anchor = ?source.as_ref().and_then(overlay_source_anchor), + source_blocks = ?source.as_ref().and_then(overlay_source_num_blocks), + "Configuring overlay source" + ); self.overlay_source = source; self } - /// Set the state trie overlay manager used to resolve in-memory parent state. - pub fn with_state_trie_overlay_manager( - mut self, - state_trie_overlay_manager: StateTrieOverlayManager, - ) -> Self { - self.overlay_source = Some(OverlaySource::Managed { - manager: state_trie_overlay_manager, - state: Arc::new(HashedPostStateSorted::default()), - }); + fn assert_lazy_overlay_anchor(&self, lazy_overlay: &LazyOverlay) { + let Some(lazy_overlay_anchor) = lazy_overlay.anchor_hash() else { return }; + assert!( + lazy_overlay_anchor == self.anchor_hash, + "LazyOverlay's anchor ({}) != OverlayBuilder's anchor ({})", + lazy_overlay_anchor, + self.anchor_hash, + ); + } + + /// Set a lazy overlay that will be computed on first access. + /// + /// Panics if the [`LazyOverlay`]'s anchor hash does not match [`Self`]'s `anchor_hash`. + pub fn with_lazy_overlay(mut self, lazy_overlay: Option>) -> Self { + if let Some(lazy_overlay) = lazy_overlay.as_ref() { + self.assert_lazy_overlay_anchor(lazy_overlay); + } + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + lazy_anchor = ?lazy_overlay.as_ref().and_then(LazyOverlay::anchor_hash), + lazy_blocks = ?lazy_overlay.as_ref().map(LazyOverlay::num_blocks), + "Configuring lazy overlay" + ); + self.overlay_source = lazy_overlay.map(OverlaySource::Lazy); self } @@ -138,29 +164,35 @@ impl OverlayBuilder { hashed_state_overlay: Option>, ) -> Self { if let Some(state) = hashed_state_overlay { - match &mut self.overlay_source { - Some(OverlaySource::Managed { state: managed_state, .. }) => { - *managed_state = state; - } - _ => { - self.overlay_source = Some(OverlaySource::Immediate { - trie: Arc::new(TrieUpdatesSorted::default()), - state, - }); - } - } + self.overlay_source = Some(OverlaySource::Immediate { + trie: Arc::new(TrieUpdatesSorted::default()), + state, + }); + } else { + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + "Clearing hashed-state overlay" + ); } self } /// Extends the existing hashed state overlay with the given [`HashedPostStateSorted`]. /// - /// If no overlay exists, creates an immediate overlay with the given state. + /// If no overlay exists, creates a new immediate overlay with the given state. + /// If a lazy overlay exists, it is resolved first then extended. pub fn with_extended_hashed_state_overlay(mut self, other: HashedPostStateSorted) -> Self { match &mut self.overlay_source { - Some(OverlaySource::Immediate { state, .. } | OverlaySource::Managed { state, .. }) => { + Some(OverlaySource::Immediate { state, .. }) => { Arc::make_mut(state).extend_ref_and_sort(&other); } + Some(OverlaySource::Lazy(overlay)) => { + // Resolve lazy overlay and convert to immediate with extension + let (trie, mut state) = overlay.as_overlay(self.anchor_hash); + Arc::make_mut(&mut state).extend_ref_and_sort(&other); + self.overlay_source = Some(OverlaySource::Immediate { trie, state }); + } None => { self.overlay_source = Some(OverlaySource::Immediate { trie: Arc::new(TrieUpdatesSorted::default()), @@ -172,89 +204,159 @@ impl OverlayBuilder { } /// Resolves the effective overlay (trie updates, hashed state). + /// + /// If an overlay source is set, it is resolved (blocking if lazy). + /// Otherwise, returns empty defaults. fn resolve_overlays( &self, anchor_hash: BlockHash, ) -> ProviderResult<(Arc, Arc)> { - match &self.overlay_source { - Some(OverlaySource::Managed { manager, state }) => { - let (trie, mut overlay_state) = if anchor_hash == self.parent_hash { - ( - Arc::new(TrieUpdatesSorted::default()), - Arc::new(HashedPostStateSorted::default()), - ) - } else { - manager - .overlay_for_parent(self.parent_hash, anchor_hash) - .map_err(ProviderError::other)? - }; - - if overlay_state.is_empty() { - overlay_state = Arc::clone(state); - } else if !state.is_empty() { - Arc::make_mut(&mut overlay_state).extend_ref_and_sort(state); - } - - Ok((trie, overlay_state)) - } + let result = match &self.overlay_source { + Some(OverlaySource::Lazy(lazy_overlay)) => lazy_overlay.as_overlay(anchor_hash), Some(OverlaySource::Immediate { trie, state }) => { - if anchor_hash != self.parent_hash { + if anchor_hash != self.anchor_hash { return Err(ProviderError::other(std::io::Error::other(format!( - "anchor_hash {anchor_hash} doesn't match OverlayBuilder's configured parent ({})", - self.parent_hash - )))) + "anchor_hash {anchor_hash} doesn't match OverlayBuilder's configured anchor ({})", + self.anchor_hash + )))); } - Ok((Arc::clone(trie), Arc::clone(state))) + (Arc::clone(trie), Arc::clone(state)) } - None => Ok(( - Arc::new(TrieUpdatesSorted::default()), - Arc::new(HashedPostStateSorted::default()), - )), - } + None => { + (Arc::new(TrieUpdatesSorted::default()), Arc::new(HashedPostStateSorted::default())) + } + }; + + Ok(result) } - /// Returns the block which is at the tip of the DB, i.e. the block which the state tables of - /// the DB are currently synced to. - fn get_db_tip_block(&self, provider: &Provider) -> ProviderResult + /// Returns the block number for [`Self`]'s `anchor_hash` field. + fn get_block_number(&self, provider: &Provider) -> ProviderResult + where + Provider: BlockNumReader, + { + provider + .convert_hash_or_number(self.anchor_hash.into())? + .ok_or(ProviderError::BlockHashNotFound(self.anchor_hash)) + } + + /// Returns the highest blocks whose state/trie data and non-state/trie data are durably + /// available in the database. + fn get_db_tip_blocks( + &self, + provider: &Provider, + ) -> ProviderResult<(BlockNumHash, BlockNumHash)> where Provider: StageCheckpointReader + BlockNumReader, { - let block_number = provider - .get_stage_checkpoint(StageId::Finish)? - .as_ref() - .map(|chk| chk.block_number) - .ok_or_else(|| ProviderError::InsufficientChangesets { - requested: 0, - available: 0..=0, - })?; - let hash = provider + let checkpoint = provider.get_stage_checkpoint(StageId::Finish)?.ok_or_else(|| { + ProviderError::InsufficientChangesets { requested: 0, available: 0..=0 } + })?; + let block_number = checkpoint + .finish_stage_checkpoint() + .and_then(|finish| finish.partial_state_trie) + .unwrap_or(checkpoint.block_number); + let state_trie_tip_hash = provider .convert_number(block_number.into())? .ok_or_else(|| ProviderError::HeaderNotFound(block_number.into()))?; - Ok(BlockNumHash::new(block_number, hash)) + let finish_tip_number = checkpoint.block_number; + let finish_tip_hash = provider + .convert_number(finish_tip_number.into())? + .ok_or_else(|| ProviderError::HeaderNotFound(finish_tip_number.into()))?; + debug!( + target: "providers::state::overlay", + state_trie_tip_number = block_number, + state_trie_tip_hash = ?state_trie_tip_hash, + finish_tip_number, + finish_tip_hash = ?finish_tip_hash, + anchor_hash = ?self.anchor_hash, + "Loaded database overlay frontiers" + ); + Ok(( + BlockNumHash::new(block_number, state_trie_tip_hash), + BlockNumHash::new(finish_tip_number, finish_tip_hash), + )) } - /// Returns whether or not it is required to collect reverts, and validates that there are - /// sufficient changesets to revert to the requested block number if so. + /// Returns the revert plan required to expose the requested overlay base state, and validates + /// that there are sufficient changesets to revert to the requested block number if so. /// /// Takes into account both the stage checkpoint and the prune checkpoint to determine the /// available data range. - fn reverts_required( + fn revert_plan( &self, provider: &Provider, - db_tip_block: BlockNumHash, - anchor_hash: B256, - ) -> ProviderResult>> + state_trie_tip_block: BlockNumHash, + finish_tip_block: BlockNumHash, + ) -> ProviderResult where Provider: BlockNumReader + PruneCheckpointReader, { - // If the anchor is the DB tip then there won't be any reverts necessary. - if db_tip_block.hash == anchor_hash { - return Ok(None) + let anchor_number = self.get_block_number(provider)?; + let anchor_hash_at_number = provider + .convert_number(anchor_number.into())? + .ok_or_else(|| ProviderError::HeaderNotFound(anchor_number.into()))?; + if anchor_hash_at_number != self.anchor_hash { + return Err(ProviderError::other(std::io::Error::other(format!( + "anchor hash {} is not on the durable finish chain at block {} (found {})", + self.anchor_hash, anchor_number, anchor_hash_at_number, + )))); } - let anchor_number = provider - .convert_hash_or_number(anchor_hash.into())? - .ok_or(ProviderError::BlockHashNotFound(anchor_hash))?; + // If the requested anchor is the current durable Finish frontier, the database already + // exposes a consistent logical state for the overlay base. + if state_trie_tip_block.hash == finish_tip_block.hash && + finish_tip_block.hash == self.anchor_hash + { + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + ?state_trie_tip_block, + ?finish_tip_block, + overlay_anchor_hash = ?finish_tip_block.hash, + "Overlay anchor matches durable finish frontier; no reverts required" + ); + return Ok(OverlayRevertPlan { + revert_blocks: None, + overlay_anchor_hash: finish_tip_block.hash, + }); + } + + if let Some(OverlaySource::Lazy(lazy)) = self.overlay_source.as_ref() { + let lazy_covers_state_trie_tip = lazy.has_anchor_hash(state_trie_tip_block.hash); + let lazy_covers_finish_gap = state_trie_tip_block.hash == finish_tip_block.hash || + lazy.has_anchor_hash(finish_tip_block.hash); + + if lazy_covers_state_trie_tip && lazy_covers_finish_gap { + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + ?state_trie_tip_block, + ?finish_tip_block, + overlay_anchor_hash = ?state_trie_tip_block.hash, + source = overlay_source_kind(self.overlay_source.as_ref()), + source_anchor = ?self.overlay_source.as_ref().and_then(overlay_source_anchor), + source_blocks = ?self.overlay_source.as_ref().and_then(overlay_source_num_blocks), + "Lazy overlay covers partial state trie frontier; no reverts required" + ); + return Ok(OverlayRevertPlan { + revert_blocks: None, + overlay_anchor_hash: state_trie_tip_block.hash, + }) + } + } + + if anchor_number > state_trie_tip_block.number { + return Err(ProviderError::other(std::io::Error::other(format!( + "overlay anchor #{} ({}) is after partial state trie frontier #{} ({}); missing trie updates for blocks #{}..=#{}", + anchor_number, + self.anchor_hash, + state_trie_tip_block.number, + state_trie_tip_block.hash, + state_trie_tip_block.number + 1, + anchor_number, + )))); + } // Check account history prune checkpoint to determine the lower bound of available data. // The prune checkpoint's block_number is the highest pruned block, so data is available @@ -265,7 +367,19 @@ impl OverlayBuilder { .map(|block_number| block_number + 1) .unwrap_or_default(); - let available_range = lower_bound..=db_tip_block.number; + let available_range = lower_bound..=finish_tip_block.number; + + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + anchor_number, + ?state_trie_tip_block, + ?finish_tip_block, + prune_lower_bound = lower_bound, + available_start = *available_range.start(), + available_end = *available_range.end(), + "Checking overlay revert requirements" + ); // Check if the requested block is within the available range if !available_range.contains(&anchor_number) { @@ -275,20 +389,36 @@ impl OverlayBuilder { }); } - Ok(Some(anchor_number + 1..=db_tip_block.number)) + let revert_range = anchor_number + 1..=finish_tip_block.number; + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + anchor_number, + revert_start = *revert_range.start(), + revert_end = *revert_range.end(), + overlay_anchor_hash = ?self.anchor_hash, + "Overlay reverts required" + ); + + Ok(OverlayRevertPlan { + revert_blocks: Some(revert_range), + overlay_anchor_hash: self.anchor_hash, + }) } - /// Calculates a new [`Overlay`] given a transaction and the current db tip. + /// Calculates a new [`Overlay`] given a transaction and the current durable state/trie + /// frontier. #[instrument( level = "debug", target = "providers::state::overlay", skip_all, - fields(?db_tip_block, parent_hash = ?self.parent_hash) + fields(?state_trie_tip_block, ?finish_tip_block, anchor_hash = ?self.anchor_hash) )] fn calculate_overlay( &self, provider: &Provider, - db_tip_block: BlockNumHash, + state_trie_tip_block: BlockNumHash, + finish_tip_block: BlockNumHash, ) -> ProviderResult where Provider: ChangeSetReader @@ -306,29 +436,20 @@ impl OverlayBuilder { let retrieve_hashed_state_reverts_duration; let trie_updates_total_len; let hashed_state_updates_total_len; - let anchor_hash = match &self.overlay_source { - Some(OverlaySource::Managed { manager, .. }) => { - let parent_is_persisted = provider - .convert_hash_or_number(self.parent_hash.into())? - .is_some_and(|parent_number| parent_number <= db_tip_block.number); - if parent_is_persisted { - self.parent_hash - } else { - manager - .anchor_for_parent(self.parent_hash, db_tip_block.hash) - .ok_or(ProviderError::BlockHashNotFound(self.parent_hash))? - } - } - _ => self.parent_hash, - }; - // Collect any reverts which are required to bring the DB view back to the anchor hash. - let (trie_updates, hashed_post_state) = if let Some(revert_blocks) = - self.reverts_required(provider, db_tip_block, anchor_hash)? - { + let OverlayRevertPlan { revert_blocks, overlay_anchor_hash } = + self.revert_plan(provider, state_trie_tip_block, finish_tip_block)?; + + // Collect any reverts which are required to bring the DB view back to the overlay anchor + // hash. + let (trie_updates, hashed_post_state) = if let Some(revert_blocks) = revert_blocks { debug!( target: "providers::state::overlay", ?revert_blocks, + overlay_anchor_hash = ?overlay_anchor_hash, + source = overlay_source_kind(self.overlay_source.as_ref()), + source_anchor = ?self.overlay_source.as_ref().and_then(overlay_source_anchor), + source_blocks = ?self.overlay_source.as_ref().and_then(overlay_source_num_blocks), "Collecting trie reverts for overlay state provider" ); @@ -359,9 +480,9 @@ impl OverlayBuilder { res }; - // Resolve overlays and extend reverts with them. + // Resolve overlays (lazy or immediate) and extend reverts with them. // If reverts are empty, use overlays directly to avoid cloning. - let (overlay_trie, overlay_state) = self.resolve_overlays(anchor_hash)?; + let (overlay_trie, overlay_state) = self.resolve_overlays(overlay_anchor_hash)?; let trie_updates = if trie_reverts.is_empty() { overlay_trie @@ -388,19 +509,31 @@ impl OverlayBuilder { target: "providers::state::overlay", num_trie_updates = ?trie_updates_total_len, num_state_updates = ?hashed_state_updates_total_len, - "Reverted to anchor block", + overlay_anchor_hash = ?overlay_anchor_hash, + source = overlay_source_kind(self.overlay_source.as_ref()), + "Built overlay after reverting to anchor", ); (trie_updates, hashed_state_updates) } else { - // If no reverts are needed then the db tip is the anchor hash. Use overlays directly. - let (trie_updates, hashed_state) = self.resolve_overlays(db_tip_block.hash)?; + // If no reverts are needed then the overlay can be resolved directly from the durable + // logical frontier selected by the revert plan. + let (trie_updates, hashed_state) = self.resolve_overlays(overlay_anchor_hash)?; retrieve_trie_reverts_duration = Duration::ZERO; retrieve_hashed_state_reverts_duration = Duration::ZERO; trie_updates_total_len = trie_updates.total_len(); hashed_state_updates_total_len = hashed_state.total_len(); + debug!( + target: "providers::state::overlay", + num_trie_updates = trie_updates_total_len, + num_state_updates = hashed_state_updates_total_len, + overlay_anchor_hash = ?overlay_anchor_hash, + source = overlay_source_kind(self.overlay_source.as_ref()), + "Built overlay directly from durable frontier" + ); + (trie_updates, hashed_state) }; @@ -429,8 +562,40 @@ impl OverlayBuilder { + BlockNumReader + StorageSettingsCache, { - let db_tip_block = self.get_db_tip_block(provider)?; - self.calculate_overlay(provider, db_tip_block) + let (state_trie_tip_block, finish_tip_block) = self.get_db_tip_blocks(provider)?; + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + ?state_trie_tip_block, + ?finish_tip_block, + source = overlay_source_kind(self.overlay_source.as_ref()), + source_anchor = ?self.overlay_source.as_ref().and_then(overlay_source_anchor), + source_blocks = ?self.overlay_source.as_ref().and_then(overlay_source_num_blocks), + "Building overlay" + ); + self.calculate_overlay(provider, state_trie_tip_block, finish_tip_block) + } +} + +fn overlay_source_kind(source: Option<&OverlaySource>) -> &'static str { + match source { + Some(OverlaySource::Immediate { .. }) => "immediate", + Some(OverlaySource::Lazy(_)) => "lazy", + None => "none", + } +} + +fn overlay_source_anchor(source: &OverlaySource) -> Option { + match source { + OverlaySource::Immediate { .. } => None, + OverlaySource::Lazy(lazy) => lazy.anchor_hash(), + } +} + +fn overlay_source_num_blocks(source: &OverlaySource) -> Option { + match source { + OverlaySource::Immediate { .. } => None, + OverlaySource::Lazy(lazy) => Some(lazy.num_blocks()), } } @@ -444,9 +609,11 @@ pub struct OverlayStateProviderFactory { factory: F, /// Overlay builder containing the configuration and overlay calculation logic. overlay_builder: OverlayBuilder, - /// A cache which maps `db_tip -> Overlay`. If the db tip changes during usage of the factory - /// then a new entry will get added to this, but in most cases only one entry is present. - overlay_cache: Arc>, + /// A cache which maps `(state_trie_tip_hash, finish_tip_hash) -> Overlay`. + /// + /// Under partial persistence the overlay depends on both the durable trie frontier and the + /// fully durable Finish frontier, so both hashes are part of the cache key. + overlay_cache: Arc>, } impl OverlayStateProviderFactory { @@ -455,6 +622,13 @@ impl OverlayStateProviderFactory { Self { factory, overlay_builder, overlay_cache: Default::default() } } + /// Set a lazy overlay that will be computed on first access. + pub fn with_lazy_overlay(mut self, lazy_overlay: Option>) -> Self { + self.overlay_builder = self.overlay_builder.with_lazy_overlay(lazy_overlay); + self.overlay_cache = Default::default(); + self + } + /// Set the hashed state overlay. pub fn with_hashed_state_overlay( mut self, @@ -472,8 +646,8 @@ impl OverlayStateProviderFactory { self } - /// Fetches an [`Overlay`] from the cache based on the current db tip block. If there is no - /// cached value then this calculates the [`Overlay`] and populates the cache. + /// Fetches an [`Overlay`] from the cache based on the current durable frontiers. If there is + /// no cached value then this calculates the [`Overlay`] and populates the cache. #[instrument(level = "debug", target = "providers::state::overlay", skip_all)] fn get_overlay(&self, provider: &Provider) -> ProviderResult where @@ -485,12 +659,36 @@ impl OverlayStateProviderFactory { + BlockNumReader + StorageSettingsCache, { - let db_tip_block = self.overlay_builder.get_db_tip_block(provider)?; + let (state_trie_tip_block, finish_tip_block) = + self.overlay_builder.get_db_tip_blocks(provider)?; - let overlay = match self.overlay_cache.entry(db_tip_block.hash) { - dashmap::Entry::Occupied(entry) => entry.get().clone(), + let overlay = match self + .overlay_cache + .entry((state_trie_tip_block.hash, finish_tip_block.hash)) + { + dashmap::Entry::Occupied(entry) => { + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.overlay_builder.anchor_hash, + ?state_trie_tip_block, + ?finish_tip_block, + source = overlay_source_kind(self.overlay_builder.overlay_source.as_ref()), + "Using cached overlay" + ); + entry.get().clone() + } dashmap::Entry::Vacant(entry) => { self.overlay_builder.metrics.overlay_cache_misses.increment(1); + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.overlay_builder.anchor_hash, + ?state_trie_tip_block, + ?finish_tip_block, + source = overlay_source_kind(self.overlay_builder.overlay_source.as_ref()), + source_anchor = ?self.overlay_builder.overlay_source.as_ref().and_then(overlay_source_anchor), + source_blocks = ?self.overlay_builder.overlay_source.as_ref().and_then(overlay_source_num_blocks), + "Overlay cache miss" + ); let overlay = self.overlay_builder.build_overlay(provider)?; entry.insert(overlay.clone()); overlay @@ -657,45 +855,235 @@ where #[cfg(test)] mod tests { use super::*; + use crate::{ + test_utils::create_test_provider_factory, BlockWriter, SaveBlocksMode, SaveBlocksPlan, + SaveBlocksPlanStep, + }; + use alloy_primitives::{B256, U256}; + use reth_chain_state::{test_utils::TestBlockBuilder, ComputedTrieData, ExecutedBlock}; use reth_primitives_traits::Account; - use reth_trie::HashedPostState; + use reth_stages_types::{FinishCheckpoint, StageCheckpoint}; + use reth_storage_api::StageCheckpointWriter; + use reth_trie::{updates::TrieUpdatesSorted, HashedPostState, HashedStorage}; + use std::sync::Arc; + + fn full_save_plan( + blocks: impl IntoIterator>, + ) -> SaveBlocksPlan { + let blocks = blocks.into_iter().collect::>(); + let full_range = 0..blocks.len(); + SaveBlocksPlan::new( + blocks, + vec![SaveBlocksPlanStep::new( + full_range.clone(), + Some(full_range.end..full_range.end), + true, + )], + ) + } - #[test] - fn managed_overlay_skips_manager_for_persisted_parent() { - let parent_hash = B256::with_last_byte(1); - let builder = OverlayBuilder::::new(parent_hash, ChangesetCache::default()) - .with_state_trie_overlay_manager(StateTrieOverlayManager::default()); + fn partial_save_plan( + blocks: impl IntoIterator>, + steps: Vec, + ) -> SaveBlocksPlan { + SaveBlocksPlan::new(blocks.into_iter().collect(), steps) + } + + fn with_unique_state( + block: &ExecutedBlock, + id: u8, + ) -> ExecutedBlock { + let hashed_address = B256::with_last_byte(id); + let hashed_slot = B256::with_last_byte(id.saturating_add(32)); + let hashed_state = HashedPostState::default() + .with_accounts([(hashed_address, Some(Account::default()))]) + .with_storages([( + hashed_address, + HashedStorage::from_iter(false, [(hashed_slot, U256::from(id))]), + )]) + .into_sorted(); - let (trie, state) = builder.resolve_overlays(parent_hash).unwrap(); - assert!(trie.is_empty()); - assert!(state.is_empty()); + ExecutedBlock::new( + Arc::clone(&block.recovered_block), + Arc::clone(&block.execution_output), + ComputedTrieData::without_trie_input( + Arc::new(hashed_state), + Arc::new(TrieUpdatesSorted::default()), + ), + ) } #[test] - fn managed_overlay_errors_if_parent_is_not_persisted_or_managed() { - let parent_hash = B256::with_last_byte(1); - let anchor_hash = B256::with_last_byte(2); - let builder = OverlayBuilder::::new(parent_hash, ChangesetCache::default()) - .with_state_trie_overlay_manager(StateTrieOverlayManager::default()); - - let err = builder.resolve_overlays(anchor_hash).unwrap_err(); + fn build_overlay_reverts_when_finish_frontier_is_after_state_trie_frontier() { + let factory = create_test_provider_factory(); + let mut block_builder = TestBlockBuilder::eth(); + let blocks = block_builder + .get_executed_blocks(0..5) + .enumerate() + .map(|(index, block)| with_unique_state(&block, index as u8 + 1)) + .collect::>(); + + let state_trie_tip = &blocks[1]; + let finish_tip = &blocks[3]; + let lazy_overlay_blocks = vec![blocks[4].clone(), blocks[3].clone(), blocks[2].clone()]; + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw.insert_block(blocks[0].recovered_block()).unwrap(); + provider_rw.insert_block(state_trie_tip.recovered_block()).unwrap(); + provider_rw.insert_block(blocks[2].recovered_block()).unwrap(); + provider_rw.insert_block(finish_tip.recovered_block()).unwrap(); + provider_rw + .save_stage_checkpoint( + StageId::Finish, + StageCheckpoint::new(finish_tip.block_number()).with_finish_stage_checkpoint( + FinishCheckpoint { partial_state_trie: Some(state_trie_tip.block_number()) }, + ), + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider = factory.provider().unwrap(); + let overlay = OverlayBuilder::::new( + state_trie_tip.recovered_block().hash(), + ChangesetCache::new(), + ) + .with_lazy_overlay(Some(LazyOverlay::new(lazy_overlay_blocks))) + .build_overlay(&provider) + .unwrap(); + + assert_eq!(overlay.hashed_post_state.accounts.len(), 3); + } - assert!(err.to_string().contains("cannot be anchored")); + #[test] + fn build_overlay_errors_for_anchor_after_state_trie_frontier() { + let factory = create_test_provider_factory(); + let mut block_builder = TestBlockBuilder::eth().with_state(); + + let genesis = block_builder.get_executed_blocks(0..1).next().unwrap(); + let blocks = block_builder.get_executed_blocks(1..4).collect::>(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &full_save_plan(std::slice::from_ref(&genesis).to_vec()), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &partial_save_plan( + blocks.clone(), + vec![ + SaveBlocksPlanStep::new(0..1, Some(1..3), true), + SaveBlocksPlanStep::new(1..3, None, true), + ], + ), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider = factory.provider().unwrap(); + let anchor = blocks[1].recovered_block().hash(); + let error = OverlayBuilder::::new(anchor, ChangesetCache::new()) + .with_lazy_overlay(Some(LazyOverlay::new(vec![blocks[2].clone()]))) + .build_overlay(&provider) + .unwrap_err(); + + assert!( + error.to_string().contains("is after partial state trie frontier"), + "unexpected error: {error}" + ); } #[test] - fn extending_hashed_state_keeps_managed_overlay_source() { - let parent_hash = B256::with_last_byte(1); - let hashed_state = HashedPostState::default() - .with_accounts([(B256::with_last_byte(2), Some(Account::default()))]) - .into_sorted(); - let builder = OverlayBuilder::::new(parent_hash, ChangesetCache::default()) - .with_state_trie_overlay_manager(StateTrieOverlayManager::default()) - .with_extended_hashed_state_overlay(hashed_state); + fn build_overlay_uses_lazy_superset_for_anchor_after_state_trie_frontier() { + let factory = create_test_provider_factory(); + let mut block_builder = TestBlockBuilder::eth(); + let blocks = block_builder + .get_executed_blocks(0..5) + .enumerate() + .map(|(index, block)| with_unique_state(&block, index as u8 + 1)) + .collect::>(); + + let state_trie_tip = &blocks[1]; + let finish_tip = &blocks[3]; + let lazy_overlay_blocks = + vec![blocks[4].clone(), blocks[3].clone(), blocks[2].clone(), blocks[1].clone()]; + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw.insert_block(blocks[0].recovered_block()).unwrap(); + provider_rw.insert_block(state_trie_tip.recovered_block()).unwrap(); + provider_rw.insert_block(blocks[2].recovered_block()).unwrap(); + provider_rw.insert_block(finish_tip.recovered_block()).unwrap(); + provider_rw + .save_stage_checkpoint( + StageId::Finish, + StageCheckpoint::new(finish_tip.block_number()).with_finish_stage_checkpoint( + FinishCheckpoint { partial_state_trie: Some(state_trie_tip.block_number()) }, + ), + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider = factory.provider().unwrap(); + let overlay = OverlayBuilder::::new( + blocks[0].recovered_block().hash(), + ChangesetCache::new(), + ) + .with_lazy_overlay(Some(LazyOverlay::new(lazy_overlay_blocks))) + .build_overlay(&provider) + .unwrap(); + + assert_eq!(overlay.hashed_post_state.accounts.len(), 3); + } - let Some(OverlaySource::Managed { state, .. }) = builder.overlay_source else { - panic!("expected managed overlay source") - }; - assert_eq!(state.total_len(), 1); + #[test] + fn build_overlay_errors_for_finish_anchor_after_state_trie_frontier() { + let factory = create_test_provider_factory(); + let mut block_builder = TestBlockBuilder::eth().with_state(); + + let genesis = block_builder.get_executed_blocks(0..1).next().unwrap(); + let blocks = block_builder.get_executed_blocks(1..4).collect::>(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &full_save_plan(std::slice::from_ref(&genesis).to_vec()), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &partial_save_plan( + blocks.clone(), + vec![ + SaveBlocksPlanStep::new(0..1, Some(1..3), true), + SaveBlocksPlanStep::new(1..3, None, true), + ], + ), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider = factory.provider().unwrap(); + let finish_anchor = blocks[2].recovered_block().hash(); + + let error = OverlayBuilder::::new(finish_anchor, ChangesetCache::new()) + .with_lazy_overlay(None) + .build_overlay(&provider) + .unwrap_err(); + + assert!( + error.to_string().contains("is after partial state trie frontier"), + "unexpected error: {error}" + ); } } diff --git a/crates/trie/common/src/hashed_state.rs b/crates/trie/common/src/hashed_state.rs index 9addbfeafd0..854f486e2b7 100644 --- a/crates/trie/common/src/hashed_state.rs +++ b/crates/trie/common/src/hashed_state.rs @@ -3,7 +3,7 @@ use core::ops::Not; use crate::{ added_removed_keys::MultiAddedRemovedKeys, prefix_set::{PrefixSetMut, TriePrefixSetsMut}, - utils::{extend_sorted_vec, kway_merge_sorted}, + utils::{extend_sorted_vec, kway_merge_disjoint_sorted, kway_merge_sorted}, KeyHasher, MultiProofTargets, Nibbles, }; use alloc::{borrow::Cow, vec::Vec}; @@ -691,6 +691,100 @@ impl HashedPostStateSorted { Self { accounts, storages } } + /// Merges the batch and removes any overlapping keys present in the mask. + /// + /// Account keys are masked at the top level, while storage entries are only masked at the slot + /// level unless the mask wipes the entire storage. For duplicate keys in the batch, later + /// items take precedence over earlier ones. The order of the mask does not matter. + pub fn disjointed_merge_batch<'a>(batch: Vec<&'a Self>, mask: Vec<&'a Self>) -> Self { + let accounts = kway_merge_disjoint_sorted( + batch.iter().map(|item| item.accounts.len()).sum(), + batch.iter().rev().map(|item| item.accounts.as_slice()), + mask.iter().map(|item| item.accounts.as_slice()), + ); + + struct StorageAcc<'a> { + wiped: bool, + sealed: bool, + slot_count: usize, + slices: Vec<&'a [(B256, U256)]>, + } + + #[derive(Default)] + struct StorageMaskAcc<'a> { + wiped: bool, + slices: Vec<&'a [(B256, U256)]>, + } + + let mut storages = B256Map::with_capacity_and_hasher( + batch.iter().map(|item| item.storages.len()).sum(), + Default::default(), + ); + + for item in batch.iter().rev() { + for (hashed_address, storage) in &item.storages { + let entry = storages.entry(*hashed_address).or_insert_with(|| StorageAcc { + wiped: false, + sealed: false, + slot_count: 0, + slices: Vec::new(), + }); + + if entry.sealed { + continue; + } + + entry.slices.push(storage.storage_slots.as_slice()); + entry.slot_count += storage.storage_slots.len(); + if storage.wiped { + entry.wiped = true; + entry.sealed = true; + } + } + } + + let mut storage_masks: B256Map> = B256Map::with_capacity_and_hasher( + mask.iter().map(|item| item.storages.len()).sum(), + Default::default(), + ); + for item in mask { + for (hashed_address, storage) in &item.storages { + let entry = storage_masks.entry(*hashed_address).or_default(); + if entry.wiped { + continue; + } + if storage.wiped { + entry.wiped = true; + entry.slices.clear(); + } else { + entry.slices.push(storage.storage_slots.as_slice()); + } + } + } + + let storages = storages + .into_iter() + .filter_map(|(hashed_address, entry)| { + let storage_slots = match storage_masks.get(&hashed_address) { + Some(mask_entry) if mask_entry.wiped => return None, + Some(mask_entry) => kway_merge_disjoint_sorted( + entry.slot_count, + entry.slices, + mask_entry.slices.iter().copied(), + ), + None => kway_merge_sorted(entry.slices), + }; + + (!storage_slots.is_empty() || entry.wiped).then_some(( + hashed_address, + HashedStorageSorted { wiped: entry.wiped, storage_slots }, + )) + }) + .collect(); + + Self { accounts, storages } + } + /// Clears all accounts and storage data. pub fn clear(&mut self) { self.accounts.clear(); @@ -1534,6 +1628,152 @@ mod tests { assert_eq!(state.accounts.get(&addr1), Some(&None)); } + #[test] + fn test_hashed_post_state_sorted_disjointed_merge_batch() { + fn account(nonce: u64) -> Account { + Account { nonce, balance: U256::ZERO, bytecode_hash: None } + } + + let kept_account = B256::with_last_byte(1); + let removed_account = B256::with_last_byte(2); + let kept_storage = B256::with_last_byte(3); + let removed_storage = B256::with_last_byte(4); + let slot1 = B256::with_last_byte(11); + let slot2 = B256::with_last_byte(12); + + let older = HashedPostStateSorted::new( + vec![(kept_account, Some(account(1))), (removed_account, Some(account(10)))], + B256Map::from_iter([ + ( + kept_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(slot1, U256::from(1))], + }, + ), + ( + removed_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(slot1, U256::from(2))], + }, + ), + ]), + ); + + let newer = HashedPostStateSorted::new( + vec![(kept_account, Some(account(2)))], + B256Map::from_iter([( + kept_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(slot1, U256::from(3)), (slot2, U256::from(4))], + }, + )]), + ); + + let remove_a = HashedPostStateSorted::new( + vec![(removed_account, None)], + B256Map::from_iter([ + ( + kept_storage, + HashedStorageSorted { wiped: false, storage_slots: vec![(slot2, U256::ZERO)] }, + ), + (removed_storage, HashedStorageSorted { wiped: true, storage_slots: vec![] }), + ]), + ); + + let remove_b = HashedPostStateSorted::new( + vec![(B256::with_last_byte(255), Some(account(99)))], + B256Map::default(), + ); + + let result = HashedPostStateSorted::disjointed_merge_batch( + vec![&older, &newer], + vec![&remove_b, &remove_a], + ); + + assert_eq!(result.accounts, vec![(kept_account, Some(account(2)))]); + assert_eq!(result.storages.len(), 1); + assert_eq!( + result.storages.get(&kept_storage), + Some(&HashedStorageSorted { + wiped: false, + storage_slots: vec![(slot1, U256::from(3))], + }) + ); + assert!(!result.storages.contains_key(&removed_storage)); + } + + #[test] + fn test_hashed_post_state_sorted_disjointed_merge_batch_removes_overlapping_batch_key() { + fn account(nonce: u64) -> Account { + Account { nonce, balance: U256::ZERO, bytecode_hash: None } + } + + let overlapping_account = B256::with_last_byte(21); + let overlapping_storage = B256::with_last_byte(22); + let slot = B256::with_last_byte(23); + + let older = HashedPostStateSorted::new( + vec![(overlapping_account, Some(account(1)))], + B256Map::from_iter([( + overlapping_storage, + HashedStorageSorted { wiped: false, storage_slots: vec![(slot, U256::from(1))] }, + )]), + ); + + let newer = HashedPostStateSorted::new( + vec![(overlapping_account, Some(account(2)))], + B256Map::from_iter([( + overlapping_storage, + HashedStorageSorted { wiped: false, storage_slots: vec![(slot, U256::from(2))] }, + )]), + ); + + let remove = HashedPostStateSorted::new( + vec![(overlapping_account, None)], + B256Map::from_iter([( + overlapping_storage, + HashedStorageSorted { wiped: true, storage_slots: vec![] }, + )]), + ); + + let result = + HashedPostStateSorted::disjointed_merge_batch(vec![&older, &newer], vec![&remove]); + + assert!(result.accounts.is_empty()); + assert!(result.storages.is_empty()); + } + + #[test] + fn test_hashed_post_state_sorted_disjointed_merge_batch_ignores_empty_storage_mask() { + let storage = B256::with_last_byte(31); + let slot = B256::with_last_byte(32); + + let batch = HashedPostStateSorted::new( + vec![], + B256Map::from_iter([( + storage, + HashedStorageSorted { wiped: false, storage_slots: vec![(slot, U256::from(1))] }, + )]), + ); + let mask = HashedPostStateSorted::new( + vec![], + B256Map::from_iter([( + storage, + HashedStorageSorted { wiped: false, storage_slots: vec![] }, + )]), + ); + + let result = HashedPostStateSorted::disjointed_merge_batch(vec![&batch], vec![&mask]); + + assert_eq!( + result.storages.get(&storage), + Some(&HashedStorageSorted { wiped: false, storage_slots: vec![(slot, U256::from(1))] }) + ); + } + /// Test non-wiped storage merges both zero and non-zero valued slots #[test] fn test_hashed_storage_extend_from_sorted_non_wiped() { diff --git a/crates/trie/common/src/updates.rs b/crates/trie/common/src/updates.rs index d73b2c4d460..698b065b02b 100644 --- a/crates/trie/common/src/updates.rs +++ b/crates/trie/common/src/updates.rs @@ -1,5 +1,5 @@ use crate::{ - utils::{extend_sorted_vec, kway_merge_sorted}, + utils::{extend_sorted_vec, kway_merge_disjoint_sorted, kway_merge_sorted}, BranchNodeCompact, HashBuilder, Nibbles, }; use alloc::{ @@ -710,6 +710,101 @@ impl TrieUpdatesSorted { Self { account_nodes, storage_tries } } + + /// Merges the batch and removes any overlapping keys present in the mask. + /// + /// Account trie nodes are masked at the top level, while storage trie entries are only masked + /// at the node level unless the mask deletes the entire storage trie. For duplicate keys in + /// the batch, later items take precedence over earlier ones. The order of the mask does not + /// matter. + pub fn disjointed_merge_batch<'a>(batch: Vec<&'a Self>, mask: Vec<&'a Self>) -> Self { + let account_nodes = kway_merge_disjoint_sorted( + batch.iter().map(|item| item.account_nodes.len()).sum(), + batch.iter().rev().map(|item| item.account_nodes.as_slice()), + mask.iter().map(|item| item.account_nodes.as_slice()), + ); + + struct StorageAcc<'a> { + is_deleted: bool, + sealed: bool, + node_count: usize, + slices: Vec<&'a [(Nibbles, Option)]>, + } + + #[derive(Default)] + struct StorageMaskAcc<'a> { + is_deleted: bool, + slices: Vec<&'a [(Nibbles, Option)]>, + } + + let mut storage_tries = B256Map::with_capacity_and_hasher( + batch.iter().map(|item| item.storage_tries.len()).sum(), + Default::default(), + ); + + for item in batch.iter().rev() { + for (hashed_address, storage_trie) in &item.storage_tries { + let entry = storage_tries.entry(*hashed_address).or_insert_with(|| StorageAcc { + is_deleted: false, + sealed: false, + node_count: 0, + slices: Vec::new(), + }); + + if entry.sealed { + continue; + } + + entry.slices.push(storage_trie.storage_nodes.as_slice()); + entry.node_count += storage_trie.storage_nodes.len(); + if storage_trie.is_deleted { + entry.is_deleted = true; + entry.sealed = true; + } + } + } + + let mut storage_masks: B256Map> = B256Map::with_capacity_and_hasher( + mask.iter().map(|item| item.storage_tries.len()).sum(), + Default::default(), + ); + for item in mask { + for (hashed_address, storage_trie) in &item.storage_tries { + let entry = storage_masks.entry(*hashed_address).or_default(); + if entry.is_deleted { + continue; + } + if storage_trie.is_deleted { + entry.is_deleted = true; + entry.slices.clear(); + } else { + entry.slices.push(storage_trie.storage_nodes.as_slice()); + } + } + } + + let storage_tries = storage_tries + .into_iter() + .filter_map(|(hashed_address, entry)| { + let storage_nodes = match storage_masks.get(&hashed_address) { + Some(mask_entry) if mask_entry.is_deleted => return None, + Some(mask_entry) => kway_merge_disjoint_sorted( + entry.node_count, + entry.slices, + mask_entry.slices.iter().copied(), + ), + None => kway_merge_sorted(entry.slices), + }; + + (!storage_nodes.is_empty() || entry.is_deleted).then_some(( + hashed_address, + StorageTrieUpdatesSorted { is_deleted: entry.is_deleted, storage_nodes }, + )) + }) + .collect(); + + Self::new(account_nodes, storage_tries) + } } impl AsRef for TrieUpdatesSorted { @@ -977,6 +1072,212 @@ mod tests { assert_eq!(storage3.storage_nodes[1].0, Nibbles::from_nibbles_unchecked([0x07])); } + #[test] + fn test_trie_updates_sorted_disjointed_merge_batch() { + let kept_node = Nibbles::from_nibbles_unchecked([0x01]); + let removed_node = Nibbles::from_nibbles_unchecked([0x02]); + let kept_storage = B256::from([3; 32]); + let removed_storage = B256::from([4; 32]); + let slot1 = Nibbles::from_nibbles_unchecked([0x0a]); + let slot2 = Nibbles::from_nibbles_unchecked([0x0b]); + + let older = TrieUpdatesSorted::new( + vec![(kept_node, Some(BranchNodeCompact::default())), (removed_node, None)], + B256Map::from_iter([ + ( + kept_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot1, None)], + }, + ), + ( + removed_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot1, Some(BranchNodeCompact::default()))], + }, + ), + ]), + ); + + let newer = TrieUpdatesSorted::new( + vec![(kept_node, None)], + B256Map::from_iter([( + kept_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot1, Some(BranchNodeCompact::default())), (slot2, None)], + }, + )]), + ); + + let remove_a = TrieUpdatesSorted::new( + vec![(removed_node, Some(BranchNodeCompact::default()))], + B256Map::from_iter([ + ( + kept_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot2, Some(BranchNodeCompact::default()))], + }, + ), + ( + removed_storage, + StorageTrieUpdatesSorted { is_deleted: true, storage_nodes: vec![] }, + ), + ]), + ); + + let remove_b = TrieUpdatesSorted::new( + vec![(Nibbles::from_nibbles_unchecked([0x0f]), Some(BranchNodeCompact::default()))], + B256Map::default(), + ); + + let result = TrieUpdatesSorted::disjointed_merge_batch( + vec![&older, &newer], + vec![&remove_b, &remove_a], + ); + + assert_eq!(result.account_nodes, vec![(kept_node, None)]); + assert_eq!(result.storage_tries.len(), 1); + assert_eq!( + result.storage_tries.get(&kept_storage), + Some(&StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot1, Some(BranchNodeCompact::default()))], + }) + ); + assert!(!result.storage_tries.contains_key(&removed_storage)); + } + + #[test] + fn test_trie_updates_sorted_disjointed_merge_batch_removes_overlapping_batch_key() { + let overlapping_node = Nibbles::from_nibbles_unchecked([0x03]); + let overlapping_storage = B256::from([5; 32]); + let slot = Nibbles::from_nibbles_unchecked([0x0c]); + + let older = TrieUpdatesSorted::new( + vec![(overlapping_node, Some(BranchNodeCompact::default()))], + B256Map::from_iter([( + overlapping_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot, Some(BranchNodeCompact::default()))], + }, + )]), + ); + + let newer = TrieUpdatesSorted::new( + vec![(overlapping_node, None)], + B256Map::from_iter([( + overlapping_storage, + StorageTrieUpdatesSorted { is_deleted: false, storage_nodes: vec![(slot, None)] }, + )]), + ); + + let remove = TrieUpdatesSorted::new( + vec![(overlapping_node, Some(BranchNodeCompact::default()))], + B256Map::from_iter([( + overlapping_storage, + StorageTrieUpdatesSorted { is_deleted: true, storage_nodes: vec![] }, + )]), + ); + + let result = TrieUpdatesSorted::disjointed_merge_batch(vec![&older, &newer], vec![&remove]); + + assert!(result.account_nodes.is_empty()); + assert!(result.storage_tries.is_empty()); + } + + #[test] + fn test_trie_updates_sorted_disjointed_merge_batch_uses_exact_key_masking() { + let hashed_address = B256::from([7; 32]); + let grandparent = Nibbles::from_nibbles_unchecked([0x05]); + let parent = Nibbles::from_nibbles_unchecked([0x05, 0x04]); + let child = Nibbles::from_nibbles_unchecked([0x05, 0x04, 0x03]); + + let batch = TrieUpdatesSorted::new( + vec![ + (grandparent, Some(BranchNodeCompact::default())), + (parent, Some(BranchNodeCompact::default())), + (child, Some(BranchNodeCompact::default())), + ], + B256Map::from_iter([( + hashed_address, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![ + (grandparent, Some(BranchNodeCompact::default())), + (parent, Some(BranchNodeCompact::default())), + (child, Some(BranchNodeCompact::default())), + ], + }, + )]), + ); + let mask = TrieUpdatesSorted::new( + vec![ + (grandparent, Some(BranchNodeCompact::default())), + (parent, Some(BranchNodeCompact::default())), + ], + B256Map::from_iter([( + hashed_address, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![ + (grandparent, Some(BranchNodeCompact::default())), + (parent, Some(BranchNodeCompact::default())), + ], + }, + )]), + ); + + let result = TrieUpdatesSorted::disjointed_merge_batch(vec![&batch], vec![&mask]); + + assert_eq!(result.account_nodes, vec![(child, Some(BranchNodeCompact::default()))]); + assert_eq!( + result.storage_tries.get(&hashed_address), + Some(&StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(child, Some(BranchNodeCompact::default()))], + }) + ); + } + + #[test] + fn test_trie_updates_sorted_disjointed_merge_batch_ignores_empty_storage_mask() { + let storage = B256::from([6; 32]); + let slot = Nibbles::from_nibbles_unchecked([0x0d]); + + let batch = TrieUpdatesSorted::new( + vec![], + B256Map::from_iter([( + storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot, Some(BranchNodeCompact::default()))], + }, + )]), + ); + let mask = TrieUpdatesSorted::new( + vec![], + B256Map::from_iter([( + storage, + StorageTrieUpdatesSorted { is_deleted: false, storage_nodes: vec![] }, + )]), + ); + + let result = TrieUpdatesSorted::disjointed_merge_batch(vec![&batch], vec![&mask]); + + assert_eq!( + result.storage_tries.get(&storage), + Some(&StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot, Some(BranchNodeCompact::default()))], + }) + ); + } + /// Test extending with storage tries adds both nodes and removed nodes correctly #[test] fn test_trie_updates_extend_from_sorted_with_storage_tries() { diff --git a/crates/trie/common/src/utils.rs b/crates/trie/common/src/utils.rs index 6d6f134a3ac..ae6b9f3e236 100644 --- a/crates/trie/common/src/utils.rs +++ b/crates/trie/common/src/utils.rs @@ -26,6 +26,51 @@ where .collect() } +/// Merge sorted left slices into a sorted `Vec`, excluding keys present in any right slice. +/// +/// Callers pass left slices in priority order (index 0 = highest priority), so the first +/// left slice's value for a key takes precedence over later slices. Right slice order is ignored; +/// the right-hand side only contributes keys to exclude. +pub(crate) fn kway_merge_disjoint_sorted<'a, K, V>( + capacity: usize, + left_slices: impl IntoIterator, + right_slices: impl IntoIterator, +) -> Vec<(K, V)> +where + K: Ord + Clone + 'a, + V: Clone + 'a, +{ + let mut right_keys = right_slices + .into_iter() + .filter(|s| !s.is_empty()) + .map(|s| s.iter().map(|(k, _)| k)) + .kmerge() + .dedup() + .peekable(); + + let mut out = Vec::with_capacity(capacity); + for (_, key, value) in left_slices + .into_iter() + .filter(|s| !s.is_empty()) + .enumerate() + .map(|(i, s)| s.iter().map(move |(k, v)| (i, k, v))) + .kmerge_by(|(i1, k1, _), (i2, k2, _)| (k1, i1) < (k2, i2)) + .dedup_by(|(_, k1, _), (_, k2, _)| *k1 == *k2) + { + while right_keys.peek().is_some_and(|right_key| *right_key < key) { + right_keys.next(); + } + + if right_keys.peek().is_some_and(|right_key| *right_key == key) { + continue; + } + + out.push((key.clone(), value.clone())); + } + + out +} + /// Extend a sorted vector with another sorted vector using 2 pointer merge. /// Values from `other` take precedence for duplicate keys. pub(crate) fn extend_sorted_vec(target: &mut Vec<(K, V)>, other: &[(K, V)]) @@ -183,4 +228,20 @@ mod tests { let result: Vec<(i32, &str)> = kway_merge_sorted(Vec::<&[(i32, &str)]>::new()); assert!(result.is_empty()); } + + #[test] + fn test_kway_merge_disjoint_sorted() { + let left_old = vec![(1, "old"), (2, "drop"), (4, "keep")]; + let left_new = vec![(1, "new"), (3, "new_only")]; + let right_a = vec![(2, "ignored"), (5, "ignored")]; + let right_b = vec![(3, "ignored")]; + + let result = kway_merge_disjoint_sorted( + left_old.len() + left_new.len(), + [left_new.as_slice(), left_old.as_slice()], + [right_a.as_slice(), right_b.as_slice()], + ); + + assert_eq!(result, vec![(1, "new"), (4, "keep")]); + } } diff --git a/crates/trie/trie/src/trie.rs b/crates/trie/trie/src/trie.rs index 9ea219f73ab..e8bcb440ed5 100644 --- a/crates/trie/trie/src/trie.rs +++ b/crates/trie/trie/src/trie.rs @@ -36,6 +36,8 @@ pub struct StateRoot { pub hashed_cursor_factory: H, /// A set of prefix sets that have changed. pub prefix_sets: TriePrefixSets, + /// Whether every child under a branch whose path matches the prefix set should be walked. + walk_all_changed_branch_children: bool, /// Previous intermediate state. previous_state: Option, /// The number of updates after which the intermediate progress should be returned. @@ -56,6 +58,7 @@ impl StateRoot { trie_cursor_factory, hashed_cursor_factory, prefix_sets: TriePrefixSets::default(), + walk_all_changed_branch_children: false, previous_state: None, threshold: DEFAULT_INTERMEDIATE_THRESHOLD, #[cfg(feature = "metrics")] @@ -69,6 +72,12 @@ impl StateRoot { self } + /// Configures the state root walker to visit all children of changed branch paths. + pub const fn with_walk_all_changed_branch_children(mut self, enabled: bool) -> Self { + self.walk_all_changed_branch_children = enabled; + self + } + /// Set the threshold. pub const fn with_threshold(mut self, threshold: u64) -> Self { self.threshold = threshold; @@ -93,6 +102,7 @@ impl StateRoot { trie_cursor_factory: self.trie_cursor_factory, hashed_cursor_factory, prefix_sets: self.prefix_sets, + walk_all_changed_branch_children: self.walk_all_changed_branch_children, threshold: self.threshold, previous_state: self.previous_state, #[cfg(feature = "metrics")] @@ -106,6 +116,7 @@ impl StateRoot { trie_cursor_factory, hashed_cursor_factory: self.hashed_cursor_factory, prefix_sets: self.prefix_sets, + walk_all_changed_branch_children: self.walk_all_changed_branch_children, threshold: self.threshold, previous_state: self.previous_state, #[cfg(feature = "metrics")] @@ -178,6 +189,7 @@ where account_root_state.walker_stack, self.prefix_sets.account_prefix_set, ) + .with_walk_all_changed_branch_children(self.walk_all_changed_branch_children) .with_deletions_retained(retain_updates); let account_node_iter = TrieNodeIter::state_trie(walker, hashed_account_cursor) .with_last_hashed_key(account_root_state.last_hashed_key); @@ -213,6 +225,7 @@ where self.metrics.storage_trie.clone(), ) .with_intermediate_state(Some(storage_state.state)) + .with_walk_all_changed_branch_children(self.walk_all_changed_branch_children) .with_threshold(remaining_threshold); let storage_result = storage_root_calculator.calculate(retain_updates)?; @@ -239,6 +252,7 @@ where // calculation let hash_builder = HashBuilder::default().with_updates(retain_updates); let walker = TrieWalker::state_trie(trie_cursor, self.prefix_sets.account_prefix_set) + .with_walk_all_changed_branch_children(self.walk_all_changed_branch_children) .with_deletions_retained(retain_updates); let node_iter = TrieNodeIter::state_trie(walker, hashed_account_cursor); (hash_builder, node_iter) @@ -272,6 +286,7 @@ where #[cfg(feature = "metrics")] self.metrics.storage_trie.clone(), ) + .with_walk_all_changed_branch_children(self.walk_all_changed_branch_children) .with_threshold(remaining_threshold); let storage_result = storage_root_calculator.calculate(retain_updates)?; @@ -465,6 +480,8 @@ pub struct StorageRoot { pub hashed_address: B256, /// The set of storage slot prefixes that have changed. pub prefix_set: PrefixSet, + /// Whether every child under a branch whose path matches the prefix set should be walked. + walk_all_changed_branch_children: bool, /// Previous intermediate state. previous_state: Option, /// The number of updates after which the intermediate progress should be returned. @@ -506,6 +523,7 @@ impl StorageRoot { hashed_cursor_factory, hashed_address, prefix_set, + walk_all_changed_branch_children: false, previous_state: None, threshold: DEFAULT_INTERMEDIATE_THRESHOLD, #[cfg(feature = "metrics")] @@ -519,6 +537,12 @@ impl StorageRoot { self } + /// Configures the storage root walker to visit all children of changed branch paths. + pub const fn with_walk_all_changed_branch_children(mut self, enabled: bool) -> Self { + self.walk_all_changed_branch_children = enabled; + self + } + /// Set the threshold. pub const fn with_threshold(mut self, threshold: u64) -> Self { self.threshold = threshold; @@ -544,6 +568,7 @@ impl StorageRoot { hashed_cursor_factory, hashed_address: self.hashed_address, prefix_set: self.prefix_set, + walk_all_changed_branch_children: self.walk_all_changed_branch_children, previous_state: self.previous_state, threshold: self.threshold, #[cfg(feature = "metrics")] @@ -558,6 +583,7 @@ impl StorageRoot { hashed_cursor_factory: self.hashed_cursor_factory, hashed_address: self.hashed_address, prefix_set: self.prefix_set, + walk_all_changed_branch_children: self.walk_all_changed_branch_children, previous_state: self.previous_state, threshold: self.threshold, #[cfg(feature = "metrics")] @@ -641,6 +667,7 @@ where state.walker_stack, self.prefix_set, ) + .with_walk_all_changed_branch_children(self.walk_all_changed_branch_children) .with_deletions_retained(retain_updates); let node_iter = TrieNodeIter::storage_trie(walker, hashed_storage_cursor) .with_last_hashed_key(state.last_hashed_key); @@ -649,6 +676,7 @@ where None => { let hash_builder = HashBuilder::default().with_updates(retain_updates); let walker = TrieWalker::storage_trie(trie_cursor, self.prefix_set) + .with_walk_all_changed_branch_children(self.walk_all_changed_branch_children) .with_deletions_retained(retain_updates); let node_iter = TrieNodeIter::storage_trie(walker, hashed_storage_cursor); (hash_builder, node_iter) diff --git a/crates/trie/trie/src/walker.rs b/crates/trie/trie/src/walker.rs index f12bf46f748..7936663f3a1 100644 --- a/crates/trie/trie/src/walker.rs +++ b/crates/trie/trie/src/walker.rs @@ -8,6 +8,18 @@ use alloy_trie::proof::AddedRemovedKeys; use reth_storage_errors::db::DatabaseError; use tracing::{instrument, trace}; +#[cfg(test)] +use crate::trie_cursor::{mock::MockTrieCursorFactory, TrieCursorFactory}; + +#[cfg(test)] +use alloy_primitives::map::B256Map; + +#[cfg(test)] +use alloy_trie::TrieMask; + +#[cfg(test)] +use std::collections::BTreeMap; + #[cfg(feature = "metrics")] use crate::metrics::WalkerMetrics; @@ -26,6 +38,9 @@ pub struct TrieWalker { pub can_skip_current_node: bool, /// A `PrefixSet` representing the changes to be applied to the trie. pub changes: PrefixSet, + /// When enabled, all children of a branch become unskippable if the branch path itself + /// matches the prefix set, even if a given child path does not. + walk_all_changed_branch_children: bool, /// The retained trie node keys that need to be removed. removed_keys: Option>, /// Provided when it's necessary not to skip certain nodes during proof generation. @@ -76,6 +91,7 @@ impl> TrieWalker { changes, stack, can_skip_current_node: false, + walk_all_changed_branch_children: false, removed_keys: None, added_removed_keys: None, #[cfg(feature = "metrics")] @@ -101,6 +117,7 @@ impl> TrieWalker { stack: self.stack, can_skip_current_node: self.can_skip_current_node, changes: self.changes, + walk_all_changed_branch_children: self.walk_all_changed_branch_children, removed_keys: self.removed_keys, added_removed_keys, #[cfg(feature = "metrics")] @@ -108,6 +125,12 @@ impl> TrieWalker { } } + /// Configures the walker to treat every child of a matching branch path as unskippable. + pub const fn with_walk_all_changed_branch_children(mut self, enabled: bool) -> Self { + self.walk_all_changed_branch_children = enabled; + self + } + /// Split the walker into stack and trie updates. pub fn split(mut self) -> (Vec, HashSet) { let keys = self.take_removed_keys(); @@ -188,7 +211,14 @@ impl> TrieWalker { "Checked for only non-removed child", ); + let branch_path_matches_prefix_set = self + .walk_all_changed_branch_children + .then(|| node.position().is_child()) + .unwrap_or(false) && + self.changes.contains(&node.key); + !self.changes.contains(node.full_key()) && + !branch_path_matches_prefix_set && node.hash_flag() && !key_is_only_nonremoved_child }); @@ -233,6 +263,7 @@ impl> TrieWalker { changes, stack: vec![CursorSubNode::default()], can_skip_current_node: false, + walk_all_changed_branch_children: false, removed_keys: None, added_removed_keys: Default::default(), #[cfg(feature = "metrics")] @@ -387,3 +418,83 @@ impl> TrieWalker { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::prefix_set::PrefixSetMut; + use alloy_primitives::B256; + + fn branch_node(state_mask: u16, tree_mask: u16, hash_mask: u16) -> BranchNodeCompact { + let hash_count = hash_mask.count_ones() as usize; + BranchNodeCompact::new( + TrieMask::new(state_mask), + TrieMask::new(tree_mask), + TrieMask::new(hash_mask), + vec![B256::ZERO; hash_count], + None, + ) + } + + fn root_branch_node(state_mask: u16, tree_mask: u16, hash_mask: u16) -> BranchNodeCompact { + let hash_count = hash_mask.count_ones() as usize; + BranchNodeCompact::new( + TrieMask::new(state_mask), + TrieMask::new(tree_mask), + TrieMask::new(hash_mask), + vec![B256::ZERO; hash_count], + Some(B256::ZERO), + ) + } + + fn walker_for_matching_branch_children_test( + walk_all_changed_branch_children: bool, + ) -> TrieWalker { + let trie_nodes = BTreeMap::from([ + (Nibbles::default(), root_branch_node(1 << 2, 1 << 2, 1 << 2)), + ( + Nibbles::from_nibbles([0x2]), + branch_node((1 << 3) | (1 << 4), 0, (1 << 3) | (1 << 4)), + ), + ]); + let factory = MockTrieCursorFactory::new(trie_nodes, B256Map::default()); + + let mut prefix_set = PrefixSetMut::default(); + prefix_set.insert(Nibbles::from_nibbles([0x2, 0x3, 0x1])); + + TrieWalker::state_trie(factory.account_trie_cursor().unwrap(), prefix_set.freeze()) + .with_walk_all_changed_branch_children(walk_all_changed_branch_children) + } + + #[test] + fn branch_siblings_remain_skippable_by_default() { + let mut walker = walker_for_matching_branch_children_test(false); + + assert_eq!(walker.key().copied(), Some(Nibbles::default())); + assert!(!walker.can_skip_current_node); + + walker.advance().unwrap(); + assert_eq!(walker.key().copied(), Some(Nibbles::from_nibbles([0x2]))); + assert!(!walker.can_skip_current_node); + + walker.advance().unwrap(); + assert_eq!(walker.key().copied(), Some(Nibbles::from_nibbles([0x2, 0x3]))); + assert_eq!(walker.stack.last().unwrap().position(), SubNodePosition::Child(0x3)); + assert!(!walker.can_skip_current_node); + + walker.advance().unwrap(); + assert_eq!(walker.key().copied(), Some(Nibbles::from_nibbles([0x2, 0x4]))); + assert!(walker.can_skip_current_node); + } + + #[test] + fn matching_branch_path_can_make_all_children_unskippable() { + let mut walker = walker_for_matching_branch_children_test(true); + + walker.advance().unwrap(); + walker.advance().unwrap(); + walker.advance().unwrap(); + assert_eq!(walker.key().copied(), Some(Nibbles::from_nibbles([0x2, 0x4]))); + assert!(!walker.can_skip_current_node); + } +} diff --git a/docs/vocs/docs/pages/cli/reth/node.mdx b/docs/vocs/docs/pages/cli/reth/node.mdx index e25376d38a5..423ed34c33f 100644 --- a/docs/vocs/docs/pages/cli/reth/node.mdx +++ b/docs/vocs/docs/pages/cli/reth/node.mdx @@ -971,10 +971,15 @@ Engine: --engine.persistence-backpressure-threshold Configure the maximum canonical-minus-persisted gap before engine API processing stalls. - If omitted, this defaults to the larger of the default backpressure threshold and twice `--engine.persistence-threshold`. + If omitted, this is derived from `--engine.persistence-threshold` and `--engine.memory-block-buffer-target`, unless the process configured an explicit global default. This value must be greater than `--engine.persistence-threshold`. + --engine.num-state-masking-blocks + Configure how many of the blocks being persisted should only mask state/trie writes instead of durably persisting their state/trie updates in the current cycle + + [default: 0] + --engine.memory-block-buffer-target Configure the target number of blocks to keep in memory