diff --git a/Cargo.lock b/Cargo.lock index 4f2f22ecdd9..e5a529eea09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3701,8 +3701,14 @@ name = "example-db-access" version = "0.0.0" dependencies = [ "alloy-primitives", + "clap", "eyre", + "reth-chainspec", + "reth-db-api", "reth-ethereum", + "reth-storage-api", + "reth-trie", + "reth-trie-db", ] [[package]] diff --git a/bin/reth-bench/README.md b/bin/reth-bench/README.md index f44245e82b0..be3ee00d094 100644 --- a/bin/reth-bench/README.md +++ b/bin/reth-bench/README.md @@ -39,7 +39,7 @@ Both `new-payload-fcu` and `new-payload-only` support `--rpc-block-fetch-retries to control how many times block fetches are retried after an RPC failure. The default is `10`. Use `--rpc-block-fetch-retries forever` to keep retrying indefinitely. -When using `--wait-for-persistence`, the benchmark waits after every `(threshold + 1)` blocks, where the threshold defaults to the engine's persistence threshold (2). This can be customized with `--persistence-threshold `. +When using `--wait-for-persistence`, the benchmark waits after every `(threshold + 1)` blocks, where the threshold defaults to the engine's persistence threshold. This can be customized with `--persistence-threshold `. By default, the WebSocket URL for persistence subscriptions is derived from `--engine-rpc-url` (converting to ws:// on port 8546). Use `--ws-rpc-url` to override this. diff --git a/bin/reth-bench/src/bench/new_payload_fcu.rs b/bin/reth-bench/src/bench/new_payload_fcu.rs index d44f9b8f132..1146871675f 100644 --- a/bin/reth-bench/src/bench/new_payload_fcu.rs +++ b/bin/reth-bench/src/bench/new_payload_fcu.rs @@ -25,9 +25,7 @@ use alloy_provider::{ network::{AnyNetwork, AnyRpcBlock}, Provider, RootProvider, }; -use alloy_rpc_types_engine::{ - ExecutionData, ExecutionPayloadEnvelopeV5, ForkchoiceState, PayloadAttributes, -}; +use alloy_rpc_types_engine::{ExecutionData, ForkchoiceState, PayloadAttributes}; use clap::Parser; use eyre::{bail, ensure, Context, OptionExt}; use futures::{stream, Stream, StreamExt, TryStreamExt}; @@ -35,8 +33,9 @@ use reth_cli_runner::CliContext; use reth_engine_primitives::config::DEFAULT_PERSISTENCE_THRESHOLD; use reth_node_api::{EngineApiMessageVersion, ExecutionPayload}; use reth_node_core::args::{BenchmarkArgs, WaitForPersistence}; -use reth_rpc_api::{RethNewPayloadInput, TestingBuildBlockRequestV1}; +use reth_rpc_api::{RethNewPayloadInput, TestingBuildBlockRequestV1, TestingBuildBlockResponseV1}; use std::{ + path::Path, pin::Pin, time::{Duration, Instant}, }; @@ -189,6 +188,7 @@ impl Command { if let Some(depth) = self.reorg { info!(target: "reth-bench", depth, "Using testing_buildBlockV1 reorg mode"); } + let output_dir = self.benchmark.output.clone(); let BenchContext { benchmark_mode, @@ -213,6 +213,7 @@ impl Command { let buffer_size = self.rpc_block_buffer_size; let provider = block_provider.clone(); let bench_mode = benchmark_mode.clone(); + let artifact_output_dir = output_dir.clone(); let mut blocks: Pin> + Send>> = Box::pin( stream::iter((next_block..) .take_while(move |next_block| { @@ -220,6 +221,7 @@ impl Command { })) .map(move |next_block| { let block_provider = provider.clone(); + let artifact_output_dir = artifact_output_dir.clone(); async move { let block_res = block_provider .get_block_by_number(next_block.into()) @@ -238,10 +240,47 @@ impl Command { }; - let bal = if !rlp_blocks && - (block.header.block_access_list_hash.is_some() || self.enable_bal) - { - Some(fetch_block_access_list(&block_provider, block.header.number).await?) + let fetched_bal = if !rlp_blocks { + match fetch_block_access_list(&block_provider, block.header.number).await { + Ok(bal) => { + write_bal_artifact( + artifact_output_dir.as_deref(), + "real", + block.header.number, + block.header.hash, + Some(&bal), + )?; + Some(bal) + } + Err(err) => { + warn!( + target: "reth-bench", + block_number = block.header.number, + block_hash = %block.header.hash, + %err, + "Failed to fetch real block BAL artifact" + ); + if is_unsupported_bal_rpc_error(&err) { + warn!( + target: "reth-bench", + "Remote RPC does not support BAL fetching; writing null real-block BAL artifact" + ); + } + write_bal_artifact( + artifact_output_dir.as_deref(), + "real", + block.header.number, + block.header.hash, + None, + )?; + None + } + } + } else { + None + }; + let bal = if block.header.block_access_list_hash.is_some() || self.enable_bal { + fetched_bal } else { None }; @@ -404,6 +443,7 @@ impl Command { .ok_or_eyre("missing deferred fork block for reorg branch start")?, canonical_parent_hash, no_wait_for_caches, + output_dir.as_deref(), ) .await?, }); @@ -448,6 +488,7 @@ impl Command { next_fork_block_number, Some(prepared.block_hash), no_wait_for_caches, + output_dir.as_deref(), ) .await?; } else { @@ -534,12 +575,13 @@ async fn prepare_built_block( block: &AnyRpcBlock, parent_block_hash: B256, no_wait_for_caches: bool, + output_dir: Option<&Path>, ) -> eyre::Result { const MAX_BUILD_ATTEMPTS: usize = 10; const BUILD_RETRY_INTERVAL: Duration = Duration::from_millis(100); let request = build_block_request(block, parent_block_hash)?; - let built_payload: ExecutionPayloadEnvelopeV5 = { + let built_response: TestingBuildBlockResponseV1 = { let mut attempts_remaining = MAX_BUILD_ATTEMPTS; loop { @@ -569,8 +611,16 @@ async fn prepare_built_block( } }; + let built_payload = built_response.execution_payload_envelope; let payload = &built_payload.execution_payload.payload_inner.payload_inner; let block_hash = payload.block_hash; + write_bal_artifact( + output_dir, + "fork", + payload.block_number, + block_hash, + built_response.block_access_list.as_ref(), + )?; let (payload, sidecar) = built_payload .into_payload_and_sidecar(block.header.parent_beacon_block_root.unwrap_or_default()); // Fork payloads are built immediately before the next `testing_buildBlockV1` call. Leaving @@ -593,9 +643,10 @@ async fn queue_fork_block( block_number: u64, parent_block_hash: Option, no_wait_for_caches: bool, + output_dir: Option<&Path>, ) -> eyre::Result> { if !benchmark_mode.contains(block_number) { - return Ok(None) + return Ok(None); } let future_block = block_provider @@ -613,17 +664,50 @@ async fn queue_fork_block( &future_block, parent_block_hash, no_wait_for_caches, + output_dir, ) .await?, })) } +fn write_bal_artifact( + output_dir: Option<&Path>, + kind: &str, + block_number: u64, + block_hash: B256, + block_access_list: Option<&BlockAccessList>, +) -> eyre::Result<()> { + let Some(output_dir) = output_dir else { return Ok(()) }; + + let bal_dir = output_dir.join("block-access-lists"); + std::fs::create_dir_all(&bal_dir)?; + let path = bal_dir.join(format!("bal-{kind}-{block_number}-{block_hash}.json")); + let value = serde_json::json!({ + "kind": kind, + "blockNumber": block_number, + "blockHash": block_hash, + "blockAccessList": block_access_list, + }); + let file = std::fs::File::create(&path)?; + serde_json::to_writer_pretty(file, &value)?; + debug!(target: "reth-bench", %kind, block_number, %block_hash, path = %path.display(), "Wrote BAL artifact"); + Ok(()) +} + fn is_retryable_build_block_error(err: &alloy_transport::TransportError) -> bool { let message = err.to_string(); message.contains("block not found: hash") || message.contains("block hash not found for block number") } +fn is_unsupported_bal_rpc_error(err: &eyre::Report) -> bool { + let message = err.to_string(); + message.contains("method ignored") || + message.contains("Method not found") || + message.contains("method not found") || + message.contains("-32601") +} + fn build_block_request( block: &AnyRpcBlock, parent_block_hash: B256, diff --git a/bin/reth-bench/src/bench/replay_payloads.rs b/bin/reth-bench/src/bench/replay_payloads.rs index 7dfeb6d8fcc..e47f54a9ead 100644 --- a/bin/reth-bench/src/bench/replay_payloads.rs +++ b/bin/reth-bench/src/bench/replay_payloads.rs @@ -29,6 +29,7 @@ use reth_node_api::EngineApiMessageVersion; use reth_node_core::args::WaitForPersistence; use reth_rpc_api::RethNewPayloadInput; use std::{ + collections::HashMap, path::PathBuf, time::{Duration, Instant}, }; @@ -228,7 +229,7 @@ impl Command { ); } - let mut parent_hash = initial_parent_hash; + let mut replayed_hashes = HashMap::from([(initial_parent_hash, initial_parent_hash)]); let mut results = Vec::new(); let total_benchmark_duration = Instant::now(); @@ -236,6 +237,7 @@ impl Command { for (i, payload) in payloads.iter().enumerate() { let execution_data = &payload.execution_data; let mut block_hash = payload.block_hash; + let original_block_hash = block_hash; let v1 = execution_data.payload.as_v1(); let gas_used = v1.gas_used; @@ -274,11 +276,16 @@ impl Command { .unwrap_or(WaitForPersistence::Never) .rpc_value(block_number); - // Inject sidecar BAL into the inline V4 payload field when --bal is set. - // If the payload is not already V4 we upgrade it (V3→V4) so the BAL - // can be carried inline. This changes the block hash, so we recompute - // it and patch parent_hash to maintain the chain. let mut execution_data = execution_data.clone(); + let original_parent_hash = execution_data.payload.as_v1().parent_hash; + let mut payload_modified = false; + if let Some(remapped_parent_hash) = replayed_hashes.get(&original_parent_hash) { + if *remapped_parent_hash != original_parent_hash { + execution_data.payload.as_v1_mut().parent_hash = *remapped_parent_hash; + payload_modified = true; + } + } + if self.bal && let Some(bal) = &payload.block_access_list { @@ -292,12 +299,10 @@ impl Command { execution_data.payload.as_v4_mut().unwrap().block_access_list = encoded_bal; } - // Patch parent_hash so this block chains off the (possibly - // rehashed) previous block. - execution_data.payload.as_v1_mut().parent_hash = parent_hash; + payload_modified = true; + } - // Recompute block hash after payload modification and update - // the hash stored in the payload itself. + if payload_modified { block_hash = compute_payload_block_hash(&execution_data)?; execution_data.payload.as_v1_mut().block_hash = block_hash; } @@ -349,8 +354,8 @@ impl Command { let fcu_state = ForkchoiceState { head_block_hash: block_hash, - safe_block_hash: parent_hash, - finalized_block_hash: parent_hash, + safe_block_hash: initial_parent_hash, + finalized_block_hash: initial_parent_hash, }; let fcu_start = Instant::now(); @@ -390,7 +395,7 @@ impl Command { TotalGasRow { block_number, transaction_count, gas_used, time: current_duration }; results.push((gas_row, combined_result)); - parent_hash = block_hash; + replayed_hashes.insert(original_block_hash, block_hash); } let (gas_output_results, combined_results): (Vec, Vec) = diff --git a/crates/chain-state/src/in_memory.rs b/crates/chain-state/src/in_memory.rs index ecdece9a337..702031adfbb 100644 --- a/crates/chain-state/src/in_memory.rs +++ b/crates/chain-state/src/in_memory.rs @@ -320,6 +320,19 @@ impl CanonicalInMemoryState { /// This will update the links between blocks and remove all blocks that are [.. /// `persisted_height`]. pub fn remove_persisted_blocks(&self, persisted_num_hash: BlockNumHash) { + self.remove_persisted_blocks_until(persisted_num_hash, persisted_num_hash.number); + } + + /// Removes blocks from the in-memory state through `remove_until` while still reporting the + /// provided block as the persisted tip. + /// + /// This is used when block bodies/plain state have been persisted further than trie data, so a + /// suffix still needs to remain in memory for trie-backed operations. + pub fn remove_persisted_blocks_until( + &self, + persisted_num_hash: BlockNumHash, + remove_until: BlockNumber, + ) { self.set_persisted(persisted_num_hash); // if the persisted hash is not in the canonical in memory state, do nothing, because it // means canonical blocks were not actually persisted. @@ -337,16 +350,15 @@ impl CanonicalInMemoryState { let mut numbers = self.inner.in_memory_state.numbers.write(); let mut blocks = self.inner.in_memory_state.blocks.write(); - let BlockNumHash { number: persisted_height, hash: _ } = persisted_num_hash; + let remove_until = remove_until.min(persisted_num_hash.number); // clear all numbers numbers.clear(); - // drain all blocks and only keep the ones that are not persisted (below the persisted - // height) + // Drain all blocks and keep only the suffix that still has to stay in memory. let mut old_blocks = blocks .drain() - .filter(|(_, b)| b.block_ref().recovered_block().number() > persisted_height) + .filter(|(_, b)| b.block_ref().recovered_block().number() > remove_until) .map(|(_, b)| b.block.clone()) .collect::>(); diff --git a/crates/chain-state/src/lazy_overlay.rs b/crates/chain-state/src/lazy_overlay.rs index ecf9a9c92ac..132b2e7a995 100644 --- a/crates/chain-state/src/lazy_overlay.rs +++ b/crates/chain-state/src/lazy_overlay.rs @@ -12,7 +12,7 @@ use reth_primitives_traits::{ }; use reth_trie::{updates::TrieUpdatesSorted, HashedPostStateSorted, TrieInputSorted}; use std::sync::Arc; -use tracing::{debug, trace}; +use tracing::debug; /// Inputs captured for lazy overlay computation. #[derive(Clone)] @@ -71,6 +71,18 @@ impl LazyOverlay { "LazyOverlay blocks must be ordered newest to oldest along a single chain" ); + if tracing::enabled!(target: "chain_state::lazy_overlay", tracing::Level::DEBUG) { + debug!( + target: "chain_state::lazy_overlay", + num_blocks = blocks.len(), + tip = ?blocks.first().map(block_summary), + oldest = ?blocks.last().map(block_summary), + anchor_hash = ?blocks.last().map(|block| block.recovered_block().parent_hash()), + blocks = ?blocks.iter().map(block_summary).collect::>(), + "Creating lazy overlay" + ); + } + Self { inner: Default::default(), inputs: LazyOverlayInputs { blocks } } } @@ -79,6 +91,11 @@ impl LazyOverlay { self.inputs.blocks.len() } + /// Returns a compact summary of the blocks captured by this overlay. + pub fn block_summaries(&self) -> Vec { + self.inputs.blocks.iter().map(block_summary).collect() + } + /// Returns the oldest anchor hash this overlay can serve. /// /// This is the parent hash of the oldest block in the stored newest-to-oldest chain segment. @@ -105,7 +122,15 @@ impl LazyOverlay { /// Subsequent calls for the same anchor return the cached result immediately. pub fn get(&self, anchor_hash: B256) -> Arc { match self.inner.entry(anchor_hash) { - dashmap::Entry::Occupied(entry) => Arc::clone(entry.get()), + dashmap::Entry::Occupied(entry) => { + debug!( + target: "chain_state::lazy_overlay", + %anchor_hash, + num_blocks = self.inputs.blocks.len(), + "Using cached lazy overlay result" + ); + Arc::clone(entry.get()) + } dashmap::Entry::Vacant(entry) => { let input = self.compute(anchor_hash); entry.insert(Arc::clone(&input)); @@ -133,12 +158,28 @@ impl LazyOverlay { let Some(last_index) = blocks.iter().position(|block| block.recovered_block().parent_hash() == anchor_hash) else { + debug!( + target: "chain_state::lazy_overlay", + %anchor_hash, + available_blocks = ?blocks.iter().map(block_summary).collect::>(), + "Lazy overlay requested missing anchor" + ); panic!( "LazyOverlay does not contain a block whose parent hash matches requested anchor {anchor_hash}" ); }; let blocks = &blocks[..=last_index]; + if tracing::enabled!(target: "chain_state::lazy_overlay", tracing::Level::DEBUG) { + debug!( + target: "chain_state::lazy_overlay", + %anchor_hash, + num_selected_blocks = blocks.len(), + selected_blocks = ?blocks.iter().map(block_summary).collect::>(), + "Computing lazy overlay for anchor" + ); + } + // Fast path: Check if tip block's overlay is ready and anchor matches. // The tip block (first in list) has the cumulative overlay from all ancestors up to the // requested anchor. @@ -146,7 +187,14 @@ impl LazyOverlay { let data = tip.trie_data(); if let Some(anchored) = &data.anchored_trie_input { if anchored.anchor_hash == anchor_hash { - trace!(target: "chain_state::lazy_overlay", %anchor_hash, "Reusing tip block's cached overlay (fast path)"); + debug!( + target: "chain_state::lazy_overlay", + %anchor_hash, + tip = ?block_summary(tip), + trie_updates = anchored.trie_input.nodes.total_len(), + hashed_state = anchored.trie_input.state.total_len(), + "Reusing tip block's cached overlay (fast path)" + ); return Arc::clone(&anchored.trie_input); } debug!( @@ -163,6 +211,7 @@ impl LazyOverlay { target: "chain_state::lazy_overlay", %anchor_hash, num_blocks = blocks.len(), + blocks = ?blocks.iter().map(block_summary).collect::>(), "Merging blocks (slow path)" ); Arc::new(Self::merge_blocks(blocks)) @@ -187,6 +236,11 @@ impl LazyOverlay { } } +fn block_summary(block: &ExecutedBlock) -> String { + let recovered = block.recovered_block(); + format!("#{} hash={} parent={}", recovered.number(), recovered.hash(), recovered.parent_hash()) +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/chain-state/src/memory_overlay.rs b/crates/chain-state/src/memory_overlay.rs index 7e31ec06fee..8cb5435387a 100644 --- a/crates/chain-state/src/memory_overlay.rs +++ b/crates/chain-state/src/memory_overlay.rs @@ -13,6 +13,7 @@ use reth_trie::{ }; use revm_database::BundleState; use std::{borrow::Cow, sync::OnceLock}; +use tracing::debug; /// A state provider that stores references to in-memory blocks along with their state as well as a /// reference of the historical state provider for fallback lookups. @@ -38,6 +39,12 @@ impl<'a, N: NodePrimitives> MemoryOverlayStateProviderRef<'a, N> { /// - `historical` - a historical state provider for the latest ancestor block stored in the /// database. pub fn new(historical: Box, in_memory: Vec>) -> Self { + debug!( + target: "chain_state::memory_overlay", + in_memory_blocks = ?block_summaries(&in_memory), + num_in_memory_blocks = in_memory.len(), + "Creating borrowed memory overlay state provider" + ); Self { historical, in_memory: Cow::Owned(in_memory), trie_input: OnceLock::new() } } @@ -50,12 +57,24 @@ impl<'a, N: NodePrimitives> MemoryOverlayStateProviderRef<'a, N> { fn trie_input(&self) -> &TrieInput { self.trie_input.get_or_init(|| { let mut input = TrieInput::default(); + let mut trie_updates = 0; + let mut hashed_state = 0; // Iterate from oldest to newest for block in self.in_memory.iter().rev() { let data = block.trie_data(); + trie_updates += data.trie_updates.total_len(); + hashed_state += data.hashed_state.total_len(); input.nodes.extend_from_sorted(&data.trie_updates); input.state.extend_from_sorted(&data.hashed_state); } + debug!( + target: "chain_state::memory_overlay", + in_memory_blocks = ?block_summaries(&self.in_memory), + num_in_memory_blocks = self.in_memory.len(), + trie_updates, + hashed_state, + "Built memory overlay trie input" + ); input }) } @@ -127,6 +146,15 @@ impl StateRootProvider for MemoryOverlayStateProviderRef<'_, } fn state_root_from_nodes(&self, mut input: TrieInput) -> ProviderResult { + debug!( + target: "chain_state::memory_overlay", + in_memory_blocks = ?block_summaries(&self.in_memory), + num_in_memory_blocks = self.in_memory.len(), + prefix_account_updates = input.prefix_sets.account_prefix_set.len(), + prefix_storage_tries = input.prefix_sets.storage_prefix_sets.len(), + prefix_destroyed_accounts = input.prefix_sets.destroyed_accounts.len(), + "Calculating state root through memory overlay provider" + ); input.prepend_self(self.trie_input().clone()); self.historical.state_root_from_nodes(input) } @@ -142,6 +170,15 @@ impl StateRootProvider for MemoryOverlayStateProviderRef<'_, &self, mut input: TrieInput, ) -> ProviderResult<(B256, TrieUpdates)> { + debug!( + target: "chain_state::memory_overlay", + in_memory_blocks = ?block_summaries(&self.in_memory), + num_in_memory_blocks = self.in_memory.len(), + prefix_account_updates = input.prefix_sets.account_prefix_set.len(), + prefix_storage_tries = input.prefix_sets.storage_prefix_sets.len(), + prefix_destroyed_accounts = input.prefix_sets.destroyed_accounts.len(), + "Calculating state root with updates through memory overlay provider" + ); input.prepend_self(self.trie_input().clone()); self.historical.state_root_from_nodes_with_updates(input) } @@ -184,6 +221,17 @@ impl StateProofProvider for MemoryOverlayStateProviderRef<'_, address: Address, slots: &[B256], ) -> ProviderResult { + debug!( + target: "chain_state::memory_overlay", + in_memory_blocks = ?block_summaries(&self.in_memory), + num_in_memory_blocks = self.in_memory.len(), + %address, + num_slots = slots.len(), + prefix_account_updates = input.prefix_sets.account_prefix_set.len(), + prefix_storage_tries = input.prefix_sets.storage_prefix_sets.len(), + prefix_destroyed_accounts = input.prefix_sets.destroyed_accounts.len(), + "Generating proof through memory overlay provider" + ); input.prepend_self(self.trie_input().clone()); self.historical.proof(input, address, slots) } @@ -193,6 +241,15 @@ impl StateProofProvider for MemoryOverlayStateProviderRef<'_, mut input: TrieInput, targets: MultiProofTargets, ) -> ProviderResult { + debug!( + target: "chain_state::memory_overlay", + in_memory_blocks = ?block_summaries(&self.in_memory), + num_in_memory_blocks = self.in_memory.len(), + prefix_account_updates = input.prefix_sets.account_prefix_set.len(), + prefix_storage_tries = input.prefix_sets.storage_prefix_sets.len(), + prefix_destroyed_accounts = input.prefix_sets.destroyed_accounts.len(), + "Generating multiproof through memory overlay provider" + ); input.prepend_self(self.trie_input().clone()); self.historical.multiproof(input, targets) } @@ -263,6 +320,12 @@ impl MemoryOverlayStateProvider { /// - `historical` - a historical state provider for the latest ancestor block stored in the /// database. pub fn new(historical: StateProviderBox, in_memory: Vec>) -> Self { + debug!( + target: "chain_state::memory_overlay", + in_memory_blocks = ?block_summaries(&in_memory), + num_in_memory_blocks = in_memory.len(), + "Creating owned memory overlay state provider" + ); Self { historical, in_memory, trie_input: OnceLock::new() } } @@ -284,3 +347,18 @@ impl MemoryOverlayStateProvider { // Delegates all provider impls to [`MemoryOverlayStateProviderRef`] reth_storage_api::macros::delegate_provider_impls!(MemoryOverlayStateProvider where [N: NodePrimitives]); + +fn block_summaries(blocks: &[ExecutedBlock]) -> Vec { + blocks + .iter() + .map(|block| { + let recovered = block.recovered_block(); + format!( + "#{} hash={} parent={}", + recovered.number(), + recovered.hash(), + recovered.parent_hash() + ) + }) + .collect() +} diff --git a/crates/e2e-test-utils/tests/e2e-testsuite/main.rs b/crates/e2e-test-utils/tests/e2e-testsuite/main.rs index eebddef2ff8..84c21baea79 100644 --- a/crates/e2e-test-utils/tests/e2e-testsuite/main.rs +++ b/crates/e2e-test-utils/tests/e2e-testsuite/main.rs @@ -374,7 +374,7 @@ async fn test_setup_builder_with_custom_tree_config() -> Result<()> { PayloadAttributes::default() }) .with_tree_config_modifier(|config| { - config.with_persistence_threshold(0).with_memory_block_buffer_target(5) + config.with_persistence_threshold(6).with_memory_block_buffer_target(5) }) .build() .await?; diff --git a/crates/e2e-test-utils/tests/rocksdb/main.rs b/crates/e2e-test-utils/tests/rocksdb/main.rs index 3a6bce7fe7e..d4659b70e2b 100644 --- a/crates/e2e-test-utils/tests/rocksdb/main.rs +++ b/crates/e2e-test-utils/tests/rocksdb/main.rs @@ -189,7 +189,7 @@ async fn test_rocksdb_transaction_queries() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .build() .await?; @@ -200,7 +200,7 @@ async fn test_rocksdb_transaction_queries() -> Result<()> { let signer = wallets[0].clone(); let client = nodes[0].rpc_client().expect("RPC client should be available"); - let raw_tx = TransactionTestContext::transfer_tx_bytes(chain_id, signer).await; + let raw_tx = TransactionTestContext::transfer_tx_bytes(chain_id, signer.clone()).await; let tx_hash = nodes[0].rpc.inject_tx(raw_tx).await?; // Wait for tx to enter pending pool before mining @@ -209,6 +209,14 @@ async fn test_rocksdb_transaction_queries() -> Result<()> { let payload = nodes[0].advance_block().await?; assert_eq!(payload.block().number(), 1); + let flush_tx = + TransactionTestContext::transfer_tx_bytes_with_nonce(chain_id, signer.clone(), 1).await; + let flush_tx_hash = nodes[0].rpc.inject_tx(flush_tx).await?; + wait_for_pending_tx(&client, flush_tx_hash).await; + + let flush_payload = nodes[0].advance_block().await?; + assert_eq!(flush_payload.block().number(), 2); + // Query each transaction by hash let tx: Option = client.request("eth_getTransactionByHash", [tx_hash]).await?; let tx = tx.expect("Transaction should be found"); @@ -256,7 +264,7 @@ async fn test_rocksdb_multi_tx_same_block() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .build() .await?; @@ -283,6 +291,14 @@ async fn test_rocksdb_multi_tx_same_block() -> Result<()> { let payload = nodes[0].advance_block().await?; assert_eq!(payload.block().number(), 1); + let flush_tx = + TransactionTestContext::transfer_tx_bytes_with_nonce(chain_id, signer.clone(), 3).await; + let flush_tx_hash = nodes[0].rpc.inject_tx(flush_tx).await?; + wait_for_pending_tx(&client, flush_tx_hash).await; + + let flush_payload = nodes[0].advance_block().await?; + assert_eq!(flush_payload.block().number(), 2); + // Verify block contains all 3 txs let block: Option = client.request("eth_getBlockByNumber", ("0x1", true)).await?; @@ -324,7 +340,7 @@ async fn test_rocksdb_txs_across_blocks() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .build() .await?; @@ -409,7 +425,7 @@ async fn test_rocksdb_pending_tx_not_in_storage() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .build() .await?; @@ -417,7 +433,7 @@ async fn test_rocksdb_pending_tx_not_in_storage() -> Result<()> { let signer = wallets[0].clone(); // Inject tx but do NOT mine - let raw_tx = TransactionTestContext::transfer_tx_bytes(chain_id, signer).await; + let raw_tx = TransactionTestContext::transfer_tx_bytes(chain_id, signer.clone()).await; let tx_hash = nodes[0].rpc.inject_tx(raw_tx).await?; // Verify tx is in pending pool via RPC @@ -442,6 +458,14 @@ async fn test_rocksdb_pending_tx_not_in_storage() -> Result<()> { let payload = nodes[0].advance_block().await?; assert_eq!(payload.block().number(), 1); + let flush_tx = + TransactionTestContext::transfer_tx_bytes_with_nonce(chain_id, signer.clone(), 1).await; + let flush_tx_hash = nodes[0].rpc.inject_tx(flush_tx).await?; + wait_for_pending_tx(&client, flush_tx_hash).await; + + let flush_payload = nodes[0].advance_block().await?; + assert_eq!(flush_payload.block().number(), 2); + // Poll until tx appears in RocksDB let tx_number = poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash).await; assert_eq!(tx_number, 0, "First tx should have tx_number 0"); @@ -473,7 +497,7 @@ async fn test_rocksdb_reorg_unwind() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .build() .await?; @@ -495,10 +519,6 @@ async fn test_rocksdb_reorg_unwind() -> Result<()> { let block1_hash = payload1.block().hash(); assert_eq!(payload1.block().number(), 1); - // Poll until tx1 appears in RocksDB (ensures persistence happened) - let tx_number1 = poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash1).await; - assert_eq!(tx_number1, 0, "First tx should have tx_number 0"); - // Mine block 2 with transaction from signer1 (nonce 1) let raw_tx2 = TransactionTestContext::transfer_tx_bytes_with_nonce(chain_id, signer1.clone(), 1).await; @@ -508,6 +528,10 @@ async fn test_rocksdb_reorg_unwind() -> Result<()> { let payload2 = nodes[0].advance_block().await?; assert_eq!(payload2.block().number(), 2); + // The second block triggers the first persistence cycle, which flushes both block 1 and 2. + let tx_number1 = poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash1).await; + assert_eq!(tx_number1, 0, "First tx should have tx_number 0"); + // Poll until tx2 appears in RocksDB let tx_number2 = poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash2).await; assert_eq!(tx_number2, 1, "Second tx should have tx_number 1"); @@ -521,6 +545,14 @@ async fn test_rocksdb_reorg_unwind() -> Result<()> { let payload3 = nodes[0].advance_block().await?; assert_eq!(payload3.block().number(), 3); + let flush_tx = + TransactionTestContext::transfer_tx_bytes_with_nonce(chain_id, signer1.clone(), 3).await; + let flush_tx_hash = nodes[0].rpc.inject_tx(flush_tx).await?; + wait_for_pending_tx(&client, flush_tx_hash).await; + + let flush_payload = nodes[0].advance_block().await?; + assert_eq!(flush_payload.block().number(), 4); + // Poll until tx3 appears in RocksDB let tx_number3 = poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash3).await; assert_eq!(tx_number3, 2, "Third tx should have tx_number 2"); @@ -532,7 +564,7 @@ async fn test_rocksdb_reorg_unwind() -> Result<()> { let alt_tx_hash = nodes[0].rpc.inject_tx(raw_alt_tx).await?; wait_for_pending_tx(&client, alt_tx_hash).await; - // Build an alternate payload (this builds on top of the current head, i.e., block 3) + // Build an alternate payload on top of the current flushed head. // But we want to reorg back to block 1, so we'll use the payload and then FCU to it let alt_payload = nodes[0].new_payload().await?; let alt_block_hash = nodes[0].submit_payload(alt_payload.clone()).await?; @@ -550,8 +582,8 @@ async fn test_rocksdb_reorg_unwind() -> Result<()> { let latest: Option = client.request("eth_getBlockByNumber", ("latest", false)).await?; let latest = latest.expect("Latest block should exist"); - // The alt block is at height 4 (on top of block 3) - assert!(latest.header.number >= 3, "Should be at height >= 3 after operation"); + // The alt block is built on top of the flushed canonical head. + assert!(latest.header.number >= 4, "Should be at height >= 4 after operation"); // tx1 from block 1 should still be there let tx1: Option = client.request("eth_getTransactionByHash", [tx_hash1]).await?; @@ -596,7 +628,7 @@ async fn test_rocksdb_historical_account_queries() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .build() .await?; @@ -621,8 +653,6 @@ async fn test_rocksdb_historical_account_queries() -> Result<()> { let payload1 = nodes[0].advance_block().await?; assert_eq!(payload1.block().number(), 1); - poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash1).await; - // Record state after block 1 let balance_at_1: U256 = client.request("eth_getBalance", (sender, "0x1")).await?; let nonce_at_1: U256 = client.request("eth_getTransactionCount", (sender, "0x1")).await?; @@ -637,8 +667,6 @@ async fn test_rocksdb_historical_account_queries() -> Result<()> { let payload2 = nodes[0].advance_block().await?; assert_eq!(payload2.block().number(), 2); - poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash2).await; - let balance_at_2: U256 = client.request("eth_getBalance", (sender, "0x2")).await?; let nonce_at_2: U256 = client.request("eth_getTransactionCount", (sender, "0x2")).await?; assert!(balance_at_2 < balance_at_1, "Balance should decrease further after second tx"); @@ -652,18 +680,14 @@ async fn test_rocksdb_historical_account_queries() -> Result<()> { let payload3 = nodes[0].advance_block().await?; assert_eq!(payload3.block().number(), 3); - poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash3).await; - let balance_at_3: U256 = client.request("eth_getBalance", (sender, "0x3")).await?; let nonce_at_3: U256 = client.request("eth_getTransactionCount", (sender, "0x3")).await?; assert!(balance_at_3 < balance_at_2, "Balance should decrease further after third tx"); assert_eq!(nonce_at_3, U256::from(3), "Nonce should be 3 after third tx"); // Mine additional blocks to push blocks 1-3 out of the in-memory overlay. - // With persistence_threshold=0 and memory_block_buffer_target=0, each new block - // triggers persistence up to `head` followed by in-memory eviction. Mining several - // more blocks ensures the engine loop has completed at least one full - // persist-then-evict cycle covering blocks 1-3. + // With a persistence threshold of 1, every second block triggers a flush, so a few extra + // blocks are enough to durably persist and evict the earlier history we want to query. // Each block needs a transaction because the payload builder requires non-empty payloads. for nonce in 3..8u64 { let raw_tx = @@ -673,6 +697,7 @@ async fn test_rocksdb_historical_account_queries() -> Result<()> { wait_for_pending_tx(&client, tx_hash).await; nodes[0].advance_block().await?; } + poll_tx_in_rocksdb(&nodes[0].inner.provider, tx_hash3).await; // Allow the engine loop to process the persistence completions tokio::time::sleep(Duration::from_millis(500)).await; @@ -743,7 +768,7 @@ async fn test_rocksdb_account_history_pruning() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .with_node_config_modifier(|mut config| { config.pruning.account_history_distance = Some(PRUNE_DISTANCE); config.pruning.minimum_distance = Some(PRUNE_DISTANCE); @@ -840,7 +865,7 @@ async fn test_rocksdb_storage_history_pruning() -> Result<()> { test_attributes_generator, ) .with_storage_v2() - .with_tree_config_modifier(|config| config.with_persistence_threshold(0)) + .with_tree_config_modifier(|config| config.with_persistence_threshold(1)) .with_node_config_modifier(|mut config| { config.pruning.storage_history_distance = Some(PRUNE_DISTANCE); config.pruning.minimum_distance = Some(PRUNE_DISTANCE); @@ -912,10 +937,6 @@ async fn test_rocksdb_storage_history_pruning() -> Result<()> { let payload1 = nodes[0].advance_block().await?; assert_eq!(payload1.block().number(), 1); - poll_tx_in_rocksdb(&nodes[0].inner.provider, deploy_hash).await; - - // Let the persistence cycle complete before the next block (same cadence as the loop below) - tokio::time::sleep(Duration::from_millis(300)).await; // Get the deployed contract address from the receipt let receipt: Option = @@ -965,6 +986,10 @@ async fn test_rocksdb_storage_history_pruning() -> Result<()> { assert_eq!(payload.block().number(), block_num); last_tx_hash = tx_hash; + if nonce == 1 { + poll_tx_in_rocksdb(&nodes[0].inner.provider, deploy_hash).await; + } + // Let the persistence cycle complete before the next block tokio::time::sleep(Duration::from_millis(300)).await; } diff --git a/crates/engine/primitives/Cargo.toml b/crates/engine/primitives/Cargo.toml index aca33f4aff6..128f777cb38 100644 --- a/crates/engine/primitives/Cargo.toml +++ b/crates/engine/primitives/Cargo.toml @@ -37,6 +37,9 @@ auto_impl.workspace = true serde.workspace = true thiserror.workspace = true +[dev-dependencies] +alloy-primitives = { workspace = true, features = ["getrandom"] } + [features] default = ["std"] trie-debug = [] diff --git a/crates/engine/primitives/src/config.rs b/crates/engine/primitives/src/config.rs index 2607d62d205..38c2daf5e91 100644 --- a/crates/engine/primitives/src/config.rs +++ b/crates/engine/primitives/src/config.rs @@ -6,12 +6,33 @@ use core::time::Duration; /// Triggers persistence when the number of canonical blocks in memory exceeds this threshold. pub const DEFAULT_PERSISTENCE_THRESHOLD: u64 = 2; -/// Maximum canonical-minus-persisted gap before engine API processing is stalled. -pub const DEFAULT_PERSISTENCE_BACKPRESSURE_THRESHOLD: u64 = 16; +/// Maximum number of consecutive canonical blocks whose non-trie outputs may be persisted ahead +/// of trie persistence. +pub const DEFAULT_DEFERRED_TRIE_BLOCKS: u64 = 0; /// How close to the canonical head we persist blocks. pub const DEFAULT_MEMORY_BLOCK_BUFFER_TARGET: u64 = 0; +/// Derives the default canonical-minus-persisted gap that triggers backpressure. +pub const fn default_persistence_backpressure_threshold( + persistence_threshold: u64, + memory_block_buffer_target: u64, +) -> u64 { + let threshold = 2 * (persistence_threshold + memory_block_buffer_target); + if threshold < 16 { + 16 + } else { + threshold + } +} + +/// Maximum canonical-minus-persisted gap before engine API processing is stalled. +pub const DEFAULT_PERSISTENCE_BACKPRESSURE_THRESHOLD: u64 = + default_persistence_backpressure_threshold( + DEFAULT_PERSISTENCE_THRESHOLD, + DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, + ); + /// The size of proof targets chunk to spawn in one multiproof calculation. pub const DEFAULT_MULTIPROOF_TASK_CHUNK_SIZE: usize = 5; @@ -60,6 +81,17 @@ const fn assert_backpressure_threshold_invariant( ); } +const fn assert_state_masking_invariant( + persistence_threshold: u64, + num_state_masking_blocks: u64, + memory_block_buffer_target: u64, +) { + debug_assert!( + num_state_masking_blocks + memory_block_buffer_target < persistence_threshold, + "num_state_masking_blocks + memory_block_buffer_target must be less than persistence_threshold", + ); +} + const fn default_cross_block_cache_size() -> usize { if cfg!(test) { 1024 * 1024 // 1 MB in tests @@ -93,6 +125,9 @@ pub struct TreeConfig { /// Maximum number of blocks to be kept only in memory without triggering /// persistence. persistence_threshold: u64, + /// Number of persisted blocks whose state/trie writes are masked instead of being durably + /// written in the current cycle. + num_state_masking_blocks: u64, /// How close to the canonical head we persist blocks. Represents the ideal /// number of most recent blocks to keep in memory for quick access and reorgs. /// @@ -204,14 +239,24 @@ pub struct TreeConfig { impl Default for TreeConfig { fn default() -> Self { + let persistence_backpressure_threshold = default_persistence_backpressure_threshold( + DEFAULT_PERSISTENCE_THRESHOLD, + DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, + ); assert_backpressure_threshold_invariant( DEFAULT_PERSISTENCE_THRESHOLD, - DEFAULT_PERSISTENCE_BACKPRESSURE_THRESHOLD, + persistence_backpressure_threshold, + ); + assert_state_masking_invariant( + DEFAULT_PERSISTENCE_THRESHOLD, + DEFAULT_DEFERRED_TRIE_BLOCKS, + DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, ); Self { persistence_threshold: DEFAULT_PERSISTENCE_THRESHOLD, + num_state_masking_blocks: DEFAULT_DEFERRED_TRIE_BLOCKS, memory_block_buffer_target: DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, - persistence_backpressure_threshold: DEFAULT_PERSISTENCE_BACKPRESSURE_THRESHOLD, + persistence_backpressure_threshold, block_buffer_limit: DEFAULT_BLOCK_BUFFER_LIMIT, max_invalid_header_cache_length: DEFAULT_MAX_INVALID_HEADER_CACHE_LENGTH, invalid_header_hit_eviction_threshold: DEFAULT_INVALID_HEADER_HIT_EVICTION_THRESHOLD, @@ -253,6 +298,7 @@ impl TreeConfig { #[expect(clippy::too_many_arguments)] pub const fn new( persistence_threshold: u64, + num_state_masking_blocks: u64, memory_block_buffer_target: u64, persistence_backpressure_threshold: u64, block_buffer_limit: u32, @@ -285,8 +331,14 @@ impl TreeConfig { persistence_threshold, persistence_backpressure_threshold, ); + assert_state_masking_invariant( + persistence_threshold, + num_state_masking_blocks, + memory_block_buffer_target, + ); Self { persistence_threshold, + num_state_masking_blocks, memory_block_buffer_target, persistence_backpressure_threshold, block_buffer_limit, @@ -329,6 +381,11 @@ impl TreeConfig { self.persistence_threshold } + /// Return the number of persisted blocks whose state/trie writes are masked. + pub const fn num_state_masking_blocks(&self) -> u64 { + self.num_state_masking_blocks + } + /// Return the memory block buffer target. pub const fn memory_block_buffer_target(&self) -> u64 { self.memory_block_buffer_target @@ -447,6 +504,22 @@ impl TreeConfig { self.persistence_threshold, self.persistence_backpressure_threshold, ); + assert_state_masking_invariant( + self.persistence_threshold, + self.num_state_masking_blocks, + self.memory_block_buffer_target, + ); + self + } + + /// Setter for the number of persisted blocks whose state/trie writes are masked. + pub const fn with_num_state_masking_blocks(mut self, num_state_masking_blocks: u64) -> Self { + self.num_state_masking_blocks = num_state_masking_blocks; + assert_state_masking_invariant( + self.persistence_threshold, + self.num_state_masking_blocks, + self.memory_block_buffer_target, + ); self } @@ -456,6 +529,11 @@ impl TreeConfig { memory_block_buffer_target: u64, ) -> Self { self.memory_block_buffer_target = memory_block_buffer_target; + assert_state_masking_invariant( + self.persistence_threshold, + self.num_state_masking_blocks, + self.memory_block_buffer_target, + ); self } @@ -765,7 +843,26 @@ impl TreeConfig { #[cfg(test)] mod tests { - use super::TreeConfig; + use super::{ + default_persistence_backpressure_threshold, TreeConfig, DEFAULT_DEFERRED_TRIE_BLOCKS, + DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, DEFAULT_PERSISTENCE_THRESHOLD, + }; + + #[test] + fn default_thresholds_use_derived_backpressure_threshold() { + let config = TreeConfig::default(); + + assert_eq!(config.persistence_threshold(), DEFAULT_PERSISTENCE_THRESHOLD); + assert_eq!(config.num_state_masking_blocks(), DEFAULT_DEFERRED_TRIE_BLOCKS); + assert_eq!(config.memory_block_buffer_target(), DEFAULT_MEMORY_BLOCK_BUFFER_TARGET); + assert_eq!( + config.persistence_backpressure_threshold(), + default_persistence_backpressure_threshold( + DEFAULT_PERSISTENCE_THRESHOLD, + DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, + ) + ); + } #[test] #[should_panic( @@ -776,4 +873,15 @@ mod tests { .with_persistence_threshold(4) .with_persistence_backpressure_threshold(4); } + + #[test] + #[should_panic( + expected = "num_state_masking_blocks + memory_block_buffer_target must be less than persistence_threshold" + )] + fn rejects_state_masking_window_at_or_above_persistence_threshold() { + let _ = TreeConfig::default() + .with_persistence_threshold(4) + .with_num_state_masking_blocks(2) + .with_memory_block_buffer_target(2); + } } diff --git a/crates/engine/tree/src/persistence.rs b/crates/engine/tree/src/persistence.rs index e526a41c7ee..0bbddf36f78 100644 --- a/crates/engine/tree/src/persistence.rs +++ b/crates/engine/tree/src/persistence.rs @@ -1,4 +1,5 @@ use crate::metrics::PersistenceMetrics; +use alloy_consensus::BlockHeader; use alloy_eips::BlockNumHash; use crossbeam_channel::Sender as CrossbeamSender; use reth_chain_state::ExecutedBlock; @@ -7,10 +8,13 @@ use reth_ethereum_primitives::EthPrimitives; use reth_primitives_traits::{FastInstant as Instant, NodePrimitives}; use reth_provider::{ providers::ProviderNodeTypes, BlockExecutionWriter, BlockHashReader, ChainStateBlockWriter, - DBProvider, DatabaseProviderFactory, ProviderFactory, SaveBlocksMode, + DBProvider, DatabaseProviderFactory, ProviderFactory, SaveBlocksMode, SaveBlocksPlan, + SaveBlocksPlanStep, StageCheckpointReader, StageCheckpointWriter, }; use reth_prune::{PrunerError, PrunerWithFactory}; -use reth_stages_api::{MetricEvent, MetricEventsSender}; +use reth_stages_api::{ + FinishCheckpoint, MetricEvent, MetricEventsSender, StageCheckpoint, StageId, +}; use reth_tasks::spawn_os_thread; use std::{ sync::{ @@ -26,8 +30,13 @@ use tracing::{debug, error, instrument}; /// Unified result of any persistence operation. #[derive(Debug)] pub struct PersistenceResult { - /// The last block that was persisted, if any. + /// The highest block whose non-state/trie outputs are persisted, if any. pub last_block: Option, + /// The highest block whose state/trie data is fully persisted, if known. + /// + /// When this lags behind [`Self::last_block`], callers must retain the suffix + /// above it in memory so trie-backed operations can still unwind from that point. + pub last_state_trie_block: Option, /// The commit duration, only available for save-blocks operations. pub commit_duration: Option, } @@ -95,15 +104,15 @@ where // If the receiver errors then senders have disconnected, so the loop should then end. while let Ok(action) = self.incoming.recv() { match action { - PersistenceAction::RemoveBlocksAbove(new_tip_num, sender) => { - let last_block = self.on_remove_blocks_above(new_tip_num)?; + PersistenceAction::RemoveBlocksAbove(new_tip_num, trie_state_blocks, sender) => { + let result = self.on_remove_blocks_above(new_tip_num, trie_state_blocks)?; // send new sync metrics based on removed blocks let _ = self.sync_metrics_tx.send(MetricEvent::SyncHeight { height: new_tip_num }); - let _ = sender.send(PersistenceResult { last_block, commit_duration: None }); + let _ = sender.send(result); } - PersistenceAction::SaveBlocks(blocks, sender) => { - let result = self.on_save_blocks(blocks)?; + PersistenceAction::SaveBlocks(plan, sender) => { + let result = self.on_save_blocks(plan)?; let result_number = result.last_block.map(|b| b.number); let _ = sender.send(result); @@ -131,28 +140,115 @@ where fn on_remove_blocks_above( &self, new_tip_num: u64, - ) -> Result, PersistenceError> { + trie_state_blocks: Vec>, + ) -> Result { debug!(target: "engine::persistence", ?new_tip_num, "Removing blocks"); let start_time = Instant::now(); let provider_rw = self.provider.database_provider_rw()?; let new_tip_hash = provider_rw.block_hash(new_tip_num)?; + + let finish_checkpoint = provider_rw.get_stage_checkpoint(StageId::Finish)?; + if let Some(checkpoint) = finish_checkpoint.as_ref() { + let partial_state_trie = checkpoint + .finish_stage_checkpoint() + .and_then(|finish| finish.partial_state_trie) + .unwrap_or(checkpoint.block_number); + + if new_tip_num > partial_state_trie { + let expected_start = partial_state_trie + 1; + let expected_len = (new_tip_num - partial_state_trie) as usize; + if trie_state_blocks.len() != expected_len { + return Err(ProviderError::HeaderNotFound(expected_start.into()).into()) + } + + for (index, block) in trie_state_blocks.iter().enumerate() { + let expected_number = expected_start + index as u64; + let num_hash = block.recovered_block().num_hash(); + if num_hash.number != expected_number { + return Err(ProviderError::HeaderNotFound(expected_number.into()).into()) + } + + let expected_hash = provider_rw + .block_hash(expected_number)? + .ok_or_else(|| ProviderError::HeaderNotFound(expected_number.into()))?; + if num_hash.hash != expected_hash { + return Err(ProviderError::BlockHashNotFound(expected_hash).into()) + } + + if index == 0 { + let expected_parent = + provider_rw.block_hash(partial_state_trie)?.ok_or_else(|| { + ProviderError::HeaderNotFound(partial_state_trie.into()) + })?; + if block.recovered_block().parent_num_hash().hash != expected_parent { + return Err(ProviderError::BlockHashNotFound(expected_parent).into()) + } + } else if block.recovered_block().parent_num_hash().hash != + trie_state_blocks[index - 1].recovered_block().num_hash().hash + { + return Err(ProviderError::HeaderNotFound(expected_number.into()).into()) + } + } + + let new_tip_hash = new_tip_hash + .ok_or_else(|| ProviderError::HeaderNotFound(new_tip_num.into()))?; + if trie_state_blocks + .last() + .is_none_or(|block| block.recovered_block().hash() != new_tip_hash) + { + return Err(ProviderError::BlockHashNotFound(new_tip_hash).into()) + } + + let catchup_len = trie_state_blocks.len(); + provider_rw.save_blocks( + &SaveBlocksPlan::new( + trie_state_blocks, + vec![SaveBlocksPlanStep::new( + 0..catchup_len, + Some(catchup_len..catchup_len), + false, + )], + ), + SaveBlocksMode::Full, + )?; + provider_rw.save_stage_checkpoint( + StageId::Finish, + StageCheckpoint::new(checkpoint.block_number).with_finish_stage_checkpoint( + FinishCheckpoint { partial_state_trie: Some(new_tip_num) }, + ), + )?; + } + } + provider_rw.remove_block_and_execution_above(new_tip_num)?; + let last_state_trie_block = + provider_rw.get_stage_checkpoint(StageId::Finish)?.map(|checkpoint| { + checkpoint + .finish_stage_checkpoint() + .and_then(|finish| finish.partial_state_trie) + .unwrap_or(checkpoint.block_number) + }); provider_rw.commit()?; debug!(target: "engine::persistence", ?new_tip_num, ?new_tip_hash, "Removed blocks from disk"); self.metrics.remove_blocks_above_duration_seconds.record(start_time.elapsed()); - Ok(new_tip_hash.map(|hash| BlockNumHash { hash, number: new_tip_num })) + Ok(PersistenceResult { + last_block: new_tip_hash.map(|hash| BlockNumHash { hash, number: new_tip_num }), + last_state_trie_block, + commit_duration: None, + }) } - #[instrument(level = "debug", target = "engine::persistence", skip_all, fields(block_count = blocks.len()))] + #[instrument(level = "debug", target = "engine::persistence", skip_all, fields(block_count = plan.blocks.len()))] fn on_save_blocks( &mut self, - blocks: Vec>, + plan: SaveBlocksPlan, ) -> Result { - let first_block = blocks.first().map(|b| b.recovered_block.num_hash()); - let last_block = blocks.last().map(|b| b.recovered_block.num_hash()); - let block_count = blocks.len(); + let first_block = plan.blocks.first().map(|block| block.recovered_block().num_hash()); + let last_block = plan.last_block(); + let block_count = plan.blocks.len(); + let mut last_state_trie_block = None; let pending_finalized = self.pending_finalized_block.take(); let pending_safe = self.pending_safe_block.take(); @@ -161,19 +257,27 @@ where let start_time = Instant::now(); - if let Some(last) = last_block { + if let Some(last_block) = last_block { let provider_rw = self.provider.database_provider_rw()?; - provider_rw.save_blocks(blocks, SaveBlocksMode::Full)?; + provider_rw.save_blocks(&plan, SaveBlocksMode::Full)?; + last_state_trie_block = provider_rw + .get_stage_checkpoint(StageId::Finish)? + .and_then(|checkpoint| { + checkpoint + .finish_stage_checkpoint() + .and_then(|finish| finish.partial_state_trie) + }) + .or(Some(last_block.number)); if let Some(finalized) = pending_finalized { - provider_rw.save_finalized_block_number(finalized.min(last.number))?; - if finalized > last.number { + provider_rw.save_finalized_block_number(finalized.min(last_block.number))?; + if finalized > last_block.number { self.pending_finalized_block = Some(finalized); } } if let Some(safe) = pending_safe { - provider_rw.save_safe_block_number(safe.min(last.number))?; - if safe > last.number { + provider_rw.save_safe_block_number(safe.min(last_block.number))?; + if safe > last_block.number { self.pending_safe_block = Some(safe); } } @@ -186,7 +290,7 @@ where self.metrics.save_blocks_batch_size.record(block_count as f64); self.metrics.save_blocks_duration_seconds.record(elapsed); - Ok(PersistenceResult { last_block, commit_duration: Some(elapsed) }) + Ok(PersistenceResult { last_block, last_state_trie_block, commit_duration: Some(elapsed) }) } fn maybe_run_pruner(&mut self, block_number: u64) -> Result<(), PersistenceError> { @@ -224,15 +328,19 @@ pub enum PersistenceAction { /// The section of tree state that should be persisted. These blocks are expected in order of /// increasing block number. /// - /// First, header, transaction, and receipt-related data should be written to static files. - /// Then the execution history-related data will be written to the database. - SaveBlocks(Vec>, CrossbeamSender), + /// First, header, transaction, and receipt-related data should be written to static files for + /// the deferred trie region. Then the execution history-related data will be written to the + /// database, while trie catchup is persisted for the prefix. + SaveBlocks(SaveBlocksPlan, CrossbeamSender), /// Removes block data above the given block number from the database. /// + /// If the durable trie frontier is below the new tip, the supplied blocks are first used to + /// catch trie/state persistence up to the new tip before the unwind removes the old suffix. + /// /// This will first update checkpoints from the database, then remove actual block data from /// static files. - RemoveBlocksAbove(u64, CrossbeamSender), + RemoveBlocksAbove(u64, Vec>, CrossbeamSender), /// Update the persisted finalized block on disk SaveFinalizedBlock(u64), @@ -310,10 +418,10 @@ impl PersistenceHandle { /// If there are no blocks to persist, then `None` is sent in the sender. pub fn save_blocks( &self, - blocks: Vec>, + plan: SaveBlocksPlan, tx: CrossbeamSender, ) -> Result<(), SendError>> { - self.send_action(PersistenceAction::SaveBlocks(blocks, tx)) + self.send_action(PersistenceAction::SaveBlocks(plan, tx)) } /// Queues the finalized block number to be persisted on disk. @@ -341,14 +449,18 @@ impl PersistenceHandle { /// Tells the persistence service to remove blocks above a certain block number. The removed /// blocks are returned by the service. /// + /// `trie_state_blocks` must contain canonical in-memory blocks from the current trie frontier + + /// 1 through `block_num`, if that frontier is below `block_num`. + /// /// When the operation completes, the new tip hash is returned in the receiver end of the sender /// argument. pub fn remove_blocks_above( &self, block_num: u64, + trie_state_blocks: Vec>, tx: CrossbeamSender, ) -> Result<(), SendError>> { - self.send_action(PersistenceAction::RemoveBlocksAbove(block_num, tx)) + self.send_action(PersistenceAction::RemoveBlocksAbove(block_num, trie_state_blocks, tx)) } } @@ -377,12 +489,12 @@ impl Drop for ServiceGuard { mod tests { use super::*; use alloy_primitives::{B256, U256}; - use reth_chain_state::test_utils::TestBlockBuilder; + use reth_chain_state::{test_utils::TestBlockBuilder, ExecutedBlock}; use reth_exex_types::FinishedExExHeight; use reth_provider::{ providers::{ProviderFactoryBuilder, ReadOnlyConfig}, test_utils::{create_test_provider_factory, MockNodeTypes}, - AccountReader, ChainSpecProvider, HeaderProvider, StorageSettingsCache, + AccountReader, ChainSpecProvider, HeaderProvider, SaveBlocksPlanStep, StorageSettingsCache, TryIntoHistoricalStateProvider, }; use reth_prune::Pruner; @@ -391,6 +503,13 @@ mod tests { fn default_persistence_handle() -> PersistenceHandle { let provider = create_test_provider_factory(); + persistence_handle(provider) + } + + fn persistence_handle(provider: ProviderFactory) -> PersistenceHandle + where + N: ProviderNodeTypes, + { let (_finished_exex_height_tx, finished_exex_height_rx) = tokio::sync::watch::channel(FinishedExExHeight::NoExExs); @@ -401,18 +520,31 @@ mod tests { PersistenceHandle::::spawn_service(provider, pruner, sync_metrics_tx) } + fn full_save_plan(blocks: Vec>) -> SaveBlocksPlan { + let full_range = 0..blocks.len(); + SaveBlocksPlan::new( + blocks, + vec![SaveBlocksPlanStep::new( + full_range.clone(), + Some(full_range.end..full_range.end), + true, + )], + ) + } + #[test] fn test_save_blocks_empty() { reth_tracing::init_test_tracing(); let handle = default_persistence_handle(); - let blocks = vec![]; + let blocks = full_save_plan(vec![]); let (tx, rx) = crossbeam_channel::bounded(1); handle.save_blocks(blocks, tx).unwrap(); let result = rx.recv().unwrap(); assert!(result.last_block.is_none()); + assert!(result.last_state_trie_block.is_none()); } #[test] @@ -425,14 +557,16 @@ mod tests { test_block_builder.get_executed_block_with_number(block_number, B256::random()); let block_hash = executed.recovered_block().hash(); - let blocks = vec![executed]; + let blocks = full_save_plan(vec![executed]); let (tx, rx) = crossbeam_channel::bounded(1); handle.save_blocks(blocks, tx).unwrap(); let result = rx.recv_timeout(std::time::Duration::from_secs(10)).expect("test timed out"); - assert_eq!(block_hash, result.last_block.unwrap().hash); + let last_block = result.last_block.unwrap(); + assert_eq!(block_hash, last_block.hash); + assert_eq!(result.last_state_trie_block, Some(last_block.number)); } #[test] @@ -445,9 +579,11 @@ mod tests { let last_hash = blocks.last().unwrap().recovered_block().hash(); let (tx, rx) = crossbeam_channel::bounded(1); - handle.save_blocks(blocks, tx).unwrap(); + handle.save_blocks(full_save_plan(blocks), tx).unwrap(); let result = rx.recv().unwrap(); - assert_eq!(last_hash, result.last_block.unwrap().hash); + let last_block = result.last_block.unwrap(); + assert_eq!(last_hash, last_block.hash); + assert_eq!(result.last_state_trie_block, Some(last_block.number)); } #[test] @@ -462,13 +598,58 @@ mod tests { let last_hash = blocks.last().unwrap().recovered_block().hash(); let (tx, rx) = crossbeam_channel::bounded(1); - handle.save_blocks(blocks, tx).unwrap(); + handle.save_blocks(full_save_plan(blocks), tx).unwrap(); let result = rx.recv().unwrap(); - assert_eq!(last_hash, result.last_block.unwrap().hash); + let last_block = result.last_block.unwrap(); + assert_eq!(last_hash, last_block.hash); + assert_eq!(result.last_state_trie_block, Some(last_block.number)); } } + #[test] + fn test_remove_blocks_above_catches_up_partial_state_trie() { + reth_tracing::init_test_tracing(); + + let provider = create_test_provider_factory(); + let mut test_block_builder = TestBlockBuilder::eth().with_state(); + let blocks = test_block_builder.get_executed_blocks(0..4).collect::>(); + let trie_state_blocks = vec![blocks[2].clone()]; + + let provider_rw = provider.database_provider_rw().unwrap(); + provider_rw + .save_blocks( + &SaveBlocksPlan::new( + blocks, + vec![ + SaveBlocksPlanStep::new(0..2, Some(2..4), true), + SaveBlocksPlanStep::new(2..4, None, true), + ], + ), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let handle = persistence_handle(provider.clone()); + let (tx, rx) = crossbeam_channel::bounded(1); + + handle.remove_blocks_above(2, trie_state_blocks, tx).unwrap(); + + let result = rx.recv_timeout(std::time::Duration::from_secs(10)).expect("test timed out"); + let last_block = result.last_block.unwrap(); + assert_eq!(last_block.number, 2); + assert_eq!(result.last_state_trie_block, Some(2)); + + let finish_checkpoint = + provider.provider().unwrap().get_stage_checkpoint(StageId::Finish).unwrap().unwrap(); + assert_eq!(finish_checkpoint.block_number, 2); + assert_eq!( + finish_checkpoint.finish_stage_checkpoint().unwrap().partial_state_trie, + Some(2) + ); + } + /// Verifies that committing `save_blocks` history before running the pruner /// prevents the pruner from overwriting new entries. /// @@ -557,7 +738,7 @@ mod tests { { let provider_rw = provider_factory.database_provider_rw().unwrap(); - provider_rw.save_blocks(blocks_a, SaveBlocksMode::Full).unwrap(); + provider_rw.save_blocks(&full_save_plan(blocks_a), SaveBlocksMode::Full).unwrap(); provider_rw.commit().unwrap(); } @@ -614,7 +795,12 @@ mod tests { provider_rw.commit().unwrap(); let provider_rw = pf.database_provider_rw().unwrap(); - provider_rw.save_blocks(vec![block_b2], SaveBlocksMode::Full).unwrap(); + provider_rw + .save_blocks( + &full_save_plan(std::slice::from_ref(&block_b2).to_vec()), + SaveBlocksMode::Full, + ) + .unwrap(); provider_rw.commit().unwrap(); }); diff --git a/crates/engine/tree/src/tree/mod.rs b/crates/engine/tree/src/tree/mod.rs index c39b20262e9..dffc7989ea9 100644 --- a/crates/engine/tree/src/tree/mod.rs +++ b/crates/engine/tree/src/tree/mod.rs @@ -30,9 +30,9 @@ use reth_primitives_traits::{ }; use reth_provider::{ BlockExecutionOutput, BlockExecutionResult, BlockReader, ChangeSetReader, - DatabaseProviderFactory, HashedPostStateProvider, ProviderError, StageCheckpointReader, - StateProviderBox, StateProviderFactory, StateReader, StorageChangeSetReader, - StorageSettingsCache, TransactionVariant, + DatabaseProviderFactory, HashedPostStateProvider, ProviderError, SaveBlocksPlan, + SaveBlocksPlanStep, StageCheckpointReader, StateProviderBox, StateProviderFactory, StateReader, + StorageChangeSetReader, StorageSettingsCache, TransactionVariant, }; use reth_revm::database::StateProviderDatabase; use reth_stages_api::ControlFlow; @@ -433,6 +433,7 @@ where let persistence_state = PersistenceState { last_persisted_block: BlockNumHash::new(best_block_number, header.hash()), + last_state_trie_persisted_block: BlockNumHash::new(best_block_number, header.hash()), rx: None, }; @@ -1350,35 +1351,78 @@ where /// Helper method to remove blocks and set the persistence state. This ensures we keep track of /// the current persistence action while we're removing blocks. fn remove_blocks(&mut self, new_tip_num: u64) { - debug!(target: "engine::tree", ?new_tip_num, last_persisted_block_number=?self.persistence_state.last_persisted_block.number, "Removing blocks using persistence task"); + debug!(target: "engine::tree", ?new_tip_num, last_persisted_block=?self.persistence_state.last_persisted_block.number, "Removing blocks using persistence task"); if new_tip_num < self.persistence_state.last_persisted_block.number { debug!(target: "engine::tree", ?new_tip_num, "Starting remove blocks job"); + let Some(trie_state_blocks) = self.remove_blocks_trie_state_catchup_blocks(new_tip_num) + else { + warn!( + target: "engine::tree", + ?new_tip_num, + last_state_trie_persisted_block = ?self.persistence_state.last_state_trie_persisted_block.number, + "Cannot remove blocks: missing in-memory block needed for trie catchup" + ); + return + }; let (tx, rx) = crossbeam_channel::bounded(1); - let _ = self.persistence.remove_blocks_above(new_tip_num, tx); + let _ = self.persistence.remove_blocks_above(new_tip_num, trie_state_blocks, tx); self.persistence_state.start_remove(new_tip_num, rx); } } + /// Returns canonical in-memory blocks whose state/trie data must be materialized before an + /// on-disk removal can unwind from the persisted block-data tip down to `new_tip_num`. + fn remove_blocks_trie_state_catchup_blocks( + &self, + new_tip_num: u64, + ) -> Option>> { + let last_state_trie_persisted_block_number = + self.persistence_state.last_state_trie_persisted_block.number; + if new_tip_num <= last_state_trie_persisted_block_number { + return Some(Vec::new()) + } + + let mut blocks = + Vec::with_capacity((new_tip_num - last_state_trie_persisted_block_number) as usize); + for block_number in last_state_trie_persisted_block_number + 1..=new_tip_num { + let Some(block_state) = self.canonical_in_memory_state.state_by_number(block_number) + else { + debug!( + target: "engine::tree", + block_number, + ?new_tip_num, + ?last_state_trie_persisted_block_number, + "missing in-memory block needed for remove-blocks trie catchup" + ); + return None + }; + blocks.push(block_state.block()); + } + + Some(blocks) + } + /// Helper method to save blocks and set the persistence state. This ensures we keep track of /// the current persistence action while we're saving blocks. - fn persist_blocks(&mut self, blocks_to_persist: Vec>) { - if blocks_to_persist.is_empty() { + fn persist_blocks(&mut self, plan: SaveBlocksPlan) { + if plan.is_empty() { debug!(target: "engine::tree", "Returned empty set of blocks to persist"); return } - // NOTE: checked non-empty above - let highest_num_hash = blocks_to_persist - .iter() - .max_by_key(|block| block.recovered_block().number()) - .map(|b| b.recovered_block().num_hash()) - .expect("Checked non-empty persisting blocks"); + let last_block = plan.last_block().expect("checked non-empty persisting blocks"); - debug!(target: "engine::tree", count=blocks_to_persist.len(), blocks = ?blocks_to_persist.iter().map(|block| block.recovered_block().num_hash()).collect::>(), "Persisting blocks"); + debug!( + target: "engine::tree", + count = plan.blocks.len(), + steps = ?plan.steps, + blocks = ?plan.blocks.iter().map(|block| block.recovered_block().num_hash()).collect::>(), + "Persisting blocks" + ); let (tx, rx) = crossbeam_channel::bounded(1); - let _ = self.persistence.save_blocks(blocks_to_persist, tx); + let _ = self.persistence.save_blocks(plan, tx); - self.persistence_state.start_save(highest_num_hash, rx); + self.persistence_state.start_save(last_block, rx); } /// Triggers new persistence actions if no persistence task is currently in progress. @@ -1390,9 +1434,8 @@ where if let Some(new_tip_num) = self.find_disk_reorg()? { self.remove_blocks(new_tip_num) } else if self.should_persist() { - let blocks_to_persist = - self.get_canonical_blocks_to_persist(PersistTarget::Threshold)?; - self.persist_blocks(blocks_to_persist); + let plan = self.get_save_blocks_plan(PersistTarget::Threshold)?; + self.persist_blocks(plan); } } @@ -1423,15 +1466,15 @@ where self.on_persistence_complete(result, start_time)?; } - let blocks_to_persist = self.get_canonical_blocks_to_persist(PersistTarget::Head)?; + let plan = self.get_save_blocks_plan(PersistTarget::Head)?; - if blocks_to_persist.is_empty() { + if plan.is_empty() { debug!(target: "engine::tree", "persistence complete, signaling termination"); return Ok(()) } - debug!(target: "engine::tree", count = blocks_to_persist.len(), "persisting remaining blocks before shutdown"); - self.persist_blocks(blocks_to_persist); + debug!(target: "engine::tree", count = plan.blocks.len(), "persisting remaining blocks before shutdown"); + self.persist_blocks(plan); } } @@ -1467,25 +1510,25 @@ where ) -> Result<(), AdvancePersistenceError> { self.metrics.engine.persistence_duration.record(start_time.elapsed()); - let commit_duration = result.commit_duration; - let Some(BlockNumHash { - hash: last_persisted_block_hash, - number: last_persisted_block_number, - }) = result.last_block + let PersistenceResult { last_block, last_state_trie_block, commit_duration } = result; + let Some(BlockNumHash { hash: last_block_hash, number: last_block_number }) = last_block else { // if this happened, then we persisted no blocks because we sent an empty vec of blocks warn!(target: "engine::tree", "Persistence task completed but did not persist any blocks"); return Ok(()) }; - debug!(target: "engine::tree", ?last_persisted_block_hash, ?last_persisted_block_number, elapsed=?start_time.elapsed(), "Finished persisting, calling finish"); - self.persistence_state.finish(last_persisted_block_hash, last_persisted_block_number); + let last_block = BlockNumHash::new(last_block_number, last_block_hash); + let last_state_trie_persisted_block = + self.last_state_trie_persisted_block(last_block, last_state_trie_block)?; + + debug!(target: "engine::tree", ?last_block_hash, ?last_block_number, last_state_trie_persisted_block = last_state_trie_persisted_block.number, elapsed=?start_time.elapsed(), "Finished persisting, calling finish"); + self.persistence_state.finish(last_block, last_state_trie_persisted_block); // Evict trie changesets for blocks below the eviction threshold. // Keep at least CHANGESET_CACHE_RETENTION_BLOCKS from the persisted tip, and also respect // the finalized block if set. - let min_threshold = - last_persisted_block_number.saturating_sub(CHANGESET_CACHE_RETENTION_BLOCKS); + let min_threshold = last_block_number.saturating_sub(CHANGESET_CACHE_RETENTION_BLOCKS); let eviction_threshold = if let Some(finalized) = self.canonical_in_memory_state.get_finalized_num_hash() { // Use the minimum of finalized block and retention threshold to be conservative @@ -1496,7 +1539,7 @@ where }; debug!( target: "engine::tree", - last_persisted = last_persisted_block_number, + last_persisted_block = last_block_number, finalized_number = ?self.canonical_in_memory_state.get_finalized_num_hash().map(|f| f.number), eviction_threshold, "Evicting changesets below threshold" @@ -1506,7 +1549,7 @@ where // Invalidate cached overlay since the anchor has changed self.state.tree_state.invalidate_cached_overlay(); - self.on_new_persisted_block()?; + self.on_new_persisted_block(last_state_trie_persisted_block)?; // Re-prepare overlay for the current canonical head with the new anchor. // Spawn a background task to trigger computation so it's ready when the next payload @@ -1517,11 +1560,39 @@ where }); } - self.purge_timing_stats(last_persisted_block_number, commit_duration); + self.purge_timing_stats(last_block_number, commit_duration); Ok(()) } + /// Returns the highest block that can be dropped from memory after persistence completes. + fn last_state_trie_persisted_block( + &self, + last_block: BlockNumHash, + last_state_trie_block: Option, + ) -> ProviderResult { + let Some(last_state_trie_block) = last_state_trie_block else { return Ok(last_block) }; + debug_assert!( + last_state_trie_block <= last_block.number, + "state/trie frontier cannot exceed the last persisted block" + ); + if last_state_trie_block >= last_block.number { + return Ok(last_block) + } + + let hash = self + .canonical_in_memory_state + .hash_by_number(last_state_trie_block) + .map(Ok) + .unwrap_or_else(|| { + self.provider + .block_hash(last_state_trie_block)? + .ok_or_else(|| ProviderError::HeaderNotFound(last_state_trie_block.into())) + })?; + + Ok(BlockNumHash::new(last_state_trie_block, hash)) + } + /// Handles a message from the engine. /// /// Returns `ControlFlow::Break(())` if the engine should terminate. @@ -1840,7 +1911,7 @@ where // update the tracked chain height, after backfill sync both the canonical height and // persisted height are the same self.state.tree_state.set_canonical_head(new_head.num_hash()); - self.persistence_state.finish(new_head.hash(), new_head.number()); + self.persistence_state.finish(new_head.num_hash(), new_head.num_hash()); // update the tracked canonical head self.canonical_in_memory_state.set_canonical_head(new_head); @@ -2076,62 +2147,96 @@ where self.config.persistence_threshold() } - /// Returns a batch of consecutive canonical blocks to persist in the range - /// `(last_persisted_number .. target]`. The expected order is oldest -> newest. - fn get_canonical_blocks_to_persist( + /// Returns the save plan for the next persistence cycle. + fn get_save_blocks_plan( &self, target: PersistTarget, - ) -> Result>, AdvancePersistenceError> { + ) -> Result, AdvancePersistenceError> { // We will calculate the state root using the database, so we need to be sure there are no // changes debug_assert!(!self.persistence_state.in_progress()); - let mut blocks_to_persist = Vec::new(); + let mut blocks = Vec::new(); let mut current_hash = self.state.tree_state.canonical_block_hash(); - let last_persisted_number = self.persistence_state.last_persisted_block.number; + let last_state_trie_persisted_block_number = + self.persistence_state.last_state_trie_persisted_block.number; + let last_persisted_block_number = self.persistence_state.last_persisted_block.number; let canonical_head_number = self.state.tree_state.canonical_block_number(); - - let target_number = match target { - PersistTarget::Head => canonical_head_number, + let last_block_target_number = match target { PersistTarget::Threshold => { canonical_head_number.saturating_sub(self.config.memory_block_buffer_target()) } + PersistTarget::Head => canonical_head_number, }; debug!( target: "engine::tree", ?current_hash, - ?last_persisted_number, + ?last_state_trie_persisted_block_number, + ?last_persisted_block_number, ?canonical_head_number, - ?target_number, - "Returning canonical blocks to persist" + target = ?target, + "Returning save plan" ); while let Some(block) = self.state.tree_state.blocks_by_hash.get(¤t_hash) { - if block.recovered_block().number() <= last_persisted_number { + if block.recovered_block().number() <= last_state_trie_persisted_block_number { break; } - if block.recovered_block().number() <= target_number { - blocks_to_persist.push(block.clone()); + if block.recovered_block().number() <= last_block_target_number { + blocks.push(block.clone()); } current_hash = block.recovered_block().parent_hash(); } // Reverse the order so that the oldest block comes first - blocks_to_persist.reverse(); + blocks.reverse(); + + let trie_catchup_block_count = last_persisted_block_number + .saturating_sub(last_state_trie_persisted_block_number) + .min(blocks.len() as u64) as usize; + let persist_rest_block_count = blocks.len().saturating_sub(trie_catchup_block_count); + let state_masking_block_count = + persist_rest_block_count.min(self.config.num_state_masking_blocks() as usize); + let full_persist_block_count = persist_rest_block_count - state_masking_block_count; + let full_persist_start = trie_catchup_block_count; + let state_masking_start = full_persist_start + full_persist_block_count; + let state_masking_range = state_masking_start..blocks.len(); + let mut steps = Vec::new(); + + if trie_catchup_block_count > 0 { + steps.push(SaveBlocksPlanStep::new( + 0..trie_catchup_block_count, + Some(state_masking_range.clone()), + false, + )); + } + if full_persist_block_count > 0 { + steps.push(SaveBlocksPlanStep::new( + full_persist_start..state_masking_start, + Some(state_masking_range.clone()), + true, + )); + } + if state_masking_block_count > 0 { + steps.push(SaveBlocksPlanStep::new(state_masking_range, None, true)); + } - Ok(blocks_to_persist) + Ok(SaveBlocksPlan::new(blocks, steps)) } - /// This clears the blocks from the in-memory tree state that have been persisted to the - /// database. + /// This clears the blocks from the in-memory tree state that no longer need to stay resident + /// after persistence completes. /// - /// This also updates the canonical in-memory state to reflect the newest persisted block - /// height. + /// This also updates the canonical in-memory state to reflect the newest persisted block tip, + /// even if trie persistence only advanced through an earlier block. /// /// Assumes that `finish` has been called on the `persistence_state` at least once - fn on_new_persisted_block(&mut self) -> ProviderResult<()> { + fn on_new_persisted_block( + &mut self, + in_memory_persisted_block: BlockNumHash, + ) -> ProviderResult<()> { // If we have an on-disk reorg, we need to handle it first before touching the in-memory // state. if let Some(remove_above) = self.find_disk_reorg()? { @@ -2140,11 +2245,11 @@ where } let finalized = self.state.forkchoice_state_tracker.last_valid_finalized(); - self.remove_before(self.persistence_state.last_persisted_block, finalized)?; - self.canonical_in_memory_state.remove_persisted_blocks(BlockNumHash { - number: self.persistence_state.last_persisted_block.number, - hash: self.persistence_state.last_persisted_block.hash, - }); + self.remove_before(in_memory_persisted_block, finalized)?; + self.canonical_in_memory_state.remove_persisted_blocks_until( + self.persistence_state.last_persisted_block, + in_memory_persisted_block.number, + ); Ok(()) } @@ -3272,9 +3377,26 @@ where &self.state, ) } else { + debug!( + target: "engine::tree", + parent_hash = %state.head_block_hash, + parent_number = head.number(), + parent_state_root = %head.state_root(), + "Payload builder sparse trie sharing disabled" + ); None }; + debug!( + target: "engine::tree", + parent_hash = %state.head_block_hash, + parent_number = head.number(), + parent_state_root = %head.state_root(), + has_execution_cache = cache.is_some(), + has_sparse_trie_handle = trie_handle.is_some(), + "Sending new payload job to payload builder" + ); + // send the payload to the builder and return the receiver for the pending payload // id, initiating payload job is handled asynchronously let pending_payload_id = self.payload_builder.send_new_payload(BuildNewPayload { diff --git a/crates/engine/tree/src/tree/payload_validator.rs b/crates/engine/tree/src/tree/payload_validator.rs index 3fdd3b6311b..b04c3123b63 100644 --- a/crates/engine/tree/src/tree/payload_validator.rs +++ b/crates/engine/tree/src/tree/payload_validator.rs @@ -88,6 +88,7 @@ use reth_provider::{ StorageChangeSetReader, StorageSettingsCache, }; use reth_revm::db::{states::bundle_state::BundleRetention, BundleAccount, State}; +use reth_stages_api::StageId; use reth_trie::{trie_cursor::TrieCursorFactory, updates::TrieUpdates, HashedPostState}; use reth_trie_db::ChangesetCache; use reth_trie_parallel::root::{ParallelStateRoot, ParallelStateRootError}; @@ -715,10 +716,11 @@ where if state_root == block.header().state_root() { maybe_state_root = Some((state_root, trie_updates, elapsed)) } else { + let block_state_root = block.header().state_root(); warn!( target: "engine::tree::payload_validator", ?state_root, - block_state_root = ?block.header().state_root(), + ?block_state_root, "State root task returned incorrect state root" ); #[cfg(feature = "trie-debug")] @@ -726,7 +728,7 @@ where block.header().number(), &trie_debug_recorders, ); - state_root_task_failed = true; + std::process::abort(); } } Err(error) => { @@ -2055,6 +2057,65 @@ where state: &EngineApiTreeState, ) -> Option { let (lazy_overlay, anchor_hash) = Self::get_parent_lazy_overlay(parent_hash, state); + let lazy_anchor = lazy_overlay.as_ref().and_then(LazyOverlay::anchor_hash); + let lazy_blocks = lazy_overlay.as_ref().map(LazyOverlay::block_summaries); + let span = debug_span!( + target: "engine::tree::payload_validator", + "payload_builder_sparse_trie_overlay", + %parent_hash, + %parent_state_root, + %anchor_hash, + ?lazy_anchor, + ?lazy_blocks, + ); + let _guard = span.enter(); + if tracing::enabled!(target: "engine::tree::payload_validator", tracing::Level::DEBUG) { + match self.provider.database_provider_ro() { + Ok(provider) => match provider.get_stage_checkpoint(StageId::Finish) { + Ok(Some(checkpoint)) => { + let finish_tip_number = checkpoint.block_number; + let partial_state_trie_number = checkpoint + .finish_stage_checkpoint() + .and_then(|finish| finish.partial_state_trie) + .unwrap_or(finish_tip_number); + let partial_state_trie_hash = provider + .convert_number(partial_state_trie_number.into()) + .ok() + .flatten(); + let finish_tip_hash = + provider.convert_number(finish_tip_number.into()).ok().flatten(); + debug!( + target: "engine::tree::payload_validator", + partial_state_trie_number, + ?partial_state_trie_hash, + finish_tip_number, + ?finish_tip_hash, + "Preparing payload builder sparse trie overlay" + ); + } + Ok(None) => { + debug!( + target: "engine::tree::payload_validator", + "Preparing payload builder sparse trie overlay without finish checkpoint" + ); + } + Err(err) => { + debug!( + target: "engine::tree::payload_validator", + %err, + "Preparing payload builder sparse trie overlay without database frontiers" + ); + } + }, + Err(err) => { + debug!( + target: "engine::tree::payload_validator", + %err, + "Preparing payload builder sparse trie overlay without database frontiers" + ); + } + } + } let overlay_factory = OverlayStateProviderFactory::new( self.provider.clone(), OverlayBuilder::::new(anchor_hash, self.changeset_cache.clone()) diff --git a/crates/engine/tree/src/tree/persistence_state.rs b/crates/engine/tree/src/tree/persistence_state.rs index c3ab00dbece..e4e0590fc56 100644 --- a/crates/engine/tree/src/tree/persistence_state.rs +++ b/crates/engine/tree/src/tree/persistence_state.rs @@ -22,7 +22,6 @@ use crate::persistence::PersistenceResult; use alloy_eips::BlockNumHash; -use alloy_primitives::B256; use crossbeam_channel::Receiver as CrossbeamReceiver; use reth_primitives_traits::FastInstant as Instant; use tracing::trace; @@ -30,10 +29,12 @@ use tracing::trace; /// The state of the persistence task. #[derive(Debug)] pub struct PersistenceState { - /// Hash and number of the last block persisted. + /// Hash and number of the highest block whose non-state/trie outputs are persisted. /// - /// This tracks the chain height that is persisted on disk + /// This tracks the highest canonical block with durable block/static-file/plain-state data. pub(crate) last_persisted_block: BlockNumHash, + /// Hash and number of the highest block whose state/trie outputs are persisted. + pub(crate) last_state_trie_persisted_block: BlockNumHash, /// Receiver end of channel where the result of the persistence task will be /// sent when done. A None value means there's no persistence task in progress. pub(crate) rx: @@ -76,13 +77,18 @@ impl PersistenceState { /// Sets state for a finished persistence task. pub(crate) fn finish( &mut self, - last_persisted_block_hash: B256, - last_persisted_block_number: u64, + last_persisted_block: BlockNumHash, + last_state_trie_persisted_block: BlockNumHash, ) { - trace!(target: "engine::tree", block= %last_persisted_block_number, hash=%last_persisted_block_hash, "updating persistence state"); + trace!( + target: "engine::tree", + last_persisted_block = %last_persisted_block.number, + last_state_trie_persisted_block = %last_state_trie_persisted_block.number, + "updating persistence state" + ); self.rx = None; - self.last_persisted_block = - BlockNumHash::new(last_persisted_block_number, last_persisted_block_hash); + self.last_persisted_block = last_persisted_block; + self.last_state_trie_persisted_block = last_state_trie_persisted_block; } } diff --git a/crates/engine/tree/src/tree/tests.rs b/crates/engine/tree/src/tree/tests.rs index 193200f2a32..147da2bc68b 100644 --- a/crates/engine/tree/src/tree/tests.rs +++ b/crates/engine/tree/src/tree/tests.rs @@ -222,7 +222,11 @@ impl TestHarness { engine_api_tree_state, canonical_in_memory_state, persistence_handle, - PersistenceState { last_persisted_block: BlockNumHash::default(), rx: None }, + PersistenceState { + last_persisted_block: BlockNumHash::default(), + last_state_trie_persisted_block: BlockNumHash::default(), + rx: None, + }, payload_builder, tree_config, EngineApiKind::Ethereum, @@ -360,6 +364,17 @@ impl TestHarness { } } +type ExpectedPlanStep = (std::ops::Range, Option>, bool); + +fn assert_plan_steps(plan: &SaveBlocksPlan, expected: &[ExpectedPlanStep]) { + assert_eq!(plan.steps.len(), expected.len()); + for (step, (block_range, masking_range, persist_rest)) in plan.steps.iter().zip(expected) { + assert_eq!(&step.block_range, block_range); + assert_eq!(&step.state_trie_masking_range, masking_range); + assert_eq!(step.persist_rest, *persist_rest); + } +} + /// Simplified test metrics for validation calls #[derive(Debug, Default)] struct TestMetrics { @@ -554,12 +569,16 @@ async fn test_tree_persist_blocks() { let received_action = test_harness.action_rx.recv().expect("Failed to receive save blocks action"); - if let PersistenceAction::SaveBlocks(saved_blocks, _) = received_action { + if let PersistenceAction::SaveBlocks(plan, _) = received_action { // only blocks.len() - tree_config.memory_block_buffer_target() will be // persisted let expected_persist_len = blocks.len() - tree_config.memory_block_buffer_target() as usize; - assert_eq!(saved_blocks.len(), expected_persist_len); - assert_eq!(saved_blocks, blocks[..expected_persist_len]); + assert_eq!(plan.blocks.len(), expected_persist_len); + assert_eq!(plan.blocks, blocks[..expected_persist_len]); + assert_plan_steps( + &plan, + &[(0..expected_persist_len, Some(expected_persist_len..expected_persist_len), true)], + ); } else { panic!("unexpected action received {received_action:?}"); } @@ -704,8 +723,8 @@ fn test_backpressure_waits_for_persistence_before_reading_incoming() { test_harness.tree.config = test_harness .tree .config - .with_persistence_threshold(0) - .with_persistence_backpressure_threshold(1); + .with_persistence_threshold(1) + .with_persistence_backpressure_threshold(2); let (persist_tx, persist_rx) = crossbeam_channel::bounded(1); let persisted = blocks.last().unwrap().recovered_block().num_hash(); @@ -736,6 +755,7 @@ fn test_backpressure_waits_for_persistence_before_reading_incoming() { persist_tx .send(PersistenceResult { last_block: Some(persisted), + last_state_trie_block: Some(persisted.number), commit_duration: Some(Duration::ZERO), }) .unwrap(); @@ -770,10 +790,10 @@ async fn test_tree_state_on_new_head_reorg() { reth_tracing::init_test_tracing(); let chain_spec = MAINNET.clone(); - // Set persistence_threshold to 1 + // Keep a single block in memory while still leaving room for the persistence threshold. let mut test_harness = TestHarness::new(chain_spec); test_harness.tree.config = - test_harness.tree.config.with_persistence_threshold(1).with_memory_block_buffer_target(1); + test_harness.tree.config.with_persistence_threshold(2).with_memory_block_buffer_target(1); let mut test_block_builder = TestBlockBuilder::eth(); let blocks: Vec<_> = test_block_builder.get_executed_blocks(1..6).collect(); @@ -824,15 +844,16 @@ async fn test_tree_state_on_new_head_reorg() { // get rid of the prev action let received_action = test_harness.action_rx.recv().unwrap(); - let PersistenceAction::SaveBlocks(saved_blocks, sender) = received_action else { + let PersistenceAction::SaveBlocks(plan, sender) = received_action else { panic!("received wrong action"); }; - assert_eq!(saved_blocks, vec![blocks[0].clone(), blocks[1].clone()]); + assert_eq!(plan.blocks, vec![blocks[0].clone(), blocks[1].clone()]); // send the response so we can advance again sender .send(PersistenceResult { last_block: Some(blocks[1].recovered_block().num_hash()), + last_state_trie_block: Some(blocks[1].recovered_block().number()), commit_duration: Some(Duration::ZERO), }) .unwrap(); @@ -968,8 +989,10 @@ async fn test_get_canonical_blocks_to_persist() { test_harness = test_harness.with_blocks(blocks.clone()); let last_persisted_block_number = 3; - test_harness.tree.persistence_state.last_persisted_block = + let last_persisted_block = blocks[last_persisted_block_number as usize].recovered_block.num_hash(); + test_harness.tree.persistence_state.last_persisted_block = last_persisted_block; + test_harness.tree.persistence_state.last_state_trie_persisted_block = last_persisted_block; let persistence_threshold = 4; let memory_block_buffer_target = 3; @@ -977,16 +1000,15 @@ async fn test_get_canonical_blocks_to_persist() { .with_persistence_threshold(persistence_threshold) .with_memory_block_buffer_target(memory_block_buffer_target); - let blocks_to_persist = - test_harness.tree.get_canonical_blocks_to_persist(PersistTarget::Threshold).unwrap(); + let plan = test_harness.tree.get_save_blocks_plan(PersistTarget::Threshold).unwrap(); let expected_blocks_to_persist_length: usize = (canonical_head_number - memory_block_buffer_target - last_persisted_block_number) .try_into() .unwrap(); - assert_eq!(blocks_to_persist.len(), expected_blocks_to_persist_length); - for (i, item) in blocks_to_persist.iter().enumerate().take(expected_blocks_to_persist_length) { + assert_eq!(plan.blocks.len(), expected_blocks_to_persist_length); + for (i, item) in plan.blocks.iter().enumerate().take(expected_blocks_to_persist_length) { assert_eq!(item.recovered_block().number, last_persisted_block_number + i as u64 + 1); } @@ -997,15 +1019,14 @@ async fn test_get_canonical_blocks_to_persist() { assert!(test_harness.tree.state.tree_state.sealed_header_by_hash(&fork_block_hash).is_some()); - let blocks_to_persist = - test_harness.tree.get_canonical_blocks_to_persist(PersistTarget::Threshold).unwrap(); - assert_eq!(blocks_to_persist.len(), expected_blocks_to_persist_length); + let plan = test_harness.tree.get_save_blocks_plan(PersistTarget::Threshold).unwrap(); + assert_eq!(plan.blocks.len(), expected_blocks_to_persist_length); // check that the fork block is not included in the blocks to persist - assert!(!blocks_to_persist.iter().any(|b| b.recovered_block().hash() == fork_block_hash)); + assert!(!plan.blocks.iter().any(|b| b.recovered_block().hash() == fork_block_hash)); // check that the original block 4 is still included - assert!(blocks_to_persist.iter().any(|b| b.recovered_block().number == 4 && + assert!(plan.blocks.iter().any(|b| b.recovered_block().number == 4 && b.recovered_block().hash() == blocks[4].recovered_block().hash())); // check that if we advance persistence, the persistence action is the correct value @@ -1013,11 +1034,193 @@ async fn test_get_canonical_blocks_to_persist() { assert_eq!( test_harness.tree.persistence_state.current_action().cloned(), Some(CurrentPersistenceAction::SavingBlocks { - highest: blocks_to_persist.last().unwrap().recovered_block().num_hash() + highest: plan.blocks.last().unwrap().recovered_block().num_hash() }) ); } +#[test] +fn test_get_save_blocks_plan_with_deferred_trie_blocks() { + let chain_spec = MAINNET.clone(); + let mut test_harness = TestHarness::new(chain_spec); + let mut test_block_builder = TestBlockBuilder::eth(); + + let blocks: Vec<_> = test_block_builder.get_executed_blocks(0..7).collect(); + test_harness = test_harness.with_blocks(blocks.clone()); + test_harness.tree.persistence_state.last_state_trie_persisted_block = + blocks[1].recovered_block().num_hash(); + test_harness.tree.persistence_state.last_persisted_block = + blocks[3].recovered_block().num_hash(); + test_harness.tree.config = TreeConfig::default() + .with_persistence_threshold(4) + .with_memory_block_buffer_target(1) + .with_num_state_masking_blocks(2); + + let plan = test_harness.tree.get_save_blocks_plan(PersistTarget::Threshold).unwrap(); + + assert_plan_steps(&plan, &[(0..2, Some(2..4), false), (2..4, None, true)]); + assert_eq!(plan.blocks.len(), 4); + assert_eq!( + plan.blocks.iter().map(|block| block.recovered_block().number()).collect::>(), + vec![2, 3, 4, 5] + ); + assert_eq!(plan.last_block(), Some(blocks[5].recovered_block().num_hash())); +} + +#[test] +fn test_get_save_blocks_plan_persists_full_region_before_deferred_tail() { + let chain_spec = MAINNET.clone(); + let mut test_harness = TestHarness::new(chain_spec); + let mut test_block_builder = TestBlockBuilder::eth(); + + let blocks: Vec<_> = test_block_builder.get_executed_blocks(0..31).collect(); + test_harness = test_harness.with_blocks(blocks.clone()); + test_harness.tree.persistence_state.last_state_trie_persisted_block = + blocks[12].recovered_block().num_hash(); + test_harness.tree.persistence_state.last_persisted_block = + blocks[15].recovered_block().num_hash(); + test_harness.tree.config = TreeConfig::default() + .with_persistence_threshold(5) + .with_memory_block_buffer_target(2) + .with_num_state_masking_blocks(2); + + let plan = test_harness.tree.get_save_blocks_plan(PersistTarget::Threshold).unwrap(); + + assert_plan_steps( + &plan, + &[(0..3, Some(14..16), false), (3..14, Some(14..16), true), (14..16, None, true)], + ); + assert_eq!(plan.blocks.len(), 16); + assert_eq!( + plan.blocks.iter().map(|block| block.recovered_block().number()).collect::>(), + (13..=28).collect::>() + ); + assert_eq!(plan.last_block(), Some(blocks[28].recovered_block().num_hash())); +} + +#[test] +fn test_on_persistence_complete_retains_blocks_above_partial_state_trie() { + let chain_spec = MAINNET.clone(); + let mut test_harness = TestHarness::new(chain_spec); + let mut test_block_builder = TestBlockBuilder::eth(); + + let blocks: Vec<_> = test_block_builder.get_executed_blocks(0..7).collect(); + test_harness = test_harness.with_blocks(blocks.clone()); + test_harness.tree.persistence_state.last_persisted_block = + blocks[1].recovered_block().num_hash(); + test_harness.tree.persistence_state.last_state_trie_persisted_block = + blocks[1].recovered_block().num_hash(); + + let persisted_tip = blocks[5].recovered_block().num_hash(); + let last_state_trie_block = blocks[3].recovered_block().number(); + + test_harness + .tree + .on_persistence_complete( + PersistenceResult { + last_block: Some(persisted_tip), + last_state_trie_block: Some(last_state_trie_block), + commit_duration: Some(Duration::ZERO), + }, + Instant::now(), + ) + .unwrap(); + + assert_eq!(test_harness.tree.persistence_state.last_persisted_block, persisted_tip); + assert_eq!( + test_harness.tree.persistence_state.last_state_trie_persisted_block, + blocks[3].recovered_block().num_hash() + ); + assert_eq!( + test_harness.tree.canonical_in_memory_state.get_persisted_num_hash(), + Some(persisted_tip) + ); + + for block in &blocks[..=last_state_trie_block as usize] { + assert!(test_harness + .tree + .state + .tree_state + .executed_block_by_hash(block.recovered_block().hash()) + .is_none()); + assert!(test_harness + .tree + .canonical_in_memory_state + .state_by_number(block.recovered_block().number()) + .is_none()); + } + + for block in &blocks[last_state_trie_block as usize + 1..] { + assert!(test_harness + .tree + .state + .tree_state + .executed_block_by_hash(block.recovered_block().hash()) + .is_some()); + assert!(test_harness + .tree + .canonical_in_memory_state + .state_by_number(block.recovered_block().number()) + .is_some()); + } +} + +#[test] +fn test_on_persistence_complete_without_partial_state_trie_prunes_through_tip() { + let chain_spec = MAINNET.clone(); + let mut test_harness = TestHarness::new(chain_spec); + let mut test_block_builder = TestBlockBuilder::eth(); + + let blocks: Vec<_> = test_block_builder.get_executed_blocks(0..7).collect(); + test_harness = test_harness.with_blocks(blocks.clone()); + test_harness.tree.persistence_state.last_persisted_block = + blocks[1].recovered_block().num_hash(); + test_harness.tree.persistence_state.last_state_trie_persisted_block = + blocks[1].recovered_block().num_hash(); + + let persisted_tip = blocks[5].recovered_block().num_hash(); + + test_harness + .tree + .on_persistence_complete( + PersistenceResult { + last_block: Some(persisted_tip), + last_state_trie_block: None, + commit_duration: Some(Duration::ZERO), + }, + Instant::now(), + ) + .unwrap(); + + for block in &blocks[..=persisted_tip.number as usize] { + assert!(test_harness + .tree + .state + .tree_state + .executed_block_by_hash(block.recovered_block().hash()) + .is_none()); + assert!(test_harness + .tree + .canonical_in_memory_state + .state_by_number(block.recovered_block().number()) + .is_none()); + } + + for block in &blocks[persisted_tip.number as usize + 1..] { + assert!(test_harness + .tree + .state + .tree_state + .executed_block_by_hash(block.recovered_block().hash()) + .is_some()); + assert!(test_harness + .tree + .canonical_in_memory_state + .state_by_number(block.recovered_block().number()) + .is_some()); + } +} + #[tokio::test] async fn test_engine_tree_fcu_missing_head() { let chain_spec = MAINNET.clone(); @@ -2112,15 +2315,18 @@ mod forkchoice_updated_tests { break; } - if let Ok(PersistenceAction::SaveBlocks(saved_blocks, sender)) = + if let Ok(PersistenceAction::SaveBlocks(plan, sender)) = action_rx.recv_timeout(std::time::Duration::from_millis(100)) { - if let Some(last) = saved_blocks.last() { + if let Some(last) = plan.last_block() { + last_persisted_number = last.number; + } else if let Some(last) = plan.blocks.last() { last_persisted_number = last.recovered_block().number; } sender .send(PersistenceResult { - last_block: saved_blocks.last().map(|b| b.recovered_block().num_hash()), + last_block: plan.last_block(), + last_state_trie_block: plan.last_block().map(|tip| tip.number), commit_duration: Some(Duration::ZERO), }) .unwrap(); diff --git a/crates/ethereum/payload/src/lib.rs b/crates/ethereum/payload/src/lib.rs index 2d6c2e2d022..82dc6061dcf 100644 --- a/crates/ethereum/payload/src/lib.rs +++ b/crates/ethereum/payload/src/lib.rs @@ -169,6 +169,16 @@ where let PayloadConfig { parent_header, attributes, payload_id } = config; let mut state_provider = client.state_by_block_hash(parent_header.hash())?; + debug!( + target: "payload_builder", + id = %payload_id, + parent_hash = %parent_header.hash(), + parent_number = parent_header.number, + parent_state_root = %parent_header.state_root, + has_execution_cache = execution_cache.is_some(), + has_sparse_trie_handle = trie_handle.is_some(), + "Created payload builder parent state provider" + ); if let Some(execution_cache) = execution_cache { state_provider = Box::new(CachedStateProvider::new( state_provider, @@ -221,7 +231,22 @@ where // If we have a sparse trie handle, wire a state hook that streams per-tx state diffs // to the background trie pipeline for incremental state root computation. if let Some(ref handle) = trie_handle { + debug!( + target: "payload_builder", + id = %payload_id, + parent_hash = %parent_header.hash(), + parent_number = parent_header.number, + "Using shared sparse trie handle for payload builder state root" + ); builder.executor_mut().set_state_hook(Some(Box::new(handle.state_hook()))); + } else { + debug!( + target: "payload_builder", + id = %payload_id, + parent_hash = %parent_header.hash(), + parent_number = parent_header.number, + "Payload builder will compute state root through its state provider" + ); } builder.apply_pre_execution_changes().map_err(|err| { diff --git a/crates/evm/evm/src/execute.rs b/crates/evm/evm/src/execute.rs index eb1c70c3deb..24881691e08 100644 --- a/crates/evm/evm/src/execute.rs +++ b/crates/evm/evm/src/execute.rs @@ -3,7 +3,7 @@ use crate::{ConfigureEvm, Database, OnStateHook, TxEnvFor}; use alloc::{boxed::Box, sync::Arc, vec::Vec}; use alloy_consensus::{BlockHeader, Header}; -use alloy_eips::eip2718::WithEncoded; +use alloy_eips::{eip2718::WithEncoded, eip7928::BlockAccessList}; pub use alloy_evm::block::{BlockExecutor, BlockExecutorFactory, GasOutput}; use alloy_evm::{ block::{CommitChanges, ExecutableTxParts}, @@ -295,6 +295,8 @@ pub trait BlockAssembler { pub struct BlockBuilderOutcome { /// Result of block execution. pub execution_result: BlockExecutionResult, + /// Block access list built while executing the block, if BAL collection was enabled. + pub block_access_list: Option, /// Hashed state after execution. pub hashed_state: HashedPostState, /// Trie updates collected during state root calculation. @@ -482,6 +484,7 @@ where // merge all transitions into bundle state db.merge_transitions(BundleRetention::Reverts); + let block_access_list = db.take_built_alloy_bal(); let hashed_state = state.hashed_post_state(&db.bundle_state); let (state_root, trie_updates) = match state_root_precomputed { @@ -507,7 +510,13 @@ where let block = RecoveredBlock::new_unhashed(block, senders); - Ok(BlockBuilderOutcome { execution_result: result, hashed_state, trie_updates, block }) + Ok(BlockBuilderOutcome { + execution_result: result, + block_access_list, + hashed_state, + trie_updates, + block, + }) } fn executor_mut(&mut self) -> &mut Self::Executor { diff --git a/crates/node/builder/src/launch/common.rs b/crates/node/builder/src/launch/common.rs index 0afef017616..027c399b28f 100644 --- a/crates/node/builder/src/launch/common.rs +++ b/crates/node/builder/src/launch/common.rs @@ -69,8 +69,8 @@ use reth_node_metrics::{ }; use reth_provider::{ providers::{NodeTypesForProvider, ProviderNodeTypes, RocksDBProvider, StaticFileProvider}, - BlockHashReader, BlockNumReader, ProviderError, ProviderFactory, ProviderResult, - RocksDBProviderFactory, StageCheckpointReader, StaticFileProviderBuilder, + BlockHashReader, BlockNumReader, DatabaseProviderFactory, ProviderError, ProviderFactory, + ProviderResult, RocksDBProviderFactory, StageCheckpointReader, StaticFileProviderBuilder, StaticFileProviderFactory, StorageSettingsCache, }; use reth_prune::{PruneModes, PrunerBuilder}; @@ -78,7 +78,7 @@ use reth_rpc_builder::config::RethRpcServerConfig; use reth_rpc_layer::JwtSecret; use reth_stages::{ sets::DefaultStages, stages::EraImportSource, MetricEvent, PipelineBuilder, PipelineTarget, - StageId, + StageCheckpoint, StageId, }; use reth_static_file::StaticFileProducer; use reth_tasks::TaskExecutor; @@ -521,19 +521,26 @@ where // the unwind targets for each storage layer if inconsistencies are // found. let (rocksdb_unwind, static_file_unwind) = factory.check_consistency()?; + let partial_trie_unwind = partial_trie_unwind_target( + factory.database_provider_ro()?.get_stage_checkpoint(StageId::Finish)?, + ); // Take the minimum block number to ensure all storage layers are consistent. - let unwind_target = [rocksdb_unwind, static_file_unwind].into_iter().flatten().min(); + let unwind_target = + [rocksdb_unwind, static_file_unwind, partial_trie_unwind].into_iter().flatten().min(); if let Some(unwind_block) = unwind_target { + let inconsistency_source = [ + rocksdb_unwind.map(|_| "RocksDB"), + static_file_unwind.map(|_| "static file"), + partial_trie_unwind.map(|_| "partial state trie"), + ] + .into_iter() + .flatten() + .collect::>() + .join(" and "); // Highly unlikely to happen, and given its destructive nature, it's better to panic // instead. Unwinding to 0 would leave MDBX with a huge free list size. - let inconsistency_source = match (rocksdb_unwind, static_file_unwind) { - (Some(_), Some(_)) => "RocksDB and static file", - (Some(_), None) => "RocksDB", - (None, Some(_)) => "static file", - (None, None) => unreachable!(), - }; assert_ne!( unwind_block, 0, "A {} inconsistency was found that would trigger an unwind to block 0", @@ -1300,11 +1307,19 @@ pub fn metrics_hooks(provider_factory: &ProviderFactory) .build() } +fn partial_trie_unwind_target(finish_checkpoint: Option) -> Option { + let finish_checkpoint = finish_checkpoint?; + let partial_state_trie = finish_checkpoint.finish_stage_checkpoint()?.partial_state_trie?; + + (partial_state_trie != finish_checkpoint.block_number).then_some(partial_state_trie) +} + #[cfg(test)] mod tests { - use super::{LaunchContext, NodeConfig}; + use super::{partial_trie_unwind_target, LaunchContext, NodeConfig}; use reth_config::Config; use reth_node_core::args::PruningArgs; + use reth_stages::{FinishCheckpoint, StageCheckpoint}; const EXTENSION: &str = "toml"; @@ -1356,4 +1371,24 @@ mod tests { assert_eq!(reth_config, loaded_config); }) } + + #[test] + fn partial_trie_unwind_target_uses_partial_finish_checkpoint() { + let finish_checkpoint = StageCheckpoint::new(42) + .with_finish_stage_checkpoint(FinishCheckpoint { partial_state_trie: Some(21) }); + + assert_eq!(partial_trie_unwind_target(Some(finish_checkpoint)), Some(21)); + } + + #[test] + fn partial_trie_unwind_target_ignores_matching_or_missing_partial_checkpoint() { + let matching_finish_checkpoint = StageCheckpoint::new(42) + .with_finish_stage_checkpoint(FinishCheckpoint { partial_state_trie: Some(42) }); + let missing_partial_finish_checkpoint = StageCheckpoint::new(42) + .with_finish_stage_checkpoint(FinishCheckpoint { partial_state_trie: None }); + + assert_eq!(partial_trie_unwind_target(Some(matching_finish_checkpoint)), None); + assert_eq!(partial_trie_unwind_target(Some(missing_partial_finish_checkpoint)), None); + assert_eq!(partial_trie_unwind_target(None), None); + } } diff --git a/crates/node/core/src/args/engine.rs b/crates/node/core/src/args/engine.rs index 382df972b71..ee97bd6914e 100644 --- a/crates/node/core/src/args/engine.rs +++ b/crates/node/core/src/args/engine.rs @@ -4,9 +4,9 @@ use clap::{builder::Resettable, Args}; use eyre::ensure; use reth_cli_util::{parse_duration_from_secs_or_ms, parsers::format_duration_as_secs_or_ms}; use reth_engine_primitives::{ - TreeConfig, DEFAULT_INVALID_HEADER_HIT_EVICTION_THRESHOLD, DEFAULT_MULTIPROOF_TASK_CHUNK_SIZE, - DEFAULT_PERSISTENCE_BACKPRESSURE_THRESHOLD, DEFAULT_SPARSE_TRIE_MAX_HOT_ACCOUNTS, - DEFAULT_SPARSE_TRIE_MAX_HOT_SLOTS, + default_persistence_backpressure_threshold, TreeConfig, DEFAULT_DEFERRED_TRIE_BLOCKS, + DEFAULT_INVALID_HEADER_HIT_EVICTION_THRESHOLD, DEFAULT_MULTIPROOF_TASK_CHUNK_SIZE, + DEFAULT_SPARSE_TRIE_MAX_HOT_ACCOUNTS, DEFAULT_SPARSE_TRIE_MAX_HOT_SLOTS, }; use std::{sync::OnceLock, time::Duration}; @@ -24,7 +24,8 @@ static ENGINE_DEFAULTS: OnceLock = OnceLock::new(); #[derive(Debug, Clone)] pub struct DefaultEngineValues { persistence_threshold: u64, - persistence_backpressure_threshold: u64, + persistence_backpressure_threshold: Option, + deferred_trie_blocks: u64, memory_block_buffer_target: u64, invalid_header_hit_eviction_threshold: u8, legacy_state_root_task_enabled: bool, @@ -73,9 +74,26 @@ impl DefaultEngineValues { self } + /// Get the default persistence backpressure threshold. + pub const fn persistence_backpressure_threshold(&self) -> u64 { + match self.persistence_backpressure_threshold { + Some(v) => v, + None => default_persistence_backpressure_threshold( + self.persistence_threshold, + self.memory_block_buffer_target, + ), + } + } + /// Set the default persistence backpressure threshold pub const fn with_persistence_backpressure_threshold(mut self, v: u64) -> Self { - self.persistence_backpressure_threshold = v; + self.persistence_backpressure_threshold = Some(v); + self + } + + /// Set the default deferred trie block target + pub const fn with_deferred_trie_blocks(mut self, v: u64) -> Self { + self.deferred_trie_blocks = v; self } @@ -261,7 +279,8 @@ impl Default for DefaultEngineValues { fn default() -> Self { Self { persistence_threshold: DEFAULT_PERSISTENCE_THRESHOLD, - persistence_backpressure_threshold: DEFAULT_PERSISTENCE_BACKPRESSURE_THRESHOLD, + persistence_backpressure_threshold: None, + deferred_trie_blocks: DEFAULT_DEFERRED_TRIE_BLOCKS, memory_block_buffer_target: DEFAULT_MEMORY_BLOCK_BUFFER_TARGET, invalid_header_hit_eviction_threshold: DEFAULT_INVALID_HEADER_HIT_EVICTION_THRESHOLD, legacy_state_root_task_enabled: false, @@ -311,9 +330,14 @@ pub struct EngineArgs { /// Configure the maximum canonical-minus-persisted gap before engine API processing stalls. /// /// This value must be greater than `--engine.persistence-threshold`. - #[arg(long = "engine.persistence-backpressure-threshold", default_value_t = DefaultEngineValues::get_global().persistence_backpressure_threshold)] + #[arg(long = "engine.persistence-backpressure-threshold", default_value_t = DefaultEngineValues::get_global().persistence_backpressure_threshold())] pub persistence_backpressure_threshold: u64, + /// Configure how many of the blocks being persisted should only mask state/trie writes instead + /// of durably persisting their state/trie updates in the current cycle. + #[arg(long = "engine.deferred-trie-blocks", default_value_t = DefaultEngineValues::get_global().deferred_trie_blocks)] + pub deferred_trie_blocks: u64, + /// Configure the target number of blocks to keep in memory. #[arg(long = "engine.memory-block-buffer-target", default_value_t = DefaultEngineValues::get_global().memory_block_buffer_target)] pub memory_block_buffer_target: u64, @@ -546,6 +570,7 @@ impl Default for EngineArgs { let DefaultEngineValues { persistence_threshold, persistence_backpressure_threshold, + deferred_trie_blocks, memory_block_buffer_target, invalid_header_hit_eviction_threshold, legacy_state_root_task_enabled, @@ -578,7 +603,15 @@ impl Default for EngineArgs { } = DefaultEngineValues::get_global().clone(); Self { persistence_threshold, - persistence_backpressure_threshold, + persistence_backpressure_threshold: persistence_backpressure_threshold.unwrap_or_else( + || { + default_persistence_backpressure_threshold( + persistence_threshold, + memory_block_buffer_target, + ) + }, + ), + deferred_trie_blocks, memory_block_buffer_target, invalid_header_hit_eviction_threshold, legacy_state_root_task_enabled, @@ -630,6 +663,13 @@ impl EngineArgs { self.persistence_backpressure_threshold, self.persistence_threshold ); + ensure!( + self.deferred_trie_blocks + self.memory_block_buffer_target < self.persistence_threshold, + "--engine.deferred-trie-blocks ({}) + --engine.memory-block-buffer-target ({}) must be less than --engine.persistence-threshold ({})", + self.deferred_trie_blocks, + self.memory_block_buffer_target, + self.persistence_threshold, + ); Ok(()) } @@ -638,6 +678,7 @@ impl EngineArgs { let config = TreeConfig::default() .with_persistence_threshold(self.persistence_threshold) .with_persistence_backpressure_threshold(self.persistence_backpressure_threshold) + .with_num_state_masking_blocks(self.deferred_trie_blocks) .with_memory_block_buffer_target(self.memory_block_buffer_target) .with_invalid_header_hit_eviction_threshold(self.invalid_header_hit_eviction_threshold) .with_legacy_state_root(self.legacy_state_root_task_enabled) @@ -695,12 +736,48 @@ mod tests { assert_eq!(args, default_args); } + #[test] + fn default_engine_values_derive_backpressure_threshold() { + let defaults = DefaultEngineValues::default() + .with_persistence_threshold(10) + .with_memory_block_buffer_target(3); + + assert_eq!(defaults.persistence_backpressure_threshold(), 26); + } + + #[test] + fn explicit_backpressure_default_override_is_preserved() { + let defaults = DefaultEngineValues::default() + .with_persistence_backpressure_threshold(99) + .with_persistence_threshold(10) + .with_memory_block_buffer_target(3); + + assert_eq!(defaults.persistence_backpressure_threshold(), 99); + } + + #[test] + fn engine_args_default_thresholds_match_expected_defaults() { + let args = EngineArgs::default(); + + assert_eq!(args.persistence_threshold, DEFAULT_PERSISTENCE_THRESHOLD); + assert_eq!(args.deferred_trie_blocks, DEFAULT_DEFERRED_TRIE_BLOCKS); + assert_eq!(args.memory_block_buffer_target, DEFAULT_MEMORY_BLOCK_BUFFER_TARGET); + assert_eq!( + args.persistence_backpressure_threshold, + default_persistence_backpressure_threshold( + args.persistence_threshold, + args.memory_block_buffer_target, + ) + ); + } + #[test] #[allow(deprecated)] fn engine_args() { let args = EngineArgs { persistence_threshold: 100, persistence_backpressure_threshold: 101, + deferred_trie_blocks: 25, memory_block_buffer_target: 50, invalid_header_hit_eviction_threshold: 7, legacy_state_root_task_enabled: true, @@ -745,6 +822,8 @@ mod tests { "100", "--engine.persistence-backpressure-threshold", "101", + "--engine.deferred-trie-blocks", + "25", "--engine.memory-block-buffer-target", "50", "--engine.invalid-header-cache-hit-eviction-threshold", @@ -788,6 +867,21 @@ mod tests { assert_eq!(parsed_args, args); } + #[test] + fn test_parse_deferred_trie_blocks() { + let args = CommandParser::::parse_from([ + "reth", + "--engine.persistence-threshold", + "8", + "--engine.deferred-trie-blocks", + "7", + ]) + .args; + + assert_eq!(args.deferred_trie_blocks, 7); + assert_eq!(args.tree_config().num_state_masking_blocks(), 7); + } + #[test] fn validate_rejects_invalid_backpressure_threshold() { let args = EngineArgs { @@ -801,6 +895,21 @@ mod tests { assert!(err.contains("engine.persistence-threshold")); } + #[test] + fn validate_rejects_state_masking_window_at_or_above_threshold() { + let args = EngineArgs { + persistence_threshold: 4, + deferred_trie_blocks: 2, + memory_block_buffer_target: 2, + ..EngineArgs::default() + }; + + let err = args.validate().unwrap_err().to_string(); + assert!(err.contains("engine.deferred-trie-blocks")); + assert!(err.contains("engine.memory-block-buffer-target")); + assert!(err.contains("engine.persistence-threshold")); + } + #[test] fn test_parse_slow_block_threshold() { // Test default value (None - disabled) diff --git a/crates/rpc/rpc-api/src/lib.rs b/crates/rpc/rpc-api/src/lib.rs index 43dfc065e28..a1a91cee7dc 100644 --- a/crates/rpc/rpc-api/src/lib.rs +++ b/crates/rpc/rpc-api/src/lib.rs @@ -32,7 +32,9 @@ mod txpool; mod validation; mod web3; -pub use testing::{TestingBuildBlockRequestV1, TESTING_BUILD_BLOCK_V1}; +pub use testing::{ + TestingBuildBlockRequestV1, TestingBuildBlockResponseV1, TESTING_BUILD_BLOCK_V1, +}; /// re-export of all server traits pub use servers::*; diff --git a/crates/rpc/rpc-api/src/testing.rs b/crates/rpc/rpc-api/src/testing.rs index e7dbeb853d4..c6bf12ba93c 100644 --- a/crates/rpc/rpc-api/src/testing.rs +++ b/crates/rpc/rpc-api/src/testing.rs @@ -5,11 +5,24 @@ //! disabled by default and never be exposed on public-facing RPC without an //! explicit operator flag. +use alloy_eips::eip7928::BlockAccessList; use alloy_rpc_types_engine::ExecutionPayloadEnvelopeV5; use jsonrpsee::proc_macros::rpc; +use serde::{Deserialize, Serialize}; pub use alloy_rpc_types_engine::{TestingBuildBlockRequestV1, TESTING_BUILD_BLOCK_V1}; +/// Temporary diagnostic response for `testing_buildBlockV1` that includes the BAL built while +/// executing the block. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TestingBuildBlockResponseV1 { + /// The execution payload envelope produced by the testing builder. + pub execution_payload_envelope: ExecutionPayloadEnvelopeV5, + /// The diagnostic block access list built while executing this payload. + pub block_access_list: Option, +} + /// Testing RPC interface for building a block in a single call. /// /// # Enabling @@ -33,5 +46,5 @@ pub trait TestingApi { async fn build_block_v1( &self, request: TestingBuildBlockRequestV1, - ) -> jsonrpsee::core::RpcResult; + ) -> jsonrpsee::core::RpcResult; } diff --git a/crates/rpc/rpc-eth-api/src/helpers/call.rs b/crates/rpc/rpc-eth-api/src/helpers/call.rs index 2ab5bf2d074..9088060e596 100644 --- a/crates/rpc/rpc-eth-api/src/helpers/call.rs +++ b/crates/rpc/rpc-eth-api/src/helpers/call.rs @@ -44,7 +44,7 @@ use revm::{ Database, DatabaseCommit, }; use revm_inspectors::{access_list::AccessListInspector, transfer::TransferInspector}; -use tracing::{trace, warn}; +use tracing::{debug, trace, warn}; /// Result type for `eth_simulateV1` RPC method. pub type SimulatedBlocksResult = Result>>, E>; @@ -665,7 +665,9 @@ pub trait Call: { let at = at.into(); self.spawn_blocking_io_fut(async move |this| { + debug!(target: "rpc::eth::call", ?at, "Resolving state provider for block"); let state = this.state_at_block_id(at).await?; + debug!(target: "rpc::eth::call", ?at, "Resolved state provider for block"); let db = State::builder() .with_database(StateProviderDatabase::new(StateProviderTraitObjWrapper(state))) .build(); diff --git a/crates/rpc/rpc-eth-api/src/helpers/state.rs b/crates/rpc/rpc-eth-api/src/helpers/state.rs index 322e070458c..c35597f8af9 100644 --- a/crates/rpc/rpc-eth-api/src/helpers/state.rs +++ b/crates/rpc/rpc-eth-api/src/helpers/state.rs @@ -23,6 +23,7 @@ use reth_storage_api::{ }; use reth_transaction_pool::TransactionPool; use std::collections::HashMap; +use tracing::debug; /// Helper methods for `eth_` methods relating to state (accounts). pub trait EthState: LoadState + SpawnBlocking { @@ -279,10 +280,14 @@ pub trait LoadState: if at.is_pending() && let Ok(Some(state)) = self.local_pending_state().await { + debug!(target: "rpc::eth::state", ?at, "Using local pending state provider"); return Ok(state) } - self.provider().state_by_block_id(at).map_err(Self::Error::from_eth_err) + debug!(target: "rpc::eth::state", ?at, "Loading state provider by block id"); + let state = self.provider().state_by_block_id(at).map_err(Self::Error::from_eth_err)?; + debug!(target: "rpc::eth::state", ?at, "Loaded state provider by block id"); + Ok(state) } } diff --git a/crates/rpc/rpc/src/testing.rs b/crates/rpc/rpc/src/testing.rs index e7d2e45826c..4ac75d7a6a2 100644 --- a/crates/rpc/rpc/src/testing.rs +++ b/crates/rpc/rpc/src/testing.rs @@ -15,11 +15,10 @@ //! on public-facing RPC endpoints without proper authentication. use alloy_consensus::{Header, Transaction}; -use alloy_eips::eip2718::Decodable2718; +use alloy_eips::{eip2718::Decodable2718, eip7928::total_bal_items}; use alloy_evm::{Evm, RecoveredTx}; use alloy_primitives::{map::HashSet, Address, U256}; use alloy_rlp::Encodable; -use alloy_rpc_types_engine::ExecutionPayloadEnvelopeV5; use async_trait::async_trait; use jsonrpsee::core::RpcResult; use reth_chainspec::{ChainSpecProvider, EthereumHardforks}; @@ -33,7 +32,7 @@ use reth_primitives_traits::{ AlloyBlockHeader as BlockTrait, TxTy, }; use reth_revm::{database::StateProviderDatabase, db::State}; -use reth_rpc_api::{TestingApiServer, TestingBuildBlockRequestV1}; +use reth_rpc_api::{TestingApiServer, TestingBuildBlockRequestV1, TestingBuildBlockResponseV1}; use reth_rpc_eth_api::{helpers::Call, FromEthApiError}; use reth_rpc_eth_types::EthApiError; use reth_storage_api::{BlockReader, HeaderProvider}; @@ -86,27 +85,44 @@ where async fn build_block_v1( &self, request: TestingBuildBlockRequestV1, - ) -> Result { + ) -> Result { let evm_config = self.evm_config.clone(); let skip_invalid_transactions = self.skip_invalid_transactions; let gas_limit_override = self.gas_limit_override; + debug!( + target: "rpc::testing", + parent_block_hash = %request.parent_block_hash, + transaction_count = request.transactions.len(), + timestamp = request.payload_attributes.timestamp, + ?gas_limit_override, + "Starting testing_buildBlockV1" + ); self.eth_api .spawn_with_state_at_block(request.parent_block_hash, move |eth_api, state| { let state = state.database.0; - let mut db = State::builder() - .with_bundle_update() - .with_database(StateProviderDatabase::new(&state)) - .build(); let parent = eth_api .provider() .sealed_header_by_hash(request.parent_block_hash)? .ok_or_else(|| { EthApiError::HeaderNotFound(request.parent_block_hash.into()) })?; + debug!( + target: "rpc::testing", + parent_block_hash = %request.parent_block_hash, + parent_number = parent.number(), + parent_state_root = %parent.state_root(), + transaction_count = request.transactions.len(), + "Resolved testing_buildBlockV1 parent and state provider" + ); let chain_spec = eth_api.provider().chain_spec(); let is_osaka = chain_spec.is_osaka_active_at_timestamp(request.payload_attributes.timestamp); + let mut db = State::builder() + .with_bundle_update() + .with_database(StateProviderDatabase::new(&state)) + .with_bal_builder() + .build(); let withdrawals = request.payload_attributes.withdrawals.clone(); let withdrawals_rlp_length = withdrawals.as_ref().map(|w| w.length()).unwrap_or(0); @@ -127,6 +143,7 @@ where .map_err(RethError::other) .map_err(Eth::Error::from_eth_err)?; builder.apply_pre_execution_changes().map_err(Eth::Error::from_eth_err)?; + builder.evm_mut().db_mut().bump_bal_index(); let mut total_fees = U256::ZERO; let base_fee = builder.evm_mut().block().basefee(); @@ -202,21 +219,49 @@ where return Err(Eth::Error::from_eth_err(err)); } }; + builder.evm_mut().db_mut().bump_bal_index(); block_transactions_rlp_length += tx_rlp_len; total_fees += U256::from(tip) * U256::from(gas_used); } + debug!( + target: "rpc::testing", + parent_block_hash = %request.parent_block_hash, + parent_number = parent.number(), + total_fees = %total_fees, + "Finishing testing_buildBlockV1 with state provider root" + ); let outcome = builder.finish(&state, None).map_err(Eth::Error::from_eth_err)?; + let block_access_list = outcome.block_access_list; let has_requests = outcome.block.requests_hash().is_some(); let sealed_block = Arc::new(outcome.block.into_sealed_block()); + debug!( + target: "rpc::testing", + parent_block_hash = %request.parent_block_hash, + built_block_hash = %sealed_block.hash(), + built_block_number = sealed_block.number(), + built_state_root = %sealed_block.state_root(), + "Finished testing_buildBlockV1" + ); let requests = has_requests.then_some(outcome.execution_result.requests); - EthBuiltPayload::new(sealed_block, total_fees, requests, None) + let execution_payload_envelope = EthBuiltPayload::new(sealed_block, total_fees, requests, None) .try_into_v5() .map_err(RethError::other) - .map_err(Eth::Error::from_eth_err) + .map_err(Eth::Error::from_eth_err)?; + + debug!( + target: "rpc::testing", + parent_block_hash = %request.parent_block_hash, + has_block_access_list = block_access_list.is_some(), + block_access_list_accounts = block_access_list.as_ref().map(|bal| bal.len()), + block_access_list_items = block_access_list.as_ref().map(|bal| total_bal_items(bal)), + "Returning testing_buildBlockV1 payload with diagnostic BAL" + ); + + Ok(TestingBuildBlockResponseV1 { execution_payload_envelope, block_access_list }) }) .await } @@ -236,7 +281,7 @@ where async fn build_block_v1( &self, request: TestingBuildBlockRequestV1, - ) -> RpcResult { + ) -> RpcResult { self.build_block_v1(request).await.map_err(Into::into) } } diff --git a/crates/stages/stages/src/stages/bodies.rs b/crates/stages/stages/src/stages/bodies.rs index 649b48b86e5..9e863f1b806 100644 --- a/crates/stages/stages/src/stages/bodies.rs +++ b/crates/stages/stages/src/stages/bodies.rs @@ -295,7 +295,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, // 1 seeded block body + batch size total // seeded headers - })) + })), + .. }, done: false }) if block_number < 200 && processed == batch_size + 1 && total == previous_stage + 1 ); @@ -333,7 +334,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: true }) if processed + 1 == total && total == previous_stage + 1 @@ -370,7 +372,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: false }) if block_number >= 10 && processed - 1 == batch_size && total == previous_stage + 1 ); @@ -391,7 +394,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: true }) if block_number > first_run_checkpoint.block_number && processed + 1 == total && total == previous_stage + 1 ); @@ -432,7 +436,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: true }) if block_number == previous_stage && processed + 1 == total && total == previous_stage + 1 ); @@ -460,7 +465,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed: 1, total - })) + })), + .. }}) if total == previous_stage + 1 ); diff --git a/crates/stages/stages/src/stages/era.rs b/crates/stages/stages/src/stages/era.rs index 6e81054ed68..862b63a3880 100644 --- a/crates/stages/stages/src/stages/era.rs +++ b/crates/stages/stages/src/stages/era.rs @@ -298,7 +298,7 @@ mod tests { assert_matches!( output, Ok(ExecOutput { - checkpoint: StageCheckpoint { block_number, stage_checkpoint: None }, + checkpoint: StageCheckpoint { block_number, stage_checkpoint: None, .. }, done: false }) if block_number == era_cap ); @@ -318,7 +318,7 @@ mod tests { assert_matches!( output, Ok(ExecOutput { - checkpoint: StageCheckpoint { block_number, stage_checkpoint: None }, + checkpoint: StageCheckpoint { block_number, stage_checkpoint: None, .. }, done: true }) if block_number == target ); diff --git a/crates/stages/stages/src/stages/execution/mod.rs b/crates/stages/stages/src/stages/execution/mod.rs index a2154fe54a7..da128c3ab28 100644 --- a/crates/stages/stages/src/stages/execution/mod.rs +++ b/crates/stages/stages/src/stages/execution/mod.rs @@ -1017,7 +1017,8 @@ mod tests { processed, total } - })) + })), + .. }, done: true } if processed == total && total == block.gas_used); @@ -1172,7 +1173,8 @@ mod tests { processed: 0, total } - })) + })), + .. } } if total == block.gas_used); diff --git a/crates/stages/stages/src/stages/hashing_account.rs b/crates/stages/stages/src/stages/hashing_account.rs index 2410e8131fe..ddf26b41b1e 100644 --- a/crates/stages/stages/src/stages/hashing_account.rs +++ b/crates/stages/stages/src/stages/hashing_account.rs @@ -397,6 +397,7 @@ mod tests { }, .. })), + .. }, done: true, }) if block_number == previous_stage && diff --git a/crates/stages/stages/src/stages/headers.rs b/crates/stages/stages/src/stages/headers.rs index f9ca2a86f3a..6f719d0e542 100644 --- a/crates/stages/stages/src/stages/headers.rs +++ b/crates/stages/stages/src/stages/headers.rs @@ -594,7 +594,8 @@ mod tests { processed, total, } - })) + })), + .. }, done: true }) if block_number == tip.number && from == checkpoint && to == previous_stage && // -1 because we don't need to download the local head @@ -666,7 +667,8 @@ mod tests { processed, total, } - })) + })), + .. }, done: true }) if block_number == tip.number && from == checkpoint && to == previous_stage && // -1 because we don't need to download the local head diff --git a/crates/stages/stages/src/stages/merkle.rs b/crates/stages/stages/src/stages/merkle.rs index 3271eeaa219..f8fa720faa5 100644 --- a/crates/stages/stages/src/stages/merkle.rs +++ b/crates/stages/stages/src/stages/merkle.rs @@ -402,7 +402,11 @@ where info!(target: "sync::stages::merkle::unwind", "Nothing to unwind"); } else { let (block_root, updates) = reth_trie_db::with_adapter!(provider, |A| { - DbStateRoot::<_, A>::incremental_root_with_updates(provider, range) + DbStateRoot::<_, A>::incremental_root_calculator(provider, range).and_then( + |calculator| { + calculator.with_walk_all_changed_branch_children(true).root_with_updates() + }, + ) }) .map_err(|e| StageError::Fatal(Box::new(e)))?; @@ -502,7 +506,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: true }) if block_number == previous_stage && processed == total && @@ -542,7 +547,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: true }) if block_number == previous_stage && processed == total && @@ -584,7 +590,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed, total - })) + })), + .. }, done: true }) if block_number == previous_stage && processed == total && diff --git a/crates/stages/stages/src/stages/sender_recovery.rs b/crates/stages/stages/src/stages/sender_recovery.rs index 1d44de77271..7487099d6bb 100644 --- a/crates/stages/stages/src/stages/sender_recovery.rs +++ b/crates/stages/stages/src/stages/sender_recovery.rs @@ -527,7 +527,8 @@ mod tests { stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { processed: 1, total: 1 - })) + })), + .. }, done: true }) if block_number == previous_stage ); diff --git a/crates/stages/stages/src/stages/tx_lookup.rs b/crates/stages/stages/src/stages/tx_lookup.rs index 6940403976d..9a5d509062a 100644 --- a/crates/stages/stages/src/stages/tx_lookup.rs +++ b/crates/stages/stages/src/stages/tx_lookup.rs @@ -337,12 +337,12 @@ mod tests { result, Ok(ExecOutput { checkpoint: StageCheckpoint { - block_number, - stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { - processed, - total - })) - }, done: true }) if block_number == previous_stage && processed == total && + block_number, + stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { + processed, + total + })) + }, done: true }) if block_number == previous_stage && processed == total && total == runner.db.count_entries::().unwrap() as u64 ); @@ -383,12 +383,12 @@ mod tests { result, Ok(ExecOutput { checkpoint: StageCheckpoint { - block_number, - stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { - processed, - total - })) - }, done: true }) if block_number == previous_stage && processed == total && + block_number, + stage_checkpoint: Some(StageUnitCheckpoint::Entities(EntitiesCheckpoint { + processed, + total + })) + }, done: true }) if block_number == previous_stage && processed == total && total == runner.db.count_entries::().unwrap() as u64 ); diff --git a/crates/stages/types/src/checkpoints.rs b/crates/stages/types/src/checkpoints.rs index 6486dce31be..9aff8ac726a 100644 --- a/crates/stages/types/src/checkpoints.rs +++ b/crates/stages/types/src/checkpoints.rs @@ -379,6 +379,9 @@ pub struct StageCheckpoint { pub stage_checkpoint: Option, } +#[cfg(any(test, feature = "reth-codec"))] +reth_codecs::impl_compression_for_compact!(StageCheckpoint); + impl StageCheckpoint { /// Creates a new [`StageCheckpoint`] with only `block_number` set. pub fn new(block_number: BlockNumber) -> Self { @@ -431,13 +434,21 @@ impl StageCheckpoint { progress: entities, .. }) => Some(entities), - StageUnitCheckpoint::MerkleChangeSets(_) => None, + StageUnitCheckpoint::MerkleChangeSets(_) | StageUnitCheckpoint::Finish(_) => None, } } } -#[cfg(any(test, feature = "reth-codec"))] -reth_codecs::impl_compression_for_compact!(StageCheckpoint); +/// Saves the progress of the Finish stage. +#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "test-utils"), derive(arbitrary::Arbitrary))] +#[cfg_attr(any(test, feature = "reth-codec"), derive(reth_codecs::Compact))] +#[cfg_attr(any(test, feature = "reth-codec"), reth_codecs::add_arbitrary_tests(compact))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct FinishCheckpoint { + /// The highest block with a partially persisted state and trie. + pub partial_state_trie: Option, +} // TODO(alexey): add a merkle checkpoint. Currently it's hard because [`MerkleCheckpoint`] // is not a Copy type. @@ -465,6 +476,8 @@ pub enum StageUnitCheckpoint { /// Note: This variant is only kept for backward compatibility with the Compact codec. /// The `MerkleChangeSets` stage has been removed. MerkleChangeSets(MerkleChangeSetsCheckpoint), + /// Saves the progress of the Finish stage. + Finish(FinishCheckpoint), } impl StageUnitCheckpoint { @@ -573,6 +586,15 @@ stage_unit_checkpoints!( index_history_stage_checkpoint, /// Sets the stage checkpoint to index history. with_index_history_stage_checkpoint + ), + ( + 6, + Finish, + FinishCheckpoint, + /// Returns the finish stage checkpoint, if any. + finish_stage_checkpoint, + /// Sets the stage checkpoint to finish. + with_finish_stage_checkpoint ) ); @@ -664,4 +686,15 @@ mod tests { let (decoded, _) = MerkleCheckpoint::from_compact(&buf, encoded); assert_eq!(decoded, checkpoint); } + + #[test] + fn finish_checkpoint_roundtrip() { + let checkpoint = StageCheckpoint::new(42) + .with_finish_stage_checkpoint(FinishCheckpoint { partial_state_trie: Some(21) }); + + let mut buf = Vec::new(); + let encoded = checkpoint.to_compact(&mut buf); + let (decoded, _) = StageCheckpoint::from_compact(&buf, encoded); + assert_eq!(decoded, checkpoint); + } } diff --git a/crates/stages/types/src/lib.rs b/crates/stages/types/src/lib.rs index 4e30ce27cd7..70c5de17fe5 100644 --- a/crates/stages/types/src/lib.rs +++ b/crates/stages/types/src/lib.rs @@ -18,7 +18,7 @@ pub use id::StageId; mod checkpoints; pub use checkpoints::{ AccountHashingCheckpoint, CheckpointBlockRange, EntitiesCheckpoint, ExecutionCheckpoint, - HeadersCheckpoint, IndexHistoryCheckpoint, MerkleCheckpoint, StageCheckpoint, + FinishCheckpoint, HeadersCheckpoint, IndexHistoryCheckpoint, MerkleCheckpoint, StageCheckpoint, StageUnitCheckpoint, StorageHashingCheckpoint, StorageRootMerkleCheckpoint, }; diff --git a/crates/storage/provider/src/lib.rs b/crates/storage/provider/src/lib.rs index 909a9c24b38..26b7c7ad91f 100644 --- a/crates/storage/provider/src/lib.rs +++ b/crates/storage/provider/src/lib.rs @@ -24,8 +24,8 @@ pub mod providers; pub use providers::{ DatabaseProvider, DatabaseProviderRO, DatabaseProviderRW, HistoricalStateProvider, HistoricalStateProviderRef, LatestStateProvider, LatestStateProviderRef, ProviderFactory, - PruneShardOutcome, PrunedIndices, SaveBlocksMode, StaticFileAccess, StaticFileProviderBuilder, - StaticFileWriteCtx, StaticFileWriter, + PruneShardOutcome, PrunedIndices, SaveBlocksMode, SaveBlocksPlan, SaveBlocksPlanStep, + StaticFileAccess, StaticFileProviderBuilder, StaticFileWriteCtx, StaticFileWriter, }; pub mod changeset_walker; diff --git a/crates/storage/provider/src/providers/blockchain_provider.rs b/crates/storage/provider/src/providers/blockchain_provider.rs index de0f6b85401..0788def9970 100644 --- a/crates/storage/provider/src/providers/blockchain_provider.rs +++ b/crates/storage/provider/src/providers/blockchain_provider.rs @@ -790,7 +790,8 @@ mod tests { create_test_provider_factory, create_test_provider_factory_with_chain_spec, MockNodeTypesWithDB, }, - BlockWriter, CanonChainTracker, ProviderFactory, SaveBlocksMode, + BlockWriter, CanonChainTracker, ProviderFactory, SaveBlocksMode, SaveBlocksPlan, + SaveBlocksPlanStep, }; use alloy_eips::{BlockHashOrNumber, BlockNumHash, BlockNumberOrTag}; use alloy_primitives::{BlockNumber, TxNumber, B256}; @@ -1007,7 +1008,15 @@ mod tests { // Push to disk let provider_rw = hook_provider.database_provider_rw().unwrap(); - provider_rw.save_blocks(vec![lowest_memory_block], SaveBlocksMode::Full).unwrap(); + provider_rw + .save_blocks( + &SaveBlocksPlan::new( + vec![lowest_memory_block], + vec![SaveBlocksPlanStep::new(0..1, Some(1..1), true)], + ), + SaveBlocksMode::Full, + ) + .unwrap(); provider_rw.commit().unwrap(); // Remove from memory diff --git a/crates/storage/provider/src/providers/consistent.rs b/crates/storage/provider/src/providers/consistent.rs index 9402a8734e6..f7aee11b0ef 100644 --- a/crates/storage/provider/src/providers/consistent.rs +++ b/crates/storage/provider/src/providers/consistent.rs @@ -18,7 +18,9 @@ use reth_chainspec::ChainInfo; use reth_db_api::models::{AccountBeforeTx, BlockNumberAddress, StoredBlockBodyIndices}; use reth_execution_types::ExecutionOutcome; use reth_node_types::{BlockTy, HeaderTy, ReceiptTy, TxTy}; -use reth_primitives_traits::{Account, BlockBody, RecoveredBlock, SealedHeader, StorageEntry}; +use reth_primitives_traits::{ + Account, BlockBody, NodePrimitives, RecoveredBlock, SealedHeader, StorageEntry, +}; use reth_prune_types::{PruneCheckpoint, PruneSegment}; use reth_stages_types::{StageCheckpoint, StageId}; use reth_static_file_types::StaticFileSegment; @@ -32,7 +34,7 @@ use std::{ ops::{Add, Bound, RangeBounds, RangeInclusive, Sub}, sync::Arc, }; -use tracing::trace; +use tracing::{debug, trace}; /// Type that interacts with a snapshot view of the blockchain (storage and in-memory) at time of /// instantiation, EXCEPT for pending, safe and finalized block which might change while holding @@ -115,12 +117,26 @@ impl ConsistentProvider { &'a self, block_hash: BlockHash, ) -> ProviderResult> { - trace!(target: "providers::blockchain", ?block_hash, "Getting history by block hash"); + debug!(target: "providers::blockchain", %block_hash, "Resolving borrowed historical state provider by block hash"); self.get_in_memory_or_storage_by_block( block_hash.into(), - |_| self.storage_provider.history_by_block_hash(block_hash), + |_| { + debug!(target: "providers::blockchain", %block_hash, "Borrowed historical state provider falling back to database"); + self.storage_provider.history_by_block_hash(block_hash) + }, |block_state| { + let anchor = block_state.anchor(); + debug!( + target: "providers::blockchain", + %block_hash, + block_number = block_state.number(), + block_hash = %block_state.hash(), + anchor_number = anchor.number, + anchor_hash = %anchor.hash, + in_memory_blocks = ?block_state_summaries(block_state), + "Borrowed historical state provider using in-memory overlay" + ); let state_provider = self.block_state_provider_ref(block_state)?; Ok(Box::new(state_provider)) }, @@ -245,9 +261,19 @@ impl ConsistentProvider { &self, state: &BlockState, ) -> ProviderResult> { - let anchor_hash = state.anchor().hash; + let anchor = state.anchor(); + let anchor_hash = anchor.hash; let latest_historical = self.history_by_block_hash_ref(anchor_hash)?; let in_memory = state.chain().map(|block_state| block_state.block()).collect(); + debug!( + target: "providers::blockchain", + block_number = state.number(), + block_hash = %state.hash(), + anchor_number = anchor.number, + anchor_hash = %anchor.hash, + in_memory_blocks = ?block_state_summaries(state), + "Creating borrowed memory overlay state provider from block state" + ); Ok(MemoryOverlayStateProviderRef::new(latest_historical, in_memory)) } @@ -448,22 +474,55 @@ impl ConsistentProvider { let block_number = self.block_number(block_hash)?.ok_or(ProviderError::BlockHashNotFound(block_hash))?; self.ensure_canonical_block(block_number)?; + debug!( + target: "providers::blockchain", + %block_hash, + block_number, + "Resolving owned state provider at block hash" + ); let Self { storage_provider, head_block, .. } = self; if let Some(Some(block_state)) = head_block.as_ref().map(|b| b.block_on_chain(block_hash.into())) { - let anchor_hash = block_state.anchor().hash; + let anchor = block_state.anchor(); + let anchor_hash = anchor.hash; let block_number = storage_provider .block_number(anchor_hash)? .ok_or(ProviderError::BlockHashNotFound(anchor_hash))?; + debug!( + target: "providers::blockchain", + requested_block_hash = %block_hash, + requested_block_number = block_state.number(), + anchor_number = anchor.number, + anchor_hash = %anchor.hash, + historical_block_number = block_number, + in_memory_blocks = ?block_state_summaries(block_state), + "Owned state provider using in-memory overlay" + ); let latest_historical = storage_provider.try_into_history_at_block(block_number)?; return Ok(Box::new(block_state.state_provider(latest_historical))); } + debug!( + target: "providers::blockchain", + %block_hash, + block_number, + "Owned state provider falling back to database historical provider" + ); storage_provider.try_into_history_at_block(block_number) } } +fn block_state_summaries(state: &BlockState) -> Vec { + state + .chain() + .map(|block_state| { + let block = block_state.block_ref().recovered_block(); + format!("#{} hash={} parent={}", block.number(), block.hash(), block.parent_hash()) + }) + .collect() +} + impl ConsistentProvider { /// Ensures that the given block number is canonical (synced) /// diff --git a/crates/storage/provider/src/providers/database/mod.rs b/crates/storage/provider/src/providers/database/mod.rs index 0cc2ced3aa6..92e62051743 100644 --- a/crates/storage/provider/src/providers/database/mod.rs +++ b/crates/storage/provider/src/providers/database/mod.rs @@ -51,6 +51,9 @@ pub use provider::{ CommitOrder, DatabaseProvider, DatabaseProviderRO, DatabaseProviderRW, SaveBlocksMode, }; +mod save_blocks; +pub use save_blocks::{SaveBlocksPlan, SaveBlocksPlanStep}; + use super::ProviderNodeTypes; use reth_trie::KeccakKeyHasher; diff --git a/crates/storage/provider/src/providers/database/provider.rs b/crates/storage/provider/src/providers/database/provider.rs index b95ce3cd221..c8e4c22f144 100644 --- a/crates/storage/provider/src/providers/database/provider.rs +++ b/crates/storage/provider/src/providers/database/provider.rs @@ -1,3 +1,4 @@ +use super::SaveBlocksPlan; use crate::{ changesets_utils::StorageRevertsIter, providers::{ @@ -57,7 +58,7 @@ use reth_primitives_traits::{ use reth_prune_types::{ PruneCheckpoint, PruneMode, PruneModes, PruneSegment, MINIMUM_UNWIND_SAFE_DISTANCE, }; -use reth_stages_types::{StageCheckpoint, StageId}; +use reth_stages_types::{FinishCheckpoint, StageCheckpoint, StageId}; use reth_static_file_types::StaticFileSegment; use reth_storage_api::{ BlockBodyIndicesProvider, BlockBodyReader, MetadataProvider, MetadataWriter, @@ -67,7 +68,7 @@ use reth_storage_api::{ use reth_storage_errors::provider::{ProviderResult, StaticFileWriterError}; use reth_trie::{ updates::{StorageTrieUpdatesSorted, TrieUpdatesSorted}, - HashedPostStateSorted, + HashedPostStateSorted, Nibbles, }; use reth_trie_db::{ChangesetCache, DatabaseStorageTrieCursor, TrieTableAdapter}; use revm_database::states::{ @@ -101,6 +102,60 @@ impl CommitOrder { } } +fn format_trie_node_path(path: &Nibbles) -> String { + let mut formatted = String::from("0x"); + for nibble in path.iter() { + formatted.push(char::from_digit(nibble as u32, 16).expect("nibbles are always hex")); + } + formatted +} + +fn format_branch_node_compact(node: &reth_trie::BranchNodeCompact) -> String { + format!( + "state_mask={:?} tree_mask={:?} hash_mask={:?} hashes={:?} root_hash={:?}", + node.state_mask, node.tree_mask, node.hash_mask, node.hashes, node.root_hash + ) +} + +fn format_trie_node_update(node: Option<&reth_trie::BranchNodeCompact>) -> String { + match node { + Some(node) => format!("upsert {}", format_branch_node_compact(node)), + None => "remove".to_string(), + } +} + +fn collect_all_trie_nodes(trie_updates: &TrieUpdatesSorted) -> Vec { + let mut nodes = trie_updates + .account_nodes_ref() + .iter() + .map(|(path, node)| { + format!( + "account {} {}", + format_trie_node_path(path), + format_trie_node_update(node.as_ref()) + ) + }) + .collect::>(); + + for (hashed_address, storage_trie) in + trie_updates.storage_tries_ref().iter().sorted_by_key(|(hashed_address, _)| *hashed_address) + { + if storage_trie.is_deleted() { + nodes.push(format!("storage {hashed_address:#x} delete trie")); + } + + nodes.extend(storage_trie.storage_nodes_ref().iter().map(|(path, node)| { + format!( + "storage {hashed_address:#x}@{} {}", + format_trie_node_path(path), + format_trie_node_update(node.as_ref()) + ) + })); + } + + nodes +} + /// A [`DatabaseProvider`] that holds a read-only database transaction. pub type DatabaseProviderRO = DatabaseProvider<::TX, N>; @@ -567,38 +622,114 @@ impl DatabaseProvider>, + plan: &SaveBlocksPlan, save_mode: SaveBlocksMode, ) -> ProviderResult<()> { + let blocks = &plan.blocks; if blocks.is_empty() { debug!(target: "providers::db", "Attempted to write empty block range"); return Ok(()) } + let persist_rest_range = plan.persist_rest_range(); + let persist_rest_blocks = + persist_rest_range.as_ref().map(|range| &blocks[range.clone()]).unwrap_or(&[]); + let total_start = Instant::now(); let block_count = blocks.len() as u64; let first_number = blocks.first().unwrap().recovered_block().number(); - let last_block_number = blocks.last().unwrap().recovered_block().number(); + let last_block_number = plan.last_block().expect("checked non-empty block range").number; debug!(target: "providers::db", block_count, "Writing blocks and execution data to storage"); + if tracing::enabled!(target: "providers::db", tracing::Level::DEBUG) { + let step_plan = plan + .steps + .iter() + .enumerate() + .map(|(step_index, step)| { + let step_blocks = blocks[step.block_range.clone()] + .iter() + .map(|block| block.recovered_block().num_hash()) + .collect::>(); + let masking_blocks = step + .state_trie_masking_range + .as_ref() + .map(|range| { + blocks[range.clone()] + .iter() + .map(|block| block.recovered_block().num_hash()) + .collect::>() + }) + .unwrap_or_default(); + + ( + step_index, + step.block_range.clone(), + step.persist_rest, + step.state_trie_masking_range.clone(), + step_blocks, + masking_blocks, + ) + }) + .collect::>(); - // Compute tx_nums upfront (both threads need these) - let first_tx_num = self - .tx - .cursor_read::()? - .last()? - .map(|(n, _)| n + 1) - .unwrap_or_default(); + debug!(target: "providers::db", ?step_plan, "save_blocks step plan"); - let tx_nums: Vec = { - let mut nums = Vec::with_capacity(blocks.len()); + if save_mode.with_state() { + let per_block_trie_updates = blocks + .iter() + .map(|block| { + ( + block.recovered_block().number(), + collect_all_trie_nodes(block.trie_data().trie_updates.as_ref()), + ) + }) + .collect::>(); + + debug!( + target: "providers::db", + range = ?first_number..=last_block_number, + per_block_trie_updates = ?per_block_trie_updates, + "save_blocks per-block trie updates" + ); + } + } + + let tx_nums: Vec = if persist_rest_blocks.is_empty() { + Vec::new() + } else { + let first_tx_num = self + .tx + .cursor_read::()? + .last()? + .map(|(n, _)| n + 1) + .unwrap_or_default(); + + let mut nums = Vec::with_capacity(persist_rest_blocks.len()); let mut current = first_tx_num; - for block in &blocks { + for block in persist_rest_blocks { nums.push(current); current += block.recovered_block().body().transaction_count() as u64; } @@ -608,12 +739,32 @@ impl DatabaseProvider DatabaseProvider DatabaseProvider>(); + let masking_trie_data = blocks[masking_range.clone()] + .iter() + .map(|block| block.trie_data()) + .collect::>(); + let masking_trie_updates = masking_trie_data + .iter() + .map(|data| data.trie_updates.as_ref()) + .collect::>(); + let merged_masking_trie = TrieUpdatesSorted::merge_slice(&masking_trie_updates); - // Write all hashed state and trie updates in single batches. - // This reduces cursor open/close overhead from N calls to 1. - if save_mode.with_state() { - // Blocks are oldest-to-newest, merge_batch expects newest-to-oldest. let start = Instant::now(); - let merged_hashed_state = HashedPostStateSorted::merge_batch( - blocks.iter().rev().map(|b| b.trie_data().hashed_state), + let merged_hashed_state = HashedPostStateSorted::disjointed_merge_batch( + step_trie_data.iter().map(|data| data.hashed_state.as_ref()).collect(), + masking_trie_data.iter().map(|data| data.hashed_state.as_ref()).collect(), ); if !merged_hashed_state.is_empty() { self.write_hashed_state(&merged_hashed_state)?; @@ -729,24 +925,60 @@ impl DatabaseProvider DatabaseProvider(()) })?; - // Collect results from spawned tasks - timings.sf = sf_result.ok_or(StaticFileWriterError::ThreadPanic("static file"))??; + // Collect results from spawned tasks. + if has_persist_rest_blocks { + timings.sf = sf_result.ok_or(StaticFileWriterError::ThreadPanic("static file"))??; - if rocksdb_enabled { - timings.rocksdb = rocksdb_result.ok_or_else(|| { - ProviderError::Database(reth_db_api::DatabaseError::Other( - "RocksDB thread panicked".into(), - )) - })??; + if rocksdb_enabled { + timings.rocksdb = rocksdb_result.ok_or_else(|| { + ProviderError::Database(reth_db_api::DatabaseError::Other( + "RocksDB thread panicked".into(), + )) + })??; + } } timings.total = total_start.elapsed(); self.metrics.record_save_blocks(&timings); - debug!(target: "providers::db", range = ?first_number..=last_block_number, "Appended block data"); + debug!( + target: "providers::db", + range = ?first_number..=last_block_number, + "Appended block data" + ); Ok(()) } @@ -3463,8 +3701,9 @@ impl BlockExecutionWriter // that is why it is deleted afterwards. self.remove_blocks_above(block)?; - // Update pipeline progress - self.update_pipeline_stages(block, true)?; + // Keep the finish checkpoint's trie frontier aligned with the highest trie data that is + // still durably materialized after truncation. + self.update_finish_checkpoint_after_remove(block)?; Ok(Chain::new(blocks, execution_state, BTreeMap::new())) } @@ -3479,8 +3718,35 @@ impl BlockExecutionWriter // that is why it is deleted afterwards. self.remove_blocks_above(block)?; - // Update pipeline progress + // Keep the finish checkpoint's trie frontier aligned with the highest trie data that is + // still durably materialized after truncation. + self.update_finish_checkpoint_after_remove(block)?; + + Ok(()) + } +} + +impl DatabaseProvider { + fn trie_persisted_tip_block_number(&self) -> ProviderResult> { + Ok(self.get_stage_checkpoint(StageId::Finish)?.map(|checkpoint| { + checkpoint + .finish_stage_checkpoint() + .and_then(|finish| finish.partial_state_trie) + .unwrap_or(checkpoint.block_number) + })) + } + + fn update_finish_checkpoint_after_remove(&self, block: BlockNumber) -> ProviderResult<()> { + let partial_state_trie = self + .trie_persisted_tip_block_number()? + .map(|trie_persisted_tip| trie_persisted_tip.min(block)); + self.update_pipeline_stages(block, true)?; + self.save_stage_checkpoint( + StageId::Finish, + StageCheckpoint::new(block) + .with_finish_stage_checkpoint(FinishCheckpoint { partial_state_trie }), + )?; Ok(()) } @@ -3518,7 +3784,13 @@ impl BlockWriter ); // Delegate to save_blocks with BlocksOnly mode (skips receipts/state/trie) - self.save_blocks(vec![executed_block], SaveBlocksMode::BlocksOnly)?; + self.save_blocks( + &SaveBlocksPlan::new( + vec![executed_block], + vec![super::SaveBlocksPlanStep::new(0..1, None, true)], + ), + SaveBlocksMode::BlocksOnly, + )?; // Return the body indices self.block_body_indices(block_number)? @@ -3932,6 +4204,7 @@ impl StoragePath for DatabaseProvider { mod tests { use super::*; use crate::{ + providers::database::SaveBlocksPlanStep, test_utils::{blocks::BlockchainTestData, create_test_provider_factory}, BlockWriter, }; @@ -3940,9 +4213,9 @@ mod tests { map::{AddressMap, B256Map}, U256, }; - use reth_chain_state::ExecutedBlock; + use reth_chain_state::{test_utils::TestBlockBuilder, ComputedTrieData, ExecutedBlock}; use reth_db_api::models::StorageSettings; - use reth_ethereum_primitives::Receipt; + use reth_ethereum_primitives::{EthPrimitives, Receipt}; use reth_execution_types::{AccountRevertInit, BlockExecutionOutput, BlockExecutionResult}; use reth_primitives_traits::SealedBlock; use reth_storage_api::MetadataWriter; @@ -3952,7 +4225,32 @@ mod tests { }; use revm_database::BundleState; use revm_state::AccountInfo; - use std::{sync::mpsc, time::Duration}; + use std::{ + sync::{mpsc, Arc}, + time::Duration, + }; + + fn full_save_plan( + blocks: impl IntoIterator>, + ) -> SaveBlocksPlan { + let blocks = blocks.into_iter().collect::>(); + let full_range = 0..blocks.len(); + SaveBlocksPlan::new( + blocks, + vec![SaveBlocksPlanStep::new( + full_range.clone(), + Some(full_range.end..full_range.end), + true, + )], + ) + } + + fn partial_save_plan( + blocks: impl IntoIterator>, + steps: Vec, + ) -> SaveBlocksPlan { + SaveBlocksPlan::new(blocks.into_iter().collect(), steps) + } #[test] fn test_receipts_by_block_range_empty_range() { @@ -4441,6 +4739,394 @@ mod tests { provider_rw.commit().unwrap(); } + #[test] + fn test_save_blocks_only_masks_trie_with_deferred_blocks() { + use reth_trie::{ + updates::{StorageTrieUpdatesSorted, TrieUpdatesSorted}, + BranchNodeCompact, HashedPostStateSorted, HashedStorageSorted, + }; + + fn empty_execution_output() -> BlockExecutionOutput { + BlockExecutionOutput { + result: BlockExecutionResult { + receipts: vec![], + requests: Default::default(), + gas_used: 0, + blob_gas_used: 0, + }, + state: Default::default(), + } + } + + fn branch(mask: u16) -> BranchNodeCompact { + BranchNodeCompact::new(mask, 0, 0, vec![], None) + } + + let factory = create_test_provider_factory(); + factory.set_storage_settings_cache(StorageSettings::v1()); + + let genesis = SealedBlock::::from_sealed_parts( + SealedHeader::new( + Header { number: 0, difficulty: U256::from(1), ..Default::default() }, + B256::ZERO, + ), + Default::default(), + ); + let genesis_executed = ExecutedBlock::new( + Arc::new(genesis.try_recover().unwrap()), + Arc::new(empty_execution_output()), + ComputedTrieData::default(), + ); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &full_save_plan(std::slice::from_ref(&genesis_executed).to_vec()), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let kept_account = B256::with_last_byte(0x11); + let deferred_masked_account = B256::with_last_byte(0x12); + let in_memory_overlap_account = B256::with_last_byte(0x13); + let in_memory_only_account = B256::with_last_byte(0x14); + let kept_storage = B256::with_last_byte(0x21); + let deferred_masked_storage = B256::with_last_byte(0x22); + let in_memory_overlap_storage = B256::with_last_byte(0x23); + let in_memory_only_storage = B256::with_last_byte(0x24); + let kept_slot = B256::with_last_byte(0x31); + let deferred_masked_slot = B256::with_last_byte(0x32); + let in_memory_overlap_slot = B256::with_last_byte(0x33); + let in_memory_only_slot = B256::with_last_byte(0x34); + let kept_account_node = Nibbles::from_nibbles([0x1, 0x2]); + let deferred_masked_account_node = Nibbles::from_nibbles([0x1, 0x3]); + let in_memory_overlap_account_node = Nibbles::from_nibbles([0x1, 0x4]); + let in_memory_only_account_node = Nibbles::from_nibbles([0x1, 0x5]); + let kept_storage_node = Nibbles::from_nibbles([0x2, 0x1]); + let deferred_masked_storage_node = Nibbles::from_nibbles([0x2, 0x2]); + let in_memory_overlap_storage_node = Nibbles::from_nibbles([0x2, 0x3]); + let in_memory_only_storage_node = Nibbles::from_nibbles([0x2, 0x4]); + let blocks: Vec<_> = + TestBlockBuilder::eth().with_state().get_executed_blocks(1..4).collect(); + let full_persist_base = &blocks[0]; + let deferred_trie_base = &blocks[1]; + let in_memory_only_base = &blocks[2]; + + let full_persist_hashed_state = HashedPostStateSorted::new( + vec![ + (kept_account, Some(Account::default())), + (deferred_masked_account, Some(Account { nonce: 1, ..Default::default() })), + (in_memory_overlap_account, Some(Account { nonce: 2, ..Default::default() })), + ], + B256Map::from_iter([ + ( + kept_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(kept_slot, U256::from(1))], + }, + ), + ( + deferred_masked_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(deferred_masked_slot, U256::from(2))], + }, + ), + ( + in_memory_overlap_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(in_memory_overlap_slot, U256::from(3))], + }, + ), + ]), + ); + let full_persist_trie_updates = TrieUpdatesSorted::new( + vec![ + (kept_account_node, Some(branch(0b0000_1111_0000_1111))), + (deferred_masked_account_node, Some(branch(0b1111_0000_1111_0000))), + (in_memory_overlap_account_node, Some(branch(0b1010_1010_1010_1010))), + ], + B256Map::from_iter([ + ( + kept_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(kept_storage_node, Some(branch(0b1010)))], + }, + ), + ( + deferred_masked_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(deferred_masked_storage_node, Some(branch(0b0101)))], + }, + ), + ( + in_memory_overlap_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(in_memory_overlap_storage_node, Some(branch(0b0110)))], + }, + ), + ]), + ); + + let full_persist_block = ExecutedBlock::new( + Arc::clone(&full_persist_base.recovered_block), + Arc::clone(&full_persist_base.execution_output), + ComputedTrieData { + hashed_state: Arc::new(full_persist_hashed_state), + trie_updates: Arc::new(full_persist_trie_updates), + ..Default::default() + }, + ); + + let deferred_trie_hashed_state = HashedPostStateSorted::new( + vec![(deferred_masked_account, Some(Account { nonce: 3, ..Default::default() }))], + B256Map::from_iter([( + deferred_masked_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(deferred_masked_slot, U256::from(4))], + }, + )]), + ); + let deferred_trie_updates = TrieUpdatesSorted::new( + vec![(deferred_masked_account_node, Some(branch(0b0011_0011)))], + B256Map::from_iter([( + deferred_masked_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(deferred_masked_storage_node, Some(branch(0b1100)))], + }, + )]), + ); + let deferred_trie_block = ExecutedBlock::new( + Arc::clone(&deferred_trie_base.recovered_block), + Arc::clone(&deferred_trie_base.execution_output), + ComputedTrieData { + hashed_state: Arc::new(deferred_trie_hashed_state), + trie_updates: Arc::new(deferred_trie_updates), + ..Default::default() + }, + ); + + let in_memory_only_hashed_state = HashedPostStateSorted::new( + vec![ + (in_memory_overlap_account, Some(Account { nonce: 4, ..Default::default() })), + (in_memory_only_account, Some(Account { nonce: 5, ..Default::default() })), + ], + B256Map::from_iter([ + ( + in_memory_overlap_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(in_memory_overlap_slot, U256::from(5))], + }, + ), + ( + in_memory_only_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(in_memory_only_slot, U256::from(6))], + }, + ), + ]), + ); + let in_memory_only_trie_updates = TrieUpdatesSorted::new( + vec![ + (in_memory_overlap_account_node, Some(branch(0b0101_0101))), + (in_memory_only_account_node, Some(branch(0b1111_0000))), + ], + B256Map::from_iter([ + ( + in_memory_overlap_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(in_memory_overlap_storage_node, Some(branch(0b1001)))], + }, + ), + ( + in_memory_only_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(in_memory_only_storage_node, Some(branch(0b1111)))], + }, + ), + ]), + ); + let in_memory_only_block = ExecutedBlock::new( + Arc::clone(&in_memory_only_base.recovered_block), + Arc::clone(&in_memory_only_base.execution_output), + ComputedTrieData { + hashed_state: Arc::new(in_memory_only_hashed_state), + trie_updates: Arc::new(in_memory_only_trie_updates), + ..Default::default() + }, + ); + + let provider_rw = factory.provider_rw().unwrap(); + let blocks = vec![full_persist_block, deferred_trie_block, in_memory_only_block]; + provider_rw + .save_blocks( + &partial_save_plan( + blocks, + vec![ + SaveBlocksPlanStep::new(0..1, Some(1..2), true), + SaveBlocksPlanStep::new(1..2, None, true), + ], + ), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider = factory.provider().unwrap(); + let tx = provider.tx_ref(); + let finish_checkpoint = provider.get_stage_checkpoint(StageId::Finish).unwrap().unwrap(); + assert_eq!(finish_checkpoint.block_number, 2); + assert_eq!( + finish_checkpoint.finish_stage_checkpoint().unwrap().partial_state_trie, + Some(1) + ); + assert!(provider.block_hash(2).unwrap().is_some()); + assert!(provider.block_hash(3).unwrap().is_none()); + + let mut hashed_accounts = tx.cursor_read::().unwrap(); + assert!(hashed_accounts.seek_exact(kept_account).unwrap().is_some()); + assert!(hashed_accounts.seek_exact(deferred_masked_account).unwrap().is_none()); + assert!(hashed_accounts.seek_exact(in_memory_overlap_account).unwrap().is_some()); + assert!(hashed_accounts.seek_exact(in_memory_only_account).unwrap().is_none()); + + let mut hashed_storages = tx.cursor_dup_read::().unwrap(); + assert!(hashed_storages.seek_by_key_subkey(kept_storage, kept_slot).unwrap().is_some()); + assert!(hashed_storages + .walk_dup(Some(deferred_masked_storage), None) + .unwrap() + .next() + .transpose() + .unwrap() + .is_none()); + assert!(hashed_storages + .seek_by_key_subkey(in_memory_overlap_storage, in_memory_overlap_slot) + .unwrap() + .is_some()); + assert!(hashed_storages + .walk_dup(Some(in_memory_only_storage), None) + .unwrap() + .next() + .transpose() + .unwrap() + .is_none()); + + let mut account_trie = tx.cursor_read::().unwrap(); + assert!(account_trie.seek_exact(StoredNibbles(kept_account_node)).unwrap().is_some()); + assert!(account_trie + .seek_exact(StoredNibbles(deferred_masked_account_node)) + .unwrap() + .is_none()); + assert!(account_trie + .seek_exact(StoredNibbles(in_memory_overlap_account_node)) + .unwrap() + .is_some()); + assert!(account_trie + .seek_exact(StoredNibbles(in_memory_only_account_node)) + .unwrap() + .is_none()); + + let mut storage_trie = tx.cursor_dup_read::().unwrap(); + let kept_entries: Vec<_> = storage_trie + .walk_dup(Some(kept_storage), None) + .unwrap() + .collect::, _>>() + .unwrap(); + assert_eq!(kept_entries.len(), 1); + assert_eq!(kept_entries[0].1.nibbles.0, kept_storage_node); + + let deferred_masked_entries: Vec<_> = storage_trie + .walk_dup(Some(deferred_masked_storage), None) + .unwrap() + .collect::, _>>() + .unwrap(); + assert!(deferred_masked_entries.is_empty()); + + let in_memory_overlap_entries: Vec<_> = storage_trie + .walk_dup(Some(in_memory_overlap_storage), None) + .unwrap() + .collect::, _>>() + .unwrap(); + assert_eq!(in_memory_overlap_entries.len(), 1); + assert_eq!(in_memory_overlap_entries[0].1.nibbles.0, in_memory_overlap_storage_node); + + let in_memory_entries: Vec<_> = storage_trie + .walk_dup(Some(in_memory_only_storage), None) + .unwrap() + .collect::, _>>() + .unwrap(); + assert!(in_memory_entries.is_empty()); + } + + #[test] + fn test_save_blocks_partial_cycles_do_not_duplicate_static_file_writes() { + let factory = create_test_provider_factory(); + let mut test_block_builder = TestBlockBuilder::eth().with_state(); + + let genesis = test_block_builder.get_executed_blocks(0..1).next().unwrap(); + let blocks: Vec<_> = test_block_builder.get_executed_blocks(1..5).collect(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &full_save_plan(std::slice::from_ref(&genesis).to_vec()), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks(&full_save_plan(blocks[..2].to_vec()), SaveBlocksMode::Full) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &partial_save_plan( + blocks, + vec![ + SaveBlocksPlanStep::new(0..2, Some(2..4), false), + SaveBlocksPlanStep::new(2..4, None, true), + ], + ), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider = factory.provider().unwrap(); + let finish_checkpoint = provider.get_stage_checkpoint(StageId::Finish).unwrap().unwrap(); + assert_eq!(finish_checkpoint.block_number, 4); + assert_eq!( + finish_checkpoint.finish_stage_checkpoint().unwrap().partial_state_trie, + Some(2) + ); + + let static_files = factory.static_file_provider(); + assert_eq!(static_files.get_highest_static_file_block(StaticFileSegment::Headers), Some(4)); + assert_eq!( + static_files.get_highest_static_file_block(StaticFileSegment::Transactions), + Some(4) + ); + assert_eq!( + static_files.get_highest_static_file_block(StaticFileSegment::Receipts), + Some(4) + ); + } + #[test] fn test_prunable_receipts_logic() { let insert_blocks = @@ -5026,7 +5712,12 @@ mod tests { ComputedTrieData::default(), ); let provider_rw = factory.provider_rw().unwrap(); - provider_rw.save_blocks(vec![genesis_executed], SaveBlocksMode::Full).unwrap(); + provider_rw + .save_blocks( + &full_save_plan(std::slice::from_ref(&genesis_executed).to_vec()), + SaveBlocksMode::Full, + ) + .unwrap(); provider_rw.commit().unwrap(); let mut blocks: Vec = Vec::new(); @@ -5098,7 +5789,7 @@ mod tests { } let provider_rw = factory.provider_rw().unwrap(); - provider_rw.save_blocks(blocks, SaveBlocksMode::Full).unwrap(); + provider_rw.save_blocks(&full_save_plan(blocks), SaveBlocksMode::Full).unwrap(); provider_rw.commit().unwrap(); let provider = factory.provider().unwrap(); diff --git a/crates/storage/provider/src/providers/database/save_blocks.rs b/crates/storage/provider/src/providers/database/save_blocks.rs new file mode 100644 index 00000000000..185136cedff --- /dev/null +++ b/crates/storage/provider/src/providers/database/save_blocks.rs @@ -0,0 +1,90 @@ +use alloy_eips::BlockNumHash; +use reth_chain_state::ExecutedBlock; +use reth_ethereum_primitives::EthPrimitives; +use reth_primitives_traits::NodePrimitives; +use std::ops::Range; + +/// A single persistence step over a contiguous region of [`SaveBlocksPlan::blocks`]. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SaveBlocksPlanStep { + /// Range of [`SaveBlocksPlan::blocks`] covered by this step. + pub block_range: Range, + /// Optional range of blocks whose state/trie updates should be used to mask this step's + /// durable state/trie writes. + /// + /// `Some(empty_range)` means persist state/trie without any masking. `None` means skip + /// durable state/trie persistence for this step. + pub state_trie_masking_range: Option>, + /// Whether to persist non-state/trie data for this step. + pub persist_rest: bool, +} + +impl SaveBlocksPlanStep { + /// Creates a new persistence step. + pub const fn new( + block_range: Range, + state_trie_masking_range: Option>, + persist_rest: bool, + ) -> Self { + Self { block_range, state_trie_masking_range, persist_rest } + } + + /// Returns `true` if this step persists state/trie data. + pub const fn persists_state_trie(&self) -> bool { + self.state_trie_masking_range.is_some() + } +} + +/// Plan for a single `save_blocks` persistence cycle. +#[derive(Debug, Clone)] +pub struct SaveBlocksPlan { + /// Canonical blocks covered by this plan. + pub blocks: Vec>, + /// Ordered persistence steps over [`Self::blocks`]. + pub steps: Vec, +} + +impl SaveBlocksPlan { + /// Creates a new save plan. + pub const fn new(blocks: Vec>, steps: Vec) -> Self { + Self { blocks, steps } + } + + /// Returns `true` if the plan contains no blocks to persist. + pub fn is_empty(&self) -> bool { + self.last_block().is_none() + } + + /// Returns the highest block covered by this plan. + pub fn last_block(&self) -> Option { + let last_index = + self.steps.iter().rev().find_map(|step| step.block_range.end.checked_sub(1))?; + self.blocks.get(last_index).map(|block| block.recovered_block().num_hash()) + } + + /// Returns the highest block whose state/trie data is durably persisted by this plan. + pub fn last_state_trie_block(&self) -> Option { + let last_index = self + .steps + .iter() + .rev() + .find(|step| step.persists_state_trie())? + .block_range + .end + .checked_sub(1)?; + self.blocks.get(last_index).map(|block| block.recovered_block().num_hash()) + } + + /// Returns the contiguous range of blocks whose non-state/trie outputs are persisted. + pub fn persist_rest_range(&self) -> Option> { + let mut ranges = + self.steps.iter().filter(|step| step.persist_rest).map(|step| &step.block_range); + let first = ranges.next()?.clone(); + let merged = ranges.fold(first, |mut merged, range| { + debug_assert_eq!(merged.end, range.start, "persist_rest steps must be contiguous"); + merged.end = range.end; + merged + }); + Some(merged) + } +} diff --git a/crates/storage/provider/src/providers/state/historical.rs b/crates/storage/provider/src/providers/state/historical.rs index 7999c8795da..33792c4a229 100644 --- a/crates/storage/provider/src/providers/state/historical.rs +++ b/crates/storage/provider/src/providers/state/historical.rs @@ -13,6 +13,7 @@ use reth_db_api::{ BlockNumberList, }; use reth_primitives_traits::{Account, Bytecode, NodePrimitives}; +use reth_stages_types::StageId; use reth_storage_api::{ BlockNumReader, BytecodeReader, DBProvider, NodePrimitivesProvider, PruneCheckpointReader, StageCheckpointReader, StateProofProvider, StorageChangeSetReader, StorageRootProvider, @@ -34,6 +35,7 @@ use reth_trie_db::{ }; use std::{fmt::Debug, marker::PhantomData, sync::Arc}; +use tracing::debug; type DbStateRoot<'a, TX, A> = StateRoot< reth_trie_db::DatabaseTrieCursorFactory<&'a TX, A>, @@ -308,12 +310,79 @@ where .block_hash(target_block)? .ok_or_else(|| ProviderError::HeaderNotFound(target_block.into()))?; + match self.provider.get_stage_checkpoint(StageId::Finish) { + Ok(Some(checkpoint)) => { + let finish_tip_number = checkpoint.block_number; + let partial_state_trie_number = checkpoint + .finish_stage_checkpoint() + .and_then(|finish| finish.partial_state_trie) + .unwrap_or(finish_tip_number); + let finish_tip_hash = self.provider.block_hash(finish_tip_number)?; + let partial_state_trie_hash = + self.provider.block_hash(partial_state_trie_number)?; + debug!( + target: "providers::historical_sp", + historical_block_number = self.block_number, + target_block, + %anchor_hash, + finish_tip_number, + ?finish_tip_hash, + partial_state_trie_number, + ?partial_state_trie_hash, + "Historical state provider overlay frontiers" + ); + } + Ok(None) => { + debug!( + target: "providers::historical_sp", + historical_block_number = self.block_number, + target_block, + %anchor_hash, + "Historical state provider overlay without finish checkpoint" + ); + } + Err(err) => { + debug!( + target: "providers::historical_sp", + historical_block_number = self.block_number, + target_block, + %anchor_hash, + %err, + "Historical state provider overlay could not load finish checkpoint" + ); + } + } + let TrieInputSorted { nodes, state, prefix_sets } = input; + let input_trie_updates = nodes.total_len(); + let input_hashed_state = state.total_len(); + debug!( + target: "providers::historical_sp", + historical_block_number = self.block_number, + target_block, + %anchor_hash, + input_trie_updates, + input_hashed_state, + prefix_account_updates = prefix_sets.account_prefix_set.len(), + prefix_storage_tries = prefix_sets.storage_prefix_sets.len(), + prefix_destroyed_accounts = prefix_sets.destroyed_accounts.len(), + "Building historical state provider overlay" + ); let overlay_builder = OverlayBuilder::::new(anchor_hash, self.changeset_cache.clone()) .with_overlay_source(Some(OverlaySource::Immediate { trie: nodes, state })); let Overlay { trie_updates, hashed_post_state } = overlay_builder.build_overlay(self.provider)?; + debug!( + target: "providers::historical_sp", + historical_block_number = self.block_number, + target_block, + %anchor_hash, + output_trie_updates = trie_updates.total_len(), + output_hashed_state = hashed_post_state.total_len(), + "Built historical state provider overlay" + ); + Ok(TrieInputSorted::new(trie_updates, hashed_post_state, prefix_sets)) } diff --git a/crates/storage/provider/src/providers/state/overlay.rs b/crates/storage/provider/src/providers/state/overlay.rs index 35b860fb110..328035b240b 100644 --- a/crates/storage/provider/src/providers/state/overlay.rs +++ b/crates/storage/provider/src/providers/state/overlay.rs @@ -61,6 +61,12 @@ pub(super) struct Overlay { pub(super) hashed_post_state: Arc, } +#[derive(Debug)] +struct OverlayRevertPlan { + revert_blocks: Option>, + overlay_anchor_hash: BlockHash, +} + /// Source of overlay data for [`OverlayStateProviderFactory`]. /// /// Either provides immediate pre-computed overlay data, or a lazy overlay that computes @@ -112,6 +118,14 @@ impl OverlayBuilder { if let Some(OverlaySource::Lazy(lazy_overlay)) = source.as_ref() { self.assert_lazy_overlay_anchor(lazy_overlay); } + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + source = overlay_source_kind(source.as_ref()), + source_anchor = ?source.as_ref().and_then(overlay_source_anchor), + source_blocks = ?source.as_ref().and_then(overlay_source_blocks), + "Configuring overlay source" + ); self.overlay_source = source; self } @@ -133,6 +147,13 @@ impl OverlayBuilder { if let Some(lazy_overlay) = lazy_overlay.as_ref() { self.assert_lazy_overlay_anchor(lazy_overlay); } + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + lazy_anchor = ?lazy_overlay.as_ref().and_then(LazyOverlay::anchor_hash), + lazy_blocks = ?lazy_overlay.as_ref().map(LazyOverlay::block_summaries), + "Configuring lazy overlay" + ); self.overlay_source = lazy_overlay.map(OverlaySource::Lazy); self } @@ -143,10 +164,22 @@ impl OverlayBuilder { hashed_state_overlay: Option>, ) -> Self { if let Some(state) = hashed_state_overlay { + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + hashed_state_updates = state.total_len(), + "Configuring immediate hashed-state overlay" + ); self.overlay_source = Some(OverlaySource::Immediate { trie: Arc::new(TrieUpdatesSorted::default()), state, }); + } else { + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + "Clearing hashed-state overlay" + ); } self } @@ -156,6 +189,14 @@ impl OverlayBuilder { /// If no overlay exists, creates a new immediate overlay with the given state. /// If a lazy overlay exists, it is resolved first then extended. pub fn with_extended_hashed_state_overlay(mut self, other: HashedPostStateSorted) -> Self { + let other_len = other.total_len(); + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + existing_source = overlay_source_kind(self.overlay_source.as_ref()), + added_hashed_state_updates = other_len, + "Extending hashed-state overlay" + ); match &mut self.overlay_source { Some(OverlaySource::Immediate { state, .. }) => { Arc::make_mut(state).extend_ref_and_sort(&other); @@ -184,22 +225,35 @@ impl OverlayBuilder { &self, anchor_hash: BlockHash, ) -> ProviderResult<(Arc, Arc)> { - match &self.overlay_source { - Some(OverlaySource::Lazy(lazy_overlay)) => Ok(lazy_overlay.as_overlay(anchor_hash)), + let result = match &self.overlay_source { + Some(OverlaySource::Lazy(lazy_overlay)) => lazy_overlay.as_overlay(anchor_hash), Some(OverlaySource::Immediate { trie, state }) => { if anchor_hash != self.anchor_hash { return Err(ProviderError::other(std::io::Error::other(format!( "anchor_hash {anchor_hash} doesn't match OverlayBuilder's configured anchor ({})", self.anchor_hash - )))) + )))); } - Ok((Arc::clone(trie), Arc::clone(state))) + (Arc::clone(trie), Arc::clone(state)) } - None => Ok(( - Arc::new(TrieUpdatesSorted::default()), - Arc::new(HashedPostStateSorted::default()), - )), - } + None => { + (Arc::new(TrieUpdatesSorted::default()), Arc::new(HashedPostStateSorted::default())) + } + }; + + debug!( + target: "providers::state::overlay", + requested_anchor_hash = ?anchor_hash, + builder_anchor_hash = ?self.anchor_hash, + source = overlay_source_kind(self.overlay_source.as_ref()), + source_anchor = ?self.overlay_source.as_ref().and_then(overlay_source_anchor), + source_blocks = ?self.overlay_source.as_ref().and_then(overlay_source_blocks), + resolved_trie_updates = result.0.total_len(), + resolved_hashed_state = result.1.total_len(), + "Resolved overlay source" + ); + + Ok(result) } /// Returns the block number for [`Self`]'s `anchor_hash` field. @@ -212,54 +266,123 @@ impl OverlayBuilder { .ok_or(ProviderError::BlockHashNotFound(self.anchor_hash)) } - /// Returns the block which is at the tip of the DB, i.e. the block which the state tables of - /// the DB are currently synced to. - fn get_db_tip_block(&self, provider: &Provider) -> ProviderResult + /// Returns the highest blocks whose state/trie data and non-state/trie data are durably + /// available in the database. + fn get_db_tip_blocks( + &self, + provider: &Provider, + ) -> ProviderResult<(BlockNumHash, BlockNumHash)> where Provider: StageCheckpointReader + BlockNumReader, { - let block_number = provider - .get_stage_checkpoint(StageId::Finish)? - .as_ref() - .map(|chk| chk.block_number) - .ok_or_else(|| ProviderError::InsufficientChangesets { - requested: 0, - available: 0..=0, - })?; - let hash = provider + let checkpoint = provider.get_stage_checkpoint(StageId::Finish)?.ok_or_else(|| { + ProviderError::InsufficientChangesets { requested: 0, available: 0..=0 } + })?; + let block_number = checkpoint + .finish_stage_checkpoint() + .and_then(|finish| finish.partial_state_trie) + .unwrap_or(checkpoint.block_number); + let state_trie_tip_hash = provider .convert_number(block_number.into())? .ok_or_else(|| ProviderError::HeaderNotFound(block_number.into()))?; - Ok(BlockNumHash::new(block_number, hash)) + let finish_tip_number = checkpoint.block_number; + let finish_tip_hash = provider + .convert_number(finish_tip_number.into())? + .ok_or_else(|| ProviderError::HeaderNotFound(finish_tip_number.into()))?; + debug!( + target: "providers::state::overlay", + state_trie_tip_number = block_number, + state_trie_tip_hash = ?state_trie_tip_hash, + finish_tip_number, + finish_tip_hash = ?finish_tip_hash, + anchor_hash = ?self.anchor_hash, + "Loaded database overlay frontiers" + ); + Ok(( + BlockNumHash::new(block_number, state_trie_tip_hash), + BlockNumHash::new(finish_tip_number, finish_tip_hash), + )) } - /// Returns whether or not it is required to collect reverts, and validates that there are - /// sufficient changesets to revert to the requested block number if so. + /// Returns the revert plan required to expose the requested overlay base state, and validates + /// that there are sufficient changesets to revert to the requested block number if so. /// /// Takes into account both the stage checkpoint and the prune checkpoint to determine the /// available data range. - fn reverts_required( + fn revert_plan( &self, provider: &Provider, - db_tip_block: BlockNumHash, - ) -> ProviderResult>> + state_trie_tip_block: BlockNumHash, + finish_tip_block: BlockNumHash, + ) -> ProviderResult where Provider: BlockNumReader + PruneCheckpointReader, { - // If the anchor is the DB tip then there won't be any reverts necessary. - if db_tip_block.hash == self.anchor_hash { - return Ok(None) + let anchor_number = self.get_block_number(provider)?; + let anchor_hash_at_number = provider + .convert_number(anchor_number.into())? + .ok_or_else(|| ProviderError::HeaderNotFound(anchor_number.into()))?; + if anchor_hash_at_number != self.anchor_hash { + return Err(ProviderError::other(std::io::Error::other(format!( + "anchor hash {} is not on the durable finish chain at block {} (found {})", + self.anchor_hash, anchor_number, anchor_hash_at_number, + )))); } - // If the DB tip has moved forward into the `LazyOverlay` then we still don't need to - // revert, the `LazyOverlay` will generate a new in-memory overlay using only the relevant - // blocks data. - if let Some(OverlaySource::Lazy(lazy_overlay)) = &self.overlay_source && - lazy_overlay.has_anchor_hash(db_tip_block.hash) + // If the requested anchor is the current durable Finish frontier, the database already + // exposes a consistent logical state for the overlay base. + if state_trie_tip_block.hash == finish_tip_block.hash && + finish_tip_block.hash == self.anchor_hash { - return Ok(None) + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + ?state_trie_tip_block, + ?finish_tip_block, + overlay_anchor_hash = ?finish_tip_block.hash, + "Overlay anchor matches durable finish frontier; no reverts required" + ); + return Ok(OverlayRevertPlan { + revert_blocks: None, + overlay_anchor_hash: finish_tip_block.hash, + }); } - let anchor_number = self.get_block_number(provider)?; + if let Some(OverlaySource::Lazy(lazy)) = self.overlay_source.as_ref() { + let lazy_covers_state_trie_tip = lazy.has_anchor_hash(state_trie_tip_block.hash); + let lazy_covers_finish_gap = state_trie_tip_block.hash == finish_tip_block.hash || + lazy.has_anchor_hash(finish_tip_block.hash); + + if lazy_covers_state_trie_tip && lazy_covers_finish_gap { + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + ?state_trie_tip_block, + ?finish_tip_block, + overlay_anchor_hash = ?state_trie_tip_block.hash, + source = overlay_source_kind(self.overlay_source.as_ref()), + source_anchor = ?self.overlay_source.as_ref().and_then(overlay_source_anchor), + source_blocks = ?self.overlay_source.as_ref().and_then(overlay_source_blocks), + "Lazy overlay covers partial state trie frontier; no reverts required" + ); + return Ok(OverlayRevertPlan { + revert_blocks: None, + overlay_anchor_hash: state_trie_tip_block.hash, + }) + } + } + + if anchor_number > state_trie_tip_block.number { + return Err(ProviderError::other(std::io::Error::other(format!( + "overlay anchor #{} ({}) is after partial state trie frontier #{} ({}); missing trie updates for blocks #{}..=#{}", + anchor_number, + self.anchor_hash, + state_trie_tip_block.number, + state_trie_tip_block.hash, + state_trie_tip_block.number + 1, + anchor_number, + )))); + } // Check account history prune checkpoint to determine the lower bound of available data. // The prune checkpoint's block_number is the highest pruned block, so data is available @@ -270,7 +393,19 @@ impl OverlayBuilder { .map(|block_number| block_number + 1) .unwrap_or_default(); - let available_range = lower_bound..=db_tip_block.number; + let available_range = lower_bound..=finish_tip_block.number; + + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + anchor_number, + ?state_trie_tip_block, + ?finish_tip_block, + prune_lower_bound = lower_bound, + available_start = *available_range.start(), + available_end = *available_range.end(), + "Checking overlay revert requirements" + ); // Check if the requested block is within the available range if !available_range.contains(&anchor_number) { @@ -280,20 +415,36 @@ impl OverlayBuilder { }); } - Ok(Some(anchor_number + 1..=db_tip_block.number)) + let revert_range = anchor_number + 1..=finish_tip_block.number; + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + anchor_number, + revert_start = *revert_range.start(), + revert_end = *revert_range.end(), + overlay_anchor_hash = ?self.anchor_hash, + "Overlay reverts required" + ); + + Ok(OverlayRevertPlan { + revert_blocks: Some(revert_range), + overlay_anchor_hash: self.anchor_hash, + }) } - /// Calculates a new [`Overlay`] given a transaction and the current db tip. + /// Calculates a new [`Overlay`] given a transaction and the current durable state/trie + /// frontier. #[instrument( level = "debug", target = "providers::state::overlay", skip_all, - fields(?db_tip_block, anchor_hash = ?self.anchor_hash) + fields(?state_trie_tip_block, ?finish_tip_block, anchor_hash = ?self.anchor_hash) )] fn calculate_overlay( &self, provider: &Provider, - db_tip_block: BlockNumHash, + state_trie_tip_block: BlockNumHash, + finish_tip_block: BlockNumHash, ) -> ProviderResult where Provider: ChangeSetReader @@ -312,13 +463,19 @@ impl OverlayBuilder { let trie_updates_total_len; let hashed_state_updates_total_len; - // Collect any reverts which are required to bring the DB view back to the anchor hash. - let (trie_updates, hashed_post_state) = if let Some(revert_blocks) = - self.reverts_required(provider, db_tip_block)? - { + let OverlayRevertPlan { revert_blocks, overlay_anchor_hash } = + self.revert_plan(provider, state_trie_tip_block, finish_tip_block)?; + + // Collect any reverts which are required to bring the DB view back to the overlay anchor + // hash. + let (trie_updates, hashed_post_state) = if let Some(revert_blocks) = revert_blocks { debug!( target: "providers::state::overlay", ?revert_blocks, + overlay_anchor_hash = ?overlay_anchor_hash, + source = overlay_source_kind(self.overlay_source.as_ref()), + source_anchor = ?self.overlay_source.as_ref().and_then(overlay_source_anchor), + source_blocks = ?self.overlay_source.as_ref().and_then(overlay_source_blocks), "Collecting trie reverts for overlay state provider" ); @@ -351,7 +508,7 @@ impl OverlayBuilder { // Resolve overlays (lazy or immediate) and extend reverts with them. // If reverts are empty, use overlays directly to avoid cloning. - let (overlay_trie, overlay_state) = self.resolve_overlays(self.anchor_hash)?; + let (overlay_trie, overlay_state) = self.resolve_overlays(overlay_anchor_hash)?; let trie_updates = if trie_reverts.is_empty() { overlay_trie @@ -378,20 +535,31 @@ impl OverlayBuilder { target: "providers::state::overlay", num_trie_updates = ?trie_updates_total_len, num_state_updates = ?hashed_state_updates_total_len, - "Reverted to target block", + overlay_anchor_hash = ?overlay_anchor_hash, + source = overlay_source_kind(self.overlay_source.as_ref()), + "Built overlay after reverting to anchor", ); (trie_updates, hashed_state_updates) } else { - // If no reverts are needed then we can assume that the db tip is the anchor hash or - // overlaps with the `LazyOverlay`. Use overlays directly. - let (trie_updates, hashed_state) = self.resolve_overlays(db_tip_block.hash)?; + // If no reverts are needed then the overlay can be resolved directly from the durable + // logical frontier selected by the revert plan. + let (trie_updates, hashed_state) = self.resolve_overlays(overlay_anchor_hash)?; retrieve_trie_reverts_duration = Duration::ZERO; retrieve_hashed_state_reverts_duration = Duration::ZERO; trie_updates_total_len = trie_updates.total_len(); hashed_state_updates_total_len = hashed_state.total_len(); + debug!( + target: "providers::state::overlay", + num_trie_updates = trie_updates_total_len, + num_state_updates = hashed_state_updates_total_len, + overlay_anchor_hash = ?overlay_anchor_hash, + source = overlay_source_kind(self.overlay_source.as_ref()), + "Built overlay directly from durable frontier" + ); + (trie_updates, hashed_state) }; @@ -420,8 +588,40 @@ impl OverlayBuilder { + BlockNumReader + StorageSettingsCache, { - let db_tip_block = self.get_db_tip_block(provider)?; - self.calculate_overlay(provider, db_tip_block) + let (state_trie_tip_block, finish_tip_block) = self.get_db_tip_blocks(provider)?; + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.anchor_hash, + ?state_trie_tip_block, + ?finish_tip_block, + source = overlay_source_kind(self.overlay_source.as_ref()), + source_anchor = ?self.overlay_source.as_ref().and_then(overlay_source_anchor), + source_blocks = ?self.overlay_source.as_ref().and_then(overlay_source_blocks), + "Building overlay" + ); + self.calculate_overlay(provider, state_trie_tip_block, finish_tip_block) + } +} + +fn overlay_source_kind(source: Option<&OverlaySource>) -> &'static str { + match source { + Some(OverlaySource::Immediate { .. }) => "immediate", + Some(OverlaySource::Lazy(_)) => "lazy", + None => "none", + } +} + +fn overlay_source_anchor(source: &OverlaySource) -> Option { + match source { + OverlaySource::Immediate { .. } => None, + OverlaySource::Lazy(lazy) => lazy.anchor_hash(), + } +} + +fn overlay_source_blocks(source: &OverlaySource) -> Option> { + match source { + OverlaySource::Immediate { .. } => None, + OverlaySource::Lazy(lazy) => Some(lazy.block_summaries()), } } @@ -435,9 +635,11 @@ pub struct OverlayStateProviderFactory { factory: F, /// Overlay builder containing the configuration and overlay calculation logic. overlay_builder: OverlayBuilder, - /// A cache which maps `db_tip -> Overlay`. If the db tip changes during usage of the factory - /// then a new entry will get added to this, but in most cases only one entry is present. - overlay_cache: Arc>, + /// A cache which maps `(state_trie_tip_hash, finish_tip_hash) -> Overlay`. + /// + /// Under partial persistence the overlay depends on both the durable trie frontier and the + /// fully durable Finish frontier, so both hashes are part of the cache key. + overlay_cache: Arc>, } impl OverlayStateProviderFactory { @@ -470,8 +672,8 @@ impl OverlayStateProviderFactory { self } - /// Fetches an [`Overlay`] from the cache based on the current db tip block. If there is no - /// cached value then this calculates the [`Overlay`] and populates the cache. + /// Fetches an [`Overlay`] from the cache based on the current durable frontiers. If there is + /// no cached value then this calculates the [`Overlay`] and populates the cache. #[instrument(level = "debug", target = "providers::state::overlay", skip_all)] fn get_overlay(&self, provider: &Provider) -> ProviderResult where @@ -483,12 +685,36 @@ impl OverlayStateProviderFactory { + BlockNumReader + StorageSettingsCache, { - let db_tip_block = self.overlay_builder.get_db_tip_block(provider)?; + let (state_trie_tip_block, finish_tip_block) = + self.overlay_builder.get_db_tip_blocks(provider)?; - let overlay = match self.overlay_cache.entry(db_tip_block.hash) { - dashmap::Entry::Occupied(entry) => entry.get().clone(), + let overlay = match self + .overlay_cache + .entry((state_trie_tip_block.hash, finish_tip_block.hash)) + { + dashmap::Entry::Occupied(entry) => { + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.overlay_builder.anchor_hash, + ?state_trie_tip_block, + ?finish_tip_block, + source = overlay_source_kind(self.overlay_builder.overlay_source.as_ref()), + "Using cached overlay" + ); + entry.get().clone() + } dashmap::Entry::Vacant(entry) => { self.overlay_builder.metrics.overlay_cache_misses.increment(1); + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.overlay_builder.anchor_hash, + ?state_trie_tip_block, + ?finish_tip_block, + source = overlay_source_kind(self.overlay_builder.overlay_source.as_ref()), + source_anchor = ?self.overlay_builder.overlay_source.as_ref().and_then(overlay_source_anchor), + source_blocks = ?self.overlay_builder.overlay_source.as_ref().and_then(overlay_source_blocks), + "Overlay cache miss" + ); let overlay = self.overlay_builder.build_overlay(provider)?; entry.insert(overlay.clone()); overlay @@ -529,6 +755,14 @@ where let Overlay { trie_updates, hashed_post_state } = self.get_overlay(&provider)?; let is_v2 = provider.cached_storage_settings().is_v2(); + debug!( + target: "providers::state::overlay", + anchor_hash = ?self.overlay_builder.anchor_hash, + trie_updates = trie_updates.total_len(), + hashed_state = hashed_post_state.total_len(), + is_v2, + "Created overlay state provider" + ); self.overlay_builder.metrics.database_provider_ro_duration.record(overall_start.elapsed()); Ok(OverlayStateProvider::new(provider, trie_updates, hashed_post_state, is_v2)) } @@ -651,3 +885,239 @@ where hashed_cursor_factory.hashed_storage_cursor(hashed_address) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + test_utils::create_test_provider_factory, BlockWriter, SaveBlocksMode, SaveBlocksPlan, + SaveBlocksPlanStep, + }; + use alloy_primitives::{B256, U256}; + use reth_chain_state::{test_utils::TestBlockBuilder, ComputedTrieData, ExecutedBlock}; + use reth_primitives_traits::Account; + use reth_stages_types::{FinishCheckpoint, StageCheckpoint}; + use reth_storage_api::StageCheckpointWriter; + use reth_trie::{updates::TrieUpdatesSorted, HashedPostState, HashedStorage}; + use std::sync::Arc; + + fn full_save_plan( + blocks: impl IntoIterator>, + ) -> SaveBlocksPlan { + let blocks = blocks.into_iter().collect::>(); + let full_range = 0..blocks.len(); + SaveBlocksPlan::new( + blocks, + vec![SaveBlocksPlanStep::new( + full_range.clone(), + Some(full_range.end..full_range.end), + true, + )], + ) + } + + fn partial_save_plan( + blocks: impl IntoIterator>, + steps: Vec, + ) -> SaveBlocksPlan { + SaveBlocksPlan::new(blocks.into_iter().collect(), steps) + } + + fn with_unique_state( + block: &ExecutedBlock, + id: u8, + ) -> ExecutedBlock { + let hashed_address = B256::with_last_byte(id); + let hashed_slot = B256::with_last_byte(id.saturating_add(32)); + let hashed_state = HashedPostState::default() + .with_accounts([(hashed_address, Some(Account::default()))]) + .with_storages([( + hashed_address, + HashedStorage::from_iter(false, [(hashed_slot, U256::from(id))]), + )]) + .into_sorted(); + + ExecutedBlock::new( + Arc::clone(&block.recovered_block), + Arc::clone(&block.execution_output), + ComputedTrieData::without_trie_input( + Arc::new(hashed_state), + Arc::new(TrieUpdatesSorted::default()), + ), + ) + } + + #[test] + fn build_overlay_reverts_when_finish_frontier_is_after_state_trie_frontier() { + let factory = create_test_provider_factory(); + let mut block_builder = TestBlockBuilder::eth(); + let blocks = block_builder + .get_executed_blocks(0..5) + .enumerate() + .map(|(index, block)| with_unique_state(&block, index as u8 + 1)) + .collect::>(); + + let state_trie_tip = &blocks[1]; + let finish_tip = &blocks[3]; + let lazy_overlay_blocks = vec![blocks[4].clone(), blocks[3].clone(), blocks[2].clone()]; + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw.insert_block(blocks[0].recovered_block()).unwrap(); + provider_rw.insert_block(state_trie_tip.recovered_block()).unwrap(); + provider_rw.insert_block(blocks[2].recovered_block()).unwrap(); + provider_rw.insert_block(finish_tip.recovered_block()).unwrap(); + provider_rw + .save_stage_checkpoint( + StageId::Finish, + StageCheckpoint::new(finish_tip.block_number()).with_finish_stage_checkpoint( + FinishCheckpoint { partial_state_trie: Some(state_trie_tip.block_number()) }, + ), + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider = factory.provider().unwrap(); + let overlay = OverlayBuilder::::new( + state_trie_tip.recovered_block().hash(), + ChangesetCache::new(), + ) + .with_lazy_overlay(Some(LazyOverlay::new(lazy_overlay_blocks))) + .build_overlay(&provider) + .unwrap(); + + assert_eq!(overlay.hashed_post_state.accounts.len(), 3); + } + + #[test] + fn build_overlay_errors_for_anchor_after_state_trie_frontier() { + let factory = create_test_provider_factory(); + let mut block_builder = TestBlockBuilder::eth().with_state(); + + let genesis = block_builder.get_executed_blocks(0..1).next().unwrap(); + let blocks = block_builder.get_executed_blocks(1..4).collect::>(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &full_save_plan(std::slice::from_ref(&genesis).to_vec()), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &partial_save_plan( + blocks.clone(), + vec![ + SaveBlocksPlanStep::new(0..1, Some(1..3), true), + SaveBlocksPlanStep::new(1..3, None, true), + ], + ), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider = factory.provider().unwrap(); + let anchor = blocks[1].recovered_block().hash(); + let error = OverlayBuilder::::new(anchor, ChangesetCache::new()) + .with_lazy_overlay(Some(LazyOverlay::new(vec![blocks[2].clone()]))) + .build_overlay(&provider) + .unwrap_err(); + + assert!( + error.to_string().contains("is after partial state trie frontier"), + "unexpected error: {error}" + ); + } + + #[test] + fn build_overlay_uses_lazy_superset_for_anchor_after_state_trie_frontier() { + let factory = create_test_provider_factory(); + let mut block_builder = TestBlockBuilder::eth(); + let blocks = block_builder + .get_executed_blocks(0..5) + .enumerate() + .map(|(index, block)| with_unique_state(&block, index as u8 + 1)) + .collect::>(); + + let state_trie_tip = &blocks[1]; + let finish_tip = &blocks[3]; + let lazy_overlay_blocks = + vec![blocks[4].clone(), blocks[3].clone(), blocks[2].clone(), blocks[1].clone()]; + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw.insert_block(blocks[0].recovered_block()).unwrap(); + provider_rw.insert_block(state_trie_tip.recovered_block()).unwrap(); + provider_rw.insert_block(blocks[2].recovered_block()).unwrap(); + provider_rw.insert_block(finish_tip.recovered_block()).unwrap(); + provider_rw + .save_stage_checkpoint( + StageId::Finish, + StageCheckpoint::new(finish_tip.block_number()).with_finish_stage_checkpoint( + FinishCheckpoint { partial_state_trie: Some(state_trie_tip.block_number()) }, + ), + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider = factory.provider().unwrap(); + let overlay = OverlayBuilder::::new( + blocks[0].recovered_block().hash(), + ChangesetCache::new(), + ) + .with_lazy_overlay(Some(LazyOverlay::new(lazy_overlay_blocks))) + .build_overlay(&provider) + .unwrap(); + + assert_eq!(overlay.hashed_post_state.accounts.len(), 3); + } + + #[test] + fn build_overlay_errors_for_finish_anchor_after_state_trie_frontier() { + let factory = create_test_provider_factory(); + let mut block_builder = TestBlockBuilder::eth().with_state(); + + let genesis = block_builder.get_executed_blocks(0..1).next().unwrap(); + let blocks = block_builder.get_executed_blocks(1..4).collect::>(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &full_save_plan(std::slice::from_ref(&genesis).to_vec()), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider_rw = factory.provider_rw().unwrap(); + provider_rw + .save_blocks( + &partial_save_plan( + blocks.clone(), + vec![ + SaveBlocksPlanStep::new(0..1, Some(1..3), true), + SaveBlocksPlanStep::new(1..3, None, true), + ], + ), + SaveBlocksMode::Full, + ) + .unwrap(); + provider_rw.commit().unwrap(); + + let provider = factory.provider().unwrap(); + let finish_anchor = blocks[2].recovered_block().hash(); + + let error = OverlayBuilder::::new(finish_anchor, ChangesetCache::new()) + .with_lazy_overlay(None) + .build_overlay(&provider) + .unwrap_err(); + + assert!( + error.to_string().contains("is after partial state trie frontier"), + "unexpected error: {error}" + ); + } +} diff --git a/crates/trie/common/src/hashed_state.rs b/crates/trie/common/src/hashed_state.rs index 9addbfeafd0..854f486e2b7 100644 --- a/crates/trie/common/src/hashed_state.rs +++ b/crates/trie/common/src/hashed_state.rs @@ -3,7 +3,7 @@ use core::ops::Not; use crate::{ added_removed_keys::MultiAddedRemovedKeys, prefix_set::{PrefixSetMut, TriePrefixSetsMut}, - utils::{extend_sorted_vec, kway_merge_sorted}, + utils::{extend_sorted_vec, kway_merge_disjoint_sorted, kway_merge_sorted}, KeyHasher, MultiProofTargets, Nibbles, }; use alloc::{borrow::Cow, vec::Vec}; @@ -691,6 +691,100 @@ impl HashedPostStateSorted { Self { accounts, storages } } + /// Merges the batch and removes any overlapping keys present in the mask. + /// + /// Account keys are masked at the top level, while storage entries are only masked at the slot + /// level unless the mask wipes the entire storage. For duplicate keys in the batch, later + /// items take precedence over earlier ones. The order of the mask does not matter. + pub fn disjointed_merge_batch<'a>(batch: Vec<&'a Self>, mask: Vec<&'a Self>) -> Self { + let accounts = kway_merge_disjoint_sorted( + batch.iter().map(|item| item.accounts.len()).sum(), + batch.iter().rev().map(|item| item.accounts.as_slice()), + mask.iter().map(|item| item.accounts.as_slice()), + ); + + struct StorageAcc<'a> { + wiped: bool, + sealed: bool, + slot_count: usize, + slices: Vec<&'a [(B256, U256)]>, + } + + #[derive(Default)] + struct StorageMaskAcc<'a> { + wiped: bool, + slices: Vec<&'a [(B256, U256)]>, + } + + let mut storages = B256Map::with_capacity_and_hasher( + batch.iter().map(|item| item.storages.len()).sum(), + Default::default(), + ); + + for item in batch.iter().rev() { + for (hashed_address, storage) in &item.storages { + let entry = storages.entry(*hashed_address).or_insert_with(|| StorageAcc { + wiped: false, + sealed: false, + slot_count: 0, + slices: Vec::new(), + }); + + if entry.sealed { + continue; + } + + entry.slices.push(storage.storage_slots.as_slice()); + entry.slot_count += storage.storage_slots.len(); + if storage.wiped { + entry.wiped = true; + entry.sealed = true; + } + } + } + + let mut storage_masks: B256Map> = B256Map::with_capacity_and_hasher( + mask.iter().map(|item| item.storages.len()).sum(), + Default::default(), + ); + for item in mask { + for (hashed_address, storage) in &item.storages { + let entry = storage_masks.entry(*hashed_address).or_default(); + if entry.wiped { + continue; + } + if storage.wiped { + entry.wiped = true; + entry.slices.clear(); + } else { + entry.slices.push(storage.storage_slots.as_slice()); + } + } + } + + let storages = storages + .into_iter() + .filter_map(|(hashed_address, entry)| { + let storage_slots = match storage_masks.get(&hashed_address) { + Some(mask_entry) if mask_entry.wiped => return None, + Some(mask_entry) => kway_merge_disjoint_sorted( + entry.slot_count, + entry.slices, + mask_entry.slices.iter().copied(), + ), + None => kway_merge_sorted(entry.slices), + }; + + (!storage_slots.is_empty() || entry.wiped).then_some(( + hashed_address, + HashedStorageSorted { wiped: entry.wiped, storage_slots }, + )) + }) + .collect(); + + Self { accounts, storages } + } + /// Clears all accounts and storage data. pub fn clear(&mut self) { self.accounts.clear(); @@ -1534,6 +1628,152 @@ mod tests { assert_eq!(state.accounts.get(&addr1), Some(&None)); } + #[test] + fn test_hashed_post_state_sorted_disjointed_merge_batch() { + fn account(nonce: u64) -> Account { + Account { nonce, balance: U256::ZERO, bytecode_hash: None } + } + + let kept_account = B256::with_last_byte(1); + let removed_account = B256::with_last_byte(2); + let kept_storage = B256::with_last_byte(3); + let removed_storage = B256::with_last_byte(4); + let slot1 = B256::with_last_byte(11); + let slot2 = B256::with_last_byte(12); + + let older = HashedPostStateSorted::new( + vec![(kept_account, Some(account(1))), (removed_account, Some(account(10)))], + B256Map::from_iter([ + ( + kept_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(slot1, U256::from(1))], + }, + ), + ( + removed_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(slot1, U256::from(2))], + }, + ), + ]), + ); + + let newer = HashedPostStateSorted::new( + vec![(kept_account, Some(account(2)))], + B256Map::from_iter([( + kept_storage, + HashedStorageSorted { + wiped: false, + storage_slots: vec![(slot1, U256::from(3)), (slot2, U256::from(4))], + }, + )]), + ); + + let remove_a = HashedPostStateSorted::new( + vec![(removed_account, None)], + B256Map::from_iter([ + ( + kept_storage, + HashedStorageSorted { wiped: false, storage_slots: vec![(slot2, U256::ZERO)] }, + ), + (removed_storage, HashedStorageSorted { wiped: true, storage_slots: vec![] }), + ]), + ); + + let remove_b = HashedPostStateSorted::new( + vec![(B256::with_last_byte(255), Some(account(99)))], + B256Map::default(), + ); + + let result = HashedPostStateSorted::disjointed_merge_batch( + vec![&older, &newer], + vec![&remove_b, &remove_a], + ); + + assert_eq!(result.accounts, vec![(kept_account, Some(account(2)))]); + assert_eq!(result.storages.len(), 1); + assert_eq!( + result.storages.get(&kept_storage), + Some(&HashedStorageSorted { + wiped: false, + storage_slots: vec![(slot1, U256::from(3))], + }) + ); + assert!(!result.storages.contains_key(&removed_storage)); + } + + #[test] + fn test_hashed_post_state_sorted_disjointed_merge_batch_removes_overlapping_batch_key() { + fn account(nonce: u64) -> Account { + Account { nonce, balance: U256::ZERO, bytecode_hash: None } + } + + let overlapping_account = B256::with_last_byte(21); + let overlapping_storage = B256::with_last_byte(22); + let slot = B256::with_last_byte(23); + + let older = HashedPostStateSorted::new( + vec![(overlapping_account, Some(account(1)))], + B256Map::from_iter([( + overlapping_storage, + HashedStorageSorted { wiped: false, storage_slots: vec![(slot, U256::from(1))] }, + )]), + ); + + let newer = HashedPostStateSorted::new( + vec![(overlapping_account, Some(account(2)))], + B256Map::from_iter([( + overlapping_storage, + HashedStorageSorted { wiped: false, storage_slots: vec![(slot, U256::from(2))] }, + )]), + ); + + let remove = HashedPostStateSorted::new( + vec![(overlapping_account, None)], + B256Map::from_iter([( + overlapping_storage, + HashedStorageSorted { wiped: true, storage_slots: vec![] }, + )]), + ); + + let result = + HashedPostStateSorted::disjointed_merge_batch(vec![&older, &newer], vec![&remove]); + + assert!(result.accounts.is_empty()); + assert!(result.storages.is_empty()); + } + + #[test] + fn test_hashed_post_state_sorted_disjointed_merge_batch_ignores_empty_storage_mask() { + let storage = B256::with_last_byte(31); + let slot = B256::with_last_byte(32); + + let batch = HashedPostStateSorted::new( + vec![], + B256Map::from_iter([( + storage, + HashedStorageSorted { wiped: false, storage_slots: vec![(slot, U256::from(1))] }, + )]), + ); + let mask = HashedPostStateSorted::new( + vec![], + B256Map::from_iter([( + storage, + HashedStorageSorted { wiped: false, storage_slots: vec![] }, + )]), + ); + + let result = HashedPostStateSorted::disjointed_merge_batch(vec![&batch], vec![&mask]); + + assert_eq!( + result.storages.get(&storage), + Some(&HashedStorageSorted { wiped: false, storage_slots: vec![(slot, U256::from(1))] }) + ); + } + /// Test non-wiped storage merges both zero and non-zero valued slots #[test] fn test_hashed_storage_extend_from_sorted_non_wiped() { diff --git a/crates/trie/common/src/updates.rs b/crates/trie/common/src/updates.rs index d73b2c4d460..698b065b02b 100644 --- a/crates/trie/common/src/updates.rs +++ b/crates/trie/common/src/updates.rs @@ -1,5 +1,5 @@ use crate::{ - utils::{extend_sorted_vec, kway_merge_sorted}, + utils::{extend_sorted_vec, kway_merge_disjoint_sorted, kway_merge_sorted}, BranchNodeCompact, HashBuilder, Nibbles, }; use alloc::{ @@ -710,6 +710,101 @@ impl TrieUpdatesSorted { Self { account_nodes, storage_tries } } + + /// Merges the batch and removes any overlapping keys present in the mask. + /// + /// Account trie nodes are masked at the top level, while storage trie entries are only masked + /// at the node level unless the mask deletes the entire storage trie. For duplicate keys in + /// the batch, later items take precedence over earlier ones. The order of the mask does not + /// matter. + pub fn disjointed_merge_batch<'a>(batch: Vec<&'a Self>, mask: Vec<&'a Self>) -> Self { + let account_nodes = kway_merge_disjoint_sorted( + batch.iter().map(|item| item.account_nodes.len()).sum(), + batch.iter().rev().map(|item| item.account_nodes.as_slice()), + mask.iter().map(|item| item.account_nodes.as_slice()), + ); + + struct StorageAcc<'a> { + is_deleted: bool, + sealed: bool, + node_count: usize, + slices: Vec<&'a [(Nibbles, Option)]>, + } + + #[derive(Default)] + struct StorageMaskAcc<'a> { + is_deleted: bool, + slices: Vec<&'a [(Nibbles, Option)]>, + } + + let mut storage_tries = B256Map::with_capacity_and_hasher( + batch.iter().map(|item| item.storage_tries.len()).sum(), + Default::default(), + ); + + for item in batch.iter().rev() { + for (hashed_address, storage_trie) in &item.storage_tries { + let entry = storage_tries.entry(*hashed_address).or_insert_with(|| StorageAcc { + is_deleted: false, + sealed: false, + node_count: 0, + slices: Vec::new(), + }); + + if entry.sealed { + continue; + } + + entry.slices.push(storage_trie.storage_nodes.as_slice()); + entry.node_count += storage_trie.storage_nodes.len(); + if storage_trie.is_deleted { + entry.is_deleted = true; + entry.sealed = true; + } + } + } + + let mut storage_masks: B256Map> = B256Map::with_capacity_and_hasher( + mask.iter().map(|item| item.storage_tries.len()).sum(), + Default::default(), + ); + for item in mask { + for (hashed_address, storage_trie) in &item.storage_tries { + let entry = storage_masks.entry(*hashed_address).or_default(); + if entry.is_deleted { + continue; + } + if storage_trie.is_deleted { + entry.is_deleted = true; + entry.slices.clear(); + } else { + entry.slices.push(storage_trie.storage_nodes.as_slice()); + } + } + } + + let storage_tries = storage_tries + .into_iter() + .filter_map(|(hashed_address, entry)| { + let storage_nodes = match storage_masks.get(&hashed_address) { + Some(mask_entry) if mask_entry.is_deleted => return None, + Some(mask_entry) => kway_merge_disjoint_sorted( + entry.node_count, + entry.slices, + mask_entry.slices.iter().copied(), + ), + None => kway_merge_sorted(entry.slices), + }; + + (!storage_nodes.is_empty() || entry.is_deleted).then_some(( + hashed_address, + StorageTrieUpdatesSorted { is_deleted: entry.is_deleted, storage_nodes }, + )) + }) + .collect(); + + Self::new(account_nodes, storage_tries) + } } impl AsRef for TrieUpdatesSorted { @@ -977,6 +1072,212 @@ mod tests { assert_eq!(storage3.storage_nodes[1].0, Nibbles::from_nibbles_unchecked([0x07])); } + #[test] + fn test_trie_updates_sorted_disjointed_merge_batch() { + let kept_node = Nibbles::from_nibbles_unchecked([0x01]); + let removed_node = Nibbles::from_nibbles_unchecked([0x02]); + let kept_storage = B256::from([3; 32]); + let removed_storage = B256::from([4; 32]); + let slot1 = Nibbles::from_nibbles_unchecked([0x0a]); + let slot2 = Nibbles::from_nibbles_unchecked([0x0b]); + + let older = TrieUpdatesSorted::new( + vec![(kept_node, Some(BranchNodeCompact::default())), (removed_node, None)], + B256Map::from_iter([ + ( + kept_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot1, None)], + }, + ), + ( + removed_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot1, Some(BranchNodeCompact::default()))], + }, + ), + ]), + ); + + let newer = TrieUpdatesSorted::new( + vec![(kept_node, None)], + B256Map::from_iter([( + kept_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot1, Some(BranchNodeCompact::default())), (slot2, None)], + }, + )]), + ); + + let remove_a = TrieUpdatesSorted::new( + vec![(removed_node, Some(BranchNodeCompact::default()))], + B256Map::from_iter([ + ( + kept_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot2, Some(BranchNodeCompact::default()))], + }, + ), + ( + removed_storage, + StorageTrieUpdatesSorted { is_deleted: true, storage_nodes: vec![] }, + ), + ]), + ); + + let remove_b = TrieUpdatesSorted::new( + vec![(Nibbles::from_nibbles_unchecked([0x0f]), Some(BranchNodeCompact::default()))], + B256Map::default(), + ); + + let result = TrieUpdatesSorted::disjointed_merge_batch( + vec![&older, &newer], + vec![&remove_b, &remove_a], + ); + + assert_eq!(result.account_nodes, vec![(kept_node, None)]); + assert_eq!(result.storage_tries.len(), 1); + assert_eq!( + result.storage_tries.get(&kept_storage), + Some(&StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot1, Some(BranchNodeCompact::default()))], + }) + ); + assert!(!result.storage_tries.contains_key(&removed_storage)); + } + + #[test] + fn test_trie_updates_sorted_disjointed_merge_batch_removes_overlapping_batch_key() { + let overlapping_node = Nibbles::from_nibbles_unchecked([0x03]); + let overlapping_storage = B256::from([5; 32]); + let slot = Nibbles::from_nibbles_unchecked([0x0c]); + + let older = TrieUpdatesSorted::new( + vec![(overlapping_node, Some(BranchNodeCompact::default()))], + B256Map::from_iter([( + overlapping_storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot, Some(BranchNodeCompact::default()))], + }, + )]), + ); + + let newer = TrieUpdatesSorted::new( + vec![(overlapping_node, None)], + B256Map::from_iter([( + overlapping_storage, + StorageTrieUpdatesSorted { is_deleted: false, storage_nodes: vec![(slot, None)] }, + )]), + ); + + let remove = TrieUpdatesSorted::new( + vec![(overlapping_node, Some(BranchNodeCompact::default()))], + B256Map::from_iter([( + overlapping_storage, + StorageTrieUpdatesSorted { is_deleted: true, storage_nodes: vec![] }, + )]), + ); + + let result = TrieUpdatesSorted::disjointed_merge_batch(vec![&older, &newer], vec![&remove]); + + assert!(result.account_nodes.is_empty()); + assert!(result.storage_tries.is_empty()); + } + + #[test] + fn test_trie_updates_sorted_disjointed_merge_batch_uses_exact_key_masking() { + let hashed_address = B256::from([7; 32]); + let grandparent = Nibbles::from_nibbles_unchecked([0x05]); + let parent = Nibbles::from_nibbles_unchecked([0x05, 0x04]); + let child = Nibbles::from_nibbles_unchecked([0x05, 0x04, 0x03]); + + let batch = TrieUpdatesSorted::new( + vec![ + (grandparent, Some(BranchNodeCompact::default())), + (parent, Some(BranchNodeCompact::default())), + (child, Some(BranchNodeCompact::default())), + ], + B256Map::from_iter([( + hashed_address, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![ + (grandparent, Some(BranchNodeCompact::default())), + (parent, Some(BranchNodeCompact::default())), + (child, Some(BranchNodeCompact::default())), + ], + }, + )]), + ); + let mask = TrieUpdatesSorted::new( + vec![ + (grandparent, Some(BranchNodeCompact::default())), + (parent, Some(BranchNodeCompact::default())), + ], + B256Map::from_iter([( + hashed_address, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![ + (grandparent, Some(BranchNodeCompact::default())), + (parent, Some(BranchNodeCompact::default())), + ], + }, + )]), + ); + + let result = TrieUpdatesSorted::disjointed_merge_batch(vec![&batch], vec![&mask]); + + assert_eq!(result.account_nodes, vec![(child, Some(BranchNodeCompact::default()))]); + assert_eq!( + result.storage_tries.get(&hashed_address), + Some(&StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(child, Some(BranchNodeCompact::default()))], + }) + ); + } + + #[test] + fn test_trie_updates_sorted_disjointed_merge_batch_ignores_empty_storage_mask() { + let storage = B256::from([6; 32]); + let slot = Nibbles::from_nibbles_unchecked([0x0d]); + + let batch = TrieUpdatesSorted::new( + vec![], + B256Map::from_iter([( + storage, + StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot, Some(BranchNodeCompact::default()))], + }, + )]), + ); + let mask = TrieUpdatesSorted::new( + vec![], + B256Map::from_iter([( + storage, + StorageTrieUpdatesSorted { is_deleted: false, storage_nodes: vec![] }, + )]), + ); + + let result = TrieUpdatesSorted::disjointed_merge_batch(vec![&batch], vec![&mask]); + + assert_eq!( + result.storage_tries.get(&storage), + Some(&StorageTrieUpdatesSorted { + is_deleted: false, + storage_nodes: vec![(slot, Some(BranchNodeCompact::default()))], + }) + ); + } + /// Test extending with storage tries adds both nodes and removed nodes correctly #[test] fn test_trie_updates_extend_from_sorted_with_storage_tries() { diff --git a/crates/trie/common/src/utils.rs b/crates/trie/common/src/utils.rs index 6d6f134a3ac..ae6b9f3e236 100644 --- a/crates/trie/common/src/utils.rs +++ b/crates/trie/common/src/utils.rs @@ -26,6 +26,51 @@ where .collect() } +/// Merge sorted left slices into a sorted `Vec`, excluding keys present in any right slice. +/// +/// Callers pass left slices in priority order (index 0 = highest priority), so the first +/// left slice's value for a key takes precedence over later slices. Right slice order is ignored; +/// the right-hand side only contributes keys to exclude. +pub(crate) fn kway_merge_disjoint_sorted<'a, K, V>( + capacity: usize, + left_slices: impl IntoIterator, + right_slices: impl IntoIterator, +) -> Vec<(K, V)> +where + K: Ord + Clone + 'a, + V: Clone + 'a, +{ + let mut right_keys = right_slices + .into_iter() + .filter(|s| !s.is_empty()) + .map(|s| s.iter().map(|(k, _)| k)) + .kmerge() + .dedup() + .peekable(); + + let mut out = Vec::with_capacity(capacity); + for (_, key, value) in left_slices + .into_iter() + .filter(|s| !s.is_empty()) + .enumerate() + .map(|(i, s)| s.iter().map(move |(k, v)| (i, k, v))) + .kmerge_by(|(i1, k1, _), (i2, k2, _)| (k1, i1) < (k2, i2)) + .dedup_by(|(_, k1, _), (_, k2, _)| *k1 == *k2) + { + while right_keys.peek().is_some_and(|right_key| *right_key < key) { + right_keys.next(); + } + + if right_keys.peek().is_some_and(|right_key| *right_key == key) { + continue; + } + + out.push((key.clone(), value.clone())); + } + + out +} + /// Extend a sorted vector with another sorted vector using 2 pointer merge. /// Values from `other` take precedence for duplicate keys. pub(crate) fn extend_sorted_vec(target: &mut Vec<(K, V)>, other: &[(K, V)]) @@ -183,4 +228,20 @@ mod tests { let result: Vec<(i32, &str)> = kway_merge_sorted(Vec::<&[(i32, &str)]>::new()); assert!(result.is_empty()); } + + #[test] + fn test_kway_merge_disjoint_sorted() { + let left_old = vec![(1, "old"), (2, "drop"), (4, "keep")]; + let left_new = vec![(1, "new"), (3, "new_only")]; + let right_a = vec![(2, "ignored"), (5, "ignored")]; + let right_b = vec![(3, "ignored")]; + + let result = kway_merge_disjoint_sorted( + left_old.len() + left_new.len(), + [left_new.as_slice(), left_old.as_slice()], + [right_a.as_slice(), right_b.as_slice()], + ); + + assert_eq!(result, vec![(1, "new"), (4, "keep")]); + } } diff --git a/crates/trie/trie/src/trie.rs b/crates/trie/trie/src/trie.rs index 9ea219f73ab..e8bcb440ed5 100644 --- a/crates/trie/trie/src/trie.rs +++ b/crates/trie/trie/src/trie.rs @@ -36,6 +36,8 @@ pub struct StateRoot { pub hashed_cursor_factory: H, /// A set of prefix sets that have changed. pub prefix_sets: TriePrefixSets, + /// Whether every child under a branch whose path matches the prefix set should be walked. + walk_all_changed_branch_children: bool, /// Previous intermediate state. previous_state: Option, /// The number of updates after which the intermediate progress should be returned. @@ -56,6 +58,7 @@ impl StateRoot { trie_cursor_factory, hashed_cursor_factory, prefix_sets: TriePrefixSets::default(), + walk_all_changed_branch_children: false, previous_state: None, threshold: DEFAULT_INTERMEDIATE_THRESHOLD, #[cfg(feature = "metrics")] @@ -69,6 +72,12 @@ impl StateRoot { self } + /// Configures the state root walker to visit all children of changed branch paths. + pub const fn with_walk_all_changed_branch_children(mut self, enabled: bool) -> Self { + self.walk_all_changed_branch_children = enabled; + self + } + /// Set the threshold. pub const fn with_threshold(mut self, threshold: u64) -> Self { self.threshold = threshold; @@ -93,6 +102,7 @@ impl StateRoot { trie_cursor_factory: self.trie_cursor_factory, hashed_cursor_factory, prefix_sets: self.prefix_sets, + walk_all_changed_branch_children: self.walk_all_changed_branch_children, threshold: self.threshold, previous_state: self.previous_state, #[cfg(feature = "metrics")] @@ -106,6 +116,7 @@ impl StateRoot { trie_cursor_factory, hashed_cursor_factory: self.hashed_cursor_factory, prefix_sets: self.prefix_sets, + walk_all_changed_branch_children: self.walk_all_changed_branch_children, threshold: self.threshold, previous_state: self.previous_state, #[cfg(feature = "metrics")] @@ -178,6 +189,7 @@ where account_root_state.walker_stack, self.prefix_sets.account_prefix_set, ) + .with_walk_all_changed_branch_children(self.walk_all_changed_branch_children) .with_deletions_retained(retain_updates); let account_node_iter = TrieNodeIter::state_trie(walker, hashed_account_cursor) .with_last_hashed_key(account_root_state.last_hashed_key); @@ -213,6 +225,7 @@ where self.metrics.storage_trie.clone(), ) .with_intermediate_state(Some(storage_state.state)) + .with_walk_all_changed_branch_children(self.walk_all_changed_branch_children) .with_threshold(remaining_threshold); let storage_result = storage_root_calculator.calculate(retain_updates)?; @@ -239,6 +252,7 @@ where // calculation let hash_builder = HashBuilder::default().with_updates(retain_updates); let walker = TrieWalker::state_trie(trie_cursor, self.prefix_sets.account_prefix_set) + .with_walk_all_changed_branch_children(self.walk_all_changed_branch_children) .with_deletions_retained(retain_updates); let node_iter = TrieNodeIter::state_trie(walker, hashed_account_cursor); (hash_builder, node_iter) @@ -272,6 +286,7 @@ where #[cfg(feature = "metrics")] self.metrics.storage_trie.clone(), ) + .with_walk_all_changed_branch_children(self.walk_all_changed_branch_children) .with_threshold(remaining_threshold); let storage_result = storage_root_calculator.calculate(retain_updates)?; @@ -465,6 +480,8 @@ pub struct StorageRoot { pub hashed_address: B256, /// The set of storage slot prefixes that have changed. pub prefix_set: PrefixSet, + /// Whether every child under a branch whose path matches the prefix set should be walked. + walk_all_changed_branch_children: bool, /// Previous intermediate state. previous_state: Option, /// The number of updates after which the intermediate progress should be returned. @@ -506,6 +523,7 @@ impl StorageRoot { hashed_cursor_factory, hashed_address, prefix_set, + walk_all_changed_branch_children: false, previous_state: None, threshold: DEFAULT_INTERMEDIATE_THRESHOLD, #[cfg(feature = "metrics")] @@ -519,6 +537,12 @@ impl StorageRoot { self } + /// Configures the storage root walker to visit all children of changed branch paths. + pub const fn with_walk_all_changed_branch_children(mut self, enabled: bool) -> Self { + self.walk_all_changed_branch_children = enabled; + self + } + /// Set the threshold. pub const fn with_threshold(mut self, threshold: u64) -> Self { self.threshold = threshold; @@ -544,6 +568,7 @@ impl StorageRoot { hashed_cursor_factory, hashed_address: self.hashed_address, prefix_set: self.prefix_set, + walk_all_changed_branch_children: self.walk_all_changed_branch_children, previous_state: self.previous_state, threshold: self.threshold, #[cfg(feature = "metrics")] @@ -558,6 +583,7 @@ impl StorageRoot { hashed_cursor_factory: self.hashed_cursor_factory, hashed_address: self.hashed_address, prefix_set: self.prefix_set, + walk_all_changed_branch_children: self.walk_all_changed_branch_children, previous_state: self.previous_state, threshold: self.threshold, #[cfg(feature = "metrics")] @@ -641,6 +667,7 @@ where state.walker_stack, self.prefix_set, ) + .with_walk_all_changed_branch_children(self.walk_all_changed_branch_children) .with_deletions_retained(retain_updates); let node_iter = TrieNodeIter::storage_trie(walker, hashed_storage_cursor) .with_last_hashed_key(state.last_hashed_key); @@ -649,6 +676,7 @@ where None => { let hash_builder = HashBuilder::default().with_updates(retain_updates); let walker = TrieWalker::storage_trie(trie_cursor, self.prefix_set) + .with_walk_all_changed_branch_children(self.walk_all_changed_branch_children) .with_deletions_retained(retain_updates); let node_iter = TrieNodeIter::storage_trie(walker, hashed_storage_cursor); (hash_builder, node_iter) diff --git a/crates/trie/trie/src/walker.rs b/crates/trie/trie/src/walker.rs index f12bf46f748..7936663f3a1 100644 --- a/crates/trie/trie/src/walker.rs +++ b/crates/trie/trie/src/walker.rs @@ -8,6 +8,18 @@ use alloy_trie::proof::AddedRemovedKeys; use reth_storage_errors::db::DatabaseError; use tracing::{instrument, trace}; +#[cfg(test)] +use crate::trie_cursor::{mock::MockTrieCursorFactory, TrieCursorFactory}; + +#[cfg(test)] +use alloy_primitives::map::B256Map; + +#[cfg(test)] +use alloy_trie::TrieMask; + +#[cfg(test)] +use std::collections::BTreeMap; + #[cfg(feature = "metrics")] use crate::metrics::WalkerMetrics; @@ -26,6 +38,9 @@ pub struct TrieWalker { pub can_skip_current_node: bool, /// A `PrefixSet` representing the changes to be applied to the trie. pub changes: PrefixSet, + /// When enabled, all children of a branch become unskippable if the branch path itself + /// matches the prefix set, even if a given child path does not. + walk_all_changed_branch_children: bool, /// The retained trie node keys that need to be removed. removed_keys: Option>, /// Provided when it's necessary not to skip certain nodes during proof generation. @@ -76,6 +91,7 @@ impl> TrieWalker { changes, stack, can_skip_current_node: false, + walk_all_changed_branch_children: false, removed_keys: None, added_removed_keys: None, #[cfg(feature = "metrics")] @@ -101,6 +117,7 @@ impl> TrieWalker { stack: self.stack, can_skip_current_node: self.can_skip_current_node, changes: self.changes, + walk_all_changed_branch_children: self.walk_all_changed_branch_children, removed_keys: self.removed_keys, added_removed_keys, #[cfg(feature = "metrics")] @@ -108,6 +125,12 @@ impl> TrieWalker { } } + /// Configures the walker to treat every child of a matching branch path as unskippable. + pub const fn with_walk_all_changed_branch_children(mut self, enabled: bool) -> Self { + self.walk_all_changed_branch_children = enabled; + self + } + /// Split the walker into stack and trie updates. pub fn split(mut self) -> (Vec, HashSet) { let keys = self.take_removed_keys(); @@ -188,7 +211,14 @@ impl> TrieWalker { "Checked for only non-removed child", ); + let branch_path_matches_prefix_set = self + .walk_all_changed_branch_children + .then(|| node.position().is_child()) + .unwrap_or(false) && + self.changes.contains(&node.key); + !self.changes.contains(node.full_key()) && + !branch_path_matches_prefix_set && node.hash_flag() && !key_is_only_nonremoved_child }); @@ -233,6 +263,7 @@ impl> TrieWalker { changes, stack: vec![CursorSubNode::default()], can_skip_current_node: false, + walk_all_changed_branch_children: false, removed_keys: None, added_removed_keys: Default::default(), #[cfg(feature = "metrics")] @@ -387,3 +418,83 @@ impl> TrieWalker { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::prefix_set::PrefixSetMut; + use alloy_primitives::B256; + + fn branch_node(state_mask: u16, tree_mask: u16, hash_mask: u16) -> BranchNodeCompact { + let hash_count = hash_mask.count_ones() as usize; + BranchNodeCompact::new( + TrieMask::new(state_mask), + TrieMask::new(tree_mask), + TrieMask::new(hash_mask), + vec![B256::ZERO; hash_count], + None, + ) + } + + fn root_branch_node(state_mask: u16, tree_mask: u16, hash_mask: u16) -> BranchNodeCompact { + let hash_count = hash_mask.count_ones() as usize; + BranchNodeCompact::new( + TrieMask::new(state_mask), + TrieMask::new(tree_mask), + TrieMask::new(hash_mask), + vec![B256::ZERO; hash_count], + Some(B256::ZERO), + ) + } + + fn walker_for_matching_branch_children_test( + walk_all_changed_branch_children: bool, + ) -> TrieWalker { + let trie_nodes = BTreeMap::from([ + (Nibbles::default(), root_branch_node(1 << 2, 1 << 2, 1 << 2)), + ( + Nibbles::from_nibbles([0x2]), + branch_node((1 << 3) | (1 << 4), 0, (1 << 3) | (1 << 4)), + ), + ]); + let factory = MockTrieCursorFactory::new(trie_nodes, B256Map::default()); + + let mut prefix_set = PrefixSetMut::default(); + prefix_set.insert(Nibbles::from_nibbles([0x2, 0x3, 0x1])); + + TrieWalker::state_trie(factory.account_trie_cursor().unwrap(), prefix_set.freeze()) + .with_walk_all_changed_branch_children(walk_all_changed_branch_children) + } + + #[test] + fn branch_siblings_remain_skippable_by_default() { + let mut walker = walker_for_matching_branch_children_test(false); + + assert_eq!(walker.key().copied(), Some(Nibbles::default())); + assert!(!walker.can_skip_current_node); + + walker.advance().unwrap(); + assert_eq!(walker.key().copied(), Some(Nibbles::from_nibbles([0x2]))); + assert!(!walker.can_skip_current_node); + + walker.advance().unwrap(); + assert_eq!(walker.key().copied(), Some(Nibbles::from_nibbles([0x2, 0x3]))); + assert_eq!(walker.stack.last().unwrap().position(), SubNodePosition::Child(0x3)); + assert!(!walker.can_skip_current_node); + + walker.advance().unwrap(); + assert_eq!(walker.key().copied(), Some(Nibbles::from_nibbles([0x2, 0x4]))); + assert!(walker.can_skip_current_node); + } + + #[test] + fn matching_branch_path_can_make_all_children_unskippable() { + let mut walker = walker_for_matching_branch_children_test(true); + + walker.advance().unwrap(); + walker.advance().unwrap(); + walker.advance().unwrap(); + assert_eq!(walker.key().copied(), Some(Nibbles::from_nibbles([0x2, 0x4]))); + assert!(!walker.can_skip_current_node); + } +} diff --git a/docs/vocs/docs/pages/cli/reth/node.mdx b/docs/vocs/docs/pages/cli/reth/node.mdx index 14b9c127ce5..216a0aa0cc4 100644 --- a/docs/vocs/docs/pages/cli/reth/node.mdx +++ b/docs/vocs/docs/pages/cli/reth/node.mdx @@ -965,6 +965,11 @@ Engine: [default: 16] + --engine.deferred-trie-blocks + Configure how many of the blocks being persisted should only mask state/trie writes instead of durably persisting their state/trie updates in the current cycle + + [default: 0] + --engine.memory-block-buffer-target Configure the target number of blocks to keep in memory diff --git a/examples/db-access/Cargo.toml b/examples/db-access/Cargo.toml index 07687e5c465..422411345a3 100644 --- a/examples/db-access/Cargo.toml +++ b/examples/db-access/Cargo.toml @@ -7,6 +7,12 @@ license.workspace = true [dependencies] reth-ethereum = { workspace = true, features = ["node"] } +reth-chainspec.workspace = true +reth-db-api.workspace = true +reth-storage-api.workspace = true +reth-trie.workspace = true +reth-trie-db.workspace = true alloy-primitives.workspace = true +clap = { workspace = true, features = ["derive"] } eyre.workspace = true diff --git a/examples/db-access/src/bin/compare_merkle_trace_to_db.rs b/examples/db-access/src/bin/compare_merkle_trace_to_db.rs new file mode 100644 index 00000000000..2417d4ea132 --- /dev/null +++ b/examples/db-access/src/bin/compare_merkle_trace_to_db.rs @@ -0,0 +1,940 @@ +#![warn(unused_crate_dependencies)] + +use alloy_primitives::{B256, U256}; +use clap::{Parser, ValueEnum}; +use eyre::{bail, eyre, Context, Result}; +use reth_chainspec::{ChainSpec, DEV, HOLESKY, HOODI, MAINNET, SEPOLIA}; +use reth_db_api::{ + cursor::{DbCursorRO, DbDupCursorRO}, + tables, + transaction::DbTx, +}; +use reth_ethereum::{node::EthereumNode, provider::providers::ReadOnlyConfig, tasks::Runtime}; +use reth_storage_api::{DBProvider, StorageSettingsCache}; +use reth_trie::{ + trie_cursor::{TrieCursor, TrieCursorFactory}, + Nibbles, StorageRoot, +}; +use reth_trie_db::{ + DatabaseHashedCursorFactory, DatabaseStorageRoot, DatabaseTrieCursorFactory, TrieTableAdapter, +}; +use std::{ + collections::{BTreeMap, HashMap}, + fs::File, + io::{BufRead, BufReader}, + path::{Path, PathBuf}, + sync::Arc, +}; + +#[derive(Debug, Clone, Copy, ValueEnum)] +enum ChainArg { + Mainnet, + Sepolia, + Holesky, + Hoodi, + Dev, +} + +impl ChainArg { + const fn as_str(self) -> &'static str { + match self { + Self::Mainnet => "mainnet", + Self::Sepolia => "sepolia", + Self::Holesky => "holesky", + Self::Hoodi => "hoodi", + Self::Dev => "dev", + } + } + + fn chain_spec(self) -> Arc { + match self { + Self::Mainnet => MAINNET.clone(), + Self::Sepolia => SEPOLIA.clone(), + Self::Holesky => HOLESKY.clone(), + Self::Hoodi => HOODI.clone(), + Self::Dev => DEV.clone(), + } + } +} + +#[derive(Debug, Parser)] +#[command( + about = "Compare a failed Merkle unwind trace against DB-backed trie/account/storage state." +)] +struct Args { + /// Path to the failed restart trace log. + trace_file: PathBuf, + + /// Reth datadir to treat as DB ground truth. + datadir: PathBuf, + + /// Chain spec used to open the datadir. + #[arg(long, value_enum, default_value_t = ChainArg::Hoodi)] + chain: ChainArg, + + /// Cap the number of reported results per section. + #[arg(long)] + max_results: Option, +} + +#[derive(Debug, Clone)] +struct ObservedStateBranch { + path: Nibbles, + observed_hash: B256, + children_are_in_trie: bool, + first_line: usize, + occurrences: usize, +} + +#[derive(Debug, Clone)] +struct ObservedAccountLeaf { + hashed_address: B256, + nonce: u64, + balance: U256, + bytecode_hash: Option, + first_line: usize, + occurrences: usize, +} + +#[derive(Debug, Clone)] +struct ObservedStorageBranch { + hashed_address: B256, + path: Nibbles, + observed_hash: B256, + children_are_in_trie: bool, + first_line: usize, + occurrences: usize, +} + +#[derive(Debug, Clone)] +struct ObservedStorageLeaf { + hashed_address: B256, + hashed_slot: B256, + value: U256, + first_line: usize, + occurrences: usize, +} + +#[derive(Debug, Clone)] +struct ObservedStorageRoot { + hashed_address: B256, + root: B256, + first_line: usize, + occurrences: usize, +} + +#[derive(Debug, Default)] +struct TraceData { + state_branches: Vec, + account_leaves: Vec, + storage_branches: Vec, + storage_leaves: Vec, + storage_roots: Vec, +} + +#[derive(Debug, Clone)] +struct BranchCandidate { + location: String, + expected_hash: Option, + children_are_in_trie: bool, +} + +impl BranchCandidate { + fn detail(&self) -> String { + format!( + "db_candidate={} expected_hash={} expected_children_are_in_trie={}", + self.location, + option_b256(self.expected_hash), + self.children_are_in_trie + ) + } +} + +#[derive(Debug, Default)] +struct BranchLookup { + candidates: Vec, + notes: Vec, +} + +impl BranchLookup { + fn matches(&self, observed_hash: B256, children_are_in_trie: bool) -> bool { + self.candidates.iter().any(|candidate| { + candidate.expected_hash == Some(observed_hash) && + candidate.children_are_in_trie == children_are_in_trie + }) + } + + fn details(&self) -> Vec { + let mut details = Vec::new(); + if self.candidates.is_empty() { + details.push("db_candidates=none".to_string()); + } else { + details.extend(self.candidates.iter().map(BranchCandidate::detail)); + } + details.extend(self.notes.iter().cloned()); + details + } +} + +#[derive(Debug, Clone)] +struct Mismatch { + context: MismatchContext, + path: Nibbles, + first_line: usize, + kind_rank: u8, + headline: String, + details: Vec, + suppressed_descendants: usize, +} + +#[derive(Debug, Clone)] +struct Diagnostic { + context: MismatchContext, + first_line: usize, + headline: String, + details: Vec, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum MismatchContext { + State, + Storage(B256), +} + +impl std::fmt::Display for MismatchContext { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::State => f.write_str("state"), + Self::Storage(hashed_address) => write!(f, "storage:{hashed_address}"), + } + } +} + +#[derive(Debug, Default)] +struct ComparisonResults { + direct_mismatches: Vec, + diagnostics: Vec, +} + +fn main() -> Result<()> { + let args = Args::parse(); + + if !args.trace_file.is_file() { + bail!("trace file does not exist: {}", args.trace_file.display()); + } + if !args.datadir.is_dir() { + bail!("datadir does not exist: {}", args.datadir.display()); + } + + let trace = parse_trace(&args.trace_file)?; + + let runtime = Runtime::test(); + let factory = EthereumNode::provider_factory_builder().open_read_only( + args.chain.chain_spec(), + ReadOnlyConfig::from_datadir(args.datadir.clone()), + runtime, + )?; + let provider = factory.provider()?; + + let results = + reth_trie_db::with_adapter!(provider, |A| compare_with_adapter::<_, A>(&provider, &trace))?; + + let total_direct_mismatches = results.direct_mismatches.len(); + let mut outermost = retain_outermost_mismatches(results.direct_mismatches); + let suppressed_direct_descendants = + outermost.iter().map(|mismatch| mismatch.suppressed_descendants).sum::(); + let diagnostic_count = results.diagnostics.len(); + let max_results = args.max_results.unwrap_or(usize::MAX); + + println!("trace_file={}", args.trace_file.display()); + println!("datadir={}", args.datadir.display()); + println!("chain={}", args.chain.as_str()); + println!( + "trace_observations state_branches={} account_leaves={} storage_branches={} storage_leaves={} storage_roots={}", + trace.state_branches.len(), + trace.account_leaves.len(), + trace.storage_branches.len(), + trace.storage_leaves.len(), + trace.storage_roots.len(), + ); + println!( + "direct_mismatches total={} outermost={} suppressed_descendants={}", + total_direct_mismatches, + outermost.len(), + suppressed_direct_descendants, + ); + println!("reported_direct_mismatches={}", outermost.len().min(max_results)); + println!("storage_root_diagnostics={diagnostic_count}"); + println!("reported_storage_root_diagnostics={}", diagnostic_count.min(max_results)); + + for mismatch in outermost.drain(..).take(max_results) { + println!(); + println!("[{}] {}", mismatch.context, mismatch.headline); + for detail in mismatch.details { + println!(" {detail}"); + } + if mismatch.suppressed_descendants > 0 { + println!(" suppressed_descendants={}", mismatch.suppressed_descendants); + } + } + + for diagnostic in results.diagnostics.into_iter().take(max_results) { + println!(); + println!("[{}] {}", diagnostic.context, diagnostic.headline); + for detail in diagnostic.details { + println!(" {detail}"); + } + } + + Ok(()) +} + +fn compare_with_adapter(provider: &P, trace: &TraceData) -> Result +where + P: DBProvider, + A: TrieTableAdapter, +{ + let tx = provider.tx_ref(); + let trie_factory = DatabaseTrieCursorFactory::<_, A>::new(tx); + let mut state_trie_cursor = trie_factory.account_trie_cursor()?; + let mut hashed_accounts_cursor = tx.cursor_read::()?; + let mut hashed_storage_cursor = tx.cursor_dup_read::()?; + + let mut results = ComparisonResults::default(); + + for observed in &trace.state_branches { + let lookup = lookup_branch(&mut state_trie_cursor, &observed.path)?; + if lookup.matches(observed.observed_hash, observed.children_are_in_trie) { + continue; + } + + let mut details = vec![ + format!("observed_hash={}", observed.observed_hash), + format!("observed_children_are_in_trie={}", observed.children_are_in_trie), + format!("trace_occurrences={}", observed.occurrences), + ]; + details.extend(lookup.details()); + + results.direct_mismatches.push(Mismatch { + context: MismatchContext::State, + path: observed.path, + first_line: observed.first_line, + kind_rank: 0, + headline: format!( + "branch mismatch path={} line={}", + nibbles_hex(&observed.path), + observed.first_line + ), + details, + suppressed_descendants: 0, + }); + } + + for observed in &trace.account_leaves { + match hashed_accounts_cursor.seek_exact(observed.hashed_address)? { + Some((_, account)) => { + let mut diffs = Vec::new(); + if observed.nonce != account.nonce { + diffs.push(format!( + "nonce observed={} expected={}", + observed.nonce, account.nonce + )); + } + if observed.balance != account.balance { + diffs.push(format!( + "balance observed={} expected={}", + observed.balance, account.balance + )); + } + if observed.bytecode_hash != account.bytecode_hash { + diffs.push(format!( + "bytecode_hash observed={} expected={}", + option_b256(observed.bytecode_hash), + option_b256(account.bytecode_hash) + )); + } + if diffs.is_empty() { + continue; + } + + diffs.push(format!("trace_occurrences={}", observed.occurrences)); + results.direct_mismatches.push(Mismatch { + context: MismatchContext::State, + path: Nibbles::unpack(observed.hashed_address), + first_line: observed.first_line, + kind_rank: 1, + headline: format!( + "account leaf mismatch hashed_address={} line={}", + observed.hashed_address, observed.first_line + ), + details: diffs, + suppressed_descendants: 0, + }); + } + None => results.direct_mismatches.push(Mismatch { + context: MismatchContext::State, + path: Nibbles::unpack(observed.hashed_address), + first_line: observed.first_line, + kind_rank: 1, + headline: format!( + "account leaf missing from DB hashed state hashed_address={} line={}", + observed.hashed_address, observed.first_line + ), + details: vec![ + format!("nonce={}", observed.nonce), + format!("balance={}", observed.balance), + format!("bytecode_hash={}", option_b256(observed.bytecode_hash)), + format!("trace_occurrences={}", observed.occurrences), + ], + suppressed_descendants: 0, + }), + } + } + + let mut storage_branches_by_address = BTreeMap::>::new(); + for observed in &trace.storage_branches { + storage_branches_by_address.entry(observed.hashed_address).or_default().push(observed); + } + + for (hashed_address, branches) in storage_branches_by_address { + let mut storage_trie_cursor = trie_factory.storage_trie_cursor(hashed_address)?; + for observed in branches { + let lookup = lookup_branch(&mut storage_trie_cursor, &observed.path)?; + if lookup.matches(observed.observed_hash, observed.children_are_in_trie) { + continue; + } + + let mut details = vec![ + format!("observed_hash={}", observed.observed_hash), + format!("observed_children_are_in_trie={}", observed.children_are_in_trie), + format!("trace_occurrences={}", observed.occurrences), + ]; + details.extend(lookup.details()); + + results.direct_mismatches.push(Mismatch { + context: MismatchContext::Storage(observed.hashed_address), + path: observed.path, + first_line: observed.first_line, + kind_rank: 0, + headline: format!( + "branch mismatch hashed_address={} path={} line={}", + observed.hashed_address, + nibbles_hex(&observed.path), + observed.first_line + ), + details, + suppressed_descendants: 0, + }); + } + } + + for observed in &trace.storage_leaves { + match hashed_storage_cursor.seek_by_key_subkey(observed.hashed_address, observed.hashed_slot)? { + Some(entry) if entry.key == observed.hashed_slot => { + if observed.value == entry.value { + continue; + } + + results.direct_mismatches.push(Mismatch { + context: MismatchContext::Storage(observed.hashed_address), + path: Nibbles::unpack(observed.hashed_slot), + first_line: observed.first_line, + kind_rank: 1, + headline: format!( + "storage leaf mismatch hashed_address={} hashed_slot={} line={}", + observed.hashed_address, observed.hashed_slot, observed.first_line + ), + details: vec![ + format!("observed_value={}", observed.value), + format!("expected_value={}", entry.value), + format!("trace_occurrences={}", observed.occurrences), + ], + suppressed_descendants: 0, + }); + } + _ => results.direct_mismatches.push(Mismatch { + context: MismatchContext::Storage(observed.hashed_address), + path: Nibbles::unpack(observed.hashed_slot), + first_line: observed.first_line, + kind_rank: 1, + headline: format!( + "storage leaf missing from DB hashed state hashed_address={} hashed_slot={} line={}", + observed.hashed_address, observed.hashed_slot, observed.first_line + ), + details: vec![ + format!("observed_value={}", observed.value), + format!("trace_occurrences={}", observed.occurrences), + ], + suppressed_descendants: 0, + }), + } + } + + let mut storage_root_cache = HashMap::::new(); + for observed in &trace.storage_roots { + let expected_root = match storage_root_cache.get(&observed.hashed_address) { + Some(root) => *root, + None => { + let root = storage_root_for_hashed_address::<_, A>(tx, observed.hashed_address)?; + storage_root_cache.insert(observed.hashed_address, root); + root + } + }; + + if observed.root == expected_root { + continue; + } + + results.diagnostics.push(Diagnostic { + context: MismatchContext::Storage(observed.hashed_address), + first_line: observed.first_line, + headline: format!( + "storage root mismatch hashed_address={} line={}", + observed.hashed_address, observed.first_line + ), + details: vec![ + format!("observed_root={}", observed.root), + format!("expected_root={expected_root}"), + format!("trace_occurrences={}", observed.occurrences), + ], + }); + } + + results.diagnostics.sort_by(|left, right| { + left.first_line.cmp(&right.first_line).then(left.context.cmp(&right.context)) + }); + + Ok(results) +} + +fn storage_root_for_hashed_address(tx: &TX, hashed_address: B256) -> Result +where + TX: DbTx, + A: TrieTableAdapter, +{ + as DatabaseStorageRoot<_>>::from_tx_hashed(tx, hashed_address) + .root() + .with_context(|| format!("compute storage root for hashed address {hashed_address}")) +} + +fn lookup_branch( + cursor: &mut C, + path: &Nibbles, +) -> Result +where + C: TrieCursor, +{ + let mut lookup = BranchLookup::default(); + + if let Some((_, node)) = cursor.seek_exact(*path)? { + if let Some(root_hash) = node.root_hash { + lookup.candidates.push(BranchCandidate { + location: format!("parent_root path={}", nibbles_hex(path)), + expected_hash: Some(root_hash), + children_are_in_trie: true, + }); + } else { + lookup.notes.push(format!( + "exact_branch_node_present path={} root_hash=None", + nibbles_hex(path) + )); + } + } + + if !path.is_empty() { + let parent_path = path.slice(..path.len() - 1); + let nibble = path.get_unchecked(path.len() - 1); + match cursor.seek_exact(parent_path)? { + Some((_, node)) => { + if node.state_mask.is_bit_set(nibble) { + lookup.candidates.push(BranchCandidate { + location: format!( + "child parent_path={} nibble={}", + nibbles_hex(&parent_path), + nibble_hex(nibble) + ), + expected_hash: node + .hash_mask + .is_bit_set(nibble) + .then(|| node.hash_for_nibble(nibble)), + children_are_in_trie: node.tree_mask.is_bit_set(nibble), + }); + } else { + lookup.notes.push(format!( + "parent_branch_present path={} missing_state_nibble={}", + nibbles_hex(&parent_path), + nibble_hex(nibble) + )); + } + } + None => lookup + .notes + .push(format!("parent_branch_missing path={}", nibbles_hex(&parent_path))), + } + } + + Ok(lookup) +} + +fn retain_outermost_mismatches(mut mismatches: Vec) -> Vec { + mismatches.sort_by(|a, b| { + a.context + .cmp(&b.context) + .then(a.path.len().cmp(&b.path.len())) + .then(a.kind_rank.cmp(&b.kind_rank)) + .then(a.first_line.cmp(&b.first_line)) + }); + + let mut kept = Vec::::new(); + 'outer: for mismatch in mismatches { + for existing in &mut kept { + if existing.context == mismatch.context && mismatch.path.starts_with(&existing.path) { + existing.suppressed_descendants += 1; + continue 'outer; + } + } + kept.push(mismatch); + } + + kept.sort_by(|a, b| { + a.path + .len() + .cmp(&b.path.len()) + .then(a.kind_rank.cmp(&b.kind_rank)) + .then(a.first_line.cmp(&b.first_line)) + }); + kept +} + +fn parse_trace(path: &Path) -> Result { + let file = File::open(path).with_context(|| format!("open trace file {}", path.display()))?; + let reader = BufReader::new(file); + + let mut state_branches = HashMap::<(Nibbles, B256, bool), ObservedStateBranch>::new(); + let mut account_leaves = HashMap::<(B256, u64, U256, Option), ObservedAccountLeaf>::new(); + let mut storage_branches = HashMap::<(B256, Nibbles, B256, bool), ObservedStorageBranch>::new(); + let mut storage_leaves = HashMap::<(B256, B256, U256), ObservedStorageLeaf>::new(); + let mut storage_roots = HashMap::<(B256, B256), ObservedStorageRoot>::new(); + + for (index, line) in reader.lines().enumerate() { + let line_number = index + 1; + let line = line?; + + if line.contains("trie::storage_root: calculated storage root") { + let root = parse_b256( + extract_after(&line, "calculated storage root root=")?.split(' ').next().unwrap(), + )?; + let hashed_address = + parse_b256(extract_after(&line, "hashed_address=")?.split(' ').next().unwrap())?; + upsert_storage_root(&mut storage_roots, hashed_address, root, line_number); + continue; + } + + if !line.contains("trie::node_iter: return=Ok(Some(") { + continue; + } + + if line.contains("trie_type=State") && line.contains("Branch(TrieBranchNode {") { + let path = parse_trace_nibbles(extract_between(&line, "key: Nibbles(", "), value:")?)?; + let observed_hash = + parse_b256(extract_between(&line, "value: ", ", children_are_in_trie:")?)?; + let children_are_in_trie = + parse_bool(extract_between(&line, "children_are_in_trie: ", " })))")?)?; + upsert_state_branch( + &mut state_branches, + path, + observed_hash, + children_are_in_trie, + line_number, + ); + continue; + } + + if line.contains("trie_type=Storage") && line.contains("Branch(TrieBranchNode {") { + let hashed_address = parse_b256(extract_storage_address(&line)?)?; + let path = parse_trace_nibbles(extract_between(&line, "key: Nibbles(", "), value:")?)?; + let observed_hash = + parse_b256(extract_between(&line, "value: ", ", children_are_in_trie:")?)?; + let children_are_in_trie = + parse_bool(extract_between(&line, "children_are_in_trie: ", " })))")?)?; + upsert_storage_branch( + &mut storage_branches, + hashed_address, + path, + observed_hash, + children_are_in_trie, + line_number, + ); + continue; + } + + if line.contains("trie_type=State") && line.contains("Leaf(") && line.contains("Account {") + { + let hashed_address = parse_b256(extract_between(&line, "Leaf(", ", Account {")?)?; + let nonce = extract_between(&line, "nonce: ", ", balance:")? + .parse::() + .with_context(|| format!("parse nonce on line {line_number}"))?; + let balance = extract_between(&line, "balance: ", ", bytecode_hash:")? + .parse::() + .with_context(|| format!("parse balance on line {line_number}"))?; + let bytecode_hash = + parse_optional_b256(extract_between(&line, "bytecode_hash: ", " })))")?)?; + upsert_account_leaf( + &mut account_leaves, + hashed_address, + nonce, + balance, + bytecode_hash, + line_number, + ); + continue; + } + + if line.contains("trie_type=Storage") && line.contains("Leaf(") { + let hashed_address = parse_b256(extract_storage_address(&line)?)?; + let leaf = extract_after(&line, "Leaf(")?; + let (hashed_slot, value) = leaf + .split_once(", ") + .ok_or_else(|| eyre!("invalid storage leaf on line {line_number}"))?; + let hashed_slot = parse_b256(hashed_slot)?; + let value = value + .split_once(")))") + .map(|(value, _)| value) + .ok_or_else(|| eyre!("invalid storage leaf terminator on line {line_number}"))? + .parse::() + .with_context(|| format!("parse storage value on line {line_number}"))?; + upsert_storage_leaf( + &mut storage_leaves, + hashed_address, + hashed_slot, + value, + line_number, + ); + } + } + + Ok(TraceData { + state_branches: into_sorted(state_branches), + account_leaves: into_sorted(account_leaves), + storage_branches: into_sorted(storage_branches), + storage_leaves: into_sorted(storage_leaves), + storage_roots: into_sorted(storage_roots), + }) +} + +fn upsert_state_branch( + state_branches: &mut HashMap<(Nibbles, B256, bool), ObservedStateBranch>, + path: Nibbles, + observed_hash: B256, + children_are_in_trie: bool, + line_number: usize, +) { + state_branches + .entry((path, observed_hash, children_are_in_trie)) + .and_modify(|entry| entry.occurrences += 1) + .or_insert(ObservedStateBranch { + path, + observed_hash, + children_are_in_trie, + first_line: line_number, + occurrences: 1, + }); +} + +fn upsert_account_leaf( + account_leaves: &mut HashMap<(B256, u64, U256, Option), ObservedAccountLeaf>, + hashed_address: B256, + nonce: u64, + balance: U256, + bytecode_hash: Option, + line_number: usize, +) { + account_leaves + .entry((hashed_address, nonce, balance, bytecode_hash)) + .and_modify(|entry| entry.occurrences += 1) + .or_insert(ObservedAccountLeaf { + hashed_address, + nonce, + balance, + bytecode_hash, + first_line: line_number, + occurrences: 1, + }); +} + +fn upsert_storage_branch( + storage_branches: &mut HashMap<(B256, Nibbles, B256, bool), ObservedStorageBranch>, + hashed_address: B256, + path: Nibbles, + observed_hash: B256, + children_are_in_trie: bool, + line_number: usize, +) { + storage_branches + .entry((hashed_address, path, observed_hash, children_are_in_trie)) + .and_modify(|entry| entry.occurrences += 1) + .or_insert(ObservedStorageBranch { + hashed_address, + path, + observed_hash, + children_are_in_trie, + first_line: line_number, + occurrences: 1, + }); +} + +fn upsert_storage_leaf( + storage_leaves: &mut HashMap<(B256, B256, U256), ObservedStorageLeaf>, + hashed_address: B256, + hashed_slot: B256, + value: U256, + line_number: usize, +) { + storage_leaves + .entry((hashed_address, hashed_slot, value)) + .and_modify(|entry| entry.occurrences += 1) + .or_insert(ObservedStorageLeaf { + hashed_address, + hashed_slot, + value, + first_line: line_number, + occurrences: 1, + }); +} + +fn upsert_storage_root( + storage_roots: &mut HashMap<(B256, B256), ObservedStorageRoot>, + hashed_address: B256, + root: B256, + line_number: usize, +) { + storage_roots + .entry((hashed_address, root)) + .and_modify(|entry| entry.occurrences += 1) + .or_insert(ObservedStorageRoot { + hashed_address, + root, + first_line: line_number, + occurrences: 1, + }); +} + +fn into_sorted(bucket: HashMap) -> Vec +where + K: std::hash::Hash + Eq, + T: TraceLine, +{ + let mut values = bucket.into_values().collect::>(); + values.sort_by_key(|left| left.first_line()); + values +} + +fn parse_b256(value: &str) -> Result { + value.parse::().with_context(|| format!("parse B256 from {value}")) +} + +fn parse_optional_b256(value: &str) -> Result> { + if value == "None" { + return Ok(None) + } + let inner = value + .strip_prefix("Some(") + .and_then(|rest| rest.strip_suffix(')')) + .ok_or_else(|| eyre!("invalid optional B256: {value}"))?; + Ok(Some(parse_b256(inner)?)) +} + +fn parse_trace_nibbles(value: &str) -> Result { + let hex = value + .strip_prefix("0x") + .ok_or_else(|| eyre!("expected nibble string with 0x prefix: {value}"))?; + let mut nibbles = Vec::with_capacity(hex.len()); + for ch in hex.bytes() { + let nibble = match ch { + b'0'..=b'9' => ch - b'0', + b'a'..=b'f' => ch - b'a' + 10, + b'A'..=b'F' => ch - b'A' + 10, + _ => bail!("invalid hex nibble in {value}"), + }; + nibbles.push(nibble); + } + Ok(Nibbles::from_nibbles_unchecked(nibbles)) +} + +fn parse_bool(value: &str) -> Result { + match value { + "true" => Ok(true), + "false" => Ok(false), + _ => bail!("invalid boolean value: {value}"), + } +} + +fn extract_after<'a>(line: &'a str, prefix: &str) -> Result<&'a str> { + line.split_once(prefix).map(|(_, rest)| rest).ok_or_else(|| eyre!("missing {prefix:?} in line")) +} + +fn extract_between<'a>(line: &'a str, start: &str, end: &str) -> Result<&'a str> { + let rest = extract_after(line, start)?; + rest.split_once(end) + .map(|(matched, _)| matched) + .ok_or_else(|| eyre!("missing end marker {end:?} after {start:?}")) +} + +fn extract_storage_address(line: &str) -> Result<&str> { + let rest = extract_after(line, "storage_trie{addr=")?; + let end = rest.find([' ', '}']).ok_or_else(|| eyre!("missing end of storage address"))?; + Ok(&rest[..end]) +} + +fn nibble_hex(nibble: u8) -> char { + char::from_digit(nibble as u32, 16).expect("valid nibble") +} + +fn nibbles_hex(path: &Nibbles) -> String { + let mut out = String::from("0x"); + for nibble in path.iter() { + out.push(nibble_hex(nibble)); + } + out +} + +fn option_b256(value: Option) -> String { + value.map_or_else(|| "None".to_string(), |hash| hash.to_string()) +} + +trait TraceLine { + fn first_line(&self) -> usize; +} + +impl TraceLine for ObservedStateBranch { + fn first_line(&self) -> usize { + self.first_line + } +} + +impl TraceLine for ObservedAccountLeaf { + fn first_line(&self) -> usize { + self.first_line + } +} + +impl TraceLine for ObservedStorageBranch { + fn first_line(&self) -> usize { + self.first_line + } +} + +impl TraceLine for ObservedStorageLeaf { + fn first_line(&self) -> usize { + self.first_line + } +} + +impl TraceLine for ObservedStorageRoot { + fn first_line(&self) -> usize { + self.first_line + } +} + +type DbStorageRoot<'a, TX, A> = + StorageRoot, DatabaseHashedCursorFactory<&'a TX>>; diff --git a/scripts/compare-merkle-trace-to-db.sh b/scripts/compare-merkle-trace-to-db.sh new file mode 100755 index 00000000000..a333d46a23a --- /dev/null +++ b/scripts/compare-merkle-trace-to-db.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +set -euo pipefail + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) + +exec cargo run --profile profiling -p example-db-access --bin compare_merkle_trace_to_db -- "$@" diff --git a/scripts/extract-merkle-trace-touches.py b/scripts/extract-merkle-trace-touches.py new file mode 100755 index 00000000000..10b8404ed94 --- /dev/null +++ b/scripts/extract-merkle-trace-touches.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python3 + +"""Extract Merkle-stage leaves and trie nodes from a reth trace log. + +The trace logs emitted by `reth` only include hashed addresses and hashed storage +slots, so this script reports the hashed values exactly as they appear in the +trace. It supports both pipeline logs with `stage=Merkle...` spans and standalone +`reth stage run merkle` logs. +""" + +from __future__ import annotations + +import argparse +import json +import re +import sys +from pathlib import Path + + +STAGE_RE = re.compile(r"stage=(Merkle[^}]+)") +STORAGE_TRIE_ADDR_RE = re.compile(r"storage_trie\{addr=(0x[0-9a-f]+)(?: [^}]*)?\}") +ACCOUNT_LEAF_RE = re.compile( + r"Leaf\(" + r"(0x[0-9a-f]+), " + r"Account \{ nonce: ([0-9]+), balance: ([0-9]+), bytecode_hash: " + r"(?:None|Some\((0x[0-9a-f]+)\))" + r" \}\)" +) +STORAGE_LEAF_RE = re.compile(r"Leaf\((0x[0-9a-f]+), (.+)\)\)\)$") +BRANCH_RE = re.compile( + r"Branch\(TrieBranchNode \{ " + r"key: Nibbles\((0x[0-9a-f]*)\), " + r"value: (0x[0-9a-f]+), " + r"children_are_in_trie: (true|false) " + r"\}\)" +) +STATE_ROOT_RE = re.compile( + r"calculated state root " + r"root=(0x[0-9a-f]+) " + r"duration=([^ ]+) " + r"branches_added=([0-9]+) " + r"leaves_added=([0-9]+)" +) +STORAGE_ROOT_RE = re.compile( + r"calculated storage root " + r"root=(0x[0-9a-f]+) " + r"hashed_address=(0x[0-9a-f]+) " + r"duration=([^ ]+) " + r"branches_added=([0-9]+) " + r"leaves_added=([0-9]+)" +) +MERKLE_LINE_MARKERS = ( + "trie::node_iter:", + "trie::state_root:", + "trie::storage_root:", +) + + +def bool_from_string(value: str) -> bool: + return value == "true" + + +def add_unique(bucket: dict[tuple[str, ...], dict[str, object]], key: tuple[str, ...], payload: dict[str, object], line_number: int) -> None: + record = bucket.get(key) + if record is None: + bucket[key] = { + **payload, + "occurrences": 1, + "first_line": line_number, + "last_line": line_number, + } + return + + record["occurrences"] = int(record["occurrences"]) + 1 + record["last_line"] = line_number + + +def sorted_records(bucket: dict[tuple[str, ...], dict[str, object]]) -> list[dict[str, object]]: + return sorted(bucket.values(), key=lambda item: (int(item["first_line"]), int(item["last_line"]))) + + +def detect_stage(line: str) -> str | None: + stage_match = STAGE_RE.search(line) + if stage_match is not None: + stage = stage_match.group(1).strip() + return stage if stage.startswith("Merkle") else None + + if any(marker in line for marker in MERKLE_LINE_MARKERS): + return "Merkle" + + return None + + +def parse_trace(trace_path: Path) -> dict[str, object]: + account_leaves: dict[tuple[str, ...], dict[str, object]] = {} + storage_leaves: dict[tuple[str, ...], dict[str, object]] = {} + state_trie_nodes: dict[tuple[str, ...], dict[str, object]] = {} + storage_trie_nodes: dict[tuple[str, ...], dict[str, object]] = {} + state_roots: list[dict[str, object]] = [] + storage_roots: list[dict[str, object]] = [] + stages_seen: set[str] = set() + + account_leaf_occurrences = 0 + storage_leaf_occurrences = 0 + state_trie_node_occurrences = 0 + storage_trie_node_occurrences = 0 + + with trace_path.open("r", encoding="utf-8", errors="replace") as handle: + for line_number, line in enumerate(handle, start=1): + stage = detect_stage(line) + if stage is None: + continue + + stages_seen.add(stage) + + storage_addr_match = STORAGE_TRIE_ADDR_RE.search(line) + storage_addr = storage_addr_match.group(1) if storage_addr_match else None + + if "trie::state_root: calculated state root" in line: + state_root_match = STATE_ROOT_RE.search(line) + if state_root_match: + state_roots.append( + { + "line": line_number, + "stage": stage, + "root": state_root_match.group(1), + "duration": state_root_match.group(2), + "branches_added": int(state_root_match.group(3)), + "leaves_added": int(state_root_match.group(4)), + } + ) + continue + + if "trie::storage_root: calculated storage root" in line: + storage_root_match = STORAGE_ROOT_RE.search(line) + if storage_root_match: + storage_roots.append( + { + "line": line_number, + "stage": stage, + "hashed_address": storage_root_match.group(2), + "root": storage_root_match.group(1), + "duration": storage_root_match.group(3), + "branches_added": int(storage_root_match.group(4)), + "leaves_added": int(storage_root_match.group(5)), + } + ) + continue + + if "trie::node_iter: return=Ok(Some(" not in line: + continue + + branch_match = BRANCH_RE.search(line) + if branch_match: + node_payload = { + "stage": stage, + "key": branch_match.group(1), + "node_hash": branch_match.group(2), + "children_are_in_trie": bool_from_string(branch_match.group(3)), + } + if "trie_type=Storage" in line and storage_addr is not None: + storage_trie_node_occurrences += 1 + add_unique( + storage_trie_nodes, + ( + stage, + storage_addr, + node_payload["key"], + node_payload["node_hash"], + str(node_payload["children_are_in_trie"]), + ), + {**node_payload, "hashed_address": storage_addr}, + line_number, + ) + elif "trie_type=State" in line: + state_trie_node_occurrences += 1 + add_unique( + state_trie_nodes, + ( + stage, + node_payload["key"], + node_payload["node_hash"], + str(node_payload["children_are_in_trie"]), + ), + node_payload, + line_number, + ) + continue + + account_leaf_match = ACCOUNT_LEAF_RE.search(line) + if account_leaf_match and "trie_type=State" in line: + account_leaf_occurrences += 1 + bytecode_hash = account_leaf_match.group(4) + add_unique( + account_leaves, + ( + stage, + account_leaf_match.group(1), + account_leaf_match.group(2), + account_leaf_match.group(3), + bytecode_hash or "None", + ), + { + "stage": stage, + "hashed_address": account_leaf_match.group(1), + "account": { + "nonce": account_leaf_match.group(2), + "balance": account_leaf_match.group(3), + "bytecode_hash": bytecode_hash, + }, + }, + line_number, + ) + continue + + storage_leaf_match = STORAGE_LEAF_RE.search(line) + if storage_leaf_match and "trie_type=Storage" in line and storage_addr is not None: + storage_leaf_occurrences += 1 + add_unique( + storage_leaves, + ( + stage, + storage_addr, + storage_leaf_match.group(1), + storage_leaf_match.group(2), + ), + { + "stage": stage, + "hashed_address": storage_addr, + "hashed_slot": storage_leaf_match.group(1), + "value": storage_leaf_match.group(2), + }, + line_number, + ) + + return { + "trace_file": str(trace_path), + "summary": { + "stages": sorted(stages_seen), + "account_leaves": { + "unique": len(account_leaves), + "occurrences": account_leaf_occurrences, + }, + "storage_leaves": { + "unique": len(storage_leaves), + "occurrences": storage_leaf_occurrences, + }, + "state_trie_nodes": { + "unique": len(state_trie_nodes), + "occurrences": state_trie_node_occurrences, + }, + "storage_trie_nodes": { + "unique": len(storage_trie_nodes), + "occurrences": storage_trie_node_occurrences, + }, + "state_roots": len(state_roots), + "storage_roots": len(storage_roots), + }, + "account_leaves": sorted_records(account_leaves), + "storage_leaves": sorted_records(storage_leaves), + "state_trie_nodes": sorted_records(state_trie_nodes), + "storage_trie_nodes": sorted_records(storage_trie_nodes), + "state_roots": state_roots, + "storage_roots": storage_roots, + } + + +def print_summary(result: dict[str, object]) -> None: + summary = result["summary"] + if not isinstance(summary, dict): + raise TypeError("summary must be a dictionary") + + print(f"trace_file: {result['trace_file']}") + print(f"stages: {', '.join(summary['stages'])}") + print( + "account_leaves: " + f"unique={summary['account_leaves']['unique']} " + f"occurrences={summary['account_leaves']['occurrences']}" + ) + print( + "storage_leaves: " + f"unique={summary['storage_leaves']['unique']} " + f"occurrences={summary['storage_leaves']['occurrences']}" + ) + print( + "state_trie_nodes: " + f"unique={summary['state_trie_nodes']['unique']} " + f"occurrences={summary['state_trie_nodes']['occurrences']}" + ) + print( + "storage_trie_nodes: " + f"unique={summary['storage_trie_nodes']['unique']} " + f"occurrences={summary['storage_trie_nodes']['occurrences']}" + ) + print(f"state_roots: {summary['state_roots']}") + print(f"storage_roots: {summary['storage_roots']}") + + +def build_arg_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Extract touched Merkle-stage account/storage leaves and trie nodes from a trace log.", + ) + parser.add_argument("trace_file", type=Path, help="Path to the trace log file") + parser.add_argument( + "--summary", + action="store_true", + help="Print only high-level counts instead of the full JSON payload", + ) + parser.add_argument( + "--output", + type=Path, + help="Write the extracted payload to a file instead of stdout", + ) + return parser + + +def main() -> int: + parser = build_arg_parser() + args = parser.parse_args() + + if not args.trace_file.is_file(): + parser.error(f"trace file does not exist: {args.trace_file}") + + result = parse_trace(args.trace_file) + + if args.summary: + if args.output is not None: + with args.output.open("w", encoding="utf-8") as handle: + original_stdout = sys.stdout + try: + sys.stdout = handle + print_summary(result) + finally: + sys.stdout = original_stdout + return 0 + + print_summary(result) + return 0 + + payload = json.dumps(result, indent=2, sort_keys=False) + if args.output is not None: + args.output.write_text(payload + "\n", encoding="utf-8") + return 0 + + print(payload) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/repro-hoodi-partial-persistence-reorg.sh b/scripts/repro-hoodi-partial-persistence-reorg.sh new file mode 100755 index 00000000000..6853c9180df --- /dev/null +++ b/scripts/repro-hoodi-partial-persistence-reorg.sh @@ -0,0 +1,560 @@ +#!/usr/bin/env bash + +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: repro-hoodi-partial-persistence-reorg.sh [options] + +Restores a hoodi datadir snapshot, starts reth with partial persistence, then +replays pre-generated reorg payload artifacts until a state-root mismatch is +observed, replay exits, or an optional timeout is reached. + +Unlike repro-hoodi-partial-persistence-unwind.sh, this script does not crash the +node and does not run restart, unwind, or Merkle-stage follow-up steps. + +Options: + --snapshot PATH Tar.zst snapshot to restore + (default: /mnt/data/hoodi.tar.zst) + --datadir PATH Restored reth datadir + (default: /mnt/data/hoodi) + --jwt-secret PATH JWT secret path + (default: /jwt.hex) + --payload-dir PATH Directory containing payload_block_*.json files + (default: /mnt/data/hoodi-bal-payload-artifacts-10k-reorg5/payloads) + --payload-count N Number of payload artifacts to replay + (default: 20000) + --expected-head N Expected local head after restore + (default: 2613962) + --start-block N First block expected to be replayed + (default: 2613963) + --artifacts-dir PATH Directory for logs and summary output + (default: /tmp/reth-hoodi-reorg-) + --start-timeout SECONDS Seconds to wait for node RPC startup + (default: 180) + --mismatch-timeout SECONDS Seconds to wait for a state-root mismatch after + reth-bench starts (default: 0, no timeout) + -h, --help Show this help + +Exit codes: + 0 Script ran to completion. See result.txt for whether a state-root mismatch + was observed. + 2 Setup/runtime failure prevented a conclusive run. +EOF +} + +log() { + printf '[%s] %s\n' "$(date '+%H:%M:%S')" "$*" >&2 +} + +regex_escape() { + printf '%s' "$1" | sed 's/[][(){}.^$+*?|\\/]/\\&/g' +} + +head_hex() { + local response + response=$(curl -fsS \ + -H 'Content-Type: application/json' \ + -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' \ + http://127.0.0.1:8545 2>/dev/null) || return 1 + response=${response//$'\n'/} + sed -n 's/.*"result"[[:space:]]*:[[:space:]]*"\(0x[0-9a-fA-F]\+\)".*/\1/p' <<<"$response" +} + +hex_to_dec() { + printf '%d\n' "$((16#${1#0x}))" +} + +pid_has_exited() { + local pid="$1" + local stat + + if ! kill -0 "$pid" 2>/dev/null; then + return 0 + fi + + stat=$(ps -o stat= -p "$pid" 2>/dev/null || true) + [[ "$stat" == *Z* ]] +} + +wait_for_pid_exit() { + local pid="$1" + local timeout="$2" + local elapsed=0 + + while (( elapsed < timeout )); do + if pid_has_exited "$pid"; then + return 0 + fi + sleep 1 + ((elapsed += 1)) + done + + return 1 +} + +record_node_exit() { + if [[ -z "${NODE_PID:-}" ]]; then + return 0 + fi + + if wait "$NODE_PID"; then + NODE_EXIT_CODE=0 + else + NODE_EXIT_CODE=$? + fi + log "reth node exited with code ${NODE_EXIT_CODE}" + NODE_PID="" +} + +stop_pid() { + local pid="$1" + local label="$2" + + if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then + log "Sending SIGTERM to ${label} (pid ${pid})" + kill -TERM "$pid" 2>/dev/null || true + fi +} + +stop_matching_processes() { + local pattern="$1" + local label="$2" + local -a pids=() + local pid + + while IFS= read -r pid; do + [[ -n "$pid" ]] && pids+=("$pid") + done < <(pgrep -f "$pattern" || true) + + if ((${#pids[@]} > 0)); then + log "Sending SIGTERM to stale ${label} processes: ${pids[*]}" + kill -TERM "${pids[@]}" 2>/dev/null || true + fi +} + +capture_command() { + local name="$1" + shift + { + printf '%s=' "$name" + printf '%q ' "$@" + printf '\n' + } >>"$COMMANDS_FILE" +} + +write_summary() { + { + printf 'result=%s\n' "${RESULT:-unknown}" + printf 'snapshot=%s\n' "$SNAPSHOT" + printf 'datadir=%s\n' "$DATADIR" + printf 'jwt_secret=%s\n' "$JWT_SECRET" + printf 'payload_dir=%s\n' "$PAYLOAD_DIR" + printf 'payload_count=%s\n' "$PAYLOAD_COUNT" + printf 'expected_head=%s\n' "$EXPECTED_HEAD" + printf 'start_block=%s\n' "$START_BLOCK" + printf 'head_before=%s\n' "${HEAD_BEFORE:-unknown}" + printf 'head_after=%s\n' "${HEAD_AFTER:-unknown}" + printf 'bench_exit_code=%s\n' "${BENCH_EXIT_CODE:-unknown}" + printf 'node_exit_code=%s\n' "${NODE_EXIT_CODE:-unknown}" + printf 'mismatch_source=%s\n' "${MISMATCH_SOURCE:-not_found}" + printf 'mismatch_line=%s\n' "${MISMATCH_LINE:-not_found}" + printf 'artifacts_dir=%s\n' "$ARTIFACTS_DIR" + printf 'node_log=%s\n' "$NODE_LOG" + printf 'bench_log=%s\n' "$BENCH_LOG" + } >"$SUMMARY_FILE" +} + +cleanup() { + if [[ -n "${BENCH_PID:-}" ]] && pid_has_exited "$BENCH_PID"; then + if wait "$BENCH_PID"; then + BENCH_EXIT_CODE=0 + else + BENCH_EXIT_CODE=$? + fi + BENCH_PID="" + fi + + stop_pid "${BENCH_PID:-}" "reth-bench" + if [[ -n "${BENCH_PID:-}" ]]; then + if wait "${BENCH_PID}" 2>/dev/null; then + BENCH_EXIT_CODE=0 + else + BENCH_EXIT_CODE=$? + fi + BENCH_PID="" + fi + + if [[ -n "${NODE_PID:-}" ]] && pid_has_exited "$NODE_PID"; then + record_node_exit + fi + + stop_pid "${NODE_PID:-}" "reth node" + if [[ -n "${NODE_PID:-}" ]]; then + wait "${NODE_PID}" 2>/dev/null || true + NODE_PID="" + fi + + write_summary +} + +SNAPSHOT="/mnt/data/hoodi.tar.zst" +DATADIR="/mnt/data/hoodi" +JWT_SECRET="" +PAYLOAD_DIR="/mnt/data/hoodi-bal-payload-artifacts-10k-reorg5/payloads" +PAYLOAD_COUNT=20000 +EXPECTED_HEAD=2613962 +START_BLOCK=2613963 +START_TIMEOUT=180 +MISMATCH_TIMEOUT=0 +RETH_BIN="/repos/reth/target/profiling/reth" +BENCH_BIN="/repos/reth/target/profiling/reth-bench" +CHAIN="hoodi" +RESULT="script_error" +HEAD_BEFORE="" +HEAD_AFTER="" +NODE_PID="" +BENCH_PID="" +BENCH_EXIT_CODE="" +NODE_EXIT_CODE="" +MISMATCH_SOURCE="" +MISMATCH_LINE="" +TIMESTAMP="$(date '+%Y%m%d-%H%M%S')" +ARTIFACTS_DIR="/tmp/reth-hoodi-reorg-${TIMESTAMP}" + +while (($# > 0)); do + case "$1" in + --snapshot) + SNAPSHOT="$2" + shift 2 + ;; + --datadir) + DATADIR="$2" + shift 2 + ;; + --jwt-secret) + JWT_SECRET="$2" + shift 2 + ;; + --payload-dir) + PAYLOAD_DIR="$2" + shift 2 + ;; + --payload-count) + PAYLOAD_COUNT="$2" + shift 2 + ;; + --expected-head) + EXPECTED_HEAD="$2" + shift 2 + ;; + --start-block) + START_BLOCK="$2" + shift 2 + ;; + --artifacts-dir) + ARTIFACTS_DIR="$2" + shift 2 + ;; + --start-timeout) + START_TIMEOUT="$2" + shift 2 + ;; + --mismatch-timeout) + MISMATCH_TIMEOUT="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage + exit 2 + ;; + esac +done + +if [[ -z "$JWT_SECRET" ]]; then + JWT_SECRET="${DATADIR}/jwt.hex" +fi + +mkdir -p "$ARTIFACTS_DIR" +COMMANDS_FILE="${ARTIFACTS_DIR}/commands.txt" +SUMMARY_FILE="${ARTIFACTS_DIR}/result.txt" +NODE_LOG="${ARTIFACTS_DIR}/node.log" +BENCH_LOG="${ARTIFACTS_DIR}/bench.log" + +trap cleanup EXIT + +if [[ ! -x "$RETH_BIN" ]]; then + log "Missing executable reth binary: $RETH_BIN" + exit 2 +fi + +if [[ ! -x "$BENCH_BIN" ]]; then + log "Missing executable reth-bench binary: $BENCH_BIN" + exit 2 +fi + +if [[ ! -f "$SNAPSHOT" ]]; then + log "Missing snapshot archive: $SNAPSHOT" + exit 2 +fi + +if [[ ! -d "$PAYLOAD_DIR" ]]; then + log "Missing payload directory: $PAYLOAD_DIR" + exit 2 +fi + +if (( PAYLOAD_COUNT <= 0 )); then + log "--payload-count must be greater than 0" + exit 2 +fi + +NODE_PATTERN="^$(regex_escape "$RETH_BIN") node --datadir $(regex_escape "$DATADIR")( |$)" +stop_matching_processes "$NODE_PATTERN" "reth" +sleep 1 + +capture_command reth "$RETH_BIN" node \ + --datadir "$DATADIR" \ + --chain "$CHAIN" \ + --http --http.addr 127.0.0.1 --http.port 8545 --http.api eth,net,web3,reth,testing \ + --ws --ws.addr 127.0.0.1 --ws.port 8546 --ws.api eth,net,web3,reth \ + --authrpc.addr 127.0.0.1 --authrpc.port 8551 --authrpc.jwtsecret "$JWT_SECRET" \ + --disable-discovery \ + --engine.persistence-threshold 10 \ + --engine.deferred-trie-blocks 3 \ + --engine.accept-execution-requests-hash \ + --log.stdout.filter debug \ + --color never + +restore_snapshot() { + local parent_dir + local base_name + local extract_root + local candidate_datadir="" + local -a nested_candidates=() + local nested_dir + + parent_dir=$(dirname "$DATADIR") + base_name=$(basename "$DATADIR") + extract_root="${DATADIR}.extract.$$" + + log "Restoring snapshot ${SNAPSHOT} into ${DATADIR}" + rm -rf "$DATADIR" "$extract_root" + mkdir -p "$parent_dir" "$extract_root" + tar --zstd -xf "$SNAPSHOT" -C "$extract_root" + + if [[ -d "${extract_root}/${base_name}/db" && -d "${extract_root}/${base_name}/static_files" ]]; then + candidate_datadir="${extract_root}/${base_name}" + else + while IFS= read -r nested_dir; do + if [[ -d "${nested_dir}/db" && -d "${nested_dir}/static_files" ]]; then + nested_candidates+=("$nested_dir") + fi + done < <(find "$extract_root" -mindepth 1 -maxdepth 1 -type d | sort) + + if ((${#nested_candidates[@]} == 1)); then + candidate_datadir="${nested_candidates[0]}" + elif ((${#nested_candidates[@]} > 1)); then + log "Snapshot layout produced multiple nested datadir candidates under ${extract_root}: ${nested_candidates[*]}" + exit 2 + elif [[ -d "${extract_root}/db" && -d "${extract_root}/static_files" ]]; then + candidate_datadir="$extract_root" + fi + fi + + if [[ -z "$candidate_datadir" ]]; then + log "Snapshot layout did not produce an expected datadir under ${extract_root}" + exit 2 + fi + + if [[ "$candidate_datadir" == "$extract_root" ]]; then + mv "$extract_root" "$DATADIR" + else + mv "$candidate_datadir" "$DATADIR" + rm -rf "$extract_root" + fi + + if [[ ! -f "$JWT_SECRET" ]]; then + log "Restored datadir is missing jwt secret; generating ${JWT_SECRET}" + mkdir -p "$(dirname "$JWT_SECRET")" + umask 077 + head -c 32 /dev/urandom | od -An -tx1 | tr -d ' \n' >"$JWT_SECRET" + printf '\n' >>"$JWT_SECRET" + fi +} + +start_node() { + ( + ulimit -c 0 + "$RETH_BIN" node \ + --datadir "$DATADIR" \ + --chain "$CHAIN" \ + --http --http.addr 127.0.0.1 --http.port 8545 --http.api eth,net,web3,reth,testing \ + --ws --ws.addr 127.0.0.1 --ws.port 8546 --ws.api eth,net,web3,reth \ + --authrpc.addr 127.0.0.1 --authrpc.port 8551 --authrpc.jwtsecret "$JWT_SECRET" \ + --disable-discovery \ + --engine.persistence-threshold 10 \ + --engine.deferred-trie-blocks 3 \ + --engine.accept-execution-requests-hash \ + --log.stdout.filter debug \ + --color never + ) >"$NODE_LOG" 2>&1 & + NODE_PID=$! +} + +wait_for_rpc_start() { + local pid="$1" + local timeout="$2" + local elapsed=0 + local block_hex + + while (( elapsed < timeout )); do + block_hex=$(head_hex || true) + if [[ -n "$block_hex" ]]; then + printf '%s\n' "$block_hex" + return 0 + fi + + if ! kill -0 "$pid" 2>/dev/null; then + log "Node exited before RPC became ready" + return 1 + fi + + sleep 1 + ((elapsed += 1)) + done + + log "Timed out waiting for node RPC readiness" + return 1 +} + +remove_stale_locks() { + rm -f "$DATADIR/db/lock" "$DATADIR/static_files/lock" "$DATADIR/rocksdb/LOCK" +} + +find_mismatch() { + local source="$1" + local log_file="$2" + local line + + line=$(grep -Ei -m1 \ + 'State root task returned incorrect state root|mismatched block state root|Failed to verify block state root' \ + "$log_file" 2>/dev/null || true) + if [[ -n "$line" ]]; then + MISMATCH_SOURCE="$source" + MISMATCH_LINE="$line" + RESULT="state_root_mismatch" + return 0 + fi + + return 1 +} + +monitor_for_mismatch() { + local start_epoch + local elapsed + local block_hex + + start_epoch=$(date +%s) + while true; do + if find_mismatch node "$NODE_LOG" || find_mismatch bench "$BENCH_LOG"; then + log "Observed state-root mismatch in ${MISMATCH_SOURCE} log" + if [[ -n "$NODE_PID" ]]; then + if wait_for_pid_exit "$NODE_PID" 5; then + record_node_exit + else + log "reth node is still running 5s after mismatch" + fi + fi + return 0 + fi + + block_hex=$(head_hex || true) + if [[ -n "$block_hex" ]]; then + HEAD_AFTER=$(hex_to_dec "$block_hex") + fi + + if [[ -n "$BENCH_PID" ]] && pid_has_exited "$BENCH_PID"; then + if wait "$BENCH_PID"; then + BENCH_EXIT_CODE=0 + RESULT="bench_completed_no_mismatch" + else + BENCH_EXIT_CODE=$? + if find_mismatch node "$NODE_LOG" || find_mismatch bench "$BENCH_LOG"; then + log "Observed state-root mismatch after reth-bench exit" + return 0 + fi + RESULT="bench_failed_no_mismatch" + fi + BENCH_PID="" + return 0 + fi + + if [[ -n "$NODE_PID" ]] && pid_has_exited "$NODE_PID"; then + if find_mismatch node "$NODE_LOG" || find_mismatch bench "$BENCH_LOG"; then + log "Observed state-root mismatch after node exit" + record_node_exit + return 0 + fi + RESULT="node_exited_no_mismatch" + record_node_exit + return 0 + fi + + if (( MISMATCH_TIMEOUT > 0 )); then + elapsed=$(($(date +%s) - start_epoch)) + if (( elapsed >= MISMATCH_TIMEOUT )); then + RESULT="timeout_no_mismatch" + return 0 + fi + fi + + sleep 1 + done +} + +restore_snapshot + +log "Starting reth for reorg replay run" +start_node + +HEAD_HEX=$(wait_for_rpc_start "$NODE_PID" "$START_TIMEOUT") || exit 2 +HEAD_BEFORE=$(hex_to_dec "$HEAD_HEX") +HEAD_AFTER="$HEAD_BEFORE" +printf '%s\n' "$HEAD_BEFORE" >"${ARTIFACTS_DIR}/current_head_before.txt" + +if (( HEAD_BEFORE != EXPECTED_HEAD )); then + log "Expected restored head ${EXPECTED_HEAD}, got ${HEAD_BEFORE}" + exit 2 +fi + +if (( HEAD_BEFORE + 1 != START_BLOCK )); then + log "Expected first replay block ${START_BLOCK}, but restored head implies ${HEAD_BEFORE} -> $((HEAD_BEFORE + 1))" + exit 2 +fi + +BENCH_ARGS=( + "$BENCH_BIN" -vvv replay-payloads + --reth-new-payload + --wait-for-persistence always + --jwt-secret "$JWT_SECRET" + --engine-rpc-url http://127.0.0.1:8551 + --payload-dir "$PAYLOAD_DIR" + --count "$PAYLOAD_COUNT" + --output "$ARTIFACTS_DIR/reth-bench" +) + +capture_command reth_bench "${BENCH_ARGS[@]}" + +log "Running reth-bench replay-payloads for ${PAYLOAD_COUNT} payloads from ${PAYLOAD_DIR}" + +"${BENCH_ARGS[@]}" >"$BENCH_LOG" 2>&1 & +BENCH_PID=$! + +monitor_for_mismatch + +log "Reorg repro result: ${RESULT}" diff --git a/scripts/repro-hoodi-partial-persistence-unwind.sh b/scripts/repro-hoodi-partial-persistence-unwind.sh new file mode 100755 index 00000000000..0c7df9d9ddc --- /dev/null +++ b/scripts/repro-hoodi-partial-persistence-unwind.sh @@ -0,0 +1,826 @@ +#!/usr/bin/env bash + +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: repro-hoodi-partial-persistence-unwind.sh [options] + +Restores a hoodi datadir snapshot, runs a partial-persistence sync replay with +reth-bench, kill -9s the node during the post-target persistence window, then +restarts the node and reports whether restart led to an unwind failure. + +Options: + --snapshot PATH Tar.zst snapshot to restore + (default: /mnt/data/hoodi.tar.zst) + --datadir PATH Restored reth datadir + (default: /mnt/data/hoodi) + --jwt-secret PATH JWT secret path + (default: /jwt.hex) + --rpc-url URL Remote hoodi RPC used by reth-bench + (default: https://rpc.hoodi.ethpandaops.io) + --expected-head N Expected local head after restore + (default: 2613962) + --start-block N First block expected to be replayed + (default: 2613963) + --target-block N Last block to replay before crashing + (default: 2614300) + --randomize-target-block Pick a random crash target between + --start-block and --target-block + --artifacts-dir PATH Directory for logs and summary output + (default: /tmp/reth-hoodi-unwind-) + --start-timeout SECONDS Seconds to wait for node RPC startup + (default: 180) + --target-timeout SECONDS Seconds to wait for local head to reach target + (default: 900) + --persistence-timeout SEC Seconds to wait for a persistence marker after + the target head is reached (default: 300) + --restart-timeout SECONDS Seconds to classify restart behavior + (default: 180) + -h, --help Show this help + +Exit codes: + 0 Script ran to completion. See result.txt for whether unwind succeeded, + failed, or was not triggered. + 2 Setup/runtime failure prevented a conclusive result. +EOF +} + +log() { + printf '[%s] %s\n' "$(date '+%H:%M:%S')" "$*" >&2 +} + +regex_escape() { + printf '%s' "$1" | sed 's/[][(){}.^$+*?|\\/]/\\&/g' +} + +head_hex() { + local response + response=$(curl -fsS \ + -H 'Content-Type: application/json' \ + -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' \ + http://127.0.0.1:8545 2>/dev/null) || return 1 + response=${response//$'\n'/} + sed -n 's/.*"result"[[:space:]]*:[[:space:]]*"\(0x[0-9a-fA-F]\+\)".*/\1/p' <<<"$response" +} + +hex_to_dec() { + printf '%d\n' "$((16#${1#0x}))" +} + +wait_for_pid_exit() { + local pid="$1" + local timeout="$2" + local elapsed=0 + + while (( elapsed < timeout )); do + if ! kill -0 "$pid" 2>/dev/null; then + return 0 + fi + sleep 1 + ((elapsed += 1)) + done + + return 1 +} + +stop_pid() { + local pid="$1" + local signal="$2" + local label="$3" + + if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then + log "Sending SIG${signal} to ${label} (pid ${pid})" + kill "-${signal}" "$pid" 2>/dev/null || true + fi +} + +kill_matching_processes() { + local signal="$1" + local pattern="$2" + local label="$3" + local -a pids=() + local pid + + while IFS= read -r pid; do + [[ -n "$pid" ]] && pids+=("$pid") + done < <(pgrep -f "$pattern" || true) + + if ((${#pids[@]} > 0)); then + log "Sending SIG${signal} to stale ${label} processes: ${pids[*]}" + kill "-${signal}" "${pids[@]}" 2>/dev/null || true + fi +} + +capture_command() { + local name="$1" + shift + { + printf '%s=' "$name" + printf '%q ' "$@" + printf '\n' + } >>"$COMMANDS_FILE" +} + +write_summary() { + { + printf 'result=%s\n' "${RESULT:-unknown}" + printf 'snapshot=%s\n' "$SNAPSHOT" + printf 'datadir=%s\n' "$DATADIR" + printf 'jwt_secret=%s\n' "$JWT_SECRET" + printf 'remote_rpc_url=%s\n' "$REMOTE_RPC_URL" + printf 'expected_head=%s\n' "$EXPECTED_HEAD" + printf 'start_block=%s\n' "$START_BLOCK" + printf 'target_block=%s\n' "$TARGET_BLOCK" + printf 'target_block_mode=%s\n' "$TARGET_BLOCK_MODE" + printf 'target_block_lower_bound=%s\n' "$TARGET_BLOCK_LOWER_BOUND" + printf 'target_block_upper_bound=%s\n' "$TARGET_BLOCK_UPPER_BOUND" + printf 'advance=%s\n' "${ADVANCE:-unknown}" + printf 'head_before=%s\n' "${HEAD_BEFORE:-unknown}" + printf 'head_after_crash=%s\n' "${HEAD_AT_CRASH:-unknown}" + printf 'head_after_restart=%s\n' "${HEAD_AFTER_RESTART:-unknown}" + printf 'artifacts_dir=%s\n' "$ARTIFACTS_DIR" + printf 'node1_log=%s\n' "$NODE1_LOG" + printf 'bench_log=%s\n' "$BENCH_LOG" + printf 'node2_log=%s\n' "$NODE2_LOG" + printf 'restart_trace_log=%s\n' "$RESTART_TRACE_LOG" + printf 'failed_unwind_target=%s\n' "${FAILED_UNWIND_TARGET:-unknown}" + printf 'drop_merkle_result=%s\n' "${DROP_MERKLE_RESULT:-not_run}" + printf 'drop_merkle_log=%s\n' "$DROP_MERKLE_LOG" + printf 'post_drop_unwind_result=%s\n' "${POST_DROP_UNWIND_RESULT:-not_run}" + printf 'post_drop_unwind_log=%s\n' "$POST_DROP_UNWIND_LOG" + printf 'post_drop_merkle_run_result=%s\n' "${POST_DROP_MERKLE_RUN_RESULT:-not_run}" + printf 'post_drop_merkle_run_log=%s\n' "$POST_DROP_MERKLE_RUN_LOG" + printf 'post_drop_merkle_run_trace_log=%s\n' "$POST_DROP_MERKLE_RUN_TRACE_LOG" + } >"$SUMMARY_FILE" +} + +cleanup() { + stop_pid "${BENCH_PID:-}" TERM "reth-bench" + if [[ -n "${BENCH_PID:-}" ]]; then + wait "${BENCH_PID}" 2>/dev/null || true + fi + + stop_pid "${NODE2_PID:-}" TERM "reth restart node" + if [[ -n "${NODE2_PID:-}" ]]; then + wait "${NODE2_PID}" 2>/dev/null || true + fi + + stop_pid "${NODE1_PID:-}" TERM "reth crash node" + if [[ -n "${NODE1_PID:-}" ]]; then + wait "${NODE1_PID}" 2>/dev/null || true + fi + + write_summary +} + +SNAPSHOT="/mnt/data/hoodi.tar.zst" +DATADIR="/mnt/data/hoodi" +JWT_SECRET="" +REMOTE_RPC_URL="https://rpc.hoodi.ethpandaops.io" +EXPECTED_HEAD=2613962 +START_BLOCK=2613963 +TARGET_BLOCK=2614300 +RANDOMIZE_TARGET_BLOCK=0 +START_TIMEOUT=180 +TARGET_TIMEOUT=900 +PERSISTENCE_TIMEOUT=300 +RESTART_TIMEOUT=180 +RETH_BIN="/repos/reth/target/profiling/reth" +BENCH_BIN="/repos/reth/target/profiling/reth-bench" +CHAIN="hoodi" +MERKLE_TRACE_FILTER='info' +RESULT="script_error" +ADVANCE="" +TARGET_BLOCK_MODE="fixed" +TARGET_BLOCK_LOWER_BOUND="$TARGET_BLOCK" +TARGET_BLOCK_UPPER_BOUND="$TARGET_BLOCK" +HEAD_BEFORE="" +HEAD_AT_CRASH="" +HEAD_AFTER_RESTART="" +NODE1_PID="" +NODE2_PID="" +BENCH_PID="" +FAILED_UNWIND_TARGET="" +DROP_MERKLE_RESULT="not_run" +POST_DROP_UNWIND_RESULT="not_run" +POST_DROP_MERKLE_RUN_RESULT="not_run" +TIMESTAMP="$(date '+%Y%m%d-%H%M%S')" +ARTIFACTS_DIR="/tmp/reth-hoodi-unwind-${TIMESTAMP}" + +while (($# > 0)); do + case "$1" in + --snapshot) + SNAPSHOT="$2" + shift 2 + ;; + --datadir) + DATADIR="$2" + shift 2 + ;; + --jwt-secret) + JWT_SECRET="$2" + shift 2 + ;; + --rpc-url) + REMOTE_RPC_URL="$2" + shift 2 + ;; + --expected-head) + EXPECTED_HEAD="$2" + shift 2 + ;; + --start-block) + START_BLOCK="$2" + shift 2 + ;; + --target-block) + TARGET_BLOCK="$2" + shift 2 + ;; + --randomize-target-block) + RANDOMIZE_TARGET_BLOCK=1 + shift + ;; + --artifacts-dir) + ARTIFACTS_DIR="$2" + shift 2 + ;; + --start-timeout) + START_TIMEOUT="$2" + shift 2 + ;; + --target-timeout) + TARGET_TIMEOUT="$2" + shift 2 + ;; + --persistence-timeout) + PERSISTENCE_TIMEOUT="$2" + shift 2 + ;; + --restart-timeout) + RESTART_TIMEOUT="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage + exit 2 + ;; + esac +done + +if [[ -z "$JWT_SECRET" ]]; then + JWT_SECRET="${DATADIR}/jwt.hex" +fi + +mkdir -p "$ARTIFACTS_DIR" +COMMANDS_FILE="${ARTIFACTS_DIR}/commands.txt" +SUMMARY_FILE="${ARTIFACTS_DIR}/result.txt" +NODE1_LOG="${ARTIFACTS_DIR}/node1.log" +BENCH_LOG="${ARTIFACTS_DIR}/bench.log" +NODE2_LOG="${ARTIFACTS_DIR}/node2.log" +RESTART_TRACE_LOG="${ARTIFACTS_DIR}/restart-trace.log" +DROP_MERKLE_LOG="${ARTIFACTS_DIR}/drop-merkle.log" +POST_DROP_UNWIND_LOG="${ARTIFACTS_DIR}/post-drop-unwind.log" +POST_DROP_MERKLE_RUN_LOG="${ARTIFACTS_DIR}/post-drop-merkle-run.log" +POST_DROP_MERKLE_RUN_TRACE_LOG="not_captured" + +trap cleanup EXIT + +if [[ ! -x "$RETH_BIN" ]]; then + log "Missing executable reth binary: $RETH_BIN" + exit 2 +fi + +if [[ ! -x "$BENCH_BIN" ]]; then + log "Missing executable reth-bench binary: $BENCH_BIN" + exit 2 +fi + +if [[ ! -f "$SNAPSHOT" ]]; then + log "Missing snapshot archive: $SNAPSHOT" + exit 2 +fi + +NODE_PATTERN="^$(regex_escape "$RETH_BIN") node --datadir $(regex_escape "$DATADIR")( |$)" +kill_matching_processes TERM "$NODE_PATTERN" "reth" +sleep 1 +kill_matching_processes KILL "$NODE_PATTERN" "reth" + +capture_command reth "$RETH_BIN" node \ + --datadir "$DATADIR" \ + --chain "$CHAIN" \ + --http --http.addr 127.0.0.1 --http.port 8545 --http.api eth,net,web3,reth \ + --ws --ws.addr 127.0.0.1 --ws.port 8546 --ws.api eth,net,web3,reth \ + --authrpc.addr 127.0.0.1 --authrpc.port 8551 --authrpc.jwtsecret "$JWT_SECRET" \ + --disable-discovery \ + --engine.persistence-threshold 10 \ + --engine.deferred-trie-blocks 3 \ + --engine.accept-execution-requests-hash \ + --log.stdout.filter 'info,providers::db=debug,reth::providers::static_file=debug,reth::storage=debug,consensus::engine=debug' \ + --color never + +capture_command reth_restart "$RETH_BIN" node \ + --datadir "$DATADIR" \ + --chain "$CHAIN" \ + --http --http.addr 127.0.0.1 --http.port 8545 --http.api eth,net,web3,reth \ + --ws --ws.addr 127.0.0.1 --ws.port 8546 --ws.api eth,net,web3,reth \ + --authrpc.addr 127.0.0.1 --authrpc.port 8551 --authrpc.jwtsecret "$JWT_SECRET" \ + --disable-discovery \ + --engine.persistence-threshold 10 \ + --engine.deferred-trie-blocks 3 \ + --engine.accept-execution-requests-hash \ + --log.stdout.filter trace \ + --color never + +capture_command reth_stage_drop_merkle "$RETH_BIN" stage drop \ + --datadir "$DATADIR" \ + --chain "$CHAIN" \ + --log.stdout.filter info \ + --color never \ + merkle + +restore_snapshot() { + local parent_dir + local base_name + local extract_root + local candidate_datadir="" + local -a nested_candidates=() + local nested_dir + + parent_dir=$(dirname "$DATADIR") + base_name=$(basename "$DATADIR") + extract_root="${DATADIR}.extract.$$" + + log "Restoring snapshot ${SNAPSHOT} into ${DATADIR}" + rm -rf "$DATADIR" "$extract_root" + mkdir -p "$parent_dir" "$extract_root" + tar --zstd -xf "$SNAPSHOT" -C "$extract_root" + + if [[ -d "${extract_root}/${base_name}/db" && -d "${extract_root}/${base_name}/static_files" ]]; then + candidate_datadir="${extract_root}/${base_name}" + else + while IFS= read -r nested_dir; do + if [[ -d "${nested_dir}/db" && -d "${nested_dir}/static_files" ]]; then + nested_candidates+=("$nested_dir") + fi + done < <(find "$extract_root" -mindepth 1 -maxdepth 1 -type d | sort) + + if ((${#nested_candidates[@]} == 1)); then + candidate_datadir="${nested_candidates[0]}" + elif ((${#nested_candidates[@]} > 1)); then + log "Snapshot layout produced multiple nested datadir candidates under ${extract_root}: ${nested_candidates[*]}" + exit 2 + elif [[ -d "${extract_root}/db" && -d "${extract_root}/static_files" ]]; then + candidate_datadir="$extract_root" + fi + fi + + if [[ -z "$candidate_datadir" ]]; then + log "Snapshot layout did not produce an expected datadir under ${extract_root}" + exit 2 + fi + + if [[ "$candidate_datadir" == "$extract_root" ]]; then + mv "$extract_root" "$DATADIR" + else + mv "$candidate_datadir" "$DATADIR" + rm -rf "$extract_root" + fi + + if [[ ! -f "$JWT_SECRET" ]]; then + log "Restored datadir is missing jwt secret; generating ${JWT_SECRET}" + mkdir -p "$(dirname "$JWT_SECRET")" + umask 077 + head -c 32 /dev/urandom | od -An -tx1 | tr -d ' \n' >"$JWT_SECRET" + printf '\n' >>"$JWT_SECRET" + fi +} + +start_node() { + local log_file="$1" + + "$RETH_BIN" node \ + --datadir "$DATADIR" \ + --chain "$CHAIN" \ + --http --http.addr 127.0.0.1 --http.port 8545 --http.api eth,net,web3,reth \ + --ws --ws.addr 127.0.0.1 --ws.port 8546 --ws.api eth,net,web3,reth \ + --authrpc.addr 127.0.0.1 --authrpc.port 8551 --authrpc.jwtsecret "$JWT_SECRET" \ + --disable-discovery \ + --engine.persistence-threshold 10 \ + --engine.deferred-trie-blocks 3 \ + --engine.accept-execution-requests-hash \ + --log.stdout.filter 'info,providers::db=debug,reth::providers::static_file=debug,reth::storage=debug,consensus::engine=debug' \ + --color never \ + >"$log_file" 2>&1 & + echo $! +} + +start_unwind_node() { + local log_file="$1" + local trace_log="$2" + + "$RETH_BIN" node \ + --datadir "$DATADIR" \ + --chain "$CHAIN" \ + --http --http.addr 127.0.0.1 --http.port 8545 --http.api eth,net,web3,reth \ + --ws --ws.addr 127.0.0.1 --ws.port 8546 --ws.api eth,net,web3,reth \ + --authrpc.addr 127.0.0.1 --authrpc.port 8551 --authrpc.jwtsecret "$JWT_SECRET" \ + --disable-discovery \ + --engine.persistence-threshold 10 \ + --engine.deferred-trie-blocks 3 \ + --engine.accept-execution-requests-hash \ + --log.stdout.filter trace \ + --color never \ + > >(tee "$trace_log" >"$log_file") 2>&1 & + echo $! +} + +wait_for_rpc_start() { + local pid="$1" + local timeout="$2" + local label="$3" + local elapsed=0 + local block_hex + + while (( elapsed < timeout )); do + block_hex=$(head_hex || true) + if [[ -n "$block_hex" ]]; then + printf '%s\n' "$block_hex" + return 0 + fi + + if ! kill -0 "$pid" 2>/dev/null; then + log "${label} exited before RPC became ready" + return 1 + fi + + sleep 1 + ((elapsed += 1)) + done + + log "Timed out waiting for ${label} RPC readiness" + return 1 +} + +wait_for_target_head() { + local pid="$1" + local target="$2" + local timeout="$3" + local elapsed=0 + local block_hex + local block_dec + + while (( elapsed < timeout )); do + block_hex=$(head_hex || true) + if [[ -n "$block_hex" ]]; then + block_dec=$(hex_to_dec "$block_hex") + if (( block_dec >= target )); then + printf '%s\n' "$block_dec" + return 0 + fi + fi + + if ! kill -0 "$pid" 2>/dev/null; then + log "Node exited before reaching target head ${target}" + return 1 + fi + + sleep 1 + ((elapsed += 1)) + done + + log "Timed out waiting for local head to reach ${target}" + return 1 +} + +wait_for_persistence_marker() { + local pid="$1" + local log_file="$2" + local start_line="$3" + local timeout="$4" + local elapsed=0 + + while (( elapsed <= timeout )); do + if grep -E -m1 \ + 'save_blocks step plan|save_blocks trie paths|write_trie_updates|Persisting canonical chain|Appended block data range' \ + < <(tail -n "+${start_line}" "$log_file") \ + >/dev/null 2>&1; then + return 0 + fi + + if ! kill -0 "$pid" 2>/dev/null; then + log "Node exited before emitting a post-target persistence marker" + return 1 + fi + + sleep 1 + ((elapsed += 1)) + done + + log "Timed out waiting for a post-target persistence marker" + return 1 +} + +stop_bench() { + if [[ -n "$BENCH_PID" ]] && kill -0 "$BENCH_PID" 2>/dev/null; then + stop_pid "$BENCH_PID" TERM "reth-bench" + if ! wait_for_pid_exit "$BENCH_PID" 10; then + stop_pid "$BENCH_PID" KILL "reth-bench" + wait "$BENCH_PID" 2>/dev/null || true + else + wait "$BENCH_PID" 2>/dev/null || true + fi + elif [[ -n "$BENCH_PID" ]]; then + wait "$BENCH_PID" 2>/dev/null || true + fi + + BENCH_PID="" +} + +remove_stale_locks() { + rm -f "$DATADIR/db/lock" "$DATADIR/static_files/lock" "$DATADIR/rocksdb/LOCK" +} + +stop_restart_node() { + if [[ -n "$NODE2_PID" ]] && kill -0 "$NODE2_PID" 2>/dev/null; then + stop_pid "$NODE2_PID" TERM "reth restart node" + if ! wait_for_pid_exit "$NODE2_PID" 30; then + stop_pid "$NODE2_PID" KILL "reth restart node" + fi + fi + + if [[ -n "$NODE2_PID" ]]; then + wait "$NODE2_PID" 2>/dev/null || true + fi + + NODE2_PID="" +} + +extract_unwind_target() { + local log_file="$1" + + sed -n 's/.*unwind_target=Unwind(\([0-9]\+\)).*/\1/p' "$log_file" | head -n1 +} + +run_drop_merkle() { + log "Dropping the Merkle stage before the targeted unwind rerun" + "$RETH_BIN" stage drop \ + --datadir "$DATADIR" \ + --chain "$CHAIN" \ + --log.stdout.filter info \ + --color never \ + merkle \ + >"$DROP_MERKLE_LOG" 2>&1 +} + +run_post_drop_unwind() { + local target="$1" + + log "Re-running unwind without trace capture to restore the pre-failure head" + "$RETH_BIN" stage unwind \ + --datadir "$DATADIR" \ + --chain "$CHAIN" \ + --log.stdout.filter info \ + --color never \ + to-block "$target" \ + >"$POST_DROP_UNWIND_LOG" 2>&1 +} + +run_post_drop_merkle() { + local target="$1" + local merkle_pid + + log "Rebuilding the Merkle stage without trace capture in ${POST_DROP_MERKLE_RUN_LOG}" + ( + "$RETH_BIN" stage run \ + --datadir "$DATADIR" \ + --chain "$CHAIN" \ + --from 0 \ + --to "$target" \ + --skip-unwind \ + --checkpoints \ + --commit \ + --disable-discovery \ + --log.stdout.filter "$MERKLE_TRACE_FILTER" \ + --color never \ + merkle \ + >"$POST_DROP_MERKLE_RUN_LOG" 2>&1 + ) & + merkle_pid=$! + + while kill -0 "$merkle_pid" 2>/dev/null; do + log "Waiting for the post-drop Merkle rebuild to finish" + sleep 300 + done + + wait "$merkle_pid" +} + +classify_restart() { + local pid="$1" + local log_file="$2" + local timeout="$3" + local elapsed=0 + local saw_unwind=0 + local rpc_ready_at=-1 + local block_hex + + while (( elapsed < timeout )); do + if grep -E -q 'Failed to verify block state root|failed to run unwind|mismatched block state root' "$log_file" 2>/dev/null; then + RESULT="unwind_failed" + return 0 + fi + + if grep -E -q 'Executing unwind after consistency check|inconsistency_source=partial state trie' "$log_file" 2>/dev/null; then + saw_unwind=1 + fi + + block_hex=$(head_hex || true) + if [[ -n "$block_hex" ]]; then + HEAD_AFTER_RESTART=$(hex_to_dec "$block_hex") + if (( rpc_ready_at < 0 )); then + rpc_ready_at=$elapsed + log "Restart RPC became ready at head ${HEAD_AFTER_RESTART}" + fi + fi + + if (( rpc_ready_at >= 0 && elapsed >= rpc_ready_at + 10 )); then + if (( saw_unwind == 1 )); then + RESULT="unwind_succeeded" + else + RESULT="no_unwind_detected" + fi + return 0 + fi + + if ! kill -0 "$pid" 2>/dev/null; then + if grep -E -q 'Failed to verify block state root|failed to run unwind|mismatched block state root' "$log_file" 2>/dev/null; then + RESULT="unwind_failed" + return 0 + fi + + RESULT="restart_exited_before_rpc_ready" + return 1 + fi + + sleep 1 + ((elapsed += 1)) + done + + RESULT="restart_timeout" + return 1 +} + +restore_snapshot + +log "Starting reth for replay run" +NODE1_PID=$(start_node "$NODE1_LOG") + +HEAD_HEX=$(wait_for_rpc_start "$NODE1_PID" "$START_TIMEOUT" "initial node") || exit 2 +HEAD_BEFORE=$(hex_to_dec "$HEAD_HEX") +printf '%s\n' "$HEAD_BEFORE" >"${ARTIFACTS_DIR}/current_head_before.txt" + +if (( HEAD_BEFORE != EXPECTED_HEAD )); then + log "Expected restored head ${EXPECTED_HEAD}, got ${HEAD_BEFORE}" + exit 2 +fi + +if (( HEAD_BEFORE + 1 != START_BLOCK )); then + log "Expected first replay block ${START_BLOCK}, but restored head implies ${HEAD_BEFORE} -> $((HEAD_BEFORE + 1))" + exit 2 +fi + +if (( RANDOMIZE_TARGET_BLOCK == 1 )); then + TARGET_BLOCK_MODE="randomized" + TARGET_BLOCK_LOWER_BOUND="$START_BLOCK" + TARGET_BLOCK_UPPER_BOUND="$TARGET_BLOCK" + + if (( TARGET_BLOCK_UPPER_BOUND < TARGET_BLOCK_LOWER_BOUND )); then + log "Randomized target range ${TARGET_BLOCK_LOWER_BOUND}-${TARGET_BLOCK_UPPER_BOUND} is invalid" + exit 2 + fi + + TARGET_BLOCK=$((TARGET_BLOCK_LOWER_BOUND + RANDOM % (TARGET_BLOCK_UPPER_BOUND - TARGET_BLOCK_LOWER_BOUND + 1))) + log "Randomized crash target block ${TARGET_BLOCK} (range ${TARGET_BLOCK_LOWER_BOUND}-${TARGET_BLOCK_UPPER_BOUND})" +else + TARGET_BLOCK_MODE="fixed" + TARGET_BLOCK_LOWER_BOUND="$TARGET_BLOCK" + TARGET_BLOCK_UPPER_BOUND="$TARGET_BLOCK" +fi + +ADVANCE=$((TARGET_BLOCK - HEAD_BEFORE)) +if (( ADVANCE <= 0 )); then + log "Target block ${TARGET_BLOCK} must be greater than restored head ${HEAD_BEFORE}" + exit 2 +fi + +capture_command reth_bench "$BENCH_BIN" -vvv new-payload-fcu \ + --rpc-url "$REMOTE_RPC_URL" \ + --advance "$ADVANCE" \ + --jwt-secret "$JWT_SECRET" \ + --engine-rpc-url http://127.0.0.1:8551 \ + --local-rpc-url http://127.0.0.1:8545 \ + --ws-rpc-url ws://127.0.0.1:8546 + +log "Running reth-bench with --advance ${ADVANCE} so replay begins at block ${START_BLOCK} and crashes at ${TARGET_BLOCK}" +"$BENCH_BIN" -vvv new-payload-fcu \ + --rpc-url "$REMOTE_RPC_URL" \ + --advance "$ADVANCE" \ + --jwt-secret "$JWT_SECRET" \ + --engine-rpc-url http://127.0.0.1:8551 \ + --local-rpc-url http://127.0.0.1:8545 \ + --ws-rpc-url ws://127.0.0.1:8546 \ + >"$BENCH_LOG" 2>&1 & +BENCH_PID=$! + +HEAD_AT_CRASH=$(wait_for_target_head "$NODE1_PID" "$TARGET_BLOCK" "$TARGET_TIMEOUT") || exit 2 +printf '%s\n' "$HEAD_AT_CRASH" >"${ARTIFACTS_DIR}/current_head_at_crash.txt" +# Allow for a small race where the target-head poll returns after the relevant +# persistence logs were already emitted. +POST_TARGET_LINE=$(( $(wc -l <"$NODE1_LOG") - 50 )) +(( POST_TARGET_LINE < 1 )) && POST_TARGET_LINE=1 + +log "Target head ${TARGET_BLOCK} reached; sending SIGTERM to trigger the final persistence flush" +stop_pid "$NODE1_PID" TERM "reth crash node" + +log "Waiting for the shutdown-triggered persistence marker before crashing" +wait_for_persistence_marker "$NODE1_PID" "$NODE1_LOG" "$POST_TARGET_LINE" "$PERSISTENCE_TIMEOUT" || exit 2 + +log "Crashing reth with SIGKILL" +stop_pid "$NODE1_PID" KILL "reth crash node" +wait "$NODE1_PID" 2>/dev/null || true +NODE1_PID="" + +stop_bench +remove_stale_locks + +log "Restarting reth to classify unwind behavior" +NODE2_PID=$(start_unwind_node "$NODE2_LOG" "$RESTART_TRACE_LOG") +classify_restart "$NODE2_PID" "$NODE2_LOG" "$RESTART_TIMEOUT" || exit 2 + +FAILED_UNWIND_TARGET=$(extract_unwind_target "$NODE2_LOG" || true) +if [[ -n "$FAILED_UNWIND_TARGET" ]]; then + printf '%s\n' "$FAILED_UNWIND_TARGET" >"${ARTIFACTS_DIR}/failed_unwind_target.txt" +fi + +stop_restart_node +remove_stale_locks + +if [[ "$RESULT" == "unwind_failed" || "$RESULT" == "unwind_succeeded" ]]; then + if [[ -z "$FAILED_UNWIND_TARGET" ]]; then + log "Failed to extract unwind_target from ${NODE2_LOG}" + exit 2 + fi + + capture_command reth_stage_unwind "$RETH_BIN" stage unwind \ + --datadir "$DATADIR" \ + --chain "$CHAIN" \ + --log.stdout.filter info \ + --color never \ + to-block "$FAILED_UNWIND_TARGET" + + capture_command reth_stage_run_merkle "$RETH_BIN" stage run \ + --datadir "$DATADIR" \ + --chain "$CHAIN" \ + --from 0 \ + --to "$FAILED_UNWIND_TARGET" \ + --skip-unwind \ + --checkpoints \ + --commit \ + --disable-discovery \ + --log.stdout.filter "$MERKLE_TRACE_FILTER" \ + --color never \ + merkle + + if ! run_drop_merkle; then + DROP_MERKLE_RESULT="failed" + exit 2 + fi + DROP_MERKLE_RESULT="ok" + + if ! run_post_drop_unwind "$FAILED_UNWIND_TARGET"; then + POST_DROP_UNWIND_RESULT="failed" + exit 2 + fi + POST_DROP_UNWIND_RESULT="ok" + + remove_stale_locks + + if ! run_post_drop_merkle "$FAILED_UNWIND_TARGET"; then + POST_DROP_MERKLE_RUN_RESULT="failed" + exit 2 + fi + POST_DROP_MERKLE_RUN_RESULT="ok" +else + DROP_MERKLE_RESULT="skipped_no_unwind_target" + POST_DROP_UNWIND_RESULT="skipped_no_unwind_target" + POST_DROP_MERKLE_RUN_RESULT="skipped_no_unwind_target" +fi + +log "Restart result: ${RESULT}"