diff --git a/Cargo.lock b/Cargo.lock index 3cf5728e6c9..ce9f449152d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7417,6 +7417,7 @@ dependencies = [ "fdlimit", "futures", "human_bytes", + "humantime", "itertools 0.14.0", "lz4", "proptest", @@ -10632,6 +10633,7 @@ dependencies = [ "alloy-primitives", "alloy-rlp", "alloy-trie", + "assert_matches", "auto_impl", "codspeed-criterion-compat", "itertools 0.14.0", diff --git a/crates/cli/commands/Cargo.toml b/crates/cli/commands/Cargo.toml index 25b3a8b8faf..525a40fce88 100644 --- a/crates/cli/commands/Cargo.toml +++ b/crates/cli/commands/Cargo.toml @@ -51,7 +51,7 @@ reth-static-file-types = { workspace = true, features = ["clap"] } reth-static-file.workspace = true reth-trie = { workspace = true, features = ["metrics"] } reth-trie-db = { workspace = true, features = ["metrics"] } -reth-trie-common = { workspace = true, optional = true } +reth-trie-common.workspace = true reth-primitives-traits.workspace = true reth-discv4.workspace = true reth-discv5.workspace = true @@ -68,6 +68,7 @@ futures.workspace = true tokio.workspace = true # misc +humantime.workspace = true human_bytes.workspace = true eyre.workspace = true clap = { workspace = true, features = ["derive", "env"] } @@ -118,7 +119,7 @@ arbitrary = [ "reth-codecs/arbitrary", "reth-prune-types?/arbitrary", "reth-stages-types?/arbitrary", - "reth-trie-common?/arbitrary", + "reth-trie-common/arbitrary", "alloy-consensus/arbitrary", "reth-primitives-traits/arbitrary", "reth-ethereum-primitives/arbitrary", diff --git a/crates/cli/commands/src/db/mod.rs b/crates/cli/commands/src/db/mod.rs index 67b060f7e9a..fd7e577c44c 100644 --- a/crates/cli/commands/src/db/mod.rs +++ b/crates/cli/commands/src/db/mod.rs @@ -13,6 +13,7 @@ mod clear; mod diff; mod get; mod list; +mod repair_trie; mod stats; /// DB List TUI mod tui; @@ -48,6 +49,8 @@ pub enum Subcommands { }, /// Deletes all table entries Clear(clear::Command), + /// Verifies trie consistency and outputs any inconsistencies + RepairTrie(repair_trie::Command), /// Lists current and local database versions Version, /// Returns the full database path @@ -135,6 +138,10 @@ impl> Command let Environment { provider_factory, .. } = self.env.init::(AccessRights::RW)?; command.execute(provider_factory)?; } + Subcommands::RepairTrie(command) => { + let Environment { provider_factory, .. } = self.env.init::(AccessRights::RW)?; + command.execute(provider_factory)?; + } Subcommands::Version => { let local_db_version = match get_db_version(&db_path) { Ok(version) => Some(version), diff --git a/crates/cli/commands/src/db/repair_trie.rs b/crates/cli/commands/src/db/repair_trie.rs new file mode 100644 index 00000000000..fcfa679b4ac --- /dev/null +++ b/crates/cli/commands/src/db/repair_trie.rs @@ -0,0 +1,163 @@ +use clap::Parser; +use reth_db_api::{ + cursor::{DbCursorRO, DbCursorRW, DbDupCursorRO}, + database::Database, + tables, + transaction::{DbTx, DbTxMut}, +}; +use reth_node_builder::NodeTypesWithDB; +use reth_provider::ProviderFactory; +use reth_trie::{ + verify::{Output, Verifier}, + Nibbles, +}; +use reth_trie_common::{StorageTrieEntry, StoredNibbles, StoredNibblesSubKey}; +use reth_trie_db::{DatabaseHashedCursorFactory, DatabaseTrieCursorFactory}; +use std::time::{Duration, Instant}; +use tracing::{info, warn}; + +/// The arguments for the `reth db repair-trie` command +#[derive(Parser, Debug)] +pub struct Command { + /// Only show inconsistencies without making any repairs + #[arg(long)] + dry_run: bool, +} + +impl Command { + /// Execute `db repair-trie` command + pub fn execute( + self, + provider_factory: ProviderFactory, + ) -> eyre::Result<()> { + // Get a database transaction directly from the database + let db = provider_factory.db_ref(); + let mut tx = db.tx_mut()?; + tx.disable_long_read_transaction_safety(); + + // Create the hashed cursor factory + let hashed_cursor_factory = DatabaseHashedCursorFactory::new(&tx); + + // Create the trie cursor factory + let trie_cursor_factory = DatabaseTrieCursorFactory::new(&tx); + + // Create the verifier + let verifier = Verifier::new(trie_cursor_factory, hashed_cursor_factory)?; + + let mut account_trie_cursor = tx.cursor_write::()?; + let mut storage_trie_cursor = tx.cursor_dup_write::()?; + + let mut inconsistent_nodes = 0; + let start_time = Instant::now(); + let mut last_progress_time = Instant::now(); + + // Iterate over the verifier and repair inconsistencies + for output_result in verifier { + let output = output_result?; + + if let Output::Progress(path) = output { + // Output progress every 5 seconds + if last_progress_time.elapsed() > Duration::from_secs(5) { + output_progress(path, start_time, inconsistent_nodes); + last_progress_time = Instant::now(); + } + continue + }; + + warn!("Inconsistency found, will repair: {output:?}"); + inconsistent_nodes += 1; + + if self.dry_run { + continue; + } + + match output { + Output::AccountExtra(path, _node) => { + // Extra account node in trie, remove it + let nibbles = StoredNibbles(path); + if account_trie_cursor.seek_exact(nibbles)?.is_some() { + account_trie_cursor.delete_current()?; + } + } + Output::StorageExtra(account, path, _node) => { + // Extra storage node in trie, remove it + let nibbles = StoredNibblesSubKey(path); + if storage_trie_cursor + .seek_by_key_subkey(account, nibbles.clone())? + .filter(|e| e.nibbles == nibbles) + .is_some() + { + storage_trie_cursor.delete_current()?; + } + } + Output::AccountWrong { path, expected: node, .. } | + Output::AccountMissing(path, node) => { + // Wrong/missing account node value, upsert it + let nibbles = StoredNibbles(path); + account_trie_cursor.upsert(nibbles, &node)?; + } + Output::StorageWrong { account, path, expected: node, .. } | + Output::StorageMissing(account, path, node) => { + // Wrong/missing storage node value, upsert it + let nibbles = StoredNibblesSubKey(path); + let entry = StorageTrieEntry { nibbles, node }; + storage_trie_cursor.upsert(account, &entry)?; + } + Output::Progress(_) => { + unreachable!() + } + } + } + + if inconsistent_nodes > 0 { + if self.dry_run { + info!("Found {} inconsistencies (dry run - no changes made)", inconsistent_nodes); + } else { + info!("Repaired {} inconsistencies", inconsistent_nodes); + tx.commit()?; + info!("Changes committed to database"); + } + } else { + info!("No inconsistencies found"); + } + + Ok(()) + } +} + +/// Output progress information based on the last seen account path. +fn output_progress(last_account: Nibbles, start_time: Instant, inconsistent_nodes: u64) { + // Calculate percentage based on position in the trie path space + // For progress estimation, we'll use the first few nibbles as an approximation + + // Convert the first 16 nibbles (8 bytes) to a u64 for progress calculation + let mut current_value: u64 = 0; + let nibbles_to_use = last_account.len().min(16); + + for i in 0..nibbles_to_use { + current_value = (current_value << 4) | (last_account.get(i).unwrap_or(0) as u64); + } + // Shift left to fill remaining bits if we have fewer than 16 nibbles + if nibbles_to_use < 16 { + current_value <<= (16 - nibbles_to_use) * 4; + } + + let progress_percent = current_value as f64 / u64::MAX as f64 * 100.0; + let progress_percent_str = format!("{progress_percent:.2}"); + + // Calculate ETA based on current speed + let elapsed = start_time.elapsed(); + let elapsed_secs = elapsed.as_secs_f64(); + + let estimated_total_time = + if progress_percent > 0.0 { elapsed_secs / (progress_percent / 100.0) } else { 0.0 }; + let remaining_time = estimated_total_time - elapsed_secs; + let eta_duration = Duration::from_secs(remaining_time as u64); + + info!( + progress_percent = progress_percent_str, + eta = %humantime::format_duration(eta_duration), + inconsistent_nodes, + "Repairing trie tables", + ); +} diff --git a/crates/trie/trie/Cargo.toml b/crates/trie/trie/Cargo.toml index adee3291b80..403d187e46a 100644 --- a/crates/trie/trie/Cargo.toml +++ b/crates/trie/trie/Cargo.toml @@ -57,6 +57,7 @@ revm-state.workspace = true triehash.workspace = true # misc +assert_matches.workspace = true criterion.workspace = true parking_lot.workspace = true pretty_assertions.workspace = true diff --git a/crates/trie/trie/src/lib.rs b/crates/trie/trie/src/lib.rs index 8accd447105..7efa00631d2 100644 --- a/crates/trie/trie/src/lib.rs +++ b/crates/trie/trie/src/lib.rs @@ -63,3 +63,6 @@ pub mod test_utils; /// Collection of mock types for testing. #[cfg(test)] pub mod mock; + +/// Verification of existing stored trie nodes against state data. +pub mod verify; diff --git a/crates/trie/trie/src/trie_cursor/depth_first.rs b/crates/trie/trie/src/trie_cursor/depth_first.rs new file mode 100644 index 00000000000..8e9b567ac68 --- /dev/null +++ b/crates/trie/trie/src/trie_cursor/depth_first.rs @@ -0,0 +1,401 @@ +use super::TrieCursor; +use crate::{BranchNodeCompact, Nibbles}; +use reth_storage_errors::db::DatabaseError; +use std::cmp::Ordering; +use tracing::trace; + +/// Compares two Nibbles in depth-first order. +/// +/// In depth-first ordering: +/// - Descendants come before their ancestors (children before parents) +/// - Siblings are ordered lexicographically +/// +/// # Example +/// +/// ```text +/// 0x11 comes before 0x1 (child before parent) +/// 0x12 comes before 0x1 (child before parent) +/// 0x11 comes before 0x12 (lexicographical among siblings) +/// 0x1 comes before 0x21 (lexicographical among siblings) +/// Result: 0x11, 0x12, 0x1, 0x21 +/// ``` +pub fn cmp(a: &Nibbles, b: &Nibbles) -> Ordering { + // If the two are equal length then compare them lexicographically + if a.len() == b.len() { + return a.cmp(b) + } + + // If one is a prefix of the other, then the other comes first + let common_prefix_len = a.common_prefix_length(b); + if a.len() == common_prefix_len { + return Ordering::Greater + } else if b.len() == common_prefix_len { + return Ordering::Less + } + + // Otherwise the nibble after the prefix determines the ordering. We know that neither is empty + // at this point, otherwise the previous if/else block would have caught it. + a.get_unchecked(common_prefix_len).cmp(&b.get_unchecked(common_prefix_len)) +} + +/// An iterator that traverses trie nodes in depth-first post-order. +/// +/// This iterator yields nodes in post-order traversal (children before parents), +/// which matches the `cmp` comparison function where descendants +/// come before their ancestors. +#[derive(Debug)] +pub struct DepthFirstTrieIterator { + /// The underlying trie cursor. + cursor: C, + /// Set to true once the trie cursor has done its initial seek to the root node. + initialized: bool, + /// Stack of nodes which have been fetched. Each node's path is a prefix of the next's. + stack: Vec<(Nibbles, BranchNodeCompact)>, + /// Nodes which are ready to be yielded from `next`. + next: Vec<(Nibbles, BranchNodeCompact)>, + /// Set to true once the cursor has been exhausted. + complete: bool, +} + +impl DepthFirstTrieIterator { + /// Create a new depth-first iterator from a trie cursor. + pub fn new(cursor: C) -> Self { + Self { + cursor, + initialized: false, + stack: Default::default(), + next: Default::default(), + complete: false, + } + } + + fn push(&mut self, path: Nibbles, node: BranchNodeCompact) { + loop { + match self.stack.last() { + None => { + // If the stack is empty then we push this node onto it, as it may have child + // nodes which need to be yielded first. + self.stack.push((path, node)); + break + } + Some((top_path, _)) if path.starts_with(top_path) => { + // If the top of the stack is a prefix of this node, it means this node is a + // child of the top of the stack (and all other nodes on the stack). Push this + // node onto the stack, as future nodes may be children of it. + self.stack.push((path, node)); + break + } + Some((_, _)) => { + // The top of the stack is not a prefix of this node, therefore it is not a + // parent of this node. Yield the top of the stack, and loop back to see if this + // node is a child of the new top-of-stack. + self.next.push(self.stack.pop().expect("stack is not empty")); + } + } + } + + // We will have popped off the top of the stack in the order we want to yield nodes, but + // `next` is itself popped off so it needs to be reversed. + self.next.reverse(); + } + + fn fill_next(&mut self) -> Result<(), DatabaseError> { + debug_assert!(self.next.is_empty()); + + loop { + let Some((path, node)) = (if self.initialized { + self.cursor.next()? + } else { + self.initialized = true; + self.cursor.seek(Nibbles::new())? + }) else { + // Record that the cursor is empty and yield the stack. The stack is in reverse + // order of what we want to yield, but `next` is popped from, so we don't have to + // reverse it. + self.complete = true; + self.next = core::mem::take(&mut self.stack); + return Ok(()) + }; + + trace!( + target: "trie::trie_cursor::depth_first", + ?path, + "Iterated from cursor", + ); + + self.push(path, node); + if !self.next.is_empty() { + return Ok(()) + } + } + } +} + +impl Iterator for DepthFirstTrieIterator { + type Item = Result<(Nibbles, BranchNodeCompact), DatabaseError>; + + fn next(&mut self) -> Option { + loop { + if let Some(next) = self.next.pop() { + return Some(Ok(next)) + } + + if self.complete { + return None + } + + if let Err(err) = self.fill_next() { + return Some(Err(err)) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::trie_cursor::{mock::MockTrieCursorFactory, TrieCursorFactory}; + use alloy_trie::TrieMask; + use std::{collections::BTreeMap, sync::Arc}; + + fn create_test_node(state_nibbles: &[u8], tree_nibbles: &[u8]) -> BranchNodeCompact { + let mut state_mask = TrieMask::default(); + for &nibble in state_nibbles { + state_mask.set_bit(nibble); + } + + let mut tree_mask = TrieMask::default(); + for &nibble in tree_nibbles { + tree_mask.set_bit(nibble); + } + + BranchNodeCompact { + state_mask, + tree_mask, + hash_mask: TrieMask::default(), + hashes: Arc::new(vec![]), + root_hash: None, + } + } + + #[test] + fn test_depth_first_cmp() { + // Test case 1: Child comes before parent + let child = Nibbles::from_nibbles([0x1, 0x1]); + let parent = Nibbles::from_nibbles([0x1]); + assert_eq!(cmp(&child, &parent), Ordering::Less); + assert_eq!(cmp(&parent, &child), Ordering::Greater); + + // Test case 2: Deeper descendant comes before ancestor + let deep = Nibbles::from_nibbles([0x1, 0x2, 0x3, 0x4]); + let ancestor = Nibbles::from_nibbles([0x1, 0x2]); + assert_eq!(cmp(&deep, &ancestor), Ordering::Less); + assert_eq!(cmp(&ancestor, &deep), Ordering::Greater); + + // Test case 3: Siblings use lexicographical ordering + let sibling1 = Nibbles::from_nibbles([0x1, 0x2]); + let sibling2 = Nibbles::from_nibbles([0x1, 0x3]); + assert_eq!(cmp(&sibling1, &sibling2), Ordering::Less); + assert_eq!(cmp(&sibling2, &sibling1), Ordering::Greater); + + // Test case 4: Different branches use lexicographical ordering + let branch1 = Nibbles::from_nibbles([0x1]); + let branch2 = Nibbles::from_nibbles([0x2]); + assert_eq!(cmp(&branch1, &branch2), Ordering::Less); + assert_eq!(cmp(&branch2, &branch1), Ordering::Greater); + + // Test case 5: Empty path comes after everything + let empty = Nibbles::new(); + let non_empty = Nibbles::from_nibbles([0x0]); + assert_eq!(cmp(&non_empty, &empty), Ordering::Less); + assert_eq!(cmp(&empty, &non_empty), Ordering::Greater); + + // Test case 6: Same paths are equal + let same1 = Nibbles::from_nibbles([0x1, 0x2, 0x3]); + let same2 = Nibbles::from_nibbles([0x1, 0x2, 0x3]); + assert_eq!(cmp(&same1, &same2), Ordering::Equal); + } + + #[test] + fn test_depth_first_ordering_complex() { + // Test the example from the conversation: 0x11, 0x12, 0x1, 0x2 + let mut paths = [ + Nibbles::from_nibbles([0x1]), // 0x1 + Nibbles::from_nibbles([0x2]), // 0x2 + Nibbles::from_nibbles([0x1, 0x1]), // 0x11 + Nibbles::from_nibbles([0x1, 0x2]), // 0x12 + ]; + + // Shuffle to ensure sorting works regardless of input order + paths.reverse(); + + // Sort using depth-first ordering + paths.sort_by(cmp); + + // Expected order: 0x11, 0x12, 0x1, 0x2 + assert_eq!(paths[0], Nibbles::from_nibbles([0x1, 0x1])); // 0x11 + assert_eq!(paths[1], Nibbles::from_nibbles([0x1, 0x2])); // 0x12 + assert_eq!(paths[2], Nibbles::from_nibbles([0x1])); // 0x1 + assert_eq!(paths[3], Nibbles::from_nibbles([0x2])); // 0x2 + } + + #[test] + fn test_depth_first_ordering_tree() { + // Test a more complex tree structure + let mut paths = vec![ + Nibbles::new(), // root (empty) + Nibbles::from_nibbles([0x1]), // 0x1 + Nibbles::from_nibbles([0x1, 0x1]), // 0x11 + Nibbles::from_nibbles([0x1, 0x1, 0x1]), // 0x111 + Nibbles::from_nibbles([0x1, 0x1, 0x2]), // 0x112 + Nibbles::from_nibbles([0x1, 0x2]), // 0x12 + Nibbles::from_nibbles([0x2]), // 0x2 + Nibbles::from_nibbles([0x2, 0x1]), // 0x21 + ]; + + // Shuffle + paths.reverse(); + + // Sort using depth-first ordering + paths.sort_by(cmp); + + // Expected depth-first order: + // All descendants come before ancestors + // Within same level, lexicographical order + assert_eq!(paths[0], Nibbles::from_nibbles([0x1, 0x1, 0x1])); // 0x111 (deepest in 0x1 branch) + assert_eq!(paths[1], Nibbles::from_nibbles([0x1, 0x1, 0x2])); // 0x112 (sibling of 0x111) + assert_eq!(paths[2], Nibbles::from_nibbles([0x1, 0x1])); // 0x11 (parent of 0x111, 0x112) + assert_eq!(paths[3], Nibbles::from_nibbles([0x1, 0x2])); // 0x12 (sibling of 0x11) + assert_eq!(paths[4], Nibbles::from_nibbles([0x1])); // 0x1 (parent of 0x11, 0x12) + assert_eq!(paths[5], Nibbles::from_nibbles([0x2, 0x1])); // 0x21 (child of 0x2) + assert_eq!(paths[6], Nibbles::from_nibbles([0x2])); // 0x2 (parent of 0x21) + assert_eq!(paths[7], Nibbles::new()); // root (empty, parent of all) + } + + #[test] + fn test_empty_trie() { + let factory = MockTrieCursorFactory::new(BTreeMap::new(), Default::default()); + let cursor = factory.account_trie_cursor().unwrap(); + let mut iter = DepthFirstTrieIterator::new(cursor); + assert!(iter.next().is_none()); + } + + #[test] + fn test_single_node() { + let path = Nibbles::from_nibbles([0x1, 0x2, 0x3]); + let node = create_test_node(&[0x4], &[0x5]); + + let mut nodes = BTreeMap::new(); + nodes.insert(path, node.clone()); + let factory = MockTrieCursorFactory::new(nodes, Default::default()); + let cursor = factory.account_trie_cursor().unwrap(); + let mut iter = DepthFirstTrieIterator::new(cursor); + + let result = iter.next().unwrap().unwrap(); + assert_eq!(result.0, path); + assert_eq!(result.1, node); + assert!(iter.next().is_none()); + } + + #[test] + fn test_depth_first_order() { + // Create a simple trie structure: + // root + // ├── 0x1 (has children 0x2 and 0x3) + // │ ├── 0x12 + // │ └── 0x13 + // └── 0x2 (has child 0x4) + // └── 0x24 + + let nodes = vec![ + // Root node with children at nibbles 1 and 2 + (Nibbles::default(), create_test_node(&[], &[0x1, 0x2])), + // Node at path 0x1 with children at nibbles 2 and 3 + (Nibbles::from_nibbles([0x1]), create_test_node(&[], &[0x2, 0x3])), + // Leaf nodes + (Nibbles::from_nibbles([0x1, 0x2]), create_test_node(&[0xF], &[])), + (Nibbles::from_nibbles([0x1, 0x3]), create_test_node(&[0xF], &[])), + // Node at path 0x2 with child at nibble 4 + (Nibbles::from_nibbles([0x2]), create_test_node(&[], &[0x4])), + // Leaf node + (Nibbles::from_nibbles([0x2, 0x4]), create_test_node(&[0xF], &[])), + ]; + + let nodes_map: BTreeMap<_, _> = nodes.into_iter().collect(); + let factory = MockTrieCursorFactory::new(nodes_map, Default::default()); + let cursor = factory.account_trie_cursor().unwrap(); + let iter = DepthFirstTrieIterator::new(cursor); + + // Expected post-order (depth-first with children before parents): + // 1. 0x12 (leaf, child of 0x1) + // 2. 0x13 (leaf, child of 0x1) + // 3. 0x1 (parent of 0x12 and 0x13) + // 4. 0x24 (leaf, child of 0x2) + // 5. 0x2 (parent of 0x24) + // 6. Root (parent of 0x1 and 0x2) + + let expected_order = vec![ + Nibbles::from_nibbles([0x1, 0x2]), + Nibbles::from_nibbles([0x1, 0x3]), + Nibbles::from_nibbles([0x1]), + Nibbles::from_nibbles([0x2, 0x4]), + Nibbles::from_nibbles([0x2]), + Nibbles::default(), + ]; + + let mut actual_order = Vec::new(); + for result in iter { + let (path, _) = result.unwrap(); + actual_order.push(path); + } + + assert_eq!(actual_order, expected_order); + } + + #[test] + fn test_complex_tree() { + // Create a more complex tree structure with multiple levels + let nodes = vec![ + // Root with multiple children + (Nibbles::default(), create_test_node(&[], &[0x0, 0x5, 0xA, 0xF])), + // Branch at 0x0 with children + (Nibbles::from_nibbles([0x0]), create_test_node(&[], &[0x1, 0x2])), + (Nibbles::from_nibbles([0x0, 0x1]), create_test_node(&[0x3], &[])), + (Nibbles::from_nibbles([0x0, 0x2]), create_test_node(&[0x4], &[])), + // Branch at 0x5 with no children (leaf) + (Nibbles::from_nibbles([0x5]), create_test_node(&[0xB], &[])), + // Branch at 0xA with deep nesting + (Nibbles::from_nibbles([0xA]), create_test_node(&[], &[0xB])), + (Nibbles::from_nibbles([0xA, 0xB]), create_test_node(&[], &[0xC])), + (Nibbles::from_nibbles([0xA, 0xB, 0xC]), create_test_node(&[0xD], &[])), + // Branch at 0xF (leaf) + (Nibbles::from_nibbles([0xF]), create_test_node(&[0xE], &[])), + ]; + + let nodes_map: BTreeMap<_, _> = nodes.into_iter().collect(); + let factory = MockTrieCursorFactory::new(nodes_map, Default::default()); + let cursor = factory.account_trie_cursor().unwrap(); + let iter = DepthFirstTrieIterator::new(cursor); + + // Verify post-order traversal (children before parents) + let expected_order = vec![ + Nibbles::from_nibbles([0x0, 0x1]), // leaf child of 0x0 + Nibbles::from_nibbles([0x0, 0x2]), // leaf child of 0x0 + Nibbles::from_nibbles([0x0]), // parent of 0x01 and 0x02 + Nibbles::from_nibbles([0x5]), // leaf + Nibbles::from_nibbles([0xA, 0xB, 0xC]), // deepest leaf + Nibbles::from_nibbles([0xA, 0xB]), // parent of 0xABC + Nibbles::from_nibbles([0xA]), // parent of 0xAB + Nibbles::from_nibbles([0xF]), // leaf + Nibbles::default(), // root (last) + ]; + + let mut actual_order = Vec::new(); + for result in iter { + let (path, _node) = result.unwrap(); + actual_order.push(path); + } + + assert_eq!(actual_order, expected_order); + } +} diff --git a/crates/trie/trie/src/trie_cursor/mod.rs b/crates/trie/trie/src/trie_cursor/mod.rs index b05737f5c85..01eea4c40e6 100644 --- a/crates/trie/trie/src/trie_cursor/mod.rs +++ b/crates/trie/trie/src/trie_cursor/mod.rs @@ -11,11 +11,14 @@ pub mod subnode; /// Noop trie cursor implementations. pub mod noop; +/// Depth-first trie iterator. +pub mod depth_first; + /// Mock trie cursor implementations. #[cfg(test)] pub mod mock; -pub use self::{in_memory::*, subnode::CursorSubNode}; +pub use self::{depth_first::DepthFirstTrieIterator, in_memory::*, subnode::CursorSubNode}; /// Factory for creating trie cursors. #[auto_impl::auto_impl(&)] diff --git a/crates/trie/trie/src/verify.rs b/crates/trie/trie/src/verify.rs new file mode 100644 index 00000000000..21a27655fa9 --- /dev/null +++ b/crates/trie/trie/src/verify.rs @@ -0,0 +1,1009 @@ +use crate::{ + hashed_cursor::{HashedCursor, HashedCursorFactory}, + progress::{IntermediateStateRootState, StateRootProgress}, + trie::StateRoot, + trie_cursor::{ + depth_first::{self, DepthFirstTrieIterator}, + noop::NoopTrieCursorFactory, + TrieCursor, TrieCursorFactory, + }, + Nibbles, +}; +use alloy_primitives::B256; +use alloy_trie::BranchNodeCompact; +use reth_execution_errors::StateRootError; +use reth_storage_errors::db::DatabaseError; +use std::cmp::Ordering; +use tracing::trace; + +/// Used by [`StateRootBranchNodesIter`] to iterate over branch nodes in a state root. +#[derive(Debug)] +enum BranchNode { + Account(Nibbles, BranchNodeCompact), + Storage(B256, Nibbles, BranchNodeCompact), +} + +/// Iterates over branch nodes produced by a [`StateRoot`]. The `StateRoot` will only used the +/// hashed accounts/storages tables, meaning it is recomputing the trie from scratch without the use +/// of the trie tables. +/// +/// [`BranchNode`]s are iterated over such that: +/// * Account nodes and storage nodes may be interspersed. +/// * Storage nodes for the same account will be ordered by ascending path relative to each other. +/// * Account nodes will be ordered by ascending path relative to each other. +/// * All storage nodes for one account will finish before storage nodes for another account are +/// started. In other words, if the current storage account is not equal to the previous, the +/// previous has no more nodes. +#[derive(Debug)] +struct StateRootBranchNodesIter { + hashed_cursor_factory: H, + account_nodes: Vec<(Nibbles, BranchNodeCompact)>, + storage_tries: Vec<(B256, Vec<(Nibbles, BranchNodeCompact)>)>, + curr_storage: Option<(B256, Vec<(Nibbles, BranchNodeCompact)>)>, + intermediate_state: Option>, + complete: bool, +} + +impl StateRootBranchNodesIter { + fn new(hashed_cursor_factory: H) -> Self { + Self { + hashed_cursor_factory, + account_nodes: Default::default(), + storage_tries: Default::default(), + curr_storage: None, + intermediate_state: None, + complete: false, + } + } + + /// Sorts a Vec of updates such that it is ready to be yielded from the `next` method. We yield + /// by popping off of the account/storage vecs, so we sort them in reverse order. + /// + /// Depth-first sorting is used because this is the order that the `HashBuilder` computes + /// branch nodes internally, even if it produces them as `B256Map`s. + fn sort_updates(updates: &mut [(Nibbles, BranchNodeCompact)]) { + updates.sort_unstable_by(|a, b| depth_first::cmp(&b.0, &a.0)); + } +} + +impl Iterator for StateRootBranchNodesIter { + type Item = Result; + + fn next(&mut self) -> Option { + loop { + // If we already started iterating through a storage trie's updates, continue doing + // so. + if let Some((account, storage_updates)) = self.curr_storage.as_mut() { + if let Some((path, node)) = storage_updates.pop() { + let node = BranchNode::Storage(*account, path, node); + return Some(Ok(node)) + } + } + + // If there's not a storage trie already being iterated over than check if there's a + // storage trie we could start iterating over. + if let Some((account, storage_updates)) = self.storage_tries.pop() { + debug_assert!(!storage_updates.is_empty()); + + self.curr_storage = Some((account, storage_updates)); + continue; + } + + // `storage_updates` is empty, check if there are account updates. + if let Some((path, node)) = self.account_nodes.pop() { + return Some(Ok(BranchNode::Account(path, node))) + } + + // All data from any previous runs of the `StateRoot` has been produced, run the next + // partial computation, unless `StateRootProgress::Complete` has been returned in which + // case iteration is over. + if self.complete { + return None + } + + let state_root = + StateRoot::new(NoopTrieCursorFactory, self.hashed_cursor_factory.clone()) + .with_intermediate_state(self.intermediate_state.take().map(|s| *s)); + + let updates = match state_root.root_with_progress() { + Err(err) => return Some(Err(err)), + Ok(StateRootProgress::Complete(_, _, updates)) => { + self.complete = true; + updates + } + Ok(StateRootProgress::Progress(intermediate_state, _, updates)) => { + self.intermediate_state = Some(intermediate_state); + updates + } + }; + + // collect account updates and sort them in descending order, so that when we pop them + // off the Vec they are popped in ascending order. + self.account_nodes.extend(updates.account_nodes); + Self::sort_updates(&mut self.account_nodes); + + self.storage_tries = updates + .storage_tries + .into_iter() + .filter_map(|(account, t)| { + (!t.storage_nodes.is_empty()).then(|| { + let mut storage_nodes = t.storage_nodes.into_iter().collect::>(); + Self::sort_updates(&mut storage_nodes); + (account, storage_nodes) + }) + }) + .collect::>(); + + // `root_with_progress` will output storage updates ordered by their account hash. If + // `root_with_progress` only returns a partial result then it will pick up with where + // it left off in the storage trie on the next run. + // + // By sorting by the account we ensure that we continue with the partially processed + // trie (the last of the previous run) first. We sort in reverse order because we pop + // off of this Vec. + self.storage_tries.sort_unstable_by(|a, b| b.0.cmp(&a.0)); + + // loop back to the top. + } + } +} + +/// Output describes an inconsistency found when comparing the hashed state tables +/// ([`HashedCursorFactory`]) with that of the trie tables ([`TrieCursorFactory`]). The hashed +/// tables are considered the source of truth; outputs are on the part of the trie tables. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Output { + /// An extra account node was found. + AccountExtra(Nibbles, BranchNodeCompact), + /// A extra storage node was found. + StorageExtra(B256, Nibbles, BranchNodeCompact), + /// An account node had the wrong value. + AccountWrong { + /// Path of the node + path: Nibbles, + /// The node's expected value. + expected: BranchNodeCompact, + /// The node's found value. + found: BranchNodeCompact, + }, + /// A storage node had the wrong value. + StorageWrong { + /// The account the storage trie belongs to. + account: B256, + /// Path of the node + path: Nibbles, + /// The node's expected value. + expected: BranchNodeCompact, + /// The node's found value. + found: BranchNodeCompact, + }, + /// An account node was missing. + AccountMissing(Nibbles, BranchNodeCompact), + /// A storage node was missing. + StorageMissing(B256, Nibbles, BranchNodeCompact), + /// Progress indicator with the last seen account path. + Progress(Nibbles), +} + +/// Verifies the contents of a trie table against some other data source which is able to produce +/// stored trie nodes. +#[derive(Debug)] +struct SingleVerifier { + account: Option, // None for accounts trie + trie_iter: I, + curr: Option<(Nibbles, BranchNodeCompact)>, +} + +impl SingleVerifier> { + fn new(account: Option, trie_cursor: C) -> Result { + let mut trie_iter = DepthFirstTrieIterator::new(trie_cursor); + let curr = trie_iter.next().transpose()?; + Ok(Self { account, trie_iter, curr }) + } + + const fn output_extra(&self, path: Nibbles, node: BranchNodeCompact) -> Output { + if let Some(account) = self.account { + Output::StorageExtra(account, path, node) + } else { + Output::AccountExtra(path, node) + } + } + + const fn output_wrong( + &self, + path: Nibbles, + expected: BranchNodeCompact, + found: BranchNodeCompact, + ) -> Output { + if let Some(account) = self.account { + Output::StorageWrong { account, path, expected, found } + } else { + Output::AccountWrong { path, expected, found } + } + } + + const fn output_missing(&self, path: Nibbles, node: BranchNodeCompact) -> Output { + if let Some(account) = self.account { + Output::StorageMissing(account, path, node) + } else { + Output::AccountMissing(path, node) + } + } + + /// Called with the next path and node in the canonical sequence of stored trie nodes. Will + /// append to the given `outputs` Vec if walking the trie cursor produces data + /// inconsistent with that given. + /// + /// `next` must be called with paths in depth-first order. + fn next( + &mut self, + outputs: &mut Vec, + path: Nibbles, + node: BranchNodeCompact, + ) -> Result<(), DatabaseError> { + loop { + // `curr` is None only if the end of the iterator has been reached. Any further nodes + // found must be considered missing. + if self.curr.is_none() { + outputs.push(self.output_missing(path, node)); + return Ok(()) + } + + let (curr_path, curr_node) = self.curr.as_ref().expect("not None"); + trace!(target: "trie::verify", account=?self.account, ?curr_path, ?path, "Current cursor node"); + + // Use depth-first ordering for comparison + match depth_first::cmp(&path, curr_path) { + Ordering::Less => { + // If the given path comes before the cursor's current path in depth-first + // order, then the given path was not produced by the cursor. + outputs.push(self.output_missing(path, node)); + return Ok(()) + } + Ordering::Equal => { + // If the the current path matches the given one (happy path) but the nodes + // aren't equal then we produce a wrong node. Either way we want to move the + // iterator forward. + if *curr_node != node { + outputs.push(self.output_wrong(path, node, curr_node.clone())) + } + self.curr = self.trie_iter.next().transpose()?; + return Ok(()) + } + Ordering::Greater => { + // If the given path comes after the current path in depth-first order, + // it means the cursor's path was not found by the caller (otherwise it would + // have hit the equal case) and so is extraneous. + outputs.push(self.output_extra(*curr_path, curr_node.clone())); + self.curr = self.trie_iter.next().transpose()?; + // back to the top of the loop to check the latest `self.curr` value against the + // given path/node. + } + } + } + } + + /// Must be called once there are no more calls to `next` to made. All further nodes produced + /// by the iterator will be considered extraneous. + fn finalize(&mut self, outputs: &mut Vec) -> Result<(), DatabaseError> { + loop { + if let Some((curr_path, curr_node)) = self.curr.take() { + outputs.push(self.output_extra(curr_path, curr_node)); + self.curr = self.trie_iter.next().transpose()?; + } else { + return Ok(()) + } + } + } +} + +/// Checks that data stored in the trie database is consistent, using hashed accounts/storages +/// database tables as the source of truth. This will iteratively re-compute the entire trie based +/// on the hashed state, and produce any discovered [`Output`]s via the `next` method. +#[derive(Debug)] +pub struct Verifier { + trie_cursor_factory: T, + hashed_cursor_factory: H, + branch_node_iter: StateRootBranchNodesIter, + outputs: Vec, + account: SingleVerifier>, + storage: Option<(B256, SingleVerifier>)>, + complete: bool, +} + +impl Verifier { + /// Creates a new verifier instance. + pub fn new(trie_cursor_factory: T, hashed_cursor_factory: H) -> Result { + Ok(Self { + trie_cursor_factory: trie_cursor_factory.clone(), + hashed_cursor_factory: hashed_cursor_factory.clone(), + branch_node_iter: StateRootBranchNodesIter::new(hashed_cursor_factory), + outputs: Default::default(), + account: SingleVerifier::new(None, trie_cursor_factory.account_trie_cursor()?)?, + storage: None, + complete: false, + }) + } +} + +impl Verifier { + fn new_storage( + &mut self, + account: B256, + path: Nibbles, + node: BranchNodeCompact, + ) -> Result<(), DatabaseError> { + let trie_cursor = self.trie_cursor_factory.storage_trie_cursor(account)?; + let mut storage = SingleVerifier::new(Some(account), trie_cursor)?; + storage.next(&mut self.outputs, path, node)?; + self.storage = Some((account, storage)); + Ok(()) + } + + /// This method is called using the account hashes at the boundary of [`BranchNode::Storage`] + /// sequences, ie once the [`StateRootBranchNodesIter`] has begun yielding storage nodes for a + /// different account than it was yielding previously. All accounts between the two should have + /// empty storages. + fn verify_empty_storages( + &mut self, + last_account: B256, + next_account: B256, + start_inclusive: bool, + end_inclusive: bool, + ) -> Result<(), DatabaseError> { + let mut account_cursor = self.hashed_cursor_factory.hashed_account_cursor()?; + let mut account_seeked = false; + + if !start_inclusive { + account_seeked = true; + account_cursor.seek(last_account)?; + } + + loop { + let Some((curr_account, _)) = (if account_seeked { + account_cursor.next()? + } else { + account_seeked = true; + account_cursor.seek(last_account)? + }) else { + return Ok(()) + }; + + if curr_account < next_account || (end_inclusive && curr_account == next_account) { + trace!(target: "trie::verify", account = ?curr_account, "Verying account has empty storage"); + + let mut storage_cursor = + self.trie_cursor_factory.storage_trie_cursor(curr_account)?; + let mut seeked = false; + while let Some((path, node)) = if seeked { + storage_cursor.next()? + } else { + seeked = true; + storage_cursor.seek(Nibbles::new())? + } { + self.outputs.push(Output::StorageExtra(curr_account, path, node)); + } + } else { + return Ok(()) + } + } + } + + fn try_next(&mut self) -> Result<(), StateRootError> { + match self.branch_node_iter.next().transpose()? { + None => { + self.account.finalize(&mut self.outputs)?; + if let Some((prev_account, storage)) = self.storage.as_mut() { + storage.finalize(&mut self.outputs)?; + + // If there was a previous storage account, and it is the final one, then we + // need to validate that all accounts coming after it have empty storages. + let prev_account = *prev_account; + + // Calculate the max possible account address. + let mut max_account = B256::ZERO; + max_account.reverse(); + + self.verify_empty_storages(prev_account, max_account, false, true)?; + } + self.complete = true; + } + Some(BranchNode::Account(path, node)) => { + trace!(target: "trie::verify", ?path, "Account node from state root"); + self.account.next(&mut self.outputs, path, node)?; + // Push progress indicator + if !path.is_empty() { + self.outputs.push(Output::Progress(path)); + } + } + Some(BranchNode::Storage(account, path, node)) => { + trace!(target: "trie::verify", ?account, ?path, "Storage node from state root"); + match self.storage.as_mut() { + None => { + // First storage account - check for any empty storages before it + self.verify_empty_storages(B256::ZERO, account, true, false)?; + self.new_storage(account, path, node)?; + } + Some((prev_account, storage)) if *prev_account == account => { + storage.next(&mut self.outputs, path, node)?; + } + Some((prev_account, storage)) => { + storage.finalize(&mut self.outputs)?; + // Clear any storage entries between the previous account and the new one + let prev_account = *prev_account; + self.verify_empty_storages(prev_account, account, false, false)?; + self.new_storage(account, path, node)?; + } + } + } + } + + // If any outputs were appended we want to reverse them, so they are popped off + // in the same order they were appended. + self.outputs.reverse(); + Ok(()) + } +} + +impl Iterator for Verifier { + type Item = Result; + + fn next(&mut self) -> Option { + loop { + if let Some(output) = self.outputs.pop() { + return Some(Ok(output)) + } + + if self.complete { + return None + } + + if let Err(err) = self.try_next() { + return Some(Err(err)) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + hashed_cursor::mock::MockHashedCursorFactory, + trie_cursor::mock::{MockTrieCursor, MockTrieCursorFactory}, + }; + use alloy_primitives::{address, keccak256, map::B256Map, U256}; + use alloy_trie::TrieMask; + use assert_matches::assert_matches; + use reth_primitives_traits::Account; + use std::collections::BTreeMap; + + /// Helper function to create a simple test `BranchNodeCompact` + fn test_branch_node( + state_mask: u16, + tree_mask: u16, + hash_mask: u16, + hashes: Vec, + ) -> BranchNodeCompact { + // Ensure the number of hashes matches the number of bits set in hash_mask + let expected_hashes = hash_mask.count_ones() as usize; + let mut final_hashes = hashes; + let mut counter = 100u8; + while final_hashes.len() < expected_hashes { + final_hashes.push(B256::from([counter; 32])); + counter += 1; + } + final_hashes.truncate(expected_hashes); + + BranchNodeCompact::new( + TrieMask::new(state_mask), + TrieMask::new(tree_mask), + TrieMask::new(hash_mask), + final_hashes, + None, + ) + } + + /// Helper function to create a simple test `MockTrieCursor` + fn create_mock_cursor(trie_nodes: BTreeMap) -> MockTrieCursor { + let factory = MockTrieCursorFactory::new(trie_nodes, B256Map::default()); + factory.account_trie_cursor().unwrap() + } + + #[test] + fn test_state_root_branch_nodes_iter_empty() { + // Test with completely empty state + let factory = MockHashedCursorFactory::new(BTreeMap::new(), B256Map::default()); + let mut iter = StateRootBranchNodesIter::new(factory); + + // Collect all results - with empty state, should complete without producing nodes + let mut count = 0; + for result in iter.by_ref() { + assert!(result.is_ok(), "Unexpected error: {:?}", result.unwrap_err()); + count += 1; + // Prevent infinite loop in test + assert!(count <= 1000, "Too many iterations"); + } + + assert!(iter.complete); + } + + #[test] + fn test_state_root_branch_nodes_iter_basic() { + // Simple test with a few accounts and storage + let mut accounts = BTreeMap::new(); + let mut storage_tries = B256Map::default(); + + // Create test accounts + let addr1 = keccak256(address!("0000000000000000000000000000000000000001")); + accounts.insert( + addr1, + Account { + nonce: 1, + balance: U256::from(1000), + bytecode_hash: Some(keccak256(b"code1")), + }, + ); + + // Add storage for the account + let mut storage1 = BTreeMap::new(); + storage1.insert(keccak256(B256::from(U256::from(1))), U256::from(100)); + storage1.insert(keccak256(B256::from(U256::from(2))), U256::from(200)); + storage_tries.insert(addr1, storage1); + + let factory = MockHashedCursorFactory::new(accounts, storage_tries); + let mut iter = StateRootBranchNodesIter::new(factory); + + // Collect nodes and verify basic properties + let mut account_paths = Vec::new(); + let mut storage_paths_by_account: B256Map> = B256Map::default(); + let mut iterations = 0; + + for result in iter.by_ref() { + iterations += 1; + assert!(iterations <= 10000, "Too many iterations - possible infinite loop"); + + match result { + Ok(BranchNode::Account(path, _)) => { + account_paths.push(path); + } + Ok(BranchNode::Storage(account, path, _)) => { + storage_paths_by_account.entry(account).or_default().push(path); + } + Err(e) => panic!("Unexpected error: {:?}", e), + } + } + + // Verify account paths are in ascending order + for i in 1..account_paths.len() { + assert!( + account_paths[i - 1] < account_paths[i], + "Account paths should be in ascending order" + ); + } + + // Verify storage paths for each account are in ascending order + for (account, paths) in storage_paths_by_account { + for i in 1..paths.len() { + assert!( + paths[i - 1] < paths[i], + "Storage paths for account {:?} should be in ascending order", + account + ); + } + } + + assert!(iter.complete); + } + + #[test] + fn test_state_root_branch_nodes_iter_multiple_accounts() { + // Test with multiple accounts to verify ordering + let mut accounts = BTreeMap::new(); + let mut storage_tries = B256Map::default(); + + // Create multiple test addresses + for i in 1u8..=3 { + let addr = keccak256([i; 20]); + accounts.insert( + addr, + Account { + nonce: i as u64, + balance: U256::from(i as u64 * 1000), + bytecode_hash: (i == 2).then(|| keccak256([i])), + }, + ); + + // Add some storage for each account + let mut storage = BTreeMap::new(); + for j in 0..i { + storage.insert(keccak256(B256::from(U256::from(j))), U256::from(j as u64 * 10)); + } + if !storage.is_empty() { + storage_tries.insert(addr, storage); + } + } + + let factory = MockHashedCursorFactory::new(accounts, storage_tries); + let mut iter = StateRootBranchNodesIter::new(factory); + + // Track what we see + let mut seen_storage_accounts = Vec::new(); + let mut current_storage_account = None; + let mut iterations = 0; + + for result in iter.by_ref() { + iterations += 1; + assert!(iterations <= 10000, "Too many iterations"); + + match result { + Ok(BranchNode::Storage(account, _, _)) => { + if current_storage_account != Some(account) { + // Verify we don't revisit a storage account + assert!( + !seen_storage_accounts.contains(&account), + "Should not revisit storage account {:?}", + account + ); + seen_storage_accounts.push(account); + current_storage_account = Some(account); + } + } + Ok(BranchNode::Account(_, _)) => { + // Account nodes are fine + } + Err(e) => panic!("Unexpected error: {:?}", e), + } + } + + assert!(iter.complete); + } + + #[test] + fn test_single_verifier_new() { + // Test creating a new SingleVerifier for account trie + let trie_nodes = BTreeMap::from([( + Nibbles::from_nibbles([0x1]), + test_branch_node(0b1111, 0, 0, vec![]), + )]); + + let cursor = create_mock_cursor(trie_nodes); + let verifier = SingleVerifier::new(None, cursor).unwrap(); + + // Should have seeked to the beginning and found the first node + assert!(verifier.curr.is_some()); + } + + #[test] + fn test_single_verifier_next_exact_match() { + // Test when the expected node matches exactly + let node1 = test_branch_node(0b1111, 0, 0b1111, vec![B256::from([1u8; 32])]); + let node2 = test_branch_node(0b0101, 0b0001, 0b0100, vec![B256::from([2u8; 32])]); + + let trie_nodes = BTreeMap::from([ + (Nibbles::from_nibbles([0x1]), node1.clone()), + (Nibbles::from_nibbles([0x2]), node2), + ]); + + let cursor = create_mock_cursor(trie_nodes); + let mut verifier = SingleVerifier::new(None, cursor).unwrap(); + let mut outputs = Vec::new(); + + // Call next with the exact node that exists + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1]), node1).unwrap(); + + // Should have no outputs + assert!(outputs.is_empty()); + } + + #[test] + fn test_single_verifier_next_wrong_value() { + // Test when the path matches but value is different + let node_in_trie = test_branch_node(0b1111, 0, 0b1111, vec![B256::from([1u8; 32])]); + let node_expected = test_branch_node(0b0101, 0b0001, 0b0100, vec![B256::from([2u8; 32])]); + + let trie_nodes = BTreeMap::from([(Nibbles::from_nibbles([0x1]), node_in_trie.clone())]); + + let cursor = create_mock_cursor(trie_nodes); + let mut verifier = SingleVerifier::new(None, cursor).unwrap(); + let mut outputs = Vec::new(); + + // Call next with different node value + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1]), node_expected.clone()).unwrap(); + + // Should have one "wrong" output + assert_eq!(outputs.len(), 1); + assert_matches!( + &outputs[0], + Output::AccountWrong { path, expected, found } + if *path == Nibbles::from_nibbles([0x1]) && *expected == node_expected && *found == node_in_trie + ); + } + + #[test] + fn test_single_verifier_next_missing() { + // Test when expected node doesn't exist in trie + let node1 = test_branch_node(0b1111, 0, 0b1111, vec![B256::from([1u8; 32])]); + let node_missing = test_branch_node(0b0101, 0b0001, 0b0100, vec![B256::from([2u8; 32])]); + + let trie_nodes = BTreeMap::from([(Nibbles::from_nibbles([0x3]), node1)]); + + let cursor = create_mock_cursor(trie_nodes); + let mut verifier = SingleVerifier::new(None, cursor).unwrap(); + let mut outputs = Vec::new(); + + // Call next with a node that comes before any in the trie + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1]), node_missing.clone()).unwrap(); + + // Should have one "missing" output + assert_eq!(outputs.len(), 1); + assert_matches!( + &outputs[0], + Output::AccountMissing(path, node) + if *path == Nibbles::from_nibbles([0x1]) && *node == node_missing + ); + } + + #[test] + fn test_single_verifier_next_extra() { + // Test when trie has extra nodes not in expected + // Create a proper trie structure with root + let node_root = test_branch_node(0b1110, 0, 0b1110, vec![]); // root has children at 1, 2, 3 + let node1 = test_branch_node(0b0001, 0, 0b0001, vec![]); + let node2 = test_branch_node(0b0010, 0, 0b0010, vec![]); + let node3 = test_branch_node(0b0100, 0, 0b0100, vec![]); + + let trie_nodes = BTreeMap::from([ + (Nibbles::new(), node_root.clone()), + (Nibbles::from_nibbles([0x1]), node1.clone()), + (Nibbles::from_nibbles([0x2]), node2.clone()), + (Nibbles::from_nibbles([0x3]), node3.clone()), + ]); + + let cursor = create_mock_cursor(trie_nodes); + let mut verifier = SingleVerifier::new(None, cursor).unwrap(); + let mut outputs = Vec::new(); + + // The depth-first iterator produces in post-order: 0x1, 0x2, 0x3, root + // We only provide 0x1 and 0x3, skipping 0x2 and root + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1]), node1).unwrap(); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x3]), node3).unwrap(); + verifier.finalize(&mut outputs).unwrap(); + + // Should have two "extra" outputs for nodes in the trie that we skipped + if outputs.len() != 2 { + eprintln!("Expected 2 outputs, got {}:", outputs.len()); + for inc in &outputs { + eprintln!(" {:?}", inc); + } + } + assert_eq!(outputs.len(), 2); + assert_matches!( + &outputs[0], + Output::AccountExtra(path, node) + if *path == Nibbles::from_nibbles([0x2]) && *node == node2 + ); + assert_matches!( + &outputs[1], + Output::AccountExtra(path, node) + if *path == Nibbles::new() && *node == node_root + ); + } + + #[test] + fn test_single_verifier_finalize() { + // Test finalize marks all remaining nodes as extra + let node_root = test_branch_node(0b1110, 0, 0b1110, vec![]); // root has children at 1, 2, 3 + let node1 = test_branch_node(0b0001, 0, 0b0001, vec![]); + let node2 = test_branch_node(0b0010, 0, 0b0010, vec![]); + let node3 = test_branch_node(0b0100, 0, 0b0100, vec![]); + + let trie_nodes = BTreeMap::from([ + (Nibbles::new(), node_root.clone()), + (Nibbles::from_nibbles([0x1]), node1.clone()), + (Nibbles::from_nibbles([0x2]), node2.clone()), + (Nibbles::from_nibbles([0x3]), node3.clone()), + ]); + + let cursor = create_mock_cursor(trie_nodes); + let mut verifier = SingleVerifier::new(None, cursor).unwrap(); + let mut outputs = Vec::new(); + + // The depth-first iterator produces in post-order: 0x1, 0x2, 0x3, root + // Process first two nodes correctly + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1]), node1).unwrap(); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x2]), node2).unwrap(); + assert!(outputs.is_empty()); + + // Finalize - should mark remaining nodes (0x3 and root) as extra + verifier.finalize(&mut outputs).unwrap(); + + // Should have two extra nodes + assert_eq!(outputs.len(), 2); + assert_matches!( + &outputs[0], + Output::AccountExtra(path, node) + if *path == Nibbles::from_nibbles([0x3]) && *node == node3 + ); + assert_matches!( + &outputs[1], + Output::AccountExtra(path, node) + if *path == Nibbles::new() && *node == node_root + ); + } + + #[test] + fn test_single_verifier_storage_trie() { + // Test SingleVerifier for storage trie (with account set) + let account = B256::from([42u8; 32]); + let node = test_branch_node(0b1111, 0, 0b1111, vec![B256::from([1u8; 32])]); + + let trie_nodes = BTreeMap::from([(Nibbles::from_nibbles([0x1]), node)]); + + let cursor = create_mock_cursor(trie_nodes); + let mut verifier = SingleVerifier::new(Some(account), cursor).unwrap(); + let mut outputs = Vec::new(); + + // Call next with missing node + let missing_node = test_branch_node(0b0101, 0b0001, 0b0100, vec![B256::from([2u8; 32])]); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x0]), missing_node.clone()).unwrap(); + + // Should produce StorageMissing, not AccountMissing + assert_eq!(outputs.len(), 1); + assert_matches!( + &outputs[0], + Output::StorageMissing(acc, path, node) + if *acc == account && *path == Nibbles::from_nibbles([0x0]) && *node == missing_node + ); + } + + #[test] + fn test_single_verifier_empty_trie() { + // Test with empty trie cursor + let trie_nodes = BTreeMap::new(); + let cursor = create_mock_cursor(trie_nodes); + let mut verifier = SingleVerifier::new(None, cursor).unwrap(); + let mut outputs = Vec::new(); + + // Any node should be marked as missing + let node = test_branch_node(0b1111, 0, 0b1111, vec![B256::from([1u8; 32])]); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1]), node.clone()).unwrap(); + + assert_eq!(outputs.len(), 1); + assert_matches!( + &outputs[0], + Output::AccountMissing(path, n) + if *path == Nibbles::from_nibbles([0x1]) && *n == node + ); + } + + #[test] + fn test_single_verifier_depth_first_ordering() { + // Test that nodes must be provided in depth-first order + // Create nodes with proper parent-child relationships + let node_root = test_branch_node(0b0110, 0, 0b0110, vec![]); // root has children at 1 and 2 + let node1 = test_branch_node(0b0110, 0, 0b0110, vec![]); // 0x1 has children at 1 and 2 + let node11 = test_branch_node(0b0001, 0, 0b0001, vec![]); // 0x11 is a leaf + let node12 = test_branch_node(0b0010, 0, 0b0010, vec![]); // 0x12 is a leaf + let node2 = test_branch_node(0b0100, 0, 0b0100, vec![]); // 0x2 is a leaf + + // The depth-first iterator will iterate from the root in this order: + // root -> 0x1 -> 0x11, 0x12 (children of 0x1), then 0x2 + // But because of depth-first, we get: root, 0x1, 0x11, 0x12, 0x2 + let trie_nodes = BTreeMap::from([ + (Nibbles::new(), node_root.clone()), // root + (Nibbles::from_nibbles([0x1]), node1.clone()), // 0x1 + (Nibbles::from_nibbles([0x1, 0x1]), node11.clone()), // 0x11 + (Nibbles::from_nibbles([0x1, 0x2]), node12.clone()), // 0x12 + (Nibbles::from_nibbles([0x2]), node2.clone()), // 0x2 + ]); + + let cursor = create_mock_cursor(trie_nodes); + let mut verifier = SingleVerifier::new(None, cursor).unwrap(); + let mut outputs = Vec::new(); + + // The depth-first iterator produces nodes in post-order (children before parents) + // Order: 0x11, 0x12, 0x1, 0x2, root + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1, 0x1]), node11).unwrap(); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1, 0x2]), node12).unwrap(); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1]), node1).unwrap(); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x2]), node2).unwrap(); + verifier.next(&mut outputs, Nibbles::new(), node_root).unwrap(); + verifier.finalize(&mut outputs).unwrap(); + + // All should match, no outputs + if !outputs.is_empty() { + eprintln!( + "Test test_single_verifier_depth_first_ordering failed with {} outputs:", + outputs.len() + ); + for inc in &outputs { + eprintln!(" {:?}", inc); + } + } + assert!(outputs.is_empty()); + } + + #[test] + fn test_single_verifier_wrong_depth_first_order() { + // Test that providing nodes in wrong order produces outputs + // Create a trie with parent-child relationship + let node_root = test_branch_node(0b0010, 0, 0b0010, vec![]); // root has child at 1 + let node1 = test_branch_node(0b0010, 0, 0b0010, vec![]); // 0x1 has child at 1 + let node11 = test_branch_node(0b0001, 0, 0b0001, vec![]); // 0x11 is a leaf + + let trie_nodes = BTreeMap::from([ + (Nibbles::new(), node_root.clone()), + (Nibbles::from_nibbles([0x1]), node1.clone()), + (Nibbles::from_nibbles([0x1, 0x1]), node11.clone()), + ]); + + let cursor = create_mock_cursor(trie_nodes); + let mut verifier = SingleVerifier::new(None, cursor).unwrap(); + let mut outputs = Vec::new(); + + // Process in WRONG order (skip root, provide child before processing all nodes correctly) + // The iterator will produce: root, 0x1, 0x11 + // But we provide: 0x11, root, 0x1 (completely wrong order) + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1, 0x1]), node11).unwrap(); + verifier.next(&mut outputs, Nibbles::new(), node_root).unwrap(); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1]), node1).unwrap(); + + // Should have outputs since we provided them in wrong order + assert!(!outputs.is_empty()); + } + + #[test] + fn test_single_verifier_complex_depth_first() { + // Test a complex tree structure with depth-first ordering + // Build a tree structure with proper parent-child relationships + let node_root = test_branch_node(0b0110, 0, 0b0110, vec![]); // root: children at nibbles 1 and 2 + let node1 = test_branch_node(0b0110, 0, 0b0110, vec![]); // 0x1: children at nibbles 1 and 2 + let node11 = test_branch_node(0b0110, 0, 0b0110, vec![]); // 0x11: children at nibbles 1 and 2 + let node111 = test_branch_node(0b0001, 0, 0b0001, vec![]); // 0x111: leaf + let node112 = test_branch_node(0b0010, 0, 0b0010, vec![]); // 0x112: leaf + let node12 = test_branch_node(0b0100, 0, 0b0100, vec![]); // 0x12: leaf + let node2 = test_branch_node(0b0010, 0, 0b0010, vec![]); // 0x2: child at nibble 1 + let node21 = test_branch_node(0b0001, 0, 0b0001, vec![]); // 0x21: leaf + + // Create the trie structure + let trie_nodes = BTreeMap::from([ + (Nibbles::new(), node_root.clone()), + (Nibbles::from_nibbles([0x1]), node1.clone()), + (Nibbles::from_nibbles([0x1, 0x1]), node11.clone()), + (Nibbles::from_nibbles([0x1, 0x1, 0x1]), node111.clone()), + (Nibbles::from_nibbles([0x1, 0x1, 0x2]), node112.clone()), + (Nibbles::from_nibbles([0x1, 0x2]), node12.clone()), + (Nibbles::from_nibbles([0x2]), node2.clone()), + (Nibbles::from_nibbles([0x2, 0x1]), node21.clone()), + ]); + + let cursor = create_mock_cursor(trie_nodes); + let mut verifier = SingleVerifier::new(None, cursor).unwrap(); + let mut outputs = Vec::new(); + + // The depth-first iterator produces nodes in post-order (children before parents) + // Order: 0x111, 0x112, 0x11, 0x12, 0x1, 0x21, 0x2, root + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1, 0x1, 0x1]), node111).unwrap(); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1, 0x1, 0x2]), node112).unwrap(); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1, 0x1]), node11).unwrap(); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1, 0x2]), node12).unwrap(); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x1]), node1).unwrap(); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x2, 0x1]), node21).unwrap(); + verifier.next(&mut outputs, Nibbles::from_nibbles([0x2]), node2).unwrap(); + verifier.next(&mut outputs, Nibbles::new(), node_root).unwrap(); + verifier.finalize(&mut outputs).unwrap(); + + // All should match, no outputs + if !outputs.is_empty() { + eprintln!( + "Test test_single_verifier_complex_depth_first failed with {} outputs:", + outputs.len() + ); + for inc in &outputs { + eprintln!(" {:?}", inc); + } + } + assert!(outputs.is_empty()); + } +} diff --git a/docs/vocs/docs/pages/cli/SUMMARY.mdx b/docs/vocs/docs/pages/cli/SUMMARY.mdx index d7582ab64c5..8158a9b94e4 100644 --- a/docs/vocs/docs/pages/cli/SUMMARY.mdx +++ b/docs/vocs/docs/pages/cli/SUMMARY.mdx @@ -18,6 +18,7 @@ - [`reth db clear`](/cli/reth/db/clear) - [`reth db clear mdbx`](/cli/reth/db/clear/mdbx) - [`reth db clear static-file`](/cli/reth/db/clear/static-file) + - [`reth db repair-trie`](/cli/reth/db/repair-trie) - [`reth db version`](/cli/reth/db/version) - [`reth db path`](/cli/reth/db/path) - [`reth download`](/cli/reth/download) diff --git a/docs/vocs/docs/pages/cli/reth/db.mdx b/docs/vocs/docs/pages/cli/reth/db.mdx index 28fb977f8b1..2553a1480f9 100644 --- a/docs/vocs/docs/pages/cli/reth/db.mdx +++ b/docs/vocs/docs/pages/cli/reth/db.mdx @@ -9,16 +9,17 @@ $ reth db --help Usage: reth db [OPTIONS] Commands: - stats Lists all the tables, their entry count and their size - list Lists the contents of a table - checksum Calculates the content checksum of a table - diff Create a diff between two database tables or two entire databases - get Gets the content of a table for the given key - drop Deletes all database entries - clear Deletes all table entries - version Lists current and local database versions - path Returns the full database path - help Print this message or the help of the given subcommand(s) + stats Lists all the tables, their entry count and their size + list Lists the contents of a table + checksum Calculates the content checksum of a table + diff Create a diff between two database tables or two entire databases + get Gets the content of a table for the given key + drop Deletes all database entries + clear Deletes all table entries + repair-trie Verifies trie consistency and outputs any inconsistencies + version Lists current and local database versions + path Returns the full database path + help Print this message or the help of the given subcommand(s) Options: -h, --help diff --git a/docs/vocs/docs/pages/cli/reth/db/repair-trie.mdx b/docs/vocs/docs/pages/cli/reth/db/repair-trie.mdx new file mode 100644 index 00000000000..f5058265196 --- /dev/null +++ b/docs/vocs/docs/pages/cli/reth/db/repair-trie.mdx @@ -0,0 +1,109 @@ +# reth db repair-trie + +Verifies trie consistency and outputs any inconsistencies + +```bash +$ reth db repair-trie --help +``` +```txt +Usage: reth db repair-trie [OPTIONS] + +Options: + --dry-run + Only show inconsistencies without making any repairs + + -h, --help + Print help (see a summary with '-h') + +Datadir: + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, holesky, hoodi, dev + + [default: mainnet] + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + [default: terminal] + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + [default: terminal] + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.name + The prefix name of the log files + + [default: reth.log] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + + [default: always] + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file