From fa942b13ffbecbeac9b8ebbe33a14669694baa8c Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Thu, 20 Nov 2025 17:37:10 +0100 Subject: [PATCH 01/59] WIP: began wiring up proof targets --- crates/trie/trie/src/proof_v2/mod.rs | 167 ++++++++++++++++++-------- crates/trie/trie/src/proof_v2/node.rs | 31 +++-- 2 files changed, 138 insertions(+), 60 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index dc3fb24e6f9..2a6020561c6 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -12,9 +12,11 @@ use crate::{ trie_cursor::{TrieCursor, TrieStorageCursor}, }; use alloy_primitives::{B256, U256}; +use alloy_rlp::Encodable; use alloy_trie::TrieMask; use reth_execution_errors::trie::StateProofError; use reth_trie_common::{BranchNode, Nibbles, ProofTrieNode, RlpNode, TrieMasks, TrieNode}; +use std::iter::Peekable; use tracing::{instrument, trace}; mod value; @@ -54,6 +56,9 @@ pub struct ProofCalculator { /// and so on. When a branch is removed from `branch_stack` its children are removed from this /// one, and the branch is pushed onto this stack in their place (see [`Self::pop_branch`]. child_stack: Vec>, + /// The proofs which will be returned from the calculation. This gets taken at the end of every + /// proof call. + retained_proofs: Vec, /// Free-list of re-usable buffers of [`RlpNode`]s, used for encoding branch nodes to RLP. /// /// We are generally able to re-use these buffers across different branch nodes for the @@ -73,6 +78,7 @@ impl ProofCalculator { branch_stack: Vec::<_>::new(), branch_path: Nibbles::new(), child_stack: Vec::<_>::new(), + retained_proofs: Vec::<_>::new(), rlp_nodes_bufs: Vec::<_>::new(), rlp_encode_buf: Vec::<_>::new(), } @@ -200,7 +206,10 @@ where /// # Panics /// /// This method panics if `branch_stack` is empty. - fn pop_branch(&mut self) -> Result<(), StateProofError> { + fn pop_branch( + &mut self, + targets: &mut Peekable>, + ) -> Result<(), StateProofError> { let mut rlp_nodes_buf = self.take_rlp_nodes_buf(); let branch = self.branch_stack.pop().expect("branch_stack cannot be empty"); @@ -221,28 +230,42 @@ where ); let children = self.child_stack.drain(self.child_stack.len() - num_children..); - // We will be pushing the branch onto the child stack, which will require its parent - // extension's short key (if it has a parent extension). Calculate this short key from the - // `branch_path` prior to modifying the `branch_path`. - let short_key = trim_nibbles_prefix( - &self.branch_path, - self.branch_path.len() - branch.ext_len as usize, - ); - - // Update the branch_path. If this branch is the only branch then only its extension needs - // to be trimmed, otherwise we also need to remove its nibble from its parent. - let new_path_len = self.branch_path.len() - - branch.ext_len as usize - - if self.branch_stack.is_empty() { 0 } else { 1 }; - - debug_assert!(self.branch_path.len() >= new_path_len); - self.branch_path = self.branch_path.slice_unchecked(0, new_path_len); - // From here we will be encoding the branch node and pushing it onto the child stack, // replacing its children. - // Collect children into an `RlpNode` Vec by calling into_rlp on each. - for child in children { + // Create an iterator over the paths of each child in the branch. + let child_paths = TrieMaskIter(branch.state_mask).map(|nibble| { + let mut child_path = self.branch_path; + debug_assert!(child_path.len() < 64, "child_path {child_path:?} is too long to extend"); + child_path.extend_from_slice_unchecked(&[nibble]); + child_path + }); + + // Collect children into an `RlpNode` Vec by encoding each of them. + for (child_path, child) in child_paths.zip(children) { + // If the child path is a prefix of the target then we retain the proof. + if let Some(curr_target) = targets.peek() && + curr_target.starts_with(&child_path) + { + // Convert to `ProofTrieNode`, which will be what is retained. If this node is a + // leaf then the `rlp_encode_buf` is taken by it and we have to allocate a new one + // going forward. + self.rlp_encode_buf.clear(); + let proof_node = + child.into_proof_trie_node(child_path, &mut self.rlp_encode_buf)?; + + // Use the `ProofTrieNode` to encode the `RlpNode` and push that into the + // `rlp_nodes_buf`. + self.rlp_encode_buf.clear(); + proof_node.node.encode(&mut self.rlp_encode_buf); + rlp_nodes_buf.push(RlpNode::from_rlp(&self.rlp_encode_buf)); + + continue; + } + + // If the child path is not being retained then we convert directly to an `RlpNode` + // using `into_rlp`. Since we are not retaining the node we can recover any `RlpNode` + // buffers for the free-list here, hence why we do this as a separate logical branch. self.rlp_encode_buf.clear(); let (child_rlp_node, freed_rlp_nodes_buf) = child.into_rlp(&mut self.rlp_encode_buf)?; rlp_nodes_buf.push(child_rlp_node); @@ -259,6 +282,13 @@ where "children length must match number of bits set in state_mask" ); + // Calculate the short key of the parent extension (if the branch has a parent extension). + // It's important to calculate this short key prior to modifying the `branch_path`. + let short_key = trim_nibbles_prefix( + &self.branch_path, + self.branch_path.len() - branch.ext_len as usize, + ); + // Construct the `BranchNode`. let branch_node = BranchNode::new(rlp_nodes_buf, branch.state_mask); @@ -272,12 +302,27 @@ where }; self.child_stack.push(branch_as_child); + + // Update the branch_path. If this branch is the only branch then only its extension needs + // to be trimmed, otherwise we also need to remove its nibble from its parent. + let new_path_len = self.branch_path.len() - + branch.ext_len as usize - + if self.branch_stack.is_empty() { 0 } else { 1 }; + + debug_assert!(self.branch_path.len() >= new_path_len); + self.branch_path = self.branch_path.slice_unchecked(0, new_path_len); + Ok(()) } /// Adds a single leaf for a key to the stack, possibly collapsing an existing branch and/or /// creating a new one depending on the path of the key. - fn add_leaf(&mut self, key: Nibbles, val: VE::DeferredEncoder) -> Result<(), StateProofError> { + fn add_leaf( + &mut self, + targets: &mut Peekable>, + key: Nibbles, + val: VE::DeferredEncoder, + ) -> Result<(), StateProofError> { loop { // Get the branch currently being built. If there are no branches on the stack then it // means either the trie is empty or only a single leaf has been added previously. @@ -312,7 +357,7 @@ where // not the parent of the new key. In this case the current branch will have no more // children. We can pop it and loop back to the top to try again with its parent branch. if common_prefix_len < self.branch_path.len() { - self.pop_branch()?; + self.pop_branch(targets)?; continue } @@ -350,23 +395,32 @@ where // In debug builds, verify that targets are sorted #[cfg(debug_assertions)] - let targets = { + let mut targets = { let mut prev: Option = None; - targets.into_iter().inspect(move |target| { - if let Some(prev) = prev { - debug_assert!( - prev <= *target, - "targets must be sorted lexicographically: {:?} > {:?}", - prev, - target - ); - } - prev = Some(*target); - }) + targets + .into_iter() + .inspect(move |target| { + if let Some(prev) = prev { + debug_assert!( + prev <= *target, + "targets must be sorted lexicographically: {:?} > {:?}", + prev, + target + ); + } + prev = Some(*target); + }) + .peekable() }; #[cfg(not(debug_assertions))] - let targets = targets.into_iter(); + let mut targets = targets.into_iter().peekable(); + + // If there are no targets then nothing could be returned, return early. + if targets.peek().is_none() { + // TODO uncomment + //return Ok(Vec::new()) + } // Ensure initial state is cleared. By the end of the method call these should be empty once // again. @@ -374,10 +428,6 @@ where debug_assert!(self.branch_path.is_empty()); debug_assert!(self.child_stack.is_empty()); - // Silence unused variable warning for now - let _ = targets; - - let mut proof_nodes = Vec::new(); let mut hashed_cursor_current = self.hashed_cursor.seek(B256::ZERO)?; loop { trace!(target: TRACE_TARGET, ?hashed_cursor_current, "proof_inner loop"); @@ -395,13 +445,13 @@ where break }; - self.add_leaf(key, val)?; + self.add_leaf(&mut targets, key, val)?; hashed_cursor_current = self.hashed_cursor.next()?; } // Once there's no more leaves we can pop the remaining branches, if any. while !self.branch_stack.is_empty() { - self.pop_branch()?; + self.pop_branch(&mut targets)?; } // At this point the branch stack should be empty. If the child stack is empty it means no @@ -411,22 +461,21 @@ where debug_assert!(self.branch_path.is_empty()); debug_assert!(self.child_stack.len() < 2); - // Determine the root node based on the child stack, and push the proof of the root node - // onto the result stack. + // All targets match the root node, so always retain it. Determine the root node based on + // the child stack, and push the proof of the root node onto the result stack. let root_node = if let Some(node) = self.child_stack.pop() { self.rlp_encode_buf.clear(); - node.into_trie_node(&mut self.rlp_encode_buf)? + node.into_proof_trie_node(Nibbles::new(), &mut self.rlp_encode_buf)? } else { - TrieNode::EmptyRoot + ProofTrieNode { + path: Nibbles::new(), // root path + node: TrieNode::EmptyRoot, + masks: TrieMasks::none(), + } }; + self.retained_proofs.push(root_node); - proof_nodes.push(ProofTrieNode { - path: Nibbles::new(), // root path - node: root_node, - masks: TrieMasks::none(), - }); - - Ok(proof_nodes) + Ok(core::mem::take(&mut self.retained_proofs)) } } @@ -507,6 +556,20 @@ where } } +/// A helper type for iterating over the indexes of the non-zero bits of a TrieMask. +struct TrieMaskIter(TrieMask); + +impl Iterator for TrieMaskIter { + type Item = u8; + fn next(&mut self) -> Option { + let bit = self.0.first_set_bit_index(); + if let Some(bit) = bit { + self.0.unset_bit(bit); + } + bit + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/trie/trie/src/proof_v2/node.rs b/crates/trie/trie/src/proof_v2/node.rs index dfe9d15053b..660658e33bd 100644 --- a/crates/trie/trie/src/proof_v2/node.rs +++ b/crates/trie/trie/src/proof_v2/node.rs @@ -3,7 +3,8 @@ use alloy_rlp::Encodable; use alloy_trie::nodes::ExtensionNodeRef; use reth_execution_errors::trie::StateProofError; use reth_trie_common::{ - BranchNode, ExtensionNode, LeafNode, LeafNodeRef, Nibbles, RlpNode, TrieMask, TrieNode, + BranchNode, ExtensionNode, LeafNode, LeafNodeRef, Nibbles, ProofTrieNode, RlpNode, TrieMask, + TrieMasks, TrieNode, }; /// A trie node which is the child of a branch in the trie. @@ -71,20 +72,34 @@ impl ProofTrieBranchChild { } } - /// Converts this child into a [`TrieNode`]. - pub(crate) fn into_trie_node(self, buf: &mut Vec) -> Result { - match self { + /// Converts this child into a [`ProofTrieNode`] having the given path. + pub(crate) fn into_proof_trie_node( + self, + path: Nibbles, + buf: &mut Vec, + ) -> Result { + let (node, masks) = match self { Self::Leaf { short_key, value } => { value.encode(buf)?; - Ok(TrieNode::Leaf(LeafNode::new(short_key, core::mem::take(buf)))) + (TrieNode::Leaf(LeafNode::new(short_key, core::mem::take(buf))), TrieMasks::none()) } Self::Extension { short_key, child } => { child.encode(buf); let child_rlp_node = RlpNode::from_rlp(buf); - Ok(TrieNode::Extension(ExtensionNode { key: short_key, child: child_rlp_node })) + ( + TrieNode::Extension(ExtensionNode { key: short_key, child: child_rlp_node }), + TrieMasks::none(), + ) } - Self::Branch(branch_node) => Ok(TrieNode::Branch(branch_node)), - } + // TODO store trie masks on branch + Self::Branch(branch_node) => (TrieNode::Branch(branch_node), TrieMasks::none()), + }; + + // Encode the `TrieNode` to the buffer, so we can return the `RlpNode` for it at the end. + buf.clear(); + node.encode(buf); + + Ok(ProofTrieNode { node, path, masks }) } /// Returns the short key of the child, if it is a leaf or extension, or empty if its a From 671d9f8f1e53f68f84343b7aa4d6e365ef2d47b8 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 21 Nov 2025 14:29:57 +0100 Subject: [PATCH 02/59] WIP: should_retain --- crates/trie/trie/src/proof_v2/mod.rs | 66 ++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 9 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 2a6020561c6..5d33e76aa75 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -199,6 +199,52 @@ where ); } + /// Returns true if the proof of a node at the given path should be retained. This may move the + /// `targets` iterator forward if the given path comes after the current target. + fn should_retain( + &self, + targets: &mut Peekable>, + path: &Nibbles, + ) -> bool { + debug_assert!(self.retained_proofs.last().is_none_or( + |ProofTrieNode { path: last_retained_path, .. }| { + crate::trie_cursor::depth_first::cmp(path, last_retained_path) == + std::cmp::Ordering::Greater + } + ), + "should_retain called with path {path:?} which is not after previously retained node {:?} in depth-first order", + self.retained_proofs.last().map(|n| n.path), + ); + + // If the node in question is a prefix of the previously retained proof then we retain. + // + // This is required for cases where the target iterator is moved forward such that the + // next target is None or the current branch is not a prefix of it. In this case it's + // still possible, when popping the branch, that one of its children was retained due to + // the previous target, and therefore this node should be retained still. + if self.retained_proofs.last().is_some_and( + |ProofTrieNode { path: last_retained_path, .. }| last_retained_path.starts_with(path), + ) { + return true + } + + // Iterate targets forwards to catch up to the given path. Some notes here: + // - If `target.starts_with(path)`, then `path <= target`. + // - Therefore, if `target < path`, there is no situation where path could be retained by + // this target. + // - The ProofCalculator traverse the trie in depth-first order, meaning paths are provided + // to this method such that `prev_path.starts_with(path)` (handled in the previous + // if-block) OR `path > prev_path`. + while let Some(target) = targets.peek() && + target < path + { + targets.next(); + } + + // If the node in question is a prefix of the target then we retain + targets.peek().is_some_and(|t| t.starts_with(path)) + } + /// Pops the top branch off of the `branch_stack`, hashes its children on the `child_stack`, and /// replaces those children on the `child_stack`. The `branch_path` field will be updated /// accordingly. @@ -228,10 +274,11 @@ where self.child_stack.len() >= num_children, "Stack is missing necessary children" ); - let children = self.child_stack.drain(self.child_stack.len() - num_children..); - // From here we will be encoding the branch node and pushing it onto the child stack, - // replacing its children. + // We have to take the child_stack off self so we can still call other methods while + // draining it. + let mut child_stack = core::mem::take(&mut self.child_stack); + let children = child_stack.drain(child_stack.len() - num_children..); // Create an iterator over the paths of each child in the branch. let child_paths = TrieMaskIter(branch.state_mask).map(|nibble| { @@ -243,13 +290,11 @@ where // Collect children into an `RlpNode` Vec by encoding each of them. for (child_path, child) in child_paths.zip(children) { - // If the child path is a prefix of the target then we retain the proof. - if let Some(curr_target) = targets.peek() && - curr_target.starts_with(&child_path) - { + // If we should retain the child then do so. + if self.should_retain(targets, &child_path) { // Convert to `ProofTrieNode`, which will be what is retained. If this node is a - // leaf then the `rlp_encode_buf` is taken by it and we have to allocate a new one - // going forward. + // leaf then the `rlp_encode_buf` is taken by it and a new one will be allocated by + // the next encode call. self.rlp_encode_buf.clear(); let proof_node = child.into_proof_trie_node(child_path, &mut self.rlp_encode_buf)?; @@ -276,6 +321,9 @@ where } } + // Put the child_stack back, now that we're done draining from it. + let _ = core::mem::replace(&mut self.child_stack, child_stack); + debug_assert_eq!( rlp_nodes_buf.len(), branch.state_mask.count_ones() as usize, From ef1dd3392409e38198a2e6ead9948075d60d077a Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 21 Nov 2025 14:55:09 +0100 Subject: [PATCH 03/59] WIP: RlpNode --- crates/trie/trie/src/proof_v2/mod.rs | 23 +++++++++++---- crates/trie/trie/src/proof_v2/node.rs | 40 +++++++++++++-------------- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 5d33e76aa75..771612bc701 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -337,16 +337,27 @@ where self.branch_path.len() - branch.ext_len as usize, ); - // Construct the `BranchNode`. - let branch_node = BranchNode::new(rlp_nodes_buf, branch.state_mask); + // Construct the `ProofTrieBranchChild` for the branch itself. + let branch_child = + ProofTrieBranchChild::Branch(BranchNode::new(rlp_nodes_buf, branch.state_mask)); // Wrap the `BranchNode` so it can be pushed onto the child stack. let branch_as_child = if short_key.is_empty() { - // If there is no extension then push a branch node - ProofTrieBranchChild::Branch(branch_node) + // If there is no extension then push the child as-is + branch_child } else { - // Otherwise push an extension node - ProofTrieBranchChild::Extension { short_key, child: branch_node } + // If there is an extension then encode the branch as an `RlpNode` and use it to + // construct the extension. + self.rlp_encode_buf.clear(); + let (branch_rlp_node, freed_rlp_nodes_buf) = + branch_child.into_rlp(&mut self.rlp_encode_buf)?; + + // If there is an `RlpNode` buffer which can be re-used then push it onto the free-list. + if let Some(buf) = freed_rlp_nodes_buf { + self.rlp_nodes_bufs.push(buf); + } + + ProofTrieBranchChild::Extension { short_key, child: branch_rlp_node } }; self.child_stack.push(branch_as_child); diff --git a/crates/trie/trie/src/proof_v2/node.rs b/crates/trie/trie/src/proof_v2/node.rs index 660658e33bd..e74476c361b 100644 --- a/crates/trie/trie/src/proof_v2/node.rs +++ b/crates/trie/trie/src/proof_v2/node.rs @@ -17,15 +17,17 @@ pub(crate) enum ProofTrieBranchChild { /// The [`DeferredValueEncoder`] which will encode the leaf's value. value: RF, }, - /// An extension node whose child branch has not yet been converted to an [`RlpNode`] + /// An extension node whose child branch has been converted to an [`RlpNode`] Extension { /// The short key of the leaf. short_key: Nibbles, - /// The node of the child branch. - child: BranchNode, + /// The [`RlpNode`] of the child branch. + child: RlpNode, }, /// A branch node whose children have already been flattened into [`RlpNode`]s. Branch(BranchNode), + // A node whose type is not known, as it has already been converted to an [`RlpNode`]. + RlpNode(RlpNode), } impl ProofTrieBranchChild { @@ -59,20 +61,22 @@ impl ProofTrieBranchChild { Ok((RlpNode::from_rlp(&buf[value_enc_len..]), None)) } Self::Extension { short_key, child } => { - let (branch_rlp, rlp_buf) = Self::Branch(child).into_rlp(buf)?; - buf.clear(); - - ExtensionNodeRef::new(&short_key, branch_rlp.as_slice()).encode(buf); - Ok((RlpNode::from_rlp(buf), rlp_buf)) + ExtensionNodeRef::new(&short_key, child.as_slice()).encode(buf); + Ok((RlpNode::from_rlp(buf), None)) } Self::Branch(branch_node) => { branch_node.encode(buf); Ok((RlpNode::from_rlp(buf), Some(branch_node.stack))) } + Self::RlpNode(rlp_node) => Ok((rlp_node, None)), } } /// Converts this child into a [`ProofTrieNode`] having the given path. + /// + /// # Panics + /// + /// If called on a [`Self::RlpNode`]. pub(crate) fn into_proof_trie_node( self, path: Nibbles, @@ -84,15 +88,11 @@ impl ProofTrieBranchChild { (TrieNode::Leaf(LeafNode::new(short_key, core::mem::take(buf))), TrieMasks::none()) } Self::Extension { short_key, child } => { - child.encode(buf); - let child_rlp_node = RlpNode::from_rlp(buf); - ( - TrieNode::Extension(ExtensionNode { key: short_key, child: child_rlp_node }), - TrieMasks::none(), - ) + (TrieNode::Extension(ExtensionNode { key: short_key, child }), TrieMasks::none()) } // TODO store trie masks on branch Self::Branch(branch_node) => (TrieNode::Branch(branch_node), TrieMasks::none()), + Self::RlpNode(_) => panic!("Cannot call `into_proof_trie_node` on RlpNode"), }; // Encode the `TrieNode` to the buffer, so we can return the `RlpNode` for it at the end. @@ -103,11 +103,11 @@ impl ProofTrieBranchChild { } /// Returns the short key of the child, if it is a leaf or extension, or empty if its a - /// [`Self::Branch`]. + /// [`Self::Branch`] or [`Self::RlpNode`]. pub(crate) fn short_key(&self) -> &Nibbles { match self { Self::Leaf { short_key, .. } | Self::Extension { short_key, .. } => short_key, - Self::Branch(_) => { + Self::Branch(_) | Self::RlpNode(_) => { static EMPTY_NIBBLES: Nibbles = Nibbles::new(); &EMPTY_NIBBLES } @@ -123,17 +123,17 @@ impl ProofTrieBranchChild { /// /// - If the given len is longer than the short key /// - If the given len is the same as the length of a leaf's short key - /// - If the node is a [`Self::Branch`] + /// - If the node is a [`Self::Branch`] or [`Self::RlpNode`] pub(crate) fn trim_short_key_prefix(&mut self, len: usize) { match self { Self::Extension { short_key, child } if short_key.len() == len => { - *self = Self::Branch(core::mem::take(child)); + *self = Self::RlpNode(core::mem::take(child)); } Self::Leaf { short_key, .. } | Self::Extension { short_key, .. } => { *short_key = trim_nibbles_prefix(short_key, len); } - Self::Branch(_) => { - panic!("Cannot call `trim_short_key_prefix` on Branch") + Self::Branch(_) | Self::RlpNode(_) => { + panic!("Cannot call `trim_short_key_prefix` on Branch or RlpNode") } } } From 37a6219e06b1671edfb0bc715fe48b9cccf06b06 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 21 Nov 2025 15:24:11 +0100 Subject: [PATCH 04/59] commit_child --- crates/trie/trie/src/proof_v2/mod.rs | 124 +++++++++++++++------------ 1 file changed, 70 insertions(+), 54 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 771612bc701..d34e0c908cb 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -245,6 +245,55 @@ where targets.peek().is_some_and(|t| t.starts_with(path)) } + /// Takes a child which has been removed from the `child_stack` and converts it to an + /// [`RlpNode`]. + /// + /// Calling this method indicates that the child will not undergo any further modifications, and + /// therefore can be retained as a proof node if applicable. + fn commit_child( + &mut self, + targets: &mut Peekable>, + child_path: Nibbles, + child: ProofTrieBranchChild, + ) -> Result { + // If the child is already an `RlpNode` then there is nothing to do. + if let ProofTrieBranchChild::RlpNode(rlp_node) = child { + return Ok(rlp_node) + } + + // If we should retain the child then do so. + if self.should_retain(targets, &child_path) { + // Convert to `ProofTrieNode`, which will be what is retained. + // + // If this node is a leaf then the `rlp_encode_buf` is taken by it and a new one will be + // allocated by the next encode call. + // + // If it is a branch then its `rlp_nodes_buf` will be taken and not returned to the + // `rlp_nodes_bufs` free-list. + self.rlp_encode_buf.clear(); + let proof_node = child.into_proof_trie_node(child_path, &mut self.rlp_encode_buf)?; + + // Use the `ProofTrieNode` to encode the `RlpNode` and push that into the + // `rlp_nodes_buf`. + self.rlp_encode_buf.clear(); + proof_node.node.encode(&mut self.rlp_encode_buf); + return Ok(RlpNode::from_rlp(&self.rlp_encode_buf)); + } + + // If the child path is not being retained then we convert directly to an `RlpNode` + // using `into_rlp`. Since we are not retaining the node we can recover any `RlpNode` + // buffers for the free-list here, hence why we do this as a separate logical branch. + self.rlp_encode_buf.clear(); + let (child_rlp_node, freed_rlp_nodes_buf) = child.into_rlp(&mut self.rlp_encode_buf)?; + + // If there is an `RlpNode` buffer which can be re-used then push it onto the free-list. + if let Some(buf) = freed_rlp_nodes_buf { + self.rlp_nodes_bufs.push(buf); + } + + Ok(child_rlp_node) + } + /// Pops the top branch off of the `branch_stack`, hashes its children on the `child_stack`, and /// replaces those children on the `child_stack`. The `branch_path` field will be updated /// accordingly. @@ -281,44 +330,23 @@ where let children = child_stack.drain(child_stack.len() - num_children..); // Create an iterator over the paths of each child in the branch. - let child_paths = TrieMaskIter(branch.state_mask).map(|nibble| { - let mut child_path = self.branch_path; - debug_assert!(child_path.len() < 64, "child_path {child_path:?} is too long to extend"); - child_path.extend_from_slice_unchecked(&[nibble]); - child_path - }); + let child_paths = { + let branch_path = self.branch_path; + debug_assert!( + branch_path.len() < 64, + "branch_path {branch_path:?} is too long to extend" + ); + TrieMaskIter(branch.state_mask).map(move |nibble| { + let mut child_path = branch_path; + child_path.extend_from_slice_unchecked(&[nibble]); + child_path + }) + }; - // Collect children into an `RlpNode` Vec by encoding each of them. + // Collect children into an `RlpNode` Vec by committing and pushing each of them. for (child_path, child) in child_paths.zip(children) { - // If we should retain the child then do so. - if self.should_retain(targets, &child_path) { - // Convert to `ProofTrieNode`, which will be what is retained. If this node is a - // leaf then the `rlp_encode_buf` is taken by it and a new one will be allocated by - // the next encode call. - self.rlp_encode_buf.clear(); - let proof_node = - child.into_proof_trie_node(child_path, &mut self.rlp_encode_buf)?; - - // Use the `ProofTrieNode` to encode the `RlpNode` and push that into the - // `rlp_nodes_buf`. - self.rlp_encode_buf.clear(); - proof_node.node.encode(&mut self.rlp_encode_buf); - rlp_nodes_buf.push(RlpNode::from_rlp(&self.rlp_encode_buf)); - - continue; - } - - // If the child path is not being retained then we convert directly to an `RlpNode` - // using `into_rlp`. Since we are not retaining the node we can recover any `RlpNode` - // buffers for the free-list here, hence why we do this as a separate logical branch. - self.rlp_encode_buf.clear(); - let (child_rlp_node, freed_rlp_nodes_buf) = child.into_rlp(&mut self.rlp_encode_buf)?; + let child_rlp_node = self.commit_child(targets, child_path, child)?; rlp_nodes_buf.push(child_rlp_node); - - // If there is an `RlpNode` buffer which can be re-used then push it onto the free-list. - if let Some(buf) = freed_rlp_nodes_buf { - self.rlp_nodes_bufs.push(buf); - } } // Put the child_stack back, now that we're done draining from it. @@ -337,27 +365,15 @@ where self.branch_path.len() - branch.ext_len as usize, ); - // Construct the `ProofTrieBranchChild` for the branch itself. - let branch_child = - ProofTrieBranchChild::Branch(BranchNode::new(rlp_nodes_buf, branch.state_mask)); - // Wrap the `BranchNode` so it can be pushed onto the child stack. - let branch_as_child = if short_key.is_empty() { - // If there is no extension then push the child as-is - branch_child - } else { - // If there is an extension then encode the branch as an `RlpNode` and use it to - // construct the extension. - self.rlp_encode_buf.clear(); - let (branch_rlp_node, freed_rlp_nodes_buf) = - branch_child.into_rlp(&mut self.rlp_encode_buf)?; - - // If there is an `RlpNode` buffer which can be re-used then push it onto the free-list. - if let Some(buf) = freed_rlp_nodes_buf { - self.rlp_nodes_bufs.push(buf); - } + let mut branch_as_child = + ProofTrieBranchChild::Branch(BranchNode::new(rlp_nodes_buf, branch.state_mask)); - ProofTrieBranchChild::Extension { short_key, child: branch_rlp_node } + // If there is an extension then encode the branch as an `RlpNode` and use it to construct + // the extension in its place + if !short_key.is_empty() { + let branch_rlp_node = self.commit_child(targets, self.branch_path, branch_as_child)?; + branch_as_child = ProofTrieBranchChild::Extension { short_key, child: branch_rlp_node }; }; self.child_stack.push(branch_as_child); From cae9681cb7ef4f0dfaa26aad6d49096d23965d74 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 21 Nov 2025 16:31:29 +0100 Subject: [PATCH 05/59] WIP: proptests --- crates/trie/trie/src/proof_v2/mod.rs | 117 +++++++++++++++++++++------ 1 file changed, 92 insertions(+), 25 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index d34e0c908cb..ac7f9914ef6 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -216,6 +216,14 @@ where self.retained_proofs.last().map(|n| n.path), ); + trace!( + target: TRACE_TARGET, + ?path, + last_retained_path = ?self.retained_proofs.last().map(|n| n.path), + target = ?targets.peek(), + "should_retain: called", + ); + // If the node in question is a prefix of the previously retained proof then we retain. // // This is required for cases where the target iterator is moved forward such that the @@ -239,6 +247,11 @@ where target < path { targets.next(); + trace!( + target: TRACE_TARGET, + target = ?targets.peek(), + "target < path, next target", + ); } // If the node in question is a prefix of the target then we retain @@ -332,13 +345,9 @@ where // Create an iterator over the paths of each child in the branch. let child_paths = { let branch_path = self.branch_path; - debug_assert!( - branch_path.len() < 64, - "branch_path {branch_path:?} is too long to extend" - ); TrieMaskIter(branch.state_mask).map(move |nibble| { let mut child_path = branch_path; - child_path.extend_from_slice_unchecked(&[nibble]); + child_path.push_unchecked(nibble); child_path }) }; @@ -493,8 +502,8 @@ where // If there are no targets then nothing could be returned, return early. if targets.peek().is_none() { - // TODO uncomment - //return Ok(Vec::new()) + trace!(target: TRACE_TARGET, "Empty targets, returning"); + return Ok(Vec::new()) } // Ensure initial state is cleared. By the end of the method call these should be empty once @@ -631,7 +640,7 @@ where } } -/// A helper type for iterating over the indexes of the non-zero bits of a TrieMask. +/// A helper type for iterating over the indexes of the non-zero bits of a [`TrieMask`]. struct TrieMaskIter(TrieMask); impl Iterator for TrieMaskIter { @@ -653,10 +662,11 @@ mod tests { proof::Proof, trie_cursor::{mock::MockTrieCursorFactory, TrieCursorFactory}, }; - use alloy_primitives::map::B256Map; + use alloy_primitives::map::{B256Map, B256Set}; use alloy_rlp::Decodable; + use assert_matches::assert_matches; use itertools::Itertools; - use reth_trie_common::{HashedPostState, MultiProofTargets}; + use reth_trie_common::{HashedPostState, MultiProofTargets, TrieNode}; use std::collections::BTreeMap; /// Target to use with the `tracing` crate. @@ -726,14 +736,29 @@ mod tests { /// proofs. /// /// This method calls both implementations with the given account targets and compares - /// the results. For now, it performs a basic comparison by checking that both succeed - /// and produce non-empty results. More detailed comparison logic can be added as needed. + /// the results. fn assert_proof( &self, - // For now ProofCalculator doesn't support real targets, we just compare calculated - // roots. - _targets: impl IntoIterator + Clone, + targets: impl IntoIterator + Clone, ) -> Result<(), StateProofError> { + // Convert B256 targets to Nibbles for proof_v2 + let targets_vec: Vec = targets.into_iter().collect(); + let nibbles_targets: Vec = targets_vec + .iter() + .map(|b256| { + // SAFETY: B256 is exactly 32 bytes + unsafe { Nibbles::unpack_unchecked(b256.as_slice()) } + }) + .sorted() + .collect(); + + // Convert B256 targets to MultiProofTargets for legacy implementation + // For account-only proofs, each account maps to an empty storage set + let legacy_targets = targets_vec + .iter() + .map(|addr| (*addr, B256Set::default())) + .collect::(); + // Create ProofCalculator (proof_v2) with account cursors let trie_cursor = self.trie_cursor_factory.account_trie_cursor()?; let hashed_cursor = self.hashed_cursor_factory.hashed_account_cursor()?; @@ -744,15 +769,15 @@ mod tests { self.hashed_cursor_factory.clone(), ); let mut proof_calculator = ProofCalculator::new(trie_cursor, hashed_cursor); - let proof_v2_result = proof_calculator.proof(&value_encoder, [Nibbles::new()])?; + let proof_v2_result = proof_calculator.proof(&value_encoder, nibbles_targets)?; // Call Proof::multiproof (legacy implementation) let proof_legacy_result = Proof::new(self.trie_cursor_factory.clone(), self.hashed_cursor_factory.clone()) - .multiproof(MultiProofTargets::default())?; + .multiproof(legacy_targets)?; // Decode and sort legacy proof nodes - let proof_legacy_nodes = proof_legacy_result + let mut proof_legacy_nodes = proof_legacy_result .account_subtree .iter() .map(|(path, node_enc)| { @@ -778,6 +803,17 @@ mod tests { .sorted_by_key(|n| n.path) .collect::>(); + // When no targets are given the legacy implementation will still produce the root node + // in the proof. This differs from the V2 implementation, which produces nothing when + // given no targets. + if targets_vec.is_empty() { + assert_matches!( + proof_legacy_nodes.pop(), + Some(ProofTrieNode { path, .. }) if path.is_empty() + ); + assert!(proof_legacy_nodes.is_empty()); + } + // Basic comparison: both should succeed and produce identical results assert_eq!(proof_legacy_nodes, proof_v2_result); @@ -824,26 +860,57 @@ mod tests { ) } + /// Generate a strategy for proof targets that are 80% from the `HashedPostState` accounts + /// and 20% random keys. + fn proof_targets_strategy(account_keys: Vec) -> impl Strategy> { + let num_accounts = account_keys.len(); + + // Generate between 0 and (num_accounts + 5) targets + let target_count = 0..=(num_accounts + 5); + + target_count.prop_flat_map(move |count| { + let account_keys = account_keys.clone(); + prop::collection::vec( + prop::bool::weighted(0.8).prop_flat_map(move |from_accounts| { + if from_accounts && !account_keys.is_empty() { + // 80% chance: pick from existing account keys + prop::sample::select(account_keys.clone()).boxed() + } else { + // 20% chance: generate random B256 + any::<[u8; 32]>().prop_map(B256::from).boxed() + } + }), + count, + ) + }) + } + proptest! { #![proptest_config(ProptestConfig::with_cases(5000))] /// Tests that ProofCalculator produces valid proofs for randomly generated - /// HashedPostState with empty target sets. + /// HashedPostState with proof targets. /// /// This test: /// - Generates random accounts in a HashedPostState + /// - Generates proof targets: 80% from existing account keys, 20% random /// - Creates a test harness with the generated state - /// - Calls assert_proof with an empty target set - /// - Verifies both ProofCalculator and legacy Proof succeed + /// - Calls assert_proof with the generated targets + /// - Verifies both ProofCalculator and legacy Proof produce equivalent results #[test] - fn proptest_proof_with_empty_targets( - post_state in hashed_post_state_strategy(), + fn proptest_proof_with_targets( + (post_state, targets) in hashed_post_state_strategy() + .prop_flat_map(|post_state| { + let account_keys: Vec = post_state.accounts.keys().copied().collect(); + let targets_strategy = proof_targets_strategy(account_keys); + (Just(post_state), targets_strategy) + }) ) { reth_tracing::init_test_tracing(); let harness = ProofTestHarness::new(post_state); - // Pass empty target set - harness.assert_proof(std::iter::empty()).expect("Proof generation failed"); + // Pass generated targets to both implementations + harness.assert_proof(targets).expect("Proof generation failed"); } } } From b45c7b9159ae70808fe05efc34a8ab4c88f955ed Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 21 Nov 2025 17:36:15 +0100 Subject: [PATCH 06/59] WIP: windowing --- crates/trie/trie/src/proof_v2/mod.rs | 139 +++++++++++++++------------ 1 file changed, 76 insertions(+), 63 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index ac7f9914ef6..992017c6558 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -9,14 +9,14 @@ use crate::{ hashed_cursor::{HashedCursor, HashedStorageCursor}, - trie_cursor::{TrieCursor, TrieStorageCursor}, + trie_cursor::{depth_first, TrieCursor, TrieStorageCursor}, }; use alloy_primitives::{B256, U256}; use alloy_rlp::Encodable; use alloy_trie::TrieMask; use reth_execution_errors::trie::StateProofError; use reth_trie_common::{BranchNode, Nibbles, ProofTrieNode, RlpNode, TrieMasks, TrieNode}; -use std::iter::Peekable; +use std::{cmp::Ordering, iter::Peekable}; use tracing::{instrument, trace}; mod value; @@ -85,6 +85,9 @@ impl ProofCalculator { } } +/// Helper type for the [`Iterator`] used to pass targets in from the caller. +type TargetsIter = Peekable>; + impl ProofCalculator where TC: TrieCursor, @@ -203,59 +206,29 @@ where /// `targets` iterator forward if the given path comes after the current target. fn should_retain( &self, - targets: &mut Peekable>, + targets: &mut TargetsIter>, path: &Nibbles, ) -> bool { + trace!(target: TRACE_TARGET, ?path, target = ?targets.peek(), "should_retain: called"); debug_assert!(self.retained_proofs.last().is_none_or( |ProofTrieNode { path: last_retained_path, .. }| { - crate::trie_cursor::depth_first::cmp(path, last_retained_path) == - std::cmp::Ordering::Greater + depth_first::cmp(path, last_retained_path) == Ordering::Greater } ), "should_retain called with path {path:?} which is not after previously retained node {:?} in depth-first order", self.retained_proofs.last().map(|n| n.path), ); - trace!( - target: TRACE_TARGET, - ?path, - last_retained_path = ?self.retained_proofs.last().map(|n| n.path), - target = ?targets.peek(), - "should_retain: called", - ); - - // If the node in question is a prefix of the previously retained proof then we retain. - // - // This is required for cases where the target iterator is moved forward such that the - // next target is None or the current branch is not a prefix of it. In this case it's - // still possible, when popping the branch, that one of its children was retained due to - // the previous target, and therefore this node should be retained still. - if self.retained_proofs.last().is_some_and( - |ProofTrieNode { path: last_retained_path, .. }| last_retained_path.starts_with(path), - ) { - return true - } - - // Iterate targets forwards to catch up to the given path. Some notes here: - // - If `target.starts_with(path)`, then `path <= target`. - // - Therefore, if `target < path`, there is no situation where path could be retained by - // this target. - // - The ProofCalculator traverse the trie in depth-first order, meaning paths are provided - // to this method such that `prev_path.starts_with(path)` (handled in the previous - // if-block) OR `path > prev_path`. - while let Some(target) = targets.peek() && - target < path + // TODO docs + while let Some((_, Some(upper))) = targets.peek() && + depth_first::cmp(path, upper) != Ordering::Less { targets.next(); - trace!( - target: TRACE_TARGET, - target = ?targets.peek(), - "target < path, next target", - ); + trace!(target: TRACE_TARGET, target = ?targets.peek(), "upper target <= path, next target"); } // If the node in question is a prefix of the target then we retain - targets.peek().is_some_and(|t| t.starts_with(path)) + targets.peek().is_some_and(|(lower, _)| lower.starts_with(path)) } /// Takes a child which has been removed from the `child_stack` and converts it to an @@ -265,7 +238,7 @@ where /// therefore can be retained as a proof node if applicable. fn commit_child( &mut self, - targets: &mut Peekable>, + targets: &mut TargetsIter>, child_path: Nibbles, child: ProofTrieBranchChild, ) -> Result { @@ -276,6 +249,8 @@ where // If we should retain the child then do so. if self.should_retain(targets, &child_path) { + trace!(target: TRACE_TARGET, ?child_path, "Retaining child"); + // Convert to `ProofTrieNode`, which will be what is retained. // // If this node is a leaf then the `rlp_encode_buf` is taken by it and a new one will be @@ -286,10 +261,12 @@ where self.rlp_encode_buf.clear(); let proof_node = child.into_proof_trie_node(child_path, &mut self.rlp_encode_buf)?; - // Use the `ProofTrieNode` to encode the `RlpNode` and push that into the - // `rlp_nodes_buf`. + // Use the `ProofTrieNode` to encode the `RlpNode`, and then push it onto retained + // nodes before returning. self.rlp_encode_buf.clear(); proof_node.node.encode(&mut self.rlp_encode_buf); + + self.retained_proofs.push(proof_node); return Ok(RlpNode::from_rlp(&self.rlp_encode_buf)); } @@ -316,7 +293,7 @@ where /// This method panics if `branch_stack` is empty. fn pop_branch( &mut self, - targets: &mut Peekable>, + targets: &mut TargetsIter>, ) -> Result<(), StateProofError> { let mut rlp_nodes_buf = self.take_rlp_nodes_buf(); let branch = self.branch_stack.pop().expect("branch_stack cannot be empty"); @@ -403,7 +380,7 @@ where /// creating a new one depending on the path of the key. fn add_leaf( &mut self, - targets: &mut Peekable>, + targets: &mut TargetsIter>, key: Nibbles, val: VE::DeferredEncoder, ) -> Result<(), StateProofError> { @@ -479,26 +456,26 @@ where // In debug builds, verify that targets are sorted #[cfg(debug_assertions)] - let mut targets = { + let targets = { let mut prev: Option = None; - targets - .into_iter() - .inspect(move |target| { - if let Some(prev) = prev { - debug_assert!( - prev <= *target, - "targets must be sorted lexicographically: {:?} > {:?}", - prev, - target - ); - } - prev = Some(*target); - }) - .peekable() + targets.into_iter().inspect(move |target| { + if let Some(prev) = prev { + debug_assert!( + prev <= *target, + "targets must be sorted lexicographically: {:?} > {:?}", + prev, + target + ); + } + prev = Some(*target); + }) }; #[cfg(not(debug_assertions))] - let mut targets = targets.into_iter().peekable(); + let targets = targets.into_iter(); + + // Wrap targets into a `TargetsIter`. + let mut targets = WindowIter::new(targets).peekable(); // If there are no targets then nothing could be returned, return early. if targets.peek().is_none() { @@ -654,13 +631,49 @@ impl Iterator for TrieMaskIter { } } +/// `WindowIter` is a wrapper around an [`Iterator`] which allows viewing both previous and current +/// items on every iteration. It is similar to `itertools::tuple_windows`, except that the final +/// item returned will contain the previous item and `None` as the current. +struct WindowIter { + iter: I, + prev: Option, +} + +impl WindowIter { + /// Wraps an iterator with a [`WindowIter`]. + const fn new(iter: I) -> Self { + Self { iter, prev: None } + } +} + +impl> Iterator for WindowIter { + /// The iterator returns the previous and current items, respectively. If the underlying + /// iterator is exhausted then `Some(prev, None)` is returned on the subsequent call to + /// `WindowIter::next`, and `None` from the call after that. + type Item = (I::Item, Option); + + fn next(&mut self) -> Option { + match (self.prev, self.iter.next()) { + (None, None) => None, + (None, Some(v)) => { + self.prev = Some(v); + self.next() + } + (Some(v), next) => { + self.prev = next; + Some((v, next)) + } + } + } +} + #[cfg(test)] mod tests { use super::*; use crate::{ hashed_cursor::{mock::MockHashedCursorFactory, HashedCursorFactory}, proof::Proof, - trie_cursor::{mock::MockTrieCursorFactory, TrieCursorFactory}, + trie_cursor::{depth_first, mock::MockTrieCursorFactory, TrieCursorFactory}, }; use alloy_primitives::map::{B256Map, B256Set}; use alloy_rlp::Decodable; @@ -800,7 +813,7 @@ mod tests { }, } }) - .sorted_by_key(|n| n.path) + .sorted_by(|a, b| depth_first::cmp(&a.path, &b.path)) .collect::>(); // When no targets are given the legacy implementation will still produce the root node From a1def3d281df6f5074ff3e3acfe4b9a2caf565a7 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Mon, 24 Nov 2025 14:39:00 +0100 Subject: [PATCH 07/59] WIP: got it working --- crates/trie/trie/src/proof_v2/mod.rs | 364 ++++++++++++++++----------- 1 file changed, 222 insertions(+), 142 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 992017c6558..25e6c4be814 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -108,100 +108,6 @@ where .unwrap_or_else(|| Vec::with_capacity(16)) } - /// Pushes a new branch onto the `branch_stack`, while also pushing the given leaf onto the - /// `child_stack`. - /// - /// This method expects that there already exists a child on the `child_stack`, and that that - /// child has a non-zero short key. The new branch is constructed based on the top child from - /// the `child_stack` and the given leaf. - fn push_new_branch(&mut self, leaf_key: Nibbles, leaf_val: VE::DeferredEncoder) { - // First determine the new leaf's shortkey relative to the current branch. If there is no - // current branch then the short key is the full key. - let leaf_short_key = if self.branch_stack.is_empty() { - leaf_key - } else { - // When there is a current branch then trim off its path as well as the nibble that it - // has set for this leaf. - trim_nibbles_prefix(&leaf_key, self.branch_path.len() + 1) - }; - - trace!( - target: TRACE_TARGET, - ?leaf_short_key, - branch_path = ?self.branch_path, - "push_new_branch: called", - ); - - // Get the new branch's first child, which is the child on the top of the stack with which - // the new leaf shares the same nibble on the current branch. - let first_child = self - .child_stack - .last_mut() - .expect("push_branch can't be called with empty child_stack"); - - let first_child_short_key = first_child.short_key(); - debug_assert!( - !first_child_short_key.is_empty(), - "push_branch called when top child on stack is not a leaf or extension with a short key", - ); - - // Determine how many nibbles are shared between the new branch's first child and the new - // leaf. This common prefix will be the extension of the new branch - let common_prefix_len = first_child_short_key.common_prefix_length(&leaf_short_key); - - // Trim off the common prefix from the first child's short key, plus one nibble which will - // stored by the new branch itself in its state mask. - let first_child_nibble = first_child_short_key.get_unchecked(common_prefix_len); - first_child.trim_short_key_prefix(common_prefix_len + 1); - - // Similarly, trim off the common prefix, plus one nibble for the new branch, from the new - // leaf's short key. - let leaf_nibble = leaf_short_key.get_unchecked(common_prefix_len); - let leaf_short_key = trim_nibbles_prefix(&leaf_short_key, common_prefix_len + 1); - - // Push the new leaf onto the child stack; it will be the second child of the new branch. - // The new branch's first child is the child already on the top of the stack, for which - // we've already adjusted its short key. - self.child_stack - .push(ProofTrieBranchChild::Leaf { short_key: leaf_short_key, value: leaf_val }); - - // Construct the state mask of the new branch, and push the new branch onto the branch - // stack. - self.branch_stack.push(ProofTrieBranch { - ext_len: common_prefix_len as u8, - state_mask: { - let mut m = TrieMask::default(); - m.set_bit(first_child_nibble); - m.set_bit(leaf_nibble); - m - }, - tree_mask: TrieMask::default(), - hash_mask: TrieMask::default(), - }); - - // Update the branch path to reflect the new branch which was just pushed. Its path will be - // the path of the previous branch, plus the nibble shared by each child, plus the parent - // extension (denoted by a non-zero `ext_len`). Since the new branch's path is a prefix of - // the original leaf_key we can just slice that. - // - // If the branch is the first branch then we do not add the extra 1, as there is no nibble - // in a parent branch to account for. - let branch_path_len = self.branch_path.len() + - common_prefix_len + - if self.branch_stack.len() == 1 { 0 } else { 1 }; - self.branch_path = leaf_key.slice_unchecked(0, branch_path_len); - - trace!( - target: TRACE_TARGET, - ?leaf_short_key, - ?common_prefix_len, - new_branch = ?self.branch_stack.last().expect("branch_stack was just pushed to"), - ?branch_path_len, - branch_path = ?self.branch_path, - "push_new_branch: returning", - ); - } - /// Returns true if the proof of a node at the given path should be retained. This may move the /// `targets` iterator forward if the given path comes after the current target. fn should_retain( @@ -242,6 +148,8 @@ where child_path: Nibbles, child: ProofTrieBranchChild, ) -> Result { + trace!(target: TRACE_TARGET, ?child_path, "commit_child: called"); + // If the child is already an `RlpNode` then there is nothing to do. if let ProofTrieBranchChild::RlpNode(rlp_node) = child { return Ok(rlp_node) @@ -284,6 +192,164 @@ where Ok(child_rlp_node) } + /// Returns the path of the child on top of the `child_stack`, or the root path if the stack is + /// empty. + fn last_child_path(&self) -> Nibbles { + // If there is no branch under construction then the top child must be the root child. + let Some(branch) = self.branch_stack.last() else { + return Nibbles::new(); + }; + + debug_assert_ne!(branch.state_mask.get(), 0, "branch.state_mask can never be zero"); + // TODO export BITS off of `TrieMask`. + let last_nibble = u16::BITS - branch.state_mask.leading_zeros() - 1; + + let mut child_path = self.branch_path; + debug_assert!(child_path.len() < 64); + child_path.push_unchecked(last_nibble as u8); + child_path + } + + /// Calls [`Self::commit_child`] on the last child of `child_stack`, replacing it with a + /// [`ProofTrieBranchChild::RlpNode`]. + /// + /// NOTE that this method call relies on the `state_mask` of the top branch of the + /// `branch_stack` to determine the last child's path. When committing the last child prior to + /// pushing a new child, it's important to set the new child's `state_mask` bit _after_ the call + /// to this method. + /// + /// # Panics + /// + /// This method panics if the `child_stack` is empty. + fn commit_last_child( + &mut self, + targets: &mut TargetsIter>, + ) -> Result<(), StateProofError> { + let child = self + .child_stack + .pop() + .expect("`commit_last_child` cannot be called with empty `child_stack`"); + + // If the child is already an `RlpNode` then there is nothing to do, push it back on with no + // changes. + if let ProofTrieBranchChild::RlpNode(_) = child { + self.child_stack.push(child); + return Ok(()) + } + + let child_path = self.last_child_path(); + let child_rlp_node = self.commit_child(targets, child_path, child)?; + + // Replace the child on the stack + self.child_stack.push(ProofTrieBranchChild::RlpNode(child_rlp_node)); + Ok(()) + } + + /// Pushes a new branch onto the `branch_stack`, while also pushing the given leaf onto the + /// `child_stack`. + /// + /// This method expects that there already exists a child on the `child_stack`, and that that + /// child has a non-zero short key. The new branch is constructed based on the top child from + /// the `child_stack` and the given leaf. + fn push_new_branch( + &mut self, + targets: &mut TargetsIter>, + leaf_key: Nibbles, + leaf_val: VE::DeferredEncoder, + ) -> Result<(), StateProofError> { + // First determine the new leaf's shortkey relative to the current branch. If there is no + // current branch then the short key is the full key. + let leaf_short_key = if self.branch_stack.is_empty() { + leaf_key + } else { + // When there is a current branch then trim off its path as well as the nibble that it + // has set for this leaf. + trim_nibbles_prefix(&leaf_key, self.branch_path.len() + 1) + }; + + trace!( + target: TRACE_TARGET, + ?leaf_short_key, + branch_path = ?self.branch_path, + "push_new_branch: called", + ); + + // Get the new branch's first child, which is the child on the top of the stack with which + // the new leaf shares the same nibble on the current branch. + let first_child = self + .child_stack + .last_mut() + .expect("push_new_branch can't be called with empty child_stack"); + + let first_child_short_key = first_child.short_key(); + debug_assert!( + !first_child_short_key.is_empty(), + "push_new_branch called when top child on stack is not a leaf or extension with a short key", + ); + + // Determine how many nibbles are shared between the new branch's first child and the new + // leaf. This common prefix will be the extension of the new branch + let common_prefix_len = first_child_short_key.common_prefix_length(&leaf_short_key); + + // Trim off the common prefix from the first child's short key, plus one nibble which will + // stored by the new branch itself in its state mask. + let first_child_nibble = first_child_short_key.get_unchecked(common_prefix_len); + first_child.trim_short_key_prefix(common_prefix_len + 1); + + // Similarly, trim off the common prefix, plus one nibble for the new branch, from the new + // leaf's short key. + let leaf_nibble = leaf_short_key.get_unchecked(common_prefix_len); + let leaf_short_key = trim_nibbles_prefix(&leaf_short_key, common_prefix_len + 1); + + // Push the new branch onto the branch stack. We do not yet set the `state_mask` bit of the + // new leaf so that we can first commit the branch's first child. + self.branch_stack.push(ProofTrieBranch { + ext_len: common_prefix_len as u8, + state_mask: TrieMask::new(1 << first_child_nibble), + tree_mask: TrieMask::default(), + hash_mask: TrieMask::default(), + }); + + // Update the branch path to reflect the new branch which was just pushed. Its path will be + // the path of the previous branch, plus the nibble shared by each child, plus the parent + // extension (denoted by a non-zero `ext_len`). Since the new branch's path is a prefix of + // the original leaf_key we can just slice that. + // + // If the branch is the first branch then we do not add the extra 1, as there is no nibble + // in a parent branch to account for. + let branch_path_len = self.branch_path.len() + + common_prefix_len + + if self.branch_stack.len() == 1 { 0 } else { 1 }; + self.branch_path = leaf_key.slice_unchecked(0, branch_path_len); + + // Before pushing the new leaf onto the `child_stack` we need to commit the previous last + // child (ie the first child of this new branch), so that only `child_stack`'s final child + // is a non-RlpNode. We have already adjusted this child's short-key to be correct. + self.commit_last_child(targets)?; + + // Once the first child is committed we set the new child's bit on the new branch's + // `state_mask` and push that child; it will be the second child of the new branch. + self.branch_stack + .last_mut() + .expect("branch was just pushed") + .state_mask + .set_bit(leaf_nibble); + + self.child_stack + .push(ProofTrieBranchChild::Leaf { short_key: leaf_short_key, value: leaf_val }); + + trace!( + target: TRACE_TARGET, + ?leaf_short_key, + ?common_prefix_len, + new_branch = ?self.branch_stack.last().expect("branch_stack was just pushed to"), + ?branch_path_len, + branch_path = ?self.branch_path, + "push_new_branch: returning", + ); + + Ok(()) + } /// Pops the top branch off of the `branch_stack`, hashes its children on the `child_stack`, and /// replaces those children on the `child_stack`. The `branch_path` field will be updated /// accordingly. @@ -295,49 +361,41 @@ where &mut self, targets: &mut TargetsIter>, ) -> Result<(), StateProofError> { - let mut rlp_nodes_buf = self.take_rlp_nodes_buf(); - let branch = self.branch_stack.pop().expect("branch_stack cannot be empty"); - trace!( target: TRACE_TARGET, - ?branch, + branch = ?self.branch_stack.last(), branch_path = ?self.branch_path, + child_stack_len = ?self.child_stack.len(), "pop_branch: called", ); + // Ensure the final child on the child stack has been committed, as this method expects all + // children of the branch to have been committed. + self.commit_last_child(targets)?; + + let mut rlp_nodes_buf = self.take_rlp_nodes_buf(); + let branch = self.branch_stack.pop().expect("branch_stack cannot be empty"); + // Take the branch's children off the stack, using the state mask to determine how many // there are. let num_children = branch.state_mask.count_ones() as usize; debug_assert!(num_children > 1, "A branch must have at least two children"); debug_assert!( self.child_stack.len() >= num_children, - "Stack is missing necessary children" + "Stack is missing necessary children ({num_children:?})" ); - // We have to take the child_stack off self so we can still call other methods while - // draining it. - let mut child_stack = core::mem::take(&mut self.child_stack); - let children = child_stack.drain(child_stack.len() - num_children..); - - // Create an iterator over the paths of each child in the branch. - let child_paths = { - let branch_path = self.branch_path; - TrieMaskIter(branch.state_mask).map(move |nibble| { - let mut child_path = branch_path; - child_path.push_unchecked(nibble); - child_path - }) - }; - // Collect children into an `RlpNode` Vec by committing and pushing each of them. - for (child_path, child) in child_paths.zip(children) { - let child_rlp_node = self.commit_child(targets, child_path, child)?; + for child in self.child_stack.drain(self.child_stack.len() - num_children..) { + let ProofTrieBranchChild::RlpNode(child_rlp_node) = child else { + panic!( + "all branch child must have been committed, found {}", + std::any::type_name_of_val(&child) + ); + }; rlp_nodes_buf.push(child_rlp_node); } - // Put the child_stack back, now that we're done draining from it. - let _ = core::mem::replace(&mut self.child_stack, child_stack); - debug_assert_eq!( rlp_nodes_buf.len(), branch.state_mask.count_ones() as usize, @@ -385,10 +443,20 @@ where val: VE::DeferredEncoder, ) -> Result<(), StateProofError> { loop { - // Get the branch currently being built. If there are no branches on the stack then it - // means either the trie is empty or only a single leaf has been added previously. - let curr_branch = match self.branch_stack.last_mut() { - Some(curr_branch) => curr_branch, + trace!( + target: TRACE_TARGET, + ?key, + branch_stack_len = ?self.branch_stack.len(), + branch_path = ?self.branch_path, + child_stack_len = ?self.child_stack.len(), + "add_leaf loop", + ); + + // Get the `state_mask` of the branch currently being built. If there are no branches on + // the stack then it means either the trie is empty or only a single leaf has been added + // previously. + let curr_branch_state_mask = match self.branch_stack.last() { + Some(curr_branch) => curr_branch.state_mask, None if self.child_stack.is_empty() => { // If the child stack is empty then this is the first leaf, push it and be done self.child_stack @@ -405,7 +473,7 @@ where .expect("already checked for emptiness") .short_key() .is_empty()); - self.push_new_branch(key, val); + self.push_new_branch(targets, key, val)?; return Ok(()) } }; @@ -427,11 +495,21 @@ where // directly, otherwise a new branch must be created in-between this branch and that // existing child. let nibble = key.get_unchecked(common_prefix_len); - if curr_branch.state_mask.is_bit_set(nibble) { + if curr_branch_state_mask.is_bit_set(nibble) { // This method will also push the new leaf onto the `child_stack`. - self.push_new_branch(key, val); + self.push_new_branch(targets, key, val)?; } else { - curr_branch.state_mask.set_bit(nibble); + // Commit the previous child of the branch, so that only `child_stack`'s final child + // is a non-RlpNode. This must be done prior to setting the `state_mask` bit for the + // new child. + self.commit_last_child(targets)?; + + // Set `state_mask` bit for the new child. + self.branch_stack + .last_mut() + .expect("already checked that `branch_stack` isn't empty") + .state_mask + .set_bit(nibble); // Add this leaf as a new child of the current branch (no intermediate branch // needed). @@ -491,7 +569,18 @@ where let mut hashed_cursor_current = self.hashed_cursor.seek(B256::ZERO)?; loop { - trace!(target: TRACE_TARGET, ?hashed_cursor_current, "proof_inner loop"); + trace!( + target: TRACE_TARGET, + ?hashed_cursor_current, + branch_stack_len = ?self.branch_stack.len(), + branch_path = ?self.branch_path, + child_stack_len = ?self.child_stack.len(), + "proof_inner loop", + ); + + // Sanity check before making any further changes: + // If there is a branch, there must be at least two children + debug_assert!(self.branch_stack.last().is_none_or(|_| self.child_stack.len() >= 2)); // Fetch the next leaf from the hashed cursor, converting the key to Nibbles and // immediately creating the DeferredValueEncoder so that encoding of the leaf value can @@ -536,6 +625,11 @@ where }; self.retained_proofs.push(root_node); + trace!( + target: TRACE_TARGET, + retained_proofs_len = ?self.retained_proofs.len(), + "proof_inner: returning", + ); Ok(core::mem::take(&mut self.retained_proofs)) } } @@ -617,20 +711,6 @@ where } } -/// A helper type for iterating over the indexes of the non-zero bits of a [`TrieMask`]. -struct TrieMaskIter(TrieMask); - -impl Iterator for TrieMaskIter { - type Item = u8; - fn next(&mut self) -> Option { - let bit = self.0.first_set_bit_index(); - if let Some(bit) = bit { - self.0.unset_bit(bit); - } - bit - } -} - /// `WindowIter` is a wrapper around an [`Iterator`] which allows viewing both previous and current /// items on every iteration. It is similar to `itertools::tuple_windows`, except that the final /// item returned will contain the previous item and `None` as the current. From f5d06582016edf5391004a4f08894d3a5d5cd2aa Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Mon, 24 Nov 2025 15:10:39 +0100 Subject: [PATCH 08/59] WIP: cleanup --- crates/trie/trie/src/proof_v2/mod.rs | 79 ++++++++++++++-------------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 25e6c4be814..f97dabbec21 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -148,8 +148,6 @@ where child_path: Nibbles, child: ProofTrieBranchChild, ) -> Result { - trace!(target: TRACE_TARGET, ?child_path, "commit_child: called"); - // If the child is already an `RlpNode` then there is nothing to do. if let ProofTrieBranchChild::RlpNode(rlp_node) = child { return Ok(rlp_node) @@ -245,6 +243,37 @@ where Ok(()) } + /// Pushes a new leaf node onto a branch, setting its `state_mask` bit. + /// + /// # Panics + /// + /// - If `branch_stack` is empty + /// - If the leaf's nibble is already set in the branch's `state_mask`. + fn push_new_leaf( + &mut self, + targets: &mut TargetsIter>, + leaf_nibble: u8, + leaf_short_key: Nibbles, + leaf_val: VE::DeferredEncoder, + ) -> Result<(), StateProofError> { + // Before pushing the new leaf onto the `child_stack` we need to commit the previous last + // child (ie the first child of this new branch), so that only `child_stack`'s final child + // is a non-RlpNode. + self.commit_last_child(targets)?; + + // Once the first child is committed we set the new child's bit on the top branch's + // `state_mask` and push that child. + let branch = self.branch_stack.last_mut().expect("branch_stack cannot be empty"); + + debug_assert!(!branch.state_mask.is_bit_set(leaf_nibble)); + branch.state_mask.set_bit(leaf_nibble); + + self.child_stack + .push(ProofTrieBranchChild::Leaf { short_key: leaf_short_key, value: leaf_val }); + + Ok(()) + } + /// Pushes a new branch onto the `branch_stack`, while also pushing the given leaf onto the /// `child_stack`. /// @@ -302,7 +331,7 @@ where let leaf_short_key = trim_nibbles_prefix(&leaf_short_key, common_prefix_len + 1); // Push the new branch onto the branch stack. We do not yet set the `state_mask` bit of the - // new leaf so that we can first commit the branch's first child. + // new leaf; `push_new_leaf` will do that. self.branch_stack.push(ProofTrieBranch { ext_len: common_prefix_len as u8, state_mask: TrieMask::new(1 << first_child_nibble), @@ -322,21 +351,9 @@ where if self.branch_stack.len() == 1 { 0 } else { 1 }; self.branch_path = leaf_key.slice_unchecked(0, branch_path_len); - // Before pushing the new leaf onto the `child_stack` we need to commit the previous last - // child (ie the first child of this new branch), so that only `child_stack`'s final child - // is a non-RlpNode. We have already adjusted this child's short-key to be correct. - self.commit_last_child(targets)?; - - // Once the first child is committed we set the new child's bit on the new branch's - // `state_mask` and push that child; it will be the second child of the new branch. - self.branch_stack - .last_mut() - .expect("branch was just pushed") - .state_mask - .set_bit(leaf_nibble); - - self.child_stack - .push(ProofTrieBranchChild::Leaf { short_key: leaf_short_key, value: leaf_val }); + // Push the new leaf onto the new branch. This step depends on the top branch being in the + // correct state, so must be done last. + self.push_new_leaf(targets, leaf_nibble, leaf_short_key, leaf_val)?; trace!( target: TRACE_TARGET, @@ -449,7 +466,7 @@ where branch_stack_len = ?self.branch_stack.len(), branch_path = ?self.branch_path, child_stack_len = ?self.child_stack.len(), - "add_leaf loop", + "add_leaf: loop", ); // Get the `state_mask` of the branch currently being built. If there are no branches on @@ -499,24 +516,8 @@ where // This method will also push the new leaf onto the `child_stack`. self.push_new_branch(targets, key, val)?; } else { - // Commit the previous child of the branch, so that only `child_stack`'s final child - // is a non-RlpNode. This must be done prior to setting the `state_mask` bit for the - // new child. - self.commit_last_child(targets)?; - - // Set `state_mask` bit for the new child. - self.branch_stack - .last_mut() - .expect("already checked that `branch_stack` isn't empty") - .state_mask - .set_bit(nibble); - - // Add this leaf as a new child of the current branch (no intermediate branch - // needed). - self.child_stack.push(ProofTrieBranchChild::Leaf { - short_key: key.slice_unchecked(common_prefix_len + 1, key.len()), - value: val, - }); + let short_key = key.slice_unchecked(common_prefix_len + 1, key.len()); + self.push_new_leaf(targets, nibble, short_key, val)?; } return Ok(()) @@ -530,7 +531,7 @@ where value_encoder: &VE, targets: impl IntoIterator, ) -> Result, StateProofError> { - trace!(target: TRACE_TARGET, "proof_inner called"); + trace!(target: TRACE_TARGET, "proof_inner: called"); // In debug builds, verify that targets are sorted #[cfg(debug_assertions)] @@ -575,7 +576,7 @@ where branch_stack_len = ?self.branch_stack.len(), branch_path = ?self.branch_path, child_stack_len = ?self.child_stack.len(), - "proof_inner loop", + "proof_inner: loop", ); // Sanity check before making any further changes: From 9bbb792a6a1c1fd0478ddc0c886cef6f633a2908 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Mon, 24 Nov 2025 16:57:12 +0100 Subject: [PATCH 09/59] WIP: benches --- crates/trie/trie/Cargo.toml | 7 + crates/trie/trie/benches/proof_v2.rs | 250 ++++++++++++++++++++++ crates/trie/trie/src/hashed_cursor/mod.rs | 2 +- crates/trie/trie/src/lib.rs | 2 +- crates/trie/trie/src/trie_cursor/mod.rs | 2 +- 5 files changed, 260 insertions(+), 3 deletions(-) create mode 100644 crates/trie/trie/benches/proof_v2.rs diff --git a/crates/trie/trie/Cargo.toml b/crates/trie/trie/Cargo.toml index 403d187e46a..504f1bc6c2f 100644 --- a/crates/trie/trie/Cargo.toml +++ b/crates/trie/trie/Cargo.toml @@ -42,6 +42,7 @@ metrics = { workspace = true, optional = true } # `test-utils` feature triehash = { workspace = true, optional = true } +parking_lot = { workspace = true, optional = true } [dev-dependencies] # reth @@ -86,6 +87,7 @@ serde = [ ] test-utils = [ "triehash", + "parking_lot", "reth-primitives-traits/test-utils", "reth-trie-common/test-utils", "reth-ethereum-primitives/test-utils", @@ -101,3 +103,8 @@ harness = false name = "trie_root" required-features = ["test-utils"] harness = false + +[[bench]] +name = "proof_v2" +required-features = ["test-utils"] +harness = false diff --git a/crates/trie/trie/benches/proof_v2.rs b/crates/trie/trie/benches/proof_v2.rs new file mode 100644 index 00000000000..250e6b576b9 --- /dev/null +++ b/crates/trie/trie/benches/proof_v2.rs @@ -0,0 +1,250 @@ +#![allow(missing_docs, unreachable_pub)] +use alloy_primitives::{ + map::{B256Map, B256Set}, + B256, U256, +}; +use alloy_rlp::Decodable; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use itertools::Itertools; +use proptest::{prelude::*, strategy::ValueTree, test_runner::TestRunner}; +use reth_primitives_traits::Account; +use reth_trie::{ + hashed_cursor::{mock::MockHashedCursorFactory, HashedCursorFactory}, + proof::Proof, + proof_v2::{ProofCalculator, SyncAccountValueEncoder}, + trie_cursor::{depth_first, mock::MockTrieCursorFactory, TrieCursorFactory}, +}; +use reth_trie_common::{ + HashedPostState, MultiProofTargets, Nibbles, ProofTrieNode, TrieMasks, TrieNode, +}; +use std::collections::BTreeMap; + +/// Benchmark for proof_v2 implementation. +/// +/// This benchmark tests the performance of the new proof calculator that generates +/// merkle proofs using only leaf data, across multiple dataset sizes and target counts. +pub fn proof_v2_benchmark(c: &mut Criterion) { + let mut group = c.benchmark_group("Proof V2"); + group.sample_size(20); + + // Test across multiple dataset sizes and target counts + for dataset_size in [100, 500, 1_000] { + for num_targets in [1, 10, 50, 100] { + // Skip combinations where targets > dataset (doesn't make sense) + if num_targets > dataset_size { + continue; + } + + let (hashed_post_state, targets, _target_b256s) = + generate_test_data(dataset_size, num_targets); + + // Create mock cursor factories from the hashed post state + let (trie_cursor_factory, hashed_cursor_factory) = + create_cursor_factories(&hashed_post_state); + + // Benchmark ID includes both dimensions: dataset_size/num_targets + let bench_id = format!("v2/dataset_{}/targets_{}", dataset_size, num_targets); + group.bench_function(BenchmarkId::new("account_proof", bench_id), |b| { + b.iter(|| { + let trie_cursor = trie_cursor_factory + .account_trie_cursor() + .expect("Failed to create trie cursor"); + let hashed_cursor = hashed_cursor_factory + .hashed_account_cursor() + .expect("Failed to create hashed cursor"); + + let value_encoder = SyncAccountValueEncoder::new( + trie_cursor_factory.clone(), + hashed_cursor_factory.clone(), + ); + + let mut proof_calculator = ProofCalculator::new(trie_cursor, hashed_cursor); + proof_calculator + .proof(&value_encoder, targets.clone()) + .expect("Proof generation failed") + }) + }); + } + } +} + +/// Benchmark for legacy proof implementation. +/// +/// This benchmark tests the performance of the original proof calculator that uses +/// trie walking for comparison with the proof_v2 implementation, across multiple +/// dataset sizes and target counts. +pub fn proof_legacy_benchmark(c: &mut Criterion) { + let mut group = c.benchmark_group("Proof V2"); + group.sample_size(20); + + // Test across multiple dataset sizes and target counts + for dataset_size in [100, 500, 1_000] { + for num_targets in [1, 10, 50, 100] { + // Skip combinations where targets > dataset (doesn't make sense) + if num_targets > dataset_size { + continue; + } + + let (hashed_post_state, _targets, target_b256s) = + generate_test_data(dataset_size, num_targets); + + // Create mock cursor factories from the hashed post state + let (trie_cursor_factory, hashed_cursor_factory) = + create_cursor_factories(&hashed_post_state); + + // Convert B256 targets to MultiProofTargets (account targets with empty storage sets) + let legacy_targets: MultiProofTargets = + target_b256s.iter().map(|addr| (*addr, B256Set::default())).collect(); + + // Benchmark ID includes both dimensions: dataset_size/num_targets + let bench_id = format!("legacy/dataset_{}/targets_{}", dataset_size, num_targets); + // Benchmark account proof generation using legacy implementation + // This includes decoding and sorting to match what proof_v2 returns + group.bench_function(BenchmarkId::new("account_proof", bench_id), |b| { + b.iter(|| { + let proof_result = + Proof::new(trie_cursor_factory.clone(), hashed_cursor_factory.clone()) + .multiproof(legacy_targets.clone()) + .expect("Legacy proof generation failed"); + + // Decode and sort legacy proof nodes (same as in proof_v2 tests) + let _proof_nodes: Vec = proof_result + .account_subtree + .iter() + .map(|(path, node_enc)| { + let mut buf = node_enc.as_ref(); + let node = TrieNode::decode(&mut buf) + .expect("legacy implementation should produce valid proof nodes"); + + ProofTrieNode { + path: *path, + node, + masks: TrieMasks { + hash_mask: proof_result + .branch_node_hash_masks + .get(path) + .copied(), + tree_mask: proof_result + .branch_node_tree_masks + .get(path) + .copied(), + }, + } + }) + .sorted_by(|a, b| depth_first::cmp(&a.path, &b.path)) + .collect(); + }) + }); + } + } +} + +/// Generate test data for benchmarking. +/// +/// Returns a tuple of: +/// - `HashedPostState` with random accounts +/// - Proof targets (Nibbles) that are 80% from existing accounts, 20% random +/// - Proof targets (B256) for legacy implementation +fn generate_test_data( + dataset_size: usize, + num_targets: usize, +) -> (HashedPostState, Vec, Vec) { + let mut runner = TestRunner::deterministic(); + + // Generate random accounts + let accounts_strategy = + proptest::collection::vec((any::<[u8; 32]>(), account_strategy()), dataset_size); + + let accounts = accounts_strategy.new_tree(&mut runner).unwrap().current(); + + // Convert to HashedPostState + let account_map: B256Map<_> = accounts + .iter() + .map(|(addr_bytes, account)| (B256::from(*addr_bytes), Some(*account))) + .collect(); + + // All accounts have empty storages + let storages = + account_map.keys().copied().map(|addr| (addr, Default::default())).collect::>(); + + let hashed_post_state = HashedPostState { accounts: account_map.clone(), storages }; + + // Generate proof targets: 80% from existing accounts, 20% random + let account_keys: Vec = account_map.keys().copied().collect(); + + let targets_strategy = proptest::collection::vec( + prop::bool::weighted(0.8).prop_flat_map(move |from_accounts| { + if from_accounts && !account_keys.is_empty() { + prop::sample::select(account_keys.clone()).boxed() + } else { + any::<[u8; 32]>().prop_map(B256::from).boxed() + } + }), + num_targets, + ); + + let target_b256s = targets_strategy.new_tree(&mut runner).unwrap().current(); + + // Convert B256 targets to sorted Nibbles + let mut targets: Vec = target_b256s + .iter() + .map(|b256| { + // SAFETY: B256 is exactly 32 bytes + unsafe { Nibbles::unpack_unchecked(b256.as_slice()) } + }) + .collect(); + targets.sort(); + + (hashed_post_state, targets, target_b256s) +} + +/// Generate a strategy for Account values +fn account_strategy() -> impl Strategy { + (any::(), any::(), any::<[u8; 32]>()).prop_map(|(nonce, balance, code_hash)| { + Account { nonce, balance: U256::from(balance), bytecode_hash: Some(B256::from(code_hash)) } + }) +} + +/// Create cursor factories from a `HashedPostState`. +/// +/// This mimics the test harness pattern from the proof_v2 tests. +fn create_cursor_factories( + post_state: &HashedPostState, +) -> (MockTrieCursorFactory, MockHashedCursorFactory) { + // Extract accounts from post state, filtering out None (deleted accounts) + let hashed_accounts: BTreeMap = post_state + .accounts + .iter() + .filter_map(|(addr, account)| account.map(|acc| (*addr, acc))) + .collect(); + + // Extract storage tries from post state + let hashed_storage_tries: B256Map> = post_state + .storages + .iter() + .map(|(addr, hashed_storage)| { + // Convert HashedStorage to BTreeMap, filtering out zero values (deletions) + let storage_map: BTreeMap = hashed_storage + .storage + .iter() + .filter_map(|(slot, value)| (*value != U256::ZERO).then_some((*slot, *value))) + .collect(); + (*addr, storage_map) + }) + .collect(); + + // Ensure that there's a storage trie dataset for every storage trie, even if empty + let storage_trie_nodes: B256Map> = + hashed_storage_tries.keys().copied().map(|addr| (addr, Default::default())).collect(); + + // Create mock hashed cursor factory populated with the post state data + let hashed_cursor_factory = MockHashedCursorFactory::new(hashed_accounts, hashed_storage_tries); + + // Create empty trie cursor factory (leaf-only calculator doesn't need trie nodes) + let trie_cursor_factory = MockTrieCursorFactory::new(BTreeMap::new(), storage_trie_nodes); + + (trie_cursor_factory, hashed_cursor_factory) +} + +criterion_group!(proof_comparison, proof_v2_benchmark, proof_legacy_benchmark); +criterion_main!(proof_comparison); diff --git a/crates/trie/trie/src/hashed_cursor/mod.rs b/crates/trie/trie/src/hashed_cursor/mod.rs index 90d70c1c68c..3b9d29964f8 100644 --- a/crates/trie/trie/src/hashed_cursor/mod.rs +++ b/crates/trie/trie/src/hashed_cursor/mod.rs @@ -10,7 +10,7 @@ pub use post_state::*; pub mod noop; /// Mock trie cursor implementations. -#[cfg(test)] +#[cfg(any(test, feature = "test-utils"))] pub mod mock; /// Metrics tracking hashed cursor implementations. diff --git a/crates/trie/trie/src/lib.rs b/crates/trie/trie/src/lib.rs index aebcbab7af8..aef322fb7cc 100644 --- a/crates/trie/trie/src/lib.rs +++ b/crates/trie/trie/src/lib.rs @@ -64,7 +64,7 @@ pub mod metrics; pub mod test_utils; /// Collection of mock types for testing. -#[cfg(test)] +#[cfg(any(test, feature = "test-utils"))] pub mod mock; /// Verification of existing stored trie nodes against state data. diff --git a/crates/trie/trie/src/trie_cursor/mod.rs b/crates/trie/trie/src/trie_cursor/mod.rs index 32fd17c996e..ce6b852af43 100644 --- a/crates/trie/trie/src/trie_cursor/mod.rs +++ b/crates/trie/trie/src/trie_cursor/mod.rs @@ -15,7 +15,7 @@ pub mod noop; pub mod depth_first; /// Mock trie cursor implementations. -#[cfg(test)] +#[cfg(any(test, feature = "test-utils"))] pub mod mock; /// Metrics tracking trie cursor implementations. From f29fd85874afc7c6db70e89da5567c451e0a0477 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Mon, 24 Nov 2025 17:39:19 +0100 Subject: [PATCH 10/59] docs --- crates/trie/trie/src/proof_v2/mod.rs | 61 +++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index f97dabbec21..91133a06ab7 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -55,6 +55,13 @@ pub struct ProofCalculator { /// The children for the bottom branch in `branch_stack` are found at the bottom of this stack, /// and so on. When a branch is removed from `branch_stack` its children are removed from this /// one, and the branch is pushed onto this stack in their place (see [`Self::pop_branch`]. + /// + /// Children on the `child_stack` are converted to [`ProofTrieBranchChild::RlpNode`]s via the + /// [`Self::commit_child`] method. Committing a child indicates that no further changes are + /// expected to happen to it (e.g. splitting its short key when inserting a new branch). Given + /// that keys are consumed in lexicographical order, only the most last child on the stack can + /// ever be modified, and therefore all children besides the last are expected to be + /// [`ProofTrieBranchChild::RlpNode`]s. child_stack: Vec>, /// The proofs which will be returned from the calculation. This gets taken at the end of every /// proof call. @@ -110,6 +117,38 @@ where /// Returns true if the proof of a node at the given path should be retained. This may move the /// `targets` iterator forward if the given path comes after the current target. + /// + /// This method takes advantage of the [`WindowIter`] component of [`TargetsIter`] to only check + /// a single target at a time. The [`WindowIter`] allows us to look at a current target and the + /// next target simultaneously, forming an end-exclusive range. + /// + /// ``` + /// * targets: [ 0x012, 0x045, 0x678 ] + /// * targets.next() returns: + /// - (0x012, Some(0x045)): covers (0x012..0x045) + /// - (0x045, Some(0x678)): covers (0x045..0x678) + /// - (0x678, None): covers (0x678..) + /// ``` + /// + /// As long as the path which is passed in lies within that range we can continue to use the + /// current target. Once the path goes beyond that range (ie path >= next target) then we can be + /// sure that no further paths will be in the range, and we can iterate forward. + /// + /// ``` + /// * Given: + /// - path: 0x04 + /// - targets returns (0x012, Some(0x045)) + /// + /// * 0x04 comes _after_ 0x045 in depth-first order, so (0x012..0x045) does not contain 0x04. + /// + /// * targets.next() is called. + /// * targets.peek() now returns (0x045, Some(0x678)). This does contain 0x04. + /// * 0x04 is a prefix of 0x045, and so is retained. + /// ``` + /// + /// Because paths in the trie are visited in depth-first order, it's imperative that targets are + /// given in depth-first order as well. If the targets where generated off of B256s, which is + /// the common-case, then this is equivalent to lexicographical order. fn should_retain( &self, targets: &mut TargetsIter>, @@ -125,7 +164,8 @@ where self.retained_proofs.last().map(|n| n.path), ); - // TODO docs + // If the path isn't in the current range then iterate forward until it is (or until there + // is no upper bound, indicating unbounded). while let Some((_, Some(upper))) = targets.peek() && depth_first::cmp(path, upper) != Ordering::Less { @@ -243,7 +283,8 @@ where Ok(()) } - /// Pushes a new leaf node onto a branch, setting its `state_mask` bit. + /// Creates a new leaf node on a branch, setting its `state_mask` bit and pushing the leaf onto + /// the `child_stack`. /// /// # Panics /// @@ -540,8 +581,8 @@ where targets.into_iter().inspect(move |target| { if let Some(prev) = prev { debug_assert!( - prev <= *target, - "targets must be sorted lexicographically: {:?} > {:?}", + depth_first::cmp(&prev, target) != Ordering::Greater, + "targets must be sorted depth-first, instead {:?} > {:?}", prev, target ); @@ -643,8 +684,8 @@ where { /// Generate a proof for the given targets. /// - /// Given lexicographically sorted targets, returns nodes whose paths are a prefix of any - /// target. The returned nodes will be sorted lexicographically by path. + /// Given depth-first sorted targets, returns nodes whose paths are a prefix of any target. The + /// returned nodes will be sorted lexicographically by path. /// /// # Panics /// @@ -676,8 +717,8 @@ where /// Generate a proof for a storage trie at the given hashed address. /// - /// Given lexicographically sorted targets, returns nodes whose paths are a prefix of any - /// target. The returned nodes will be sorted lexicographically by path. + /// Given depth-first sorted targets, returns nodes whose paths are a prefix of any target. The + /// returned nodes will be sorted lexicographically by path. /// /// # Panics /// @@ -935,7 +976,7 @@ mod tests { /// Generate a strategy for `HashedPostState` with random accounts fn hashed_post_state_strategy() -> impl Strategy { - prop::collection::vec((any::<[u8; 32]>(), account_strategy()), 0..20).prop_map( + prop::collection::vec((any::<[u8; 32]>(), account_strategy()), 0..40).prop_map( |accounts| { let account_map = accounts .into_iter() @@ -980,7 +1021,7 @@ mod tests { } proptest! { - #![proptest_config(ProptestConfig::with_cases(5000))] + #![proptest_config(ProptestConfig::with_cases(8000))] /// Tests that ProofCalculator produces valid proofs for randomly generated /// HashedPostState with proof targets. From d82dc1532b8c81daac91fb359aa17035787060bf Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Mon, 24 Nov 2025 17:53:42 +0100 Subject: [PATCH 11/59] docs --- crates/trie/trie/src/proof_v2/mod.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 91133a06ab7..417862f2a52 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -122,8 +122,8 @@ where /// a single target at a time. The [`WindowIter`] allows us to look at a current target and the /// next target simultaneously, forming an end-exclusive range. /// - /// ``` - /// * targets: [ 0x012, 0x045, 0x678 ] + /// ```text + /// * Given targets: [ 0x012, 0x045, 0x678 ] /// * targets.next() returns: /// - (0x012, Some(0x045)): covers (0x012..0x045) /// - (0x045, Some(0x678)): covers (0x045..0x678) @@ -134,15 +134,17 @@ where /// current target. Once the path goes beyond that range (ie path >= next target) then we can be /// sure that no further paths will be in the range, and we can iterate forward. /// - /// ``` + /// ```text /// * Given: /// - path: 0x04 - /// - targets returns (0x012, Some(0x045)) + /// - targets.peek() returns (0x012, Some(0x045)) /// /// * 0x04 comes _after_ 0x045 in depth-first order, so (0x012..0x045) does not contain 0x04. /// /// * targets.next() is called. + /// /// * targets.peek() now returns (0x045, Some(0x678)). This does contain 0x04. + /// /// * 0x04 is a prefix of 0x045, and so is retained. /// ``` /// @@ -239,7 +241,6 @@ where }; debug_assert_ne!(branch.state_mask.get(), 0, "branch.state_mask can never be zero"); - // TODO export BITS off of `TrieMask`. let last_nibble = u16::BITS - branch.state_mask.leading_zeros() - 1; let mut child_path = self.branch_path; From fdeb1e2ca6a97a348e242d6ab2c25ce74d763fdc Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 25 Nov 2025 11:22:58 +0100 Subject: [PATCH 12/59] Apply suggestions from code review Co-authored-by: YK --- crates/trie/trie/src/proof_v2/mod.rs | 8 +++++--- crates/trie/trie/src/proof_v2/node.rs | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 417862f2a52..45c4973c64f 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -59,7 +59,7 @@ pub struct ProofCalculator { /// Children on the `child_stack` are converted to [`ProofTrieBranchChild::RlpNode`]s via the /// [`Self::commit_child`] method. Committing a child indicates that no further changes are /// expected to happen to it (e.g. splitting its short key when inserting a new branch). Given - /// that keys are consumed in lexicographical order, only the most last child on the stack can + /// that keys are consumed in lexicographical order, only the last child on the stack can /// ever be modified, and therefore all children besides the last are expected to be /// [`ProofTrieBranchChild::RlpNode`]s. child_stack: Vec>, @@ -115,7 +115,9 @@ where .unwrap_or_else(|| Vec::with_capacity(16)) } - /// Returns true if the proof of a node at the given path should be retained. This may move the + /// Returns true if the proof of a node at the given path should be retained. + /// A node is retained if its path is a prefix of any target. + /// This may move the /// `targets` iterator forward if the given path comes after the current target. /// /// This method takes advantage of the [`WindowIter`] component of [`TargetsIter`] to only check @@ -149,7 +151,7 @@ where /// ``` /// /// Because paths in the trie are visited in depth-first order, it's imperative that targets are - /// given in depth-first order as well. If the targets where generated off of B256s, which is + /// given in depth-first order as well. If the targets were generated off of B256s, which is /// the common-case, then this is equivalent to lexicographical order. fn should_retain( &self, diff --git a/crates/trie/trie/src/proof_v2/node.rs b/crates/trie/trie/src/proof_v2/node.rs index e74476c361b..53a8190cbbf 100644 --- a/crates/trie/trie/src/proof_v2/node.rs +++ b/crates/trie/trie/src/proof_v2/node.rs @@ -26,7 +26,7 @@ pub(crate) enum ProofTrieBranchChild { }, /// A branch node whose children have already been flattened into [`RlpNode`]s. Branch(BranchNode), - // A node whose type is not known, as it has already been converted to an [`RlpNode`]. + /// A node whose type is not known, as it has already been converted to an [`RlpNode`]. RlpNode(RlpNode), } From d83745da5804389b0e031bad901bc8d1a83f5744 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 25 Nov 2025 11:46:52 +0100 Subject: [PATCH 13/59] remove an Option check in the common case from should_retain --- crates/trie/trie/src/proof_v2/mod.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 45c4973c64f..440b49b7a42 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -115,7 +115,7 @@ where .unwrap_or_else(|| Vec::with_capacity(16)) } - /// Returns true if the proof of a node at the given path should be retained. + /// Returns true if the proof of a node at the given path should be retained. /// A node is retained if its path is a prefix of any target. /// This may move the /// `targets` iterator forward if the given path comes after the current target. @@ -168,17 +168,19 @@ where self.retained_proofs.last().map(|n| n.path), ); + let &(mut lower, mut upper) = targets.peek().expect("targets is never exhausted"); + // If the path isn't in the current range then iterate forward until it is (or until there // is no upper bound, indicating unbounded). - while let Some((_, Some(upper))) = targets.peek() && - depth_first::cmp(path, upper) != Ordering::Less - { + while upper.is_some_and(|upper| depth_first::cmp(path, &upper) != Ordering::Less) { targets.next(); trace!(target: TRACE_TARGET, target = ?targets.peek(), "upper target <= path, next target"); + let &(l, u) = targets.peek().expect("targets is never exhausted"); + (lower, upper) = (l, u); } // If the node in question is a prefix of the target then we retain - targets.peek().is_some_and(|(lower, _)| lower.starts_with(path)) + lower.starts_with(path) } /// Takes a child which has been removed from the `child_stack` and converts it to an From 1ed2ab4ac9d15d0656237a12da48eb6a2d44180f Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 25 Nov 2025 12:05:59 +0100 Subject: [PATCH 14/59] Get rid of recursion in WindowIter --- crates/trie/trie/src/proof_v2/mod.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 440b49b7a42..c323318e84d 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -780,15 +780,16 @@ impl> Iterator for WindowIter { type Item = (I::Item, Option); fn next(&mut self) -> Option { - match (self.prev, self.iter.next()) { - (None, None) => None, - (None, Some(v)) => { - self.prev = Some(v); - self.next() - } - (Some(v), next) => { - self.prev = next; - Some((v, next)) + loop { + match (self.prev, self.iter.next()) { + (None, None) => return None, + (None, Some(v)) => { + self.prev = Some(v); + } + (Some(v), next) => { + self.prev = next; + return Some((v, next)) + } } } } From 9cf79a28ddca9bc044eb4b9c10ed6ee18be1d8fc Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 25 Nov 2025 13:08:08 +0100 Subject: [PATCH 15/59] Fix up benchmarks to do proper comparisons --- crates/trie/trie/benches/proof_v2.rs | 217 ++++++++++++--------------- 1 file changed, 92 insertions(+), 125 deletions(-) diff --git a/crates/trie/trie/benches/proof_v2.rs b/crates/trie/trie/benches/proof_v2.rs index 250e6b576b9..8142a04ee2f 100644 --- a/crates/trie/trie/benches/proof_v2.rs +++ b/crates/trie/trie/benches/proof_v2.rs @@ -4,7 +4,7 @@ use alloy_primitives::{ B256, U256, }; use alloy_rlp::Decodable; -use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; use itertools::Itertools; use proptest::{prelude::*, strategy::ValueTree, test_runner::TestRunner}; use reth_primitives_traits::Account; @@ -19,136 +19,16 @@ use reth_trie_common::{ }; use std::collections::BTreeMap; -/// Benchmark for proof_v2 implementation. -/// -/// This benchmark tests the performance of the new proof calculator that generates -/// merkle proofs using only leaf data, across multiple dataset sizes and target counts. -pub fn proof_v2_benchmark(c: &mut Criterion) { - let mut group = c.benchmark_group("Proof V2"); - group.sample_size(20); - - // Test across multiple dataset sizes and target counts - for dataset_size in [100, 500, 1_000] { - for num_targets in [1, 10, 50, 100] { - // Skip combinations where targets > dataset (doesn't make sense) - if num_targets > dataset_size { - continue; - } - - let (hashed_post_state, targets, _target_b256s) = - generate_test_data(dataset_size, num_targets); - - // Create mock cursor factories from the hashed post state - let (trie_cursor_factory, hashed_cursor_factory) = - create_cursor_factories(&hashed_post_state); - - // Benchmark ID includes both dimensions: dataset_size/num_targets - let bench_id = format!("v2/dataset_{}/targets_{}", dataset_size, num_targets); - group.bench_function(BenchmarkId::new("account_proof", bench_id), |b| { - b.iter(|| { - let trie_cursor = trie_cursor_factory - .account_trie_cursor() - .expect("Failed to create trie cursor"); - let hashed_cursor = hashed_cursor_factory - .hashed_account_cursor() - .expect("Failed to create hashed cursor"); - - let value_encoder = SyncAccountValueEncoder::new( - trie_cursor_factory.clone(), - hashed_cursor_factory.clone(), - ); - - let mut proof_calculator = ProofCalculator::new(trie_cursor, hashed_cursor); - proof_calculator - .proof(&value_encoder, targets.clone()) - .expect("Proof generation failed") - }) - }); - } - } -} - -/// Benchmark for legacy proof implementation. -/// -/// This benchmark tests the performance of the original proof calculator that uses -/// trie walking for comparison with the proof_v2 implementation, across multiple -/// dataset sizes and target counts. -pub fn proof_legacy_benchmark(c: &mut Criterion) { - let mut group = c.benchmark_group("Proof V2"); - group.sample_size(20); - - // Test across multiple dataset sizes and target counts - for dataset_size in [100, 500, 1_000] { - for num_targets in [1, 10, 50, 100] { - // Skip combinations where targets > dataset (doesn't make sense) - if num_targets > dataset_size { - continue; - } - - let (hashed_post_state, _targets, target_b256s) = - generate_test_data(dataset_size, num_targets); - - // Create mock cursor factories from the hashed post state - let (trie_cursor_factory, hashed_cursor_factory) = - create_cursor_factories(&hashed_post_state); - - // Convert B256 targets to MultiProofTargets (account targets with empty storage sets) - let legacy_targets: MultiProofTargets = - target_b256s.iter().map(|addr| (*addr, B256Set::default())).collect(); - - // Benchmark ID includes both dimensions: dataset_size/num_targets - let bench_id = format!("legacy/dataset_{}/targets_{}", dataset_size, num_targets); - // Benchmark account proof generation using legacy implementation - // This includes decoding and sorting to match what proof_v2 returns - group.bench_function(BenchmarkId::new("account_proof", bench_id), |b| { - b.iter(|| { - let proof_result = - Proof::new(trie_cursor_factory.clone(), hashed_cursor_factory.clone()) - .multiproof(legacy_targets.clone()) - .expect("Legacy proof generation failed"); - - // Decode and sort legacy proof nodes (same as in proof_v2 tests) - let _proof_nodes: Vec = proof_result - .account_subtree - .iter() - .map(|(path, node_enc)| { - let mut buf = node_enc.as_ref(); - let node = TrieNode::decode(&mut buf) - .expect("legacy implementation should produce valid proof nodes"); - - ProofTrieNode { - path: *path, - node, - masks: TrieMasks { - hash_mask: proof_result - .branch_node_hash_masks - .get(path) - .copied(), - tree_mask: proof_result - .branch_node_tree_masks - .get(path) - .copied(), - }, - } - }) - .sorted_by(|a, b| depth_first::cmp(&a.path, &b.path)) - .collect(); - }) - }); - } - } -} - /// Generate test data for benchmarking. /// /// Returns a tuple of: /// - `HashedPostState` with random accounts /// - Proof targets (Nibbles) that are 80% from existing accounts, 20% random -/// - Proof targets (B256) for legacy implementation +/// - Equivalent [`MultiProofTargets`] for legacy implementation fn generate_test_data( dataset_size: usize, num_targets: usize, -) -> (HashedPostState, Vec, Vec) { +) -> (HashedPostState, Vec, MultiProofTargets) { let mut runner = TestRunner::deterministic(); // Generate random accounts @@ -195,7 +75,10 @@ fn generate_test_data( .collect(); targets.sort(); - (hashed_post_state, targets, target_b256s) + let legacy_targets: MultiProofTargets = + target_b256s.iter().map(|addr| (*addr, B256Set::default())).collect(); + + (hashed_post_state, targets, legacy_targets) } /// Generate a strategy for Account values @@ -246,5 +129,89 @@ fn create_cursor_factories( (trie_cursor_factory, hashed_cursor_factory) } -criterion_group!(proof_comparison, proof_v2_benchmark, proof_legacy_benchmark); +// Benchmark comparing legacy and V2 implementations +fn bench_proof_algos(c: &mut Criterion) { + let mut group = c.benchmark_group("Proof"); + for dataset_size in [10240 /* 128, 1024, 10240, 102400 */] { + for num_targets in [512 /* 1, 8, 16, 64, 128, 512, 2048 */] { + let (hashed_post_state, targets, legacy_targets) = + generate_test_data(dataset_size, num_targets); + + // Create mock cursor factories from the hashed post state + let (trie_cursor_factory, hashed_cursor_factory) = + create_cursor_factories(&hashed_post_state); + + let bench_name = format!("dataset_{dataset_size}/targets_{num_targets}"); + + group.bench_function(BenchmarkId::new("Legacy", &bench_name), |b| { + b.iter_batched( + || legacy_targets.clone(), + |targets| { + let proof_result = + Proof::new(trie_cursor_factory.clone(), hashed_cursor_factory.clone()) + .multiproof(targets) + .expect("Legacy proof generation failed"); + + // Decode and sort legacy proof nodes, so output is the same as V2 + let _proof_nodes: Vec = proof_result + .account_subtree + .iter() + .map(|(path, node_enc)| { + let mut buf = node_enc.as_ref(); + let node = TrieNode::decode(&mut buf).expect( + "legacy implementation should produce valid proof nodes", + ); + + ProofTrieNode { + path: *path, + node, + masks: TrieMasks { + hash_mask: proof_result + .branch_node_hash_masks + .get(path) + .copied(), + tree_mask: proof_result + .branch_node_tree_masks + .get(path) + .copied(), + }, + } + }) + .sorted_by(|a, b| depth_first::cmp(&a.path, &b.path)) + .collect(); + }, + BatchSize::SmallInput, + ); + }); + + group.bench_function(BenchmarkId::new("V2", &bench_name), |b| { + let value_encoder = SyncAccountValueEncoder::new( + trie_cursor_factory.clone(), + hashed_cursor_factory.clone(), + ); + + let trie_cursor = trie_cursor_factory + .account_trie_cursor() + .expect("Failed to create trie cursor"); + let hashed_cursor = hashed_cursor_factory + .hashed_account_cursor() + .expect("Failed to create hashed cursor"); + + let mut proof_calculator = ProofCalculator::new(trie_cursor, hashed_cursor); + + b.iter_batched( + || targets.clone(), + |targets| { + proof_calculator + .proof(&value_encoder, targets.into_iter()) + .expect("Proof generation failed"); + }, + BatchSize::SmallInput, + ); + }); + } + } +} + +criterion_group!(proof_comparison, bench_proof_algos); criterion_main!(proof_comparison); From 9c074e67a04b0d8b18cebe2dc95b11e928b4850b Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 25 Nov 2025 13:33:01 +0100 Subject: [PATCH 16/59] Switch to using storage tries --- crates/trie/trie/benches/proof_v2.rs | 113 +++++++++++++-------------- 1 file changed, 53 insertions(+), 60 deletions(-) diff --git a/crates/trie/trie/benches/proof_v2.rs b/crates/trie/trie/benches/proof_v2.rs index 8142a04ee2f..4204f7fd70a 100644 --- a/crates/trie/trie/benches/proof_v2.rs +++ b/crates/trie/trie/benches/proof_v2.rs @@ -7,55 +7,62 @@ use alloy_rlp::Decodable; use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; use itertools::Itertools; use proptest::{prelude::*, strategy::ValueTree, test_runner::TestRunner}; -use reth_primitives_traits::Account; use reth_trie::{ hashed_cursor::{mock::MockHashedCursorFactory, HashedCursorFactory}, - proof::Proof, - proof_v2::{ProofCalculator, SyncAccountValueEncoder}, + proof::StorageProof, + proof_v2::StorageProofCalculator, trie_cursor::{depth_first, mock::MockTrieCursorFactory, TrieCursorFactory}, }; use reth_trie_common::{ - HashedPostState, MultiProofTargets, Nibbles, ProofTrieNode, TrieMasks, TrieNode, + HashedPostState, HashedStorage, Nibbles, ProofTrieNode, TrieMasks, TrieNode, }; use std::collections::BTreeMap; /// Generate test data for benchmarking. /// /// Returns a tuple of: -/// - `HashedPostState` with random accounts -/// - Proof targets (Nibbles) that are 80% from existing accounts, 20% random -/// - Equivalent [`MultiProofTargets`] for legacy implementation +/// - Hashed address for the storage trie +/// - `HashedPostState` with random storage slots +/// - Proof targets (Nibbles) that are 80% from existing slots, 20% random +/// - Equivalent [`B256Set`] for legacy implementation fn generate_test_data( dataset_size: usize, num_targets: usize, -) -> (HashedPostState, Vec, MultiProofTargets) { +) -> (B256, HashedPostState, Vec, B256Set) { let mut runner = TestRunner::deterministic(); - // Generate random accounts - let accounts_strategy = - proptest::collection::vec((any::<[u8; 32]>(), account_strategy()), dataset_size); + // Use a fixed hashed address for the storage trie + let hashed_address = B256::from([0x42; 32]); - let accounts = accounts_strategy.new_tree(&mut runner).unwrap().current(); + // Generate random storage slots (key -> value) + let storage_strategy = + proptest::collection::vec((any::<[u8; 32]>(), any::()), dataset_size); - // Convert to HashedPostState - let account_map: B256Map<_> = accounts + let storage_entries = storage_strategy.new_tree(&mut runner).unwrap().current(); + + // Convert to storage map + let storage_map: B256Map = storage_entries .iter() - .map(|(addr_bytes, account)| (B256::from(*addr_bytes), Some(*account))) + .map(|(slot_bytes, value)| (B256::from(*slot_bytes), U256::from(*value))) .collect(); - // All accounts have empty storages - let storages = - account_map.keys().copied().map(|addr| (addr, Default::default())).collect::>(); + // Create HashedPostState with single account's storage + let mut storages = B256Map::default(); + let hashed_storage = HashedStorage { + wiped: false, + storage: storage_map.iter().map(|(k, v)| (*k, *v)).collect(), + }; + storages.insert(hashed_address, hashed_storage); - let hashed_post_state = HashedPostState { accounts: account_map.clone(), storages }; + let hashed_post_state = HashedPostState { accounts: B256Map::default(), storages }; - // Generate proof targets: 80% from existing accounts, 20% random - let account_keys: Vec = account_map.keys().copied().collect(); + // Generate proof targets: 80% from existing slots, 20% random + let slot_keys: Vec = storage_map.keys().copied().collect(); let targets_strategy = proptest::collection::vec( - prop::bool::weighted(0.8).prop_flat_map(move |from_accounts| { - if from_accounts && !account_keys.is_empty() { - prop::sample::select(account_keys.clone()).boxed() + prop::bool::weighted(0.8).prop_flat_map(move |from_slots| { + if from_slots && !slot_keys.is_empty() { + prop::sample::select(slot_keys.clone()).boxed() } else { any::<[u8; 32]>().prop_map(B256::from).boxed() } @@ -65,7 +72,7 @@ fn generate_test_data( let target_b256s = targets_strategy.new_tree(&mut runner).unwrap().current(); - // Convert B256 targets to sorted Nibbles + // Convert B256 targets to sorted Nibbles for V2 let mut targets: Vec = target_b256s .iter() .map(|b256| { @@ -75,32 +82,18 @@ fn generate_test_data( .collect(); targets.sort(); - let legacy_targets: MultiProofTargets = - target_b256s.iter().map(|addr| (*addr, B256Set::default())).collect(); - - (hashed_post_state, targets, legacy_targets) -} + // Create B256Set for legacy + let legacy_targets: B256Set = target_b256s.into_iter().collect(); -/// Generate a strategy for Account values -fn account_strategy() -> impl Strategy { - (any::(), any::(), any::<[u8; 32]>()).prop_map(|(nonce, balance, code_hash)| { - Account { nonce, balance: U256::from(balance), bytecode_hash: Some(B256::from(code_hash)) } - }) + (hashed_address, hashed_post_state, targets, legacy_targets) } -/// Create cursor factories from a `HashedPostState`. +/// Create cursor factories from a `HashedPostState` for storage trie testing. /// /// This mimics the test harness pattern from the proof_v2 tests. fn create_cursor_factories( post_state: &HashedPostState, ) -> (MockTrieCursorFactory, MockHashedCursorFactory) { - // Extract accounts from post state, filtering out None (deleted accounts) - let hashed_accounts: BTreeMap = post_state - .accounts - .iter() - .filter_map(|(addr, account)| account.map(|acc| (*addr, acc))) - .collect(); - // Extract storage tries from post state let hashed_storage_tries: B256Map> = post_state .storages @@ -120,8 +113,9 @@ fn create_cursor_factories( let storage_trie_nodes: B256Map> = hashed_storage_tries.keys().copied().map(|addr| (addr, Default::default())).collect(); - // Create mock hashed cursor factory populated with the post state data - let hashed_cursor_factory = MockHashedCursorFactory::new(hashed_accounts, hashed_storage_tries); + // Create mock hashed cursor factory populated with the storage data + // No accounts needed for storage trie testing + let hashed_cursor_factory = MockHashedCursorFactory::new(BTreeMap::new(), hashed_storage_tries); // Create empty trie cursor factory (leaf-only calculator doesn't need trie nodes) let trie_cursor_factory = MockTrieCursorFactory::new(BTreeMap::new(), storage_trie_nodes); @@ -134,7 +128,7 @@ fn bench_proof_algos(c: &mut Criterion) { let mut group = c.benchmark_group("Proof"); for dataset_size in [10240 /* 128, 1024, 10240, 102400 */] { for num_targets in [512 /* 1, 8, 16, 64, 128, 512, 2048 */] { - let (hashed_post_state, targets, legacy_targets) = + let (hashed_address, hashed_post_state, targets, legacy_targets) = generate_test_data(dataset_size, num_targets); // Create mock cursor factories from the hashed post state @@ -147,14 +141,17 @@ fn bench_proof_algos(c: &mut Criterion) { b.iter_batched( || legacy_targets.clone(), |targets| { - let proof_result = - Proof::new(trie_cursor_factory.clone(), hashed_cursor_factory.clone()) - .multiproof(targets) - .expect("Legacy proof generation failed"); + let proof_result = StorageProof::new_hashed( + trie_cursor_factory.clone(), + hashed_cursor_factory.clone(), + hashed_address, + ) + .storage_multiproof(targets) + .expect("Legacy proof generation failed"); // Decode and sort legacy proof nodes, so output is the same as V2 let _proof_nodes: Vec = proof_result - .account_subtree + .subtree .iter() .map(|(path, node_enc)| { let mut buf = node_enc.as_ref(); @@ -185,25 +182,21 @@ fn bench_proof_algos(c: &mut Criterion) { }); group.bench_function(BenchmarkId::new("V2", &bench_name), |b| { - let value_encoder = SyncAccountValueEncoder::new( - trie_cursor_factory.clone(), - hashed_cursor_factory.clone(), - ); - let trie_cursor = trie_cursor_factory - .account_trie_cursor() + .storage_trie_cursor(hashed_address) .expect("Failed to create trie cursor"); let hashed_cursor = hashed_cursor_factory - .hashed_account_cursor() + .hashed_storage_cursor(hashed_address) .expect("Failed to create hashed cursor"); - let mut proof_calculator = ProofCalculator::new(trie_cursor, hashed_cursor); + let mut proof_calculator = + StorageProofCalculator::new_storage(trie_cursor, hashed_cursor); b.iter_batched( || targets.clone(), |targets| { proof_calculator - .proof(&value_encoder, targets.into_iter()) + .storage_proof(hashed_address, targets.into_iter()) .expect("Proof generation failed"); }, BatchSize::SmallInput, From 062bb785921497397dbabb763aa1296ebce01816 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 25 Nov 2025 14:54:16 +0100 Subject: [PATCH 17/59] Some perf improvements --- crates/trie/trie/benches/proof_v2.rs | 42 ++++----------------------- crates/trie/trie/src/proof_v2/mod.rs | 16 +++++----- crates/trie/trie/src/proof_v2/node.rs | 14 +++++---- 3 files changed, 22 insertions(+), 50 deletions(-) diff --git a/crates/trie/trie/benches/proof_v2.rs b/crates/trie/trie/benches/proof_v2.rs index 4204f7fd70a..c347c616519 100644 --- a/crates/trie/trie/benches/proof_v2.rs +++ b/crates/trie/trie/benches/proof_v2.rs @@ -3,19 +3,15 @@ use alloy_primitives::{ map::{B256Map, B256Set}, B256, U256, }; -use alloy_rlp::Decodable; use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; -use itertools::Itertools; use proptest::{prelude::*, strategy::ValueTree, test_runner::TestRunner}; use reth_trie::{ hashed_cursor::{mock::MockHashedCursorFactory, HashedCursorFactory}, proof::StorageProof, proof_v2::StorageProofCalculator, - trie_cursor::{depth_first, mock::MockTrieCursorFactory, TrieCursorFactory}, -}; -use reth_trie_common::{ - HashedPostState, HashedStorage, Nibbles, ProofTrieNode, TrieMasks, TrieNode, + trie_cursor::{mock::MockTrieCursorFactory, TrieCursorFactory}, }; +use reth_trie_common::{HashedPostState, HashedStorage, Nibbles}; use std::collections::BTreeMap; /// Generate test data for benchmarking. @@ -126,8 +122,8 @@ fn create_cursor_factories( // Benchmark comparing legacy and V2 implementations fn bench_proof_algos(c: &mut Criterion) { let mut group = c.benchmark_group("Proof"); - for dataset_size in [10240 /* 128, 1024, 10240, 102400 */] { - for num_targets in [512 /* 1, 8, 16, 64, 128, 512, 2048 */] { + for dataset_size in [128, 1024, 10240] { + for num_targets in [1, 16, 64, 128, 512, 2048] { let (hashed_address, hashed_post_state, targets, legacy_targets) = generate_test_data(dataset_size, num_targets); @@ -141,41 +137,13 @@ fn bench_proof_algos(c: &mut Criterion) { b.iter_batched( || legacy_targets.clone(), |targets| { - let proof_result = StorageProof::new_hashed( + StorageProof::new_hashed( trie_cursor_factory.clone(), hashed_cursor_factory.clone(), hashed_address, ) .storage_multiproof(targets) .expect("Legacy proof generation failed"); - - // Decode and sort legacy proof nodes, so output is the same as V2 - let _proof_nodes: Vec = proof_result - .subtree - .iter() - .map(|(path, node_enc)| { - let mut buf = node_enc.as_ref(); - let node = TrieNode::decode(&mut buf).expect( - "legacy implementation should produce valid proof nodes", - ); - - ProofTrieNode { - path: *path, - node, - masks: TrieMasks { - hash_mask: proof_result - .branch_node_hash_masks - .get(path) - .copied(), - tree_mask: proof_result - .branch_node_tree_masks - .get(path) - .copied(), - }, - } - }) - .sorted_by(|a, b| depth_first::cmp(&a.path, &b.path)) - .collect(); }, BatchSize::SmallInput, ); diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index c323318e84d..b6763873e6a 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -28,6 +28,9 @@ use node::*; /// Target to use with the `tracing` crate. static TRACE_TARGET: &str = "trie::proof_v2"; +/// Number of bytes to pre-allocate for [`ProofCalculator`]'s `rlp_encode_buf` field. +const RLP_ENCODE_BUF_SIZE: usize = 1024; + /// A proof calculator that generates merkle proofs using only leaf data. /// /// The calculator: @@ -78,7 +81,7 @@ pub struct ProofCalculator { impl ProofCalculator { /// Create a new [`ProofCalculator`] instance for calculating account proofs. - pub const fn new(trie_cursor: TC, hashed_cursor: HC) -> Self { + pub fn new(trie_cursor: TC, hashed_cursor: HC) -> Self { Self { trie_cursor, hashed_cursor, @@ -87,7 +90,7 @@ impl ProofCalculator { child_stack: Vec::<_>::new(), retained_proofs: Vec::<_>::new(), rlp_nodes_bufs: Vec::<_>::new(), - rlp_encode_buf: Vec::<_>::new(), + rlp_encode_buf: Vec::<_>::with_capacity(RLP_ENCODE_BUF_SIZE), } } } @@ -205,11 +208,8 @@ where // Convert to `ProofTrieNode`, which will be what is retained. // - // If this node is a leaf then the `rlp_encode_buf` is taken by it and a new one will be - // allocated by the next encode call. - // - // If it is a branch then its `rlp_nodes_buf` will be taken and not returned to the - // `rlp_nodes_bufs` free-list. + // If this node is a branch then its `rlp_nodes_buf` will be taken and not returned to + // the `rlp_nodes_bufs` free-list. self.rlp_encode_buf.clear(); let proof_node = child.into_proof_trie_node(child_path, &mut self.rlp_encode_buf)?; @@ -716,7 +716,7 @@ where HC: HashedStorageCursor, { /// Create a new [`StorageProofCalculator`] instance. - pub const fn new_storage(trie_cursor: TC, hashed_cursor: HC) -> Self { + pub fn new_storage(trie_cursor: TC, hashed_cursor: HC) -> Self { Self::new(trie_cursor, hashed_cursor) } diff --git a/crates/trie/trie/src/proof_v2/node.rs b/crates/trie/trie/src/proof_v2/node.rs index 53a8190cbbf..536665f19ae 100644 --- a/crates/trie/trie/src/proof_v2/node.rs +++ b/crates/trie/trie/src/proof_v2/node.rs @@ -85,7 +85,15 @@ impl ProofTrieBranchChild { let (node, masks) = match self { Self::Leaf { short_key, value } => { value.encode(buf)?; - (TrieNode::Leaf(LeafNode::new(short_key, core::mem::take(buf))), TrieMasks::none()) + // Counter-intuitively a clone is better here than a `core::mem::take`. If we take + // the buffer then future RLP-encodes will need to re-allocate a new one, and + // RLP-encodes after those may need a bigger buffer and therefore re-alloc again. + // + // By cloning here we do a single allocation of exactly the size we need to take + // this value, and the passed in buffer can remain with whatever large capacity it + // already has. + let rlp_val = buf.clone(); + (TrieNode::Leaf(LeafNode::new(short_key, rlp_val)), TrieMasks::none()) } Self::Extension { short_key, child } => { (TrieNode::Extension(ExtensionNode { key: short_key, child }), TrieMasks::none()) @@ -95,10 +103,6 @@ impl ProofTrieBranchChild { Self::RlpNode(_) => panic!("Cannot call `into_proof_trie_node` on RlpNode"), }; - // Encode the `TrieNode` to the buffer, so we can return the `RlpNode` for it at the end. - buf.clear(); - node.encode(buf); - Ok(ProofTrieNode { node, path, masks }) } From f17b7609689af93caed6deaffc674cddfb3484ff Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 25 Nov 2025 15:27:02 +0100 Subject: [PATCH 18/59] Simplify mock factory creation --- crates/trie/trie/benches/proof_v2.rs | 22 ++---------- crates/trie/trie/src/hashed_cursor/mock.rs | 28 +++++++++++++++ crates/trie/trie/src/proof_v2/mod.rs | 40 ++++------------------ crates/trie/trie/src/trie_cursor/mock.rs | 22 ++++++++++++ 4 files changed, 59 insertions(+), 53 deletions(-) diff --git a/crates/trie/trie/benches/proof_v2.rs b/crates/trie/trie/benches/proof_v2.rs index c347c616519..6e871ce7e76 100644 --- a/crates/trie/trie/benches/proof_v2.rs +++ b/crates/trie/trie/benches/proof_v2.rs @@ -90,28 +90,12 @@ fn generate_test_data( fn create_cursor_factories( post_state: &HashedPostState, ) -> (MockTrieCursorFactory, MockHashedCursorFactory) { - // Extract storage tries from post state - let hashed_storage_tries: B256Map> = post_state - .storages - .iter() - .map(|(addr, hashed_storage)| { - // Convert HashedStorage to BTreeMap, filtering out zero values (deletions) - let storage_map: BTreeMap = hashed_storage - .storage - .iter() - .filter_map(|(slot, value)| (*value != U256::ZERO).then_some((*slot, *value))) - .collect(); - (*addr, storage_map) - }) - .collect(); - // Ensure that there's a storage trie dataset for every storage trie, even if empty let storage_trie_nodes: B256Map> = - hashed_storage_tries.keys().copied().map(|addr| (addr, Default::default())).collect(); + post_state.storages.keys().copied().map(|addr| (addr, Default::default())).collect(); - // Create mock hashed cursor factory populated with the storage data - // No accounts needed for storage trie testing - let hashed_cursor_factory = MockHashedCursorFactory::new(BTreeMap::new(), hashed_storage_tries); + // Create mock hashed cursor factory from the post state + let hashed_cursor_factory = MockHashedCursorFactory::from_hashed_post_state(post_state.clone()); // Create empty trie cursor factory (leaf-only calculator doesn't need trie nodes) let trie_cursor_factory = MockTrieCursorFactory::new(BTreeMap::new(), storage_trie_nodes); diff --git a/crates/trie/trie/src/hashed_cursor/mock.rs b/crates/trie/trie/src/hashed_cursor/mock.rs index fd3e8d2f25f..527226c774b 100644 --- a/crates/trie/trie/src/hashed_cursor/mock.rs +++ b/crates/trie/trie/src/hashed_cursor/mock.rs @@ -7,6 +7,7 @@ use alloy_primitives::{map::B256Map, B256, U256}; use parking_lot::{Mutex, MutexGuard}; use reth_primitives_traits::Account; use reth_storage_errors::db::DatabaseError; +use reth_trie_common::HashedPostState; use tracing::instrument; /// Mock hashed cursor factory. @@ -37,6 +38,33 @@ impl MockHashedCursorFactory { } } + /// Creates a new mock hashed cursor factory from a `HashedPostState`. + pub fn from_hashed_post_state(post_state: HashedPostState) -> Self { + // Extract accounts from post state, filtering out None (deleted accounts) + let hashed_accounts: BTreeMap = post_state + .accounts + .into_iter() + .filter_map(|(addr, account)| account.map(|acc| (addr, acc))) + .collect(); + + // Extract storages from post state + let hashed_storages: B256Map> = post_state + .storages + .into_iter() + .map(|(addr, hashed_storage)| { + // Convert HashedStorage to BTreeMap, filtering out zero values (deletions) + let storage_map: BTreeMap = hashed_storage + .storage + .into_iter() + .filter_map(|(slot, value)| (value != U256::ZERO).then_some((slot, value))) + .collect(); + (addr, storage_map) + }) + .collect(); + + Self::new(hashed_accounts, hashed_storages) + } + /// Returns a reference to the list of visited hashed account keys. pub fn visited_account_keys(&self) -> MutexGuard<'_, Vec>> { self.visited_account_keys.lock() diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index b6763873e6a..295bebe4c84 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -833,38 +833,16 @@ mod tests { fn new(post_state: HashedPostState) -> Self { trace!(target: TRACE_TARGET, ?post_state, "Creating ProofTestHarness"); - // Extract accounts from post state, filtering out None (deleted accounts) - let hashed_accounts: BTreeMap = post_state - .accounts - .into_iter() - .filter_map(|(addr, account)| account.map(|acc| (addr, acc))) - .collect(); - - // Extract storage tries from post state - let hashed_storage_tries: B256Map> = post_state - .storages - .into_iter() - .map(|(addr, hashed_storage)| { - // Convert HashedStorage to BTreeMap, filtering out zero values (deletions) - let storage_map: BTreeMap = hashed_storage - .storage - .into_iter() - .filter_map(|(slot, value)| (value != U256::ZERO).then_some((slot, value))) - .collect(); - (addr, storage_map) - }) - .collect(); - // Ensure that there's a storage trie dataset for every storage trie, even if empty. - let storage_trie_nodes: B256Map> = hashed_storage_tries + let storage_trie_nodes: B256Map> = post_state + .storages .keys() .copied() .map(|addr| (addr, Default::default())) .collect(); - // Create mock hashed cursor factory populated with the post state data - let hashed_cursor_factory = - MockHashedCursorFactory::new(hashed_accounts, hashed_storage_tries); + // Create mock hashed cursor factory from the post state + let hashed_cursor_factory = MockHashedCursorFactory::from_hashed_post_state(post_state); // Create empty trie cursor factory (leaf-only calculator doesn't need trie nodes) let trie_cursor_factory = @@ -884,14 +862,8 @@ mod tests { ) -> Result<(), StateProofError> { // Convert B256 targets to Nibbles for proof_v2 let targets_vec: Vec = targets.into_iter().collect(); - let nibbles_targets: Vec = targets_vec - .iter() - .map(|b256| { - // SAFETY: B256 is exactly 32 bytes - unsafe { Nibbles::unpack_unchecked(b256.as_slice()) } - }) - .sorted() - .collect(); + let nibbles_targets: Vec = + targets_vec.iter().map(|b256| Nibbles::unpack(b256.as_slice())).sorted().collect(); // Convert B256 targets to MultiProofTargets for legacy implementation // For account-only proofs, each account maps to an empty storage set diff --git a/crates/trie/trie/src/trie_cursor/mock.rs b/crates/trie/trie/src/trie_cursor/mock.rs index cbb2b0ffb1a..ee59fe4fceb 100644 --- a/crates/trie/trie/src/trie_cursor/mock.rs +++ b/crates/trie/trie/src/trie_cursor/mock.rs @@ -9,6 +9,7 @@ use crate::{ }; use alloy_primitives::{map::B256Map, B256}; use reth_storage_errors::db::DatabaseError; +use reth_trie_common::updates::TrieUpdates; /// Mock trie cursor factory. #[derive(Clone, Default, Debug)] @@ -37,6 +38,27 @@ impl MockTrieCursorFactory { } } + /// Creates a new mock trie cursor factory from `TrieUpdates`. + pub fn from_trie_updates(updates: TrieUpdates) -> Self { + // Convert account nodes from HashMap to BTreeMap + let account_trie_nodes: BTreeMap = + updates.account_nodes.into_iter().collect(); + + // Convert storage tries + let storage_tries: B256Map> = updates + .storage_tries + .into_iter() + .map(|(addr, storage_updates)| { + // Convert storage nodes from HashMap to BTreeMap + let storage_nodes: BTreeMap = + storage_updates.storage_nodes.into_iter().collect(); + (addr, storage_nodes) + }) + .collect(); + + Self::new(account_trie_nodes, storage_tries) + } + /// Returns a reference to the list of visited account keys. pub fn visited_account_keys(&self) -> MutexGuard<'_, Vec>> { self.visited_account_keys.lock() From e227e083f1ccf43ed14e97ea52f802b80343b84d Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 25 Nov 2025 15:39:24 +0100 Subject: [PATCH 19/59] consts --- crates/trie/trie/src/hashed_cursor/mock.rs | 2 +- crates/trie/trie/src/trie_cursor/mock.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/trie/trie/src/hashed_cursor/mock.rs b/crates/trie/trie/src/hashed_cursor/mock.rs index 527226c774b..63f1b138fe2 100644 --- a/crates/trie/trie/src/hashed_cursor/mock.rs +++ b/crates/trie/trie/src/hashed_cursor/mock.rs @@ -129,7 +129,7 @@ pub struct MockHashedCursor { impl MockHashedCursor { /// Creates a new mock hashed cursor for accounts with the given values and key tracking. - pub fn new( + pub const fn new( values: Arc>, visited_keys: Arc>>>, ) -> Self { diff --git a/crates/trie/trie/src/trie_cursor/mock.rs b/crates/trie/trie/src/trie_cursor/mock.rs index ee59fe4fceb..5f29a6734bf 100644 --- a/crates/trie/trie/src/trie_cursor/mock.rs +++ b/crates/trie/trie/src/trie_cursor/mock.rs @@ -126,7 +126,7 @@ pub struct MockTrieCursor { impl MockTrieCursor { /// Creates a new mock trie cursor for accounts with the given trie nodes and key tracking. - pub fn new( + pub const fn new( trie_nodes: Arc>, visited_keys: Arc>>>, ) -> Self { From 9046bf619181df8c7f906a6ec1436f8d07006e5e Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 25 Nov 2025 16:50:40 +0100 Subject: [PATCH 20/59] docs --- crates/trie/trie/benches/proof_v2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/trie/trie/benches/proof_v2.rs b/crates/trie/trie/benches/proof_v2.rs index 6e871ce7e76..d592091cee4 100644 --- a/crates/trie/trie/benches/proof_v2.rs +++ b/crates/trie/trie/benches/proof_v2.rs @@ -86,7 +86,7 @@ fn generate_test_data( /// Create cursor factories from a `HashedPostState` for storage trie testing. /// -/// This mimics the test harness pattern from the proof_v2 tests. +/// This mimics the test harness pattern from the `proof_v2` tests. fn create_cursor_factories( post_state: &HashedPostState, ) -> (MockTrieCursorFactory, MockHashedCursorFactory) { From 844609dd62221ff8c203fed61a10b562010edab6 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Thu, 27 Nov 2025 17:27:18 +0100 Subject: [PATCH 21/59] WIP: super basic usage of cached branches --- crates/trie/trie/src/proof_v2/mod.rs | 240 +++++++++++++++++++++++---- 1 file changed, 210 insertions(+), 30 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 1d818a90e9e..28b1de38cf8 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -13,7 +13,7 @@ use crate::{ }; use alloy_primitives::{B256, U256}; use alloy_rlp::Encodable; -use alloy_trie::TrieMask; +use alloy_trie::{BranchNodeCompact, TrieMask}; use reth_execution_errors::trie::StateProofError; use reth_trie_common::{BranchNode, Nibbles, ProofTrieNode, RlpNode, TrieMasks, TrieNode}; use std::{cmp::Ordering, iter::Peekable}; @@ -66,6 +66,10 @@ pub struct ProofCalculator { /// ever be modified, and therefore all children besides the last are expected to be /// [`ProofTrieBranchChild::RlpNode`]s. child_stack: Vec>, + /// Cached branch data pulled from the `trie_cursor`. The calculator will use the cached + /// [`BranchNodeCompact::hashes`] to skip over the calculation of sub-tries in the overall + /// trie. The cached hashes cannot be used for any paths which are prefixes of a proof target. + cached_branch_stack: Vec<(Nibbles, BranchNodeCompact)>, /// The proofs which will be returned from the calculation. This gets taken at the end of every /// proof call. retained_proofs: Vec, @@ -85,14 +89,55 @@ impl ProofCalculator { Self { trie_cursor, hashed_cursor, - branch_stack: Vec::<_>::new(), + branch_stack: Vec::<_>::with_capacity(64), branch_path: Nibbles::new(), child_stack: Vec::<_>::new(), + cached_branch_stack: Vec::<_>::with_capacity(64), retained_proofs: Vec::<_>::new(), rlp_nodes_bufs: Vec::<_>::new(), rlp_encode_buf: Vec::<_>::with_capacity(RLP_ENCODE_BUF_SIZE), } } + + /// Returns whether the given path lies within the lower/upper bound of a portion of the target + /// set (presumably obtained via `targets.peek()`. See [`Self::should_retain`] to understand + /// how the targets lower/upper bounds work. + /// + /// This method assumes depth-first ordering. + /// + /// # Returns + /// + /// - [`Ordering::Less`] if `path` is less than the lower bound. + /// - [`Ordering::Equal`] if `path` is greater-or-equal to the lower bound, and less than the + /// upper bound (ie it is in-range). + /// - [`Ordering::Greater`] if `path` is greater-or-equal to the upper bound. + #[expect(unused)] + fn cmp_targets(path: &Nibbles, bounds: &(Nibbles, Option)) -> Ordering { + debug_assert!( + bounds + .1 + .as_ref() + .is_none_or(|upper| depth_first::cmp(&bounds.0, upper) != Ordering::Greater), + "lower bound {:?} is greater than upper bound {:?} (depth-first)", + bounds.0, + bounds.1, + ); + + match bounds { + (lower, _) if depth_first::cmp(path, lower) == Ordering::Less => Ordering::Less, + (_, None) => { + // None indicates no upper-bound. We've already determined that path is >= lower, + // so it must be in-range. + Ordering::Equal + } + (_, Some(upper)) if depth_first::cmp(path, upper) == Ordering::Less => { + // Upper bound is exclusive. If path is less the upper bound and not less than the + // lower bound then it is in-range. + Ordering::Equal + } + (_, _) => Ordering::Greater, + } + } } /// Helper type for the [`Iterator`] used to pass targets in from the caller. @@ -236,6 +281,26 @@ where Ok(child_rlp_node) } + /// Returns the path of the child of the currently under-construction branch at the given + /// nibble. + fn child_path_at(&self, nibble: u8) -> Nibbles { + let mut child_path = self.branch_path; + debug_assert!(child_path.len() < 64); + child_path.push_unchecked(nibble); + child_path + } + + /// Returns index of the highest nibble which is set in the mask. + /// + /// # Panics + /// + /// Will panic in debug mode if the mask is empty. + #[inline] + fn highest_set_nibble(mask: TrieMask) -> u8 { + debug_assert!(!mask.is_empty()); + (u16::BITS - mask.leading_zeros() - 1) as u8 + } + /// Returns the path of the child on top of the `child_stack`, or the root path if the stack is /// empty. fn last_child_path(&self) -> Nibbles { @@ -244,13 +309,7 @@ where return Nibbles::new(); }; - debug_assert_ne!(branch.state_mask.get(), 0, "branch.state_mask can never be zero"); - let last_nibble = u16::BITS - branch.state_mask.leading_zeros() - 1; - - let mut child_path = self.branch_path; - debug_assert!(child_path.len() < 64); - child_path.push_unchecked(last_nibble as u8); - child_path + self.child_path_at(Self::highest_set_nibble(branch.state_mask)) } /// Calls [`Self::commit_child`] on the last child of `child_stack`, replacing it with a @@ -499,7 +558,7 @@ where /// Adds a single leaf for a key to the stack, possibly collapsing an existing branch and/or /// creating a new one depending on the path of the key. - fn add_leaf( + fn push_leaf( &mut self, targets: &mut TargetsIter>, key: Nibbles, @@ -512,12 +571,12 @@ where branch_stack_len = ?self.branch_stack.len(), branch_path = ?self.branch_path, child_stack_len = ?self.child_stack.len(), - "add_leaf: loop", + "push_leaf: loop", ); - // Get the `state_mask` of the branch currently being built. If there are no branches on - // the stack then it means either the trie is empty or only a single leaf has been added - // previously. + // Get the `state_mask` of the branch currently being built. If there are no branches + // on the stack then it means either the trie is empty or only a single leaf has been + // added previously. let curr_branch_state_mask = match self.branch_stack.last() { Some(curr_branch) => curr_branch.state_mask, None if self.child_stack.is_empty() => { @@ -570,6 +629,79 @@ where } } + // Notes: + // - Will not be called mid-branch; either child_stack will be empty, or the last child will be + // a branch/extension. + fn next_uncached_subtrie(&mut self) -> Result { + loop { + // The cached branch stack is initialized with the node closest to root as part of + // `proof_inner`, so if the stack is empty it means either there are no cached nodes or + // they've been exhausted. + let Some((cached_path, cached_branch)) = self.cached_branch_stack.last() else { + todo!() + }; + + // If the current key belongs to a sub-trie which comes before this one in the ordering + // then we don't have any cached trie data for that sub-trie, return None indicating to + // iterate forward normally. + //if &curr_key_nibbles < cached_path { + // todo!() + //} + + if &self.branch_path != cached_path { + todo!() + } + + let Some(curr_branch) = self.branch_stack.last_mut() else { + // If the branch stack is empty then we should... push a new one? + todo!() + }; + + let cached_state_mask = cached_branch.state_mask.get(); + let curr_state_mask = curr_branch.state_mask.get(); + + // Determine all child nibbles which are set in the cached branch but not the + // under-construction branch. + let next_child_nibbles = curr_state_mask ^ cached_state_mask; + debug_assert_eq!( + cached_state_mask | next_child_nibbles, cached_state_mask, + "curr_branch has state_mask bits set which aren't set on cached_branch. curr_branch:{:?}", + curr_state_mask, + ); + + if next_child_nibbles == 0 { + todo!( + "If all children have been constructed then we should pop this cached branch" + ); + } + + // Determine the next nibble of the branch which has not yet been constructed, and set + // its bit on the `state_mask`. + let child_nibble = next_child_nibbles.trailing_zeros() as u8; + curr_branch.state_mask.set_bit(child_nibble); + + // If the `hash_mask` bit is set for the next child it means the child's hash is cached + // in the `cached_branch`. We can use that instead of re-calculating the hash of the + // entire sub-trie. + // + // TODO we cannot use the cached value if any of this sub-trie might be within the + // target set. + if cached_branch.hash_mask.is_bit_set(child_nibble) { + let num_prev_children = curr_state_mask.count_ones(); + let hash = cached_branch.hashes[num_prev_children as usize]; + self.child_stack.push(ProofTrieBranchChild::RlpNode(RlpNode::word_rlp(&hash))); + continue + } + + // TODO check if the child is a cached branch node + + // It is required to recalculate the sub-trie for this child using the leaves. Return + // the child path, indicating that all keys with this prefix should be iterated over + // and their sub-trie root (aka this child node) calculated. + return Ok(self.child_path_at(child_nibble)); + } + } + /// Internal implementation of proof calculation. Assumes both cursors have already been reset. /// See docs on [`Self::proof`] for expected behavior. fn proof_inner( @@ -614,14 +746,33 @@ where debug_assert!(self.branch_path.is_empty()); debug_assert!(self.child_stack.is_empty()); - let mut hashed_cursor_current = self.hashed_cursor.seek(B256::ZERO)?; + // Initialize the `cached_branch_stack` with the node closest to root. + if let Some(cached_branch) = self.trie_cursor.seek(Nibbles::new())? { + self.cached_branch_stack.push(cached_branch); + } + + // Initialize the hashed cursor to None to indicate it hasn't been seeked yet. + let mut hashed_cursor_current: Option<(Nibbles, VE::DeferredEncoder)> = None; + + // A helper closure for mapping entries returned from the `hashed_cursor`, converting the + // key to Nibbles and immediately creating the DeferredValueEncoder so that encoding of the + // leaf value can begin ASAP. + let map_hashed_cursor_entry = |(key_b256, val): (B256, _)| { + debug_assert_eq!(key_b256.len(), 32); + // SAFETY: key is a B256 and so is exactly 32-bytes. + let key = unsafe { Nibbles::unpack_unchecked(key_b256.as_slice()) }; + let val = value_encoder.deferred_encoder(key_b256, val); + (key, val) + }; + loop { trace!( target: TRACE_TARGET, - ?hashed_cursor_current, + hashed_cursor_current = ?hashed_cursor_current.as_ref().map(|kv| kv.0), branch_stack_len = ?self.branch_stack.len(), branch_path = ?self.branch_path, child_stack_len = ?self.child_stack.len(), + cached_branch_path = ?self.cached_branch_stack.last().map(|cached| cached.0), "proof_inner: loop", ); @@ -629,21 +780,50 @@ where // If there is a branch, there must be at least two children debug_assert!(self.branch_stack.last().is_none_or(|_| self.child_stack.len() >= 2)); - // Fetch the next leaf from the hashed cursor, converting the key to Nibbles and - // immediately creating the DeferredValueEncoder so that encoding of the leaf value can - // begin ASAP. - let Some((key, val)) = hashed_cursor_current.map(|(key_b256, val)| { - debug_assert_eq!(key_b256.len(), 32); - // SAFETY: key is a B256 and so is exactly 32-bytes. - let key = unsafe { Nibbles::unpack_unchecked(key_b256.as_slice()) }; - let val = value_encoder.deferred_encoder(key_b256, val); - (key, val) - }) else { - break - }; + // Determine the next subtrie of the overall trie whose hash is not cached. The path of + // the subtrie indicates the path to its root node, as well as the prefix all children + // the subtrie will have. + let subtrie_path = self.next_uncached_subtrie()?; + + // Calculate the exclusive upper bound of node paths in the subtrie, with None + // indicating unbounded. + let subtrie_upper_bound = subtrie_path.increment(); + + // If the cursor hasn't been used, or the last iterated key is prior to this subtrie's + // key range, then seek forward to the subtrie. + if hashed_cursor_current.as_ref().is_none_or(|(key, _)| key < &subtrie_path) { + let subtrie_lower_key = B256::right_padding_from(&subtrie_path.pack()); + hashed_cursor_current = + self.hashed_cursor.seek(subtrie_lower_key)?.map(map_hashed_cursor_entry); + } + + // Loop over all keys in the subtrie, calling `push_leaf` on each. + while let Some((key, _)) = hashed_cursor_current && + subtrie_upper_bound.is_none_or(|upper_bound| key < upper_bound) + { + let (key, val) = hashed_cursor_current.expect("while-let checks for Some"); + self.push_leaf(&mut targets, key, val)?; + hashed_cursor_current = self.hashed_cursor.next()?.map(map_hashed_cursor_entry); + } - self.add_leaf(&mut targets, key, val)?; - hashed_cursor_current = self.hashed_cursor.next()?; + // Once outside the while-loop `hashed_cursor_current` will be at the first key after + // the subtrie. This may be the first key of the next uncached subtrie, in which case + // no seek will be done on the next loop (see the `hashed_cursor_current.is_none_or` + // call above). + // + // If the `hashed_cursor_current` is None then there are no more keys at all, meaning + // the trie couldn't possible have more data and we should complete computation. + if hashed_cursor_current.is_none() { + break; + } + + // Pop off all branches of the subtrie, including the subtrie itself. After this loop + // the top of the `child_stack` will be the root node of the subtrie, and the top of + // the `branch_stack` (if the subtrie wasn't the root of the overall trie) will be the + // subtrie's parent. + while self.branch_path.starts_with(&subtrie_path) { + self.pop_branch(&mut targets)?; + } } // Once there's no more leaves we can pop the remaining branches, if any. From a181bcdd4ba95c0f0498b5fef50d540c3c91cd55 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 28 Nov 2025 13:21:03 +0100 Subject: [PATCH 22/59] WIP: Basic implementation done, needs testing --- crates/trie/trie/src/proof_v2/mod.rs | 248 +++++++++++++++++++-------- 1 file changed, 175 insertions(+), 73 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 28b1de38cf8..5344d0ee2d2 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -163,6 +163,17 @@ where .unwrap_or_else(|| Vec::with_capacity(16)) } + // Returns zero if `branch_stack` is empty, one otherwise. + // + // This is used when working with the `ext_len` field of [`ProofTrieBranch`]. The `ext_len` is + // calculated by taking the difference of the current `branch_path` and the new branch's path; + // if the new branch has a parent branch (ie `branch_stack` is not empty) then 1 is subtracted + // from the `ext_len` to account for the child's nibble on the parent. + #[inline] + fn maybe_parent_nibble(&self) -> usize { + !self.branch_stack.is_empty() as usize + } + /// Returns true if the proof of a node at the given path should be retained. /// A node is retained if its path is a prefix of any target. /// This may move the @@ -435,6 +446,17 @@ where let leaf_nibble = leaf_short_key.get_unchecked(common_prefix_len); let leaf_short_key = trim_nibbles_prefix(&leaf_short_key, common_prefix_len + 1); + // Update the branch path to reflect the new branch about to be pushed. Its path will be + // the path of the previous branch, plus the nibble shared by each child, plus the parent + // extension (denoted by a non-zero `ext_len`). Since the new branch's path is a prefix of + // the original leaf_key we can just slice that. + // + // If the branch is the first branch then we do not add the extra 1, as there is no nibble + // in a parent branch to account for. + let branch_path_len = + self.branch_path.len() + common_prefix_len + self.maybe_parent_nibble(); + self.branch_path = leaf_key.slice_unchecked(0, branch_path_len); + // Push the new branch onto the branch stack. We do not yet set the `state_mask` bit of the // new leaf; `push_new_leaf` will do that. self.branch_stack.push(ProofTrieBranch { @@ -444,18 +466,6 @@ where hash_mask: TrieMask::default(), }); - // Update the branch path to reflect the new branch which was just pushed. Its path will be - // the path of the previous branch, plus the nibble shared by each child, plus the parent - // extension (denoted by a non-zero `ext_len`). Since the new branch's path is a prefix of - // the original leaf_key we can just slice that. - // - // If the branch is the first branch then we do not add the extra 1, as there is no nibble - // in a parent branch to account for. - let branch_path_len = self.branch_path.len() + - common_prefix_len + - if self.branch_stack.len() == 1 { 0 } else { 1 }; - self.branch_path = leaf_key.slice_unchecked(0, branch_path_len); - // Push the new leaf onto the new branch. This step depends on the top branch being in the // correct state, so must be done last. self.push_new_leaf(targets, leaf_nibble, leaf_short_key, leaf_val)?; @@ -546,9 +556,8 @@ where // Update the branch_path. If this branch is the only branch then only its extension needs // to be trimmed, otherwise we also need to remove its nibble from its parent. - let new_path_len = self.branch_path.len() - - branch.ext_len as usize - - if self.branch_stack.is_empty() { 0 } else { 1 }; + let new_path_len = + self.branch_path.len() - branch.ext_len as usize - self.maybe_parent_nibble(); debug_assert!(self.branch_path.len() >= new_path_len); self.branch_path = self.branch_path.slice_unchecked(0, new_path_len); @@ -629,33 +638,102 @@ where } } - // Notes: - // - Will not be called mid-branch; either child_stack will be empty, or the last child will be - // a branch/extension. - fn next_uncached_subtrie(&mut self) -> Result { + // TODO docs + // TODO re-evaluate how next_cached_branch works... might be possible to not always call next + // when taking it. + fn next_uncached_key_range( + &mut self, + targets: &mut TargetsIter>, + next_cached_branch: &mut Option<(Nibbles, BranchNodeCompact)>, + hashed_key_current: Option<&Nibbles>, + ) -> Result<(Nibbles, Option), StateProofError> { loop { - // The cached branch stack is initialized with the node closest to root as part of - // `proof_inner`, so if the stack is empty it means either there are no cached nodes or - // they've been exhausted. - let Some((cached_path, cached_branch)) = self.cached_branch_stack.last() else { - todo!() - }; + // TODO might be possible to move this out of the loop? + // Determine the current cached branch node. + // Note: Cloning the `cached_branch` is cheap because it uses an Arc. + let (cached_path, cached_branch) = + match (self.cached_branch_stack.last(), &next_cached_branch) { + (Some(cached), _) => { + // If the `cached_branch_stack` is not empty then its last is the current + cached.clone() + } + (_, Some(_)) => { + // If `cached_branch_stack` is empty but there is an unconsumed cached + // branch from the cursor then we consume that branch, pushing it onto the + // stack. + let cached = core::mem::take(next_cached_branch).expect("is some"); + *next_cached_branch = self.trie_cursor.next()?; + self.cached_branch_stack.push(cached.clone()); + cached + } + (None, None) => { + // If both stack and cursor are empty then there are no more cached nodes, + // return an open range to indicate that the rest of the trie should be + // calculated solely from leaves. + return Ok((hashed_key_current.copied().unwrap_or_else(Nibbles::new), None)); + } + }; + + // TODO might be possible to move this out of the loop? + // The current hashed key indicates the first key after the previous uncached range, + // or None if this is the first call to this method. If the key is not caught up to + // this cached branch it means there are portions of the trie prior to this branch + // which need to be computed; return the range up to this branch to make that happen. + if hashed_key_current.is_none_or(|k| k < &cached_path) { + return Ok(( + // If this is the first call to this method then start computation from zero + hashed_key_current.copied().unwrap_or_else(Nibbles::new), + Some(cached_path), + )); + } - // If the current key belongs to a sub-trie which comes before this one in the ordering - // then we don't have any cached trie data for that sub-trie, return None indicating to - // iterate forward normally. - //if &curr_key_nibbles < cached_path { - // todo!() - //} + // We can assert that this method doesn't let the currently active branch get ahead of + // the cached one. + debug_assert!( + self.branch_path <= cached_path, + "branch_path {:?} is after cached_path {cached_path:?}", + self.branch_path + ); - if &self.branch_path != cached_path { - todo!() + // All trie data prior to this cached branch has been computed. Any branches which were + // under-construction previously, and which are not on the same path as this cached + // branch, can be assumed to be completed; they will not have any further keys added to + // them. + while !cached_path.starts_with(&self.branch_path) { + self.pop_branch(targets)?; } - let Some(curr_branch) = self.branch_stack.last_mut() else { - // If the branch stack is empty then we should... push a new one? - todo!() - }; + // Since we've popped all branches which don't start with cached_path, branch_path at + // this point must be equal to or shorter than cached_path. + debug_assert!( + self.branch_path.len() < cached_path.len() || self.branch_path == cached_path, + "branch_path {:?} is different-or-longer-than cached_path {cached_path:?}", + self.branch_path + ); + + // If the branch_path != cached_path it means the branch_stack is either empty, or the + // top branch is the parent of this cached branch. Either way we push a branch + // corresponding to the cached one onto the stack, so we can begin constructing it. + if self.branch_path != cached_path { + // The length of the extension will be the difference of the lengths of the cached + // branch and its parent if any. + let ext_len = + (cached_path.len() - self.branch_path.len() - self.maybe_parent_nibble()) as u8; + self.branch_stack.push(ProofTrieBranch { + ext_len, + state_mask: cached_branch.state_mask, + tree_mask: cached_branch.tree_mask, + hash_mask: cached_branch.hash_mask, + }); + self.branch_path = cached_path; + } + + // At this point the top of the branch stack is the same branch which was found in the + // cache. + let curr_branch = self + .branch_stack + .last_mut() + .expect("top of branch_stack corresponds to cached branch"); let cached_state_mask = cached_branch.state_mask.get(); let curr_state_mask = curr_branch.state_mask.get(); @@ -669,10 +747,12 @@ where curr_state_mask, ); + // If there are no further children to construct for this branch then pop it off both + // stacks and loop using the parent branch. if next_child_nibbles == 0 { - todo!( - "If all children have been constructed then we should pop this cached branch" - ); + self.cached_branch_stack.pop(); + self.pop_branch(targets)?; + continue } // Determine the next nibble of the branch which has not yet been constructed, and set @@ -693,12 +773,36 @@ where continue } - // TODO check if the child is a cached branch node + // We now want to check if there is a cached branch node at this child. The cached + // branch node may be the node at this child directly, or this child may be an + // extension and the cached branch is the child of that extension. + let child_path = self.child_path_at(child_nibble); + + // All trie nodes prior to `child_path` will not be modified further, so we can seek + // the cached cursor to the next cached node at-or-after `child_path`. + if let Some(next_cached_path) = next_cached_branch.as_ref().map(|kv| kv.0) && + next_cached_path < child_path + { + *next_cached_branch = self.trie_cursor.seek(child_path)?; + } - // It is required to recalculate the sub-trie for this child using the leaves. Return - // the child path, indicating that all keys with this prefix should be iterated over - // and their sub-trie root (aka this child node) calculated. - return Ok(self.child_path_at(child_nibble)); + // If the next cached branch node is a child of the child path then we can assume it is + // the cached branch for this child. We push it onto the `cached_branch_stack` and loop + // back to the top. + if let Some(next_cached_path) = next_cached_branch.as_ref().map(|kv| kv.0) && + next_cached_path.starts_with(&child_path) + { + let cached = core::mem::take(next_cached_branch).expect("is some"); + *next_cached_branch = self.trie_cursor.next()?; + self.cached_branch_stack.push(cached); + continue; + } + + // There is no cached data for the sub-trie at this child, we must recalculate the + // sub-trie root (this child) using the leaves. Return the range of keys based on the + // child path. + let child_path_upper = child_path.increment(); + return Ok((child_path, child_path_upper)); } } @@ -746,13 +850,21 @@ where debug_assert!(self.branch_path.is_empty()); debug_assert!(self.child_stack.is_empty()); + // Initialize the hashed cursor to None to indicate it hasn't been seeked yet. + let mut hashed_cursor_current: Option<(Nibbles, VE::DeferredEncoder)> = None; + // Initialize the `cached_branch_stack` with the node closest to root. if let Some(cached_branch) = self.trie_cursor.seek(Nibbles::new())? { self.cached_branch_stack.push(cached_branch); } - // Initialize the hashed cursor to None to indicate it hasn't been seeked yet. - let mut hashed_cursor_current: Option<(Nibbles, VE::DeferredEncoder)> = None; + // `next_cached_branch` will always be the next _unconsumed_ cached node. If the + // `cached_branch_stack` is empty then the seek in the previous step returned None, + // indicating there are no trie nodes. + let mut next_cached_branch = (!self.cached_branch_stack.is_empty()) + .then(|| self.trie_cursor.next().transpose()) + .flatten() + .transpose()?; // A helper closure for mapping entries returned from the `hashed_cursor`, converting the // key to Nibbles and immediately creating the DeferredValueEncoder so that encoding of the @@ -780,26 +892,24 @@ where // If there is a branch, there must be at least two children debug_assert!(self.branch_stack.last().is_none_or(|_| self.child_stack.len() >= 2)); - // Determine the next subtrie of the overall trie whose hash is not cached. The path of - // the subtrie indicates the path to its root node, as well as the prefix all children - // the subtrie will have. - let subtrie_path = self.next_uncached_subtrie()?; - - // Calculate the exclusive upper bound of node paths in the subtrie, with None - // indicating unbounded. - let subtrie_upper_bound = subtrie_path.increment(); - - // If the cursor hasn't been used, or the last iterated key is prior to this subtrie's - // key range, then seek forward to the subtrie. - if hashed_cursor_current.as_ref().is_none_or(|(key, _)| key < &subtrie_path) { - let subtrie_lower_key = B256::right_padding_from(&subtrie_path.pack()); + // Determine the range of keys of the overall trie which need to be re-computed. + let (lower_bound, upper_bound) = self.next_uncached_key_range( + &mut targets, + &mut next_cached_branch, + hashed_cursor_current.as_ref().map(|kv| &kv.0), + )?; + + // If the cursor hasn't been used, or the last iterated key is prior to this range's + // key range, then seek forward to at least the first key. + if hashed_cursor_current.as_ref().is_none_or(|(key, _)| key < &lower_bound) { + let lower_key = B256::right_padding_from(&lower_bound.pack()); hashed_cursor_current = - self.hashed_cursor.seek(subtrie_lower_key)?.map(map_hashed_cursor_entry); + self.hashed_cursor.seek(lower_key)?.map(map_hashed_cursor_entry); } - // Loop over all keys in the subtrie, calling `push_leaf` on each. + // Loop over all keys in the range, calling `push_leaf` on each. while let Some((key, _)) = hashed_cursor_current && - subtrie_upper_bound.is_none_or(|upper_bound| key < upper_bound) + upper_bound.is_none_or(|upper_bound| key < upper_bound) { let (key, val) = hashed_cursor_current.expect("while-let checks for Some"); self.push_leaf(&mut targets, key, val)?; @@ -807,23 +917,15 @@ where } // Once outside the while-loop `hashed_cursor_current` will be at the first key after - // the subtrie. This may be the first key of the next uncached subtrie, in which case + // the range. This may be the first key of the next uncached range, in which case // no seek will be done on the next loop (see the `hashed_cursor_current.is_none_or` // call above). // // If the `hashed_cursor_current` is None then there are no more keys at all, meaning - // the trie couldn't possible have more data and we should complete computation. + // the trie couldn't possibly have more data and we should complete computation. if hashed_cursor_current.is_none() { break; } - - // Pop off all branches of the subtrie, including the subtrie itself. After this loop - // the top of the `child_stack` will be the root node of the subtrie, and the top of - // the `branch_stack` (if the subtrie wasn't the root of the overall trie) will be the - // subtrie's parent. - while self.branch_path.starts_with(&subtrie_path) { - self.pop_branch(&mut targets)?; - } } // Once there's no more leaves we can pop the remaining branches, if any. From 8cfb54407ac10eb62056a007edc751fed04b9c84 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 28 Nov 2025 14:54:07 +0100 Subject: [PATCH 23/59] WIP: retain proofs --- crates/trie/trie/src/hashed_cursor/mock.rs | 7 ++- crates/trie/trie/src/proof_v2/mod.rs | 69 ++++++++++------------ crates/trie/trie/src/proof_v2/value.rs | 3 +- 3 files changed, 39 insertions(+), 40 deletions(-) diff --git a/crates/trie/trie/src/hashed_cursor/mock.rs b/crates/trie/trie/src/hashed_cursor/mock.rs index 63f1b138fe2..15edd97adee 100644 --- a/crates/trie/trie/src/hashed_cursor/mock.rs +++ b/crates/trie/trie/src/hashed_cursor/mock.rs @@ -48,7 +48,7 @@ impl MockHashedCursorFactory { .collect(); // Extract storages from post state - let hashed_storages: B256Map> = post_state + let mut hashed_storages: B256Map> = post_state .storages .into_iter() .map(|(addr, hashed_storage)| { @@ -62,6 +62,11 @@ impl MockHashedCursorFactory { }) .collect(); + // Ensure all accounts have at least an empty storage + for account in hashed_accounts.keys() { + hashed_storages.entry(*account).or_default(); + } + Self::new(hashed_accounts, hashed_storages) } diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 5344d0ee2d2..cda1cd98347 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -170,7 +170,7 @@ where // if the new branch has a parent branch (ie `branch_stack` is not empty) then 1 is subtracted // from the `ext_len` to account for the child's nibble on the parent. #[inline] - fn maybe_parent_nibble(&self) -> usize { + const fn maybe_parent_nibble(&self) -> usize { !self.branch_stack.is_empty() as usize } @@ -756,17 +756,20 @@ where } // Determine the next nibble of the branch which has not yet been constructed, and set - // its bit on the `state_mask`. + // its bit on the `state_mask`, and determine the child's full path. let child_nibble = next_child_nibbles.trailing_zeros() as u8; curr_branch.state_mask.set_bit(child_nibble); + let child_path = self.child_path_at(child_nibble); // If the `hash_mask` bit is set for the next child it means the child's hash is cached // in the `cached_branch`. We can use that instead of re-calculating the hash of the // entire sub-trie. // - // TODO we cannot use the cached value if any of this sub-trie might be within the - // target set. - if cached_branch.hash_mask.is_bit_set(child_nibble) { + // If the child needs to be retained for a proof then we should not use the cached + // hash, and instead continue on to calculate its node manually. + if cached_branch.hash_mask.is_bit_set(child_nibble) && + !self.should_retain(targets, &child_path) + { let num_prev_children = curr_state_mask.count_ones(); let hash = cached_branch.hashes[num_prev_children as usize]; self.child_stack.push(ProofTrieBranchChild::RlpNode(RlpNode::word_rlp(&hash))); @@ -776,7 +779,6 @@ where // We now want to check if there is a cached branch node at this child. The cached // branch node may be the node at this child directly, or this child may be an // extension and the cached branch is the child of that extension. - let child_path = self.child_path_at(child_nibble); // All trie nodes prior to `child_path` will not be modified further, so we can seek // the cached cursor to the next cached node at-or-after `child_path`. @@ -786,7 +788,7 @@ where *next_cached_branch = self.trie_cursor.seek(child_path)?; } - // If the next cached branch node is a child of the child path then we can assume it is + // If the next cached branch node is a child of `child_path` then we can assume it is // the cached branch for this child. We push it onto the `cached_branch_stack` and loop // back to the top. if let Some(next_cached_path) = next_cached_branch.as_ref().map(|kv| kv.0) && @@ -811,22 +813,17 @@ where fn proof_inner( &mut self, value_encoder: &VE, - targets: impl IntoIterator, + targets: impl IntoIterator, ) -> Result, StateProofError> { trace!(target: TRACE_TARGET, "proof_inner: called"); // In debug builds, verify that targets are sorted #[cfg(debug_assertions)] let targets = { - let mut prev: Option = None; + let mut prev: Option = None; targets.into_iter().inspect(move |target| { if let Some(prev) = prev { - debug_assert!( - depth_first::cmp(&prev, target) != Ordering::Greater, - "targets must be sorted depth-first, instead {:?} > {:?}", - prev, - target - ); + debug_assert!(&prev <= target, "prev:{prev:?} target:{target:?}"); } prev = Some(*target); }) @@ -835,6 +832,12 @@ where #[cfg(not(debug_assertions))] let targets = targets.into_iter(); + // Convert B256 targets into Nibbles. + let targets = targets.into_iter().map(|key| { + // SAFETY: key is a B256 and so is exactly 32-bytes. + unsafe { Nibbles::unpack_unchecked(key.as_slice()) } + }); + // Wrap targets into a `TargetsIter`. let mut targets = WindowIter::new(targets).peekable(); @@ -971,8 +974,8 @@ where { /// Generate a proof for the given targets. /// - /// Given depth-first sorted targets, returns nodes whose paths are a prefix of any target. The - /// returned nodes will be sorted lexicographically by path. + /// Given lexicographically sorted targets, returns nodes whose paths are a prefix of any + /// target. The returned nodes will be sorted lexicographically by path. /// /// # Panics /// @@ -981,7 +984,7 @@ where pub fn proof( &mut self, value_encoder: &VE, - targets: impl IntoIterator, + targets: impl IntoIterator, ) -> Result, StateProofError> { self.trie_cursor.reset(); self.hashed_cursor.reset(); @@ -1004,8 +1007,8 @@ where /// Generate a proof for a storage trie at the given hashed address. /// - /// Given depth-first sorted targets, returns nodes whose paths are a prefix of any target. The - /// returned nodes will be sorted lexicographically by path. + /// Given lexicographically sorted targets, returns nodes whose paths are a prefix of any + /// target. The returned nodes will be sorted lexicographically by path. /// /// # Panics /// @@ -1014,7 +1017,7 @@ where pub fn storage_proof( &mut self, hashed_address: B256, - targets: impl IntoIterator, + targets: impl IntoIterator, ) -> Result, StateProofError> { /// Static storage value encoder instance used by all storage proofs. static STORAGE_VALUE_ENCODER: StorageValueEncoder = StorageValueEncoder; @@ -1115,9 +1118,10 @@ mod tests { fn new(post_state: HashedPostState) -> Self { trace!(target: TRACE_TARGET, ?post_state, "Creating ProofTestHarness"); - // Ensure that there's a storage trie dataset for every storage trie, even if empty. + // Ensure that there's an storage trie dataset for every account, to make the mocks + // happy. let storage_trie_nodes: B256Map> = post_state - .storages + .accounts .keys() .copied() .map(|addr| (addr, Default::default())) @@ -1140,12 +1144,10 @@ mod tests { /// the results. fn assert_proof( &self, - targets: impl IntoIterator + Clone, + targets: impl IntoIterator, ) -> Result<(), StateProofError> { - // Convert B256 targets to Nibbles for proof_v2 - let targets_vec: Vec = targets.into_iter().collect(); - let nibbles_targets: Vec = - targets_vec.iter().map(|b256| Nibbles::unpack(b256.as_slice())).sorted().collect(); + let targets_vec = targets.into_iter().sorted().collect::>(); + // Convert B256 targets to MultiProofTargets for legacy implementation // For account-only proofs, each account maps to an empty storage set let legacy_targets = targets_vec @@ -1163,7 +1165,7 @@ mod tests { self.hashed_cursor_factory.clone(), ); let mut proof_calculator = ProofCalculator::new(trie_cursor, hashed_cursor); - let proof_v2_result = proof_calculator.proof(&value_encoder, nibbles_targets)?; + let proof_v2_result = proof_calculator.proof(&value_encoder, targets_vec.clone())?; // Call Proof::multiproof (legacy implementation) let proof_legacy_result = @@ -1242,14 +1244,7 @@ mod tests { .map(|(addr_bytes, account)| (B256::from(addr_bytes), Some(account))) .collect::>(); - // All accounts have empty storages. - let storages = account_map - .keys() - .copied() - .map(|addr| (addr, Default::default())) - .collect::>(); - - HashedPostState { accounts: account_map, storages } + HashedPostState { accounts: account_map, ..Default::default() } }, ) } diff --git a/crates/trie/trie/src/proof_v2/value.rs b/crates/trie/trie/src/proof_v2/value.rs index 9f5f97a2718..b97e7579d4d 100644 --- a/crates/trie/trie/src/proof_v2/value.rs +++ b/crates/trie/trie/src/proof_v2/value.rs @@ -7,7 +7,6 @@ use alloy_primitives::{B256, U256}; use alloy_rlp::Encodable; use reth_execution_errors::trie::StateProofError; use reth_primitives_traits::Account; -use reth_trie_common::Nibbles; use std::rc::Rc; /// A trait for deferred RLP-encoding of leaf values. @@ -124,7 +123,7 @@ where // Compute storage root by calling storage_proof with the root path as a target. // This returns just the root node of the storage trie. let storage_root = storage_proof_calculator - .storage_proof(self.hashed_address, [Nibbles::new()]) + .storage_proof(self.hashed_address, [B256::ZERO]) .map(|nodes| { // Encode the root node to RLP and hash it let root_node = From f8f5477965733441f7d84f7a8e5de049537f0c62 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 28 Nov 2025 16:32:07 +0100 Subject: [PATCH 24/59] WIP: tests working, code broken --- crates/trie/trie/src/proof_v2/mod.rs | 230 +++++++++++++++++---------- 1 file changed, 148 insertions(+), 82 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index cda1cd98347..df8e8415f86 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -326,22 +326,17 @@ where /// Calls [`Self::commit_child`] on the last child of `child_stack`, replacing it with a /// [`ProofTrieBranchChild::RlpNode`]. /// + /// If `child_stack` is empty then this is a no-op. + /// /// NOTE that this method call relies on the `state_mask` of the top branch of the /// `branch_stack` to determine the last child's path. When committing the last child prior to /// pushing a new child, it's important to set the new child's `state_mask` bit _after_ the call /// to this method. - /// - /// # Panics - /// - /// This method panics if the `child_stack` is empty. fn commit_last_child( &mut self, targets: &mut TargetsIter>, ) -> Result<(), StateProofError> { - let child = self - .child_stack - .pop() - .expect("`commit_last_child` cannot be called with empty `child_stack`"); + let Some(child) = self.child_stack.pop() else { return Ok(()) }; // If the child is already an `RlpNode` then there is nothing to do, push it back on with no // changes. @@ -638,9 +633,61 @@ where } } + /// Given the lower and upper bounds (exclusive) of a range of keys, iterates over the + /// `hashed_cursor` and calculates all trie nodes possible based on those keys. If the upper + /// bound is None then it is considered unbounded. + /// + /// It is expected that this method is "driven" by `next_uncached_key_range`, which decides + /// which ranges of keys need to be calculated based on what cached trie data is available. + #[instrument( + target = TRACE_TARGET, + level = "trace", + skip(self, value_encoder, targets, hashed_cursor_current), + )] + fn calculate_key_range( + &mut self, + value_encoder: &VE, + targets: &mut TargetsIter>, + hashed_cursor_current: &mut Option<(Nibbles, VE::DeferredEncoder)>, + lower_bound: Nibbles, + upper_bound: Option, + ) -> Result<(), StateProofError> { + // A helper closure for mapping entries returned from the `hashed_cursor`, converting the + // key to Nibbles and immediately creating the DeferredValueEncoder so that encoding of the + // leaf value can begin ASAP. + let map_hashed_cursor_entry = |(key_b256, val): (B256, _)| { + debug_assert_eq!(key_b256.len(), 32); + // SAFETY: key is a B256 and so is exactly 32-bytes. + let key = unsafe { Nibbles::unpack_unchecked(key_b256.as_slice()) }; + let val = value_encoder.deferred_encoder(key_b256, val); + (key, val) + }; + + // If the cursor hasn't been used, or the last iterated key is prior to this range's + // key range, then seek forward to at least the first key. + if hashed_cursor_current.as_ref().is_none_or(|(key, _)| key < &lower_bound) { + let lower_key = B256::right_padding_from(&lower_bound.pack()); + *hashed_cursor_current = + self.hashed_cursor.seek(lower_key)?.map(map_hashed_cursor_entry); + } + + // Loop over all keys in the range, calling `push_leaf` on each. + while let Some((key, _)) = hashed_cursor_current.as_ref() && + upper_bound.is_none_or(|upper_bound| key < &upper_bound) + { + let (key, val) = + core::mem::take(hashed_cursor_current).expect("while-let checks for Some"); + self.push_leaf(targets, key, val)?; + *hashed_cursor_current = self.hashed_cursor.next()?.map(map_hashed_cursor_entry); + } + + Ok(()) + } + // TODO docs // TODO re-evaluate how next_cached_branch works... might be possible to not always call next // when taking it. + #[instrument(target = TRACE_TARGET, level = "trace", skip_all)] fn next_uncached_key_range( &mut self, targets: &mut TargetsIter>, @@ -667,6 +714,7 @@ where cached } (None, None) => { + trace!(target: TRACE_TARGET, "Exhausted cached trie nodes"); // If both stack and cursor are empty then there are no more cached nodes, // return an open range to indicate that the rest of the trie should be // calculated solely from leaves. @@ -674,6 +722,17 @@ where } }; + trace!( + target: TRACE_TARGET, + ?hashed_key_current, + branch_path = ?self.branch_path, + branch_state_mask = ?self.branch_stack.last().map(|b| b.state_mask), + ?cached_path, + cached_branch_state_mask = ?cached_branch.state_mask, + cached_branch_hash_mask = ?cached_branch.hash_mask, + "loop", + ); + // TODO might be possible to move this out of the loop? // The current hashed key indicates the first key after the previous uncached range, // or None if this is the first call to this method. If the key is not caught up to @@ -687,14 +746,6 @@ where )); } - // We can assert that this method doesn't let the currently active branch get ahead of - // the cached one. - debug_assert!( - self.branch_path <= cached_path, - "branch_path {:?} is after cached_path {cached_path:?}", - self.branch_path - ); - // All trie data prior to this cached branch has been computed. Any branches which were // under-construction previously, and which are not on the same path as this cached // branch, can be assumed to be completed; they will not have any further keys added to @@ -721,19 +772,23 @@ where (cached_path.len() - self.branch_path.len() - self.maybe_parent_nibble()) as u8; self.branch_stack.push(ProofTrieBranch { ext_len, - state_mask: cached_branch.state_mask, + state_mask: TrieMask::new(0), tree_mask: cached_branch.tree_mask, hash_mask: cached_branch.hash_mask, }); self.branch_path = cached_path; + trace!( + target: TRACE_TARGET, + branch=?self.branch_stack.last(), + branch_path=?self.branch_path, + "pushed cached branch", + ); } // At this point the top of the branch stack is the same branch which was found in the // cache. - let curr_branch = self - .branch_stack - .last_mut() - .expect("top of branch_stack corresponds to cached branch"); + let curr_branch = + self.branch_stack.last().expect("top of branch_stack corresponds to cached branch"); let cached_state_mask = cached_branch.state_mask.get(); let curr_state_mask = curr_branch.state_mask.get(); @@ -750,15 +805,21 @@ where // If there are no further children to construct for this branch then pop it off both // stacks and loop using the parent branch. if next_child_nibbles == 0 { + trace!( + target: TRACE_TARGET, + path=?cached_path, + ?curr_branch, + ?cached_branch, + "No further children, popping branch", + ); self.cached_branch_stack.pop(); self.pop_branch(targets)?; continue } - // Determine the next nibble of the branch which has not yet been constructed, and set - // its bit on the `state_mask`, and determine the child's full path. + // Determine the next nibble of the branch which has not yet been constructed, and + // determine the child's full path. let child_nibble = next_child_nibbles.trailing_zeros() as u8; - curr_branch.state_mask.set_bit(child_nibble); let child_path = self.child_path_at(child_nibble); // If the `hash_mask` bit is set for the next child it means the child's hash is cached @@ -770,9 +831,25 @@ where if cached_branch.hash_mask.is_bit_set(child_nibble) && !self.should_retain(targets, &child_path) { - let num_prev_children = curr_state_mask.count_ones(); - let hash = cached_branch.hashes[num_prev_children as usize]; + let hash_idx = + cached_branch.hash_mask.count_ones() - curr_state_mask.count_ones() - 1; + let hash = cached_branch.hashes[hash_idx as usize]; + + trace!( + target: TRACE_TARGET, + ?child_path, + ?hash_idx, + ?hash, + "Using cached hash for child", + ); + self.child_stack.push(ProofTrieBranchChild::RlpNode(RlpNode::word_rlp(&hash))); + self.branch_stack + .last_mut() + .expect("already asserted there is a last branch") + .state_mask + .set_bit(child_nibble); + continue } @@ -785,6 +862,7 @@ where if let Some(next_cached_path) = next_cached_branch.as_ref().map(|kv| kv.0) && next_cached_path < child_path { + trace!(target: TRACE_TARGET, ?child_path, "Seeking trie cursor to child path"); *next_cached_branch = self.trie_cursor.seek(child_path)?; } @@ -795,6 +873,7 @@ where next_cached_path.starts_with(&child_path) { let cached = core::mem::take(next_cached_branch).expect("is some"); + trace!(target: TRACE_TARGET, ?child_path, ?cached, "Pushing cached branch for child"); *next_cached_branch = self.trie_cursor.next()?; self.cached_branch_stack.push(cached); continue; @@ -804,6 +883,12 @@ where // sub-trie root (this child) using the leaves. Return the range of keys based on the // child path. let child_path_upper = child_path.increment(); + trace!( + target: TRACE_TARGET, + lower=?child_path, + upper=?child_path_upper, + "Returning key range to calculate", + ); return Ok((child_path, child_path_upper)); } } @@ -869,32 +954,7 @@ where .flatten() .transpose()?; - // A helper closure for mapping entries returned from the `hashed_cursor`, converting the - // key to Nibbles and immediately creating the DeferredValueEncoder so that encoding of the - // leaf value can begin ASAP. - let map_hashed_cursor_entry = |(key_b256, val): (B256, _)| { - debug_assert_eq!(key_b256.len(), 32); - // SAFETY: key is a B256 and so is exactly 32-bytes. - let key = unsafe { Nibbles::unpack_unchecked(key_b256.as_slice()) }; - let val = value_encoder.deferred_encoder(key_b256, val); - (key, val) - }; - loop { - trace!( - target: TRACE_TARGET, - hashed_cursor_current = ?hashed_cursor_current.as_ref().map(|kv| kv.0), - branch_stack_len = ?self.branch_stack.len(), - branch_path = ?self.branch_path, - child_stack_len = ?self.child_stack.len(), - cached_branch_path = ?self.cached_branch_stack.last().map(|cached| cached.0), - "proof_inner: loop", - ); - - // Sanity check before making any further changes: - // If there is a branch, there must be at least two children - debug_assert!(self.branch_stack.last().is_none_or(|_| self.child_stack.len() >= 2)); - // Determine the range of keys of the overall trie which need to be re-computed. let (lower_bound, upper_bound) = self.next_uncached_key_range( &mut targets, @@ -902,27 +962,17 @@ where hashed_cursor_current.as_ref().map(|kv| &kv.0), )?; - // If the cursor hasn't been used, or the last iterated key is prior to this range's - // key range, then seek forward to at least the first key. - if hashed_cursor_current.as_ref().is_none_or(|(key, _)| key < &lower_bound) { - let lower_key = B256::right_padding_from(&lower_bound.pack()); - hashed_cursor_current = - self.hashed_cursor.seek(lower_key)?.map(map_hashed_cursor_entry); - } - - // Loop over all keys in the range, calling `push_leaf` on each. - while let Some((key, _)) = hashed_cursor_current && - upper_bound.is_none_or(|upper_bound| key < upper_bound) - { - let (key, val) = hashed_cursor_current.expect("while-let checks for Some"); - self.push_leaf(&mut targets, key, val)?; - hashed_cursor_current = self.hashed_cursor.next()?.map(map_hashed_cursor_entry); - } + // Calculate the trie for that range of keys + self.calculate_key_range( + value_encoder, + &mut targets, + &mut hashed_cursor_current, + lower_bound, + upper_bound, + )?; - // Once outside the while-loop `hashed_cursor_current` will be at the first key after - // the range. This may be the first key of the next uncached range, in which case - // no seek will be done on the next loop (see the `hashed_cursor_current.is_none_or` - // call above). + // Once outside `calculate_key_range`, `hashed_cursor_current` will be at the first key + // after the range. // // If the `hashed_cursor_current` is None then there are no more keys at all, meaning // the trie couldn't possibly have more data and we should complete computation. @@ -1092,8 +1142,10 @@ mod tests { use alloy_rlp::Decodable; use assert_matches::assert_matches; use itertools::Itertools; - use reth_trie_common::{HashedPostState, MultiProofTargets, TrieNode}; - use std::collections::BTreeMap; + use reth_trie_common::{ + updates::{StorageTrieUpdates, TrieUpdates}, + HashedPostState, MultiProofTargets, TrieNode, + }; /// Target to use with the `tracing` crate. static TRACE_TARGET: &str = "trie::proof_v2::tests"; @@ -1113,26 +1165,40 @@ mod tests { /// Creates a new test harness from a `HashedPostState`. /// /// The `HashedPostState` is used to populate the mock hashed cursor factory directly. - /// The trie cursor factory is empty by default, suitable for testing the leaf-only - /// proof calculator. + /// The trie cursor factory is initialized from TrieUpdates generated by StateRoot. fn new(post_state: HashedPostState) -> Self { trace!(target: TRACE_TARGET, ?post_state, "Creating ProofTestHarness"); - // Ensure that there's an storage trie dataset for every account, to make the mocks - // happy. - let storage_trie_nodes: B256Map> = post_state + // Create empty trie cursor factory to serve as the initial state for StateRoot + // Ensure that there's a storage trie dataset for every account, to make + // `MockTrieCursorFactory` happy. + let storage_tries: B256Map<_> = post_state .accounts .keys() .copied() - .map(|addr| (addr, Default::default())) + .map(|addr| (addr, StorageTrieUpdates::default())) .collect(); + let empty_trie_cursor_factory = MockTrieCursorFactory::from_trie_updates(TrieUpdates { + storage_tries: storage_tries.clone(), + ..Default::default() + }); + // Create mock hashed cursor factory from the post state let hashed_cursor_factory = MockHashedCursorFactory::from_hashed_post_state(post_state); - // Create empty trie cursor factory (leaf-only calculator doesn't need trie nodes) - let trie_cursor_factory = - MockTrieCursorFactory::new(BTreeMap::new(), storage_trie_nodes); + // Generate TrieUpdates using StateRoot + let (_root, mut trie_updates) = + crate::StateRoot::new(empty_trie_cursor_factory, hashed_cursor_factory.clone()) + .root_with_updates() + .expect("StateRoot should succeed"); + + // Continue using empty storage tries for each account, to keep `MockTrieCursorFactory` + // happy. + trie_updates.storage_tries = storage_tries; + + // Initialize trie cursor factory from the generated TrieUpdates + let trie_cursor_factory = MockTrieCursorFactory::from_trie_updates(trie_updates); Self { trie_cursor_factory, hashed_cursor_factory } } From 550b95398273a16ea6e0ad13313d7d7b44c9c759 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Mon, 1 Dec 2025 15:53:23 +0100 Subject: [PATCH 25/59] WIP: failing tests still --- crates/trie/trie/src/proof_v2/mod.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index df8e8415f86..a3fa8ad4bff 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -766,6 +766,10 @@ where // top branch is the parent of this cached branch. Either way we push a branch // corresponding to the cached one onto the stack, so we can begin constructing it. if self.branch_path != cached_path { + if !self.child_stack.is_empty() { + self.commit_last_child(targets)?; + } + // The length of the extension will be the difference of the lengths of the cached // branch and its parent if any. let ext_len = @@ -781,7 +785,7 @@ where target: TRACE_TARGET, branch=?self.branch_stack.last(), branch_path=?self.branch_path, - "pushed cached branch", + "Pushed cached branch", ); } @@ -831,8 +835,7 @@ where if cached_branch.hash_mask.is_bit_set(child_nibble) && !self.should_retain(targets, &child_path) { - let hash_idx = - cached_branch.hash_mask.count_ones() - curr_state_mask.count_ones() - 1; + let hash_idx = cached_branch.hash_mask.count_ones() - curr_state_mask.count_ones(); let hash = cached_branch.hashes[hash_idx as usize]; trace!( From a52dfd23502132853da3e48c0c43ef3e39d03fb5 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Mon, 1 Dec 2025 18:27:33 +0100 Subject: [PATCH 26/59] WIP: almost working --- crates/trie/trie/src/proof_v2/mod.rs | 190 +++++++++++++++------------ 1 file changed, 106 insertions(+), 84 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index a3fa8ad4bff..a1cb4529a8b 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -98,46 +98,6 @@ impl ProofCalculator { rlp_encode_buf: Vec::<_>::with_capacity(RLP_ENCODE_BUF_SIZE), } } - - /// Returns whether the given path lies within the lower/upper bound of a portion of the target - /// set (presumably obtained via `targets.peek()`. See [`Self::should_retain`] to understand - /// how the targets lower/upper bounds work. - /// - /// This method assumes depth-first ordering. - /// - /// # Returns - /// - /// - [`Ordering::Less`] if `path` is less than the lower bound. - /// - [`Ordering::Equal`] if `path` is greater-or-equal to the lower bound, and less than the - /// upper bound (ie it is in-range). - /// - [`Ordering::Greater`] if `path` is greater-or-equal to the upper bound. - #[expect(unused)] - fn cmp_targets(path: &Nibbles, bounds: &(Nibbles, Option)) -> Ordering { - debug_assert!( - bounds - .1 - .as_ref() - .is_none_or(|upper| depth_first::cmp(&bounds.0, upper) != Ordering::Greater), - "lower bound {:?} is greater than upper bound {:?} (depth-first)", - bounds.0, - bounds.1, - ); - - match bounds { - (lower, _) if depth_first::cmp(path, lower) == Ordering::Less => Ordering::Less, - (_, None) => { - // None indicates no upper-bound. We've already determined that path is >= lower, - // so it must be in-range. - Ordering::Equal - } - (_, Some(upper)) if depth_first::cmp(path, upper) == Ordering::Less => { - // Upper bound is exclusive. If path is less the upper bound and not less than the - // lower bound then it is in-range. - Ordering::Equal - } - (_, _) => Ordering::Greater, - } - } } /// Helper type for the [`Iterator`] used to pass targets in from the caller. @@ -212,34 +172,47 @@ where /// Because paths in the trie are visited in depth-first order, it's imperative that targets are /// given in depth-first order as well. If the targets were generated off of B256s, which is /// the common-case, then this is equivalent to lexicographical order. + /// + /// If `lexicographic` is true then ordering is checked using lexicographic order, not + /// depth-first. This is used when checking cached branch nodes, which we visit in lexicographic + /// order. fn should_retain( &self, targets: &mut TargetsIter>, path: &Nibbles, + lexicographic: bool, ) -> bool { - trace!(target: TRACE_TARGET, ?path, target = ?targets.peek(), "should_retain: called"); + let cmp_fn = if lexicographic { std::cmp::Ord::cmp } else { depth_first::cmp }; + + trace!(target: TRACE_TARGET, ?path, target = ?targets.peek(), ?lexicographic, "should_retain: called"); debug_assert!(self.retained_proofs.last().is_none_or( |ProofTrieNode { path: last_retained_path, .. }| { - depth_first::cmp(path, last_retained_path) == Ordering::Greater + cmp_fn(path, last_retained_path) == Ordering::Greater } ), - "should_retain called with path {path:?} which is not after previously retained node {:?} in depth-first order", + "should_retain called with path {path:?} which is not after previously retained node {:?} (lexicographic order:{lexicographic:?})", self.retained_proofs.last().map(|n| n.path), ); let &(mut lower, mut upper) = targets.peek().expect("targets is never exhausted"); - // If the path isn't in the current range then iterate forward until it is (or until there - // is no upper bound, indicating unbounded). - while upper.is_some_and(|upper| depth_first::cmp(path, &upper) != Ordering::Less) { - targets.next(); - trace!(target: TRACE_TARGET, target = ?targets.peek(), "upper target <= path, next target"); - let &(l, u) = targets.peek().expect("targets is never exhausted"); - (lower, upper) = (l, u); - } + loop { + // If the node in question is a prefix of the target then we retain + if lower.starts_with(path) { + return true + } - // If the node in question is a prefix of the target then we retain - lower.starts_with(path) + // If the path isn't in the current range then iterate forward until it is (or until + // there is no upper bound, indicating unbounded). + if upper.is_some_and(|upper| cmp_fn(path, &upper) != Ordering::Less) { + targets.next(); + trace!(target: TRACE_TARGET, target = ?targets.peek(), ?lexicographic, "upper target <= path, next target"); + let &(l, u) = targets.peek().expect("targets is never exhausted"); + (lower, upper) = (l, u); + } else { + return false + } + } } /// Takes a child which has been removed from the `child_stack` and converts it to an @@ -259,7 +232,7 @@ where } // If we should retain the child then do so. - if self.should_retain(targets, &child_path) { + if self.should_retain(targets, &child_path, false) { trace!(target: TRACE_TARGET, ?child_path, "Retaining child"); // Convert to `ProofTrieNode`, which will be what is retained. @@ -346,6 +319,9 @@ where } let child_path = self.last_child_path(); + // TODO theoretically `commit_child` only needs to convert to an `RlpNode` if it's going to + // retain the proof, otherwise we could leave the child as-is on the stack and convert it + // when popping the branch, giving more time to the DeferredEncoder to do async work. let child_rlp_node = self.commit_child(targets, child_path, child)?; // Replace the child on the stack @@ -513,10 +489,12 @@ where ); // Collect children into an `RlpNode` Vec by committing and pushing each of them. - for child in self.child_stack.drain(self.child_stack.len() - num_children..) { + for (idx, child) in + self.child_stack.drain(self.child_stack.len() - num_children..).enumerate() + { let ProofTrieBranchChild::RlpNode(child_rlp_node) = child else { panic!( - "all branch child must have been committed, found {}", + "all branch children must have been committed, found {} at index {idx:?}", std::any::type_name_of_val(&child) ); }; @@ -734,16 +712,29 @@ where ); // TODO might be possible to move this out of the loop? + // + // TODO A lot of calculations start with something like: + // ``` + // Returning key range to calculate in order to catch up to cached branch lower=Nibbles(0x) upper=Some(Nibbles(0x0)) + // ``` + // ...which is not strictly necessary, if 0x0 is a branch then there can't be children + // prior. + // // The current hashed key indicates the first key after the previous uncached range, // or None if this is the first call to this method. If the key is not caught up to // this cached branch it means there are portions of the trie prior to this branch // which need to be computed; return the range up to this branch to make that happen. if hashed_key_current.is_none_or(|k| k < &cached_path) { - return Ok(( - // If this is the first call to this method then start computation from zero - hashed_key_current.copied().unwrap_or_else(Nibbles::new), - Some(cached_path), - )); + // If this is the first call to this method then start computation from zero + let lower = hashed_key_current.copied().unwrap_or_else(Nibbles::new); + let upper = Some(cached_path); + trace!( + target: TRACE_TARGET, + ?lower, + ?upper, + "Returning key range to calculate in order to catch up to cached branch", + ); + return Ok((lower, upper)); } // All trie data prior to this cached branch has been computed. Any branches which were @@ -766,10 +757,22 @@ where // top branch is the parent of this cached branch. Either way we push a branch // corresponding to the cached one onto the stack, so we can begin constructing it. if self.branch_path != cached_path { + // `commit_last_child` relies on the last set bit of the parent branch's + // `state_mask` to determine the path of the last child on the `child_stack`. Since + // we are about to change that mask we need to commit that last child first. if !self.child_stack.is_empty() { self.commit_last_child(targets)?; } + // When pushing a new branch we need to set its child nibble in the `state_mask` of + // its parent, if there is one. + if let Some(parent_branch) = self.branch_stack.last_mut() { + // We've asserted above that branch_path.len() < cached_path.len(), so this + // `get_unchecked` is safe. + let child_nibble = cached_path.get_unchecked(self.branch_path.len()); + parent_branch.state_mask.set_bit(child_nibble); + } + // The length of the extension will be the difference of the lengths of the cached // branch and its parent if any. let ext_len = @@ -832,28 +835,45 @@ where // // If the child needs to be retained for a proof then we should not use the cached // hash, and instead continue on to calculate its node manually. - if cached_branch.hash_mask.is_bit_set(child_nibble) && - !self.should_retain(targets, &child_path) - { - let hash_idx = cached_branch.hash_mask.count_ones() - curr_state_mask.count_ones(); - let hash = cached_branch.hashes[hash_idx as usize]; - - trace!( - target: TRACE_TARGET, - ?child_path, - ?hash_idx, - ?hash, - "Using cached hash for child", - ); - - self.child_stack.push(ProofTrieBranchChild::RlpNode(RlpNode::word_rlp(&hash))); - self.branch_stack - .last_mut() - .expect("already asserted there is a last branch") - .state_mask - .set_bit(child_nibble); + if cached_branch.hash_mask.is_bit_set(child_nibble) { + // Commit the last child. We do this here for two reasons: + // - `commit_last_child` will check if the last child needs to be retained. We need + // to check that before the subsequent `should_retain` call here to prevent + // `targets` from being moved beyond the last child before it is checked. + // - If we do end up using the cached hash value, then we will need to commit the + // last child before pushing a new one onto the stack anyway. + if !self.child_stack.is_empty() { + self.commit_last_child(targets)?; + } - continue + if !self.should_retain(targets, &child_path, false) { + // Pull this child's hash out of the cached branch node. To get the hash's index + // we first need to calculate which cached hash's have already been used by this + // branch (if any), and subtract that count from the count of total cached + // hashes. + let curr_hashed_used_mask = cached_branch.hash_mask.get() & curr_state_mask; + let hash_idx = cached_branch.hash_mask.count_ones() - + curr_hashed_used_mask.count_ones() - + 1; + let hash = cached_branch.hashes[hash_idx as usize]; + + trace!( + target: TRACE_TARGET, + ?child_path, + ?hash_idx, + ?hash, + "Using cached hash for child", + ); + + self.child_stack.push(ProofTrieBranchChild::RlpNode(RlpNode::word_rlp(&hash))); + self.branch_stack + .last_mut() + .expect("already asserted there is a last branch") + .state_mask + .set_bit(child_nibble); + + continue + } } // We now want to check if there is a cached branch node at this child. The cached @@ -890,7 +910,7 @@ where target: TRACE_TARGET, lower=?child_path, upper=?child_path_upper, - "Returning key range to calculate", + "Returning sub-trie's key range to calculate", ); return Ok((child_path, child_path_upper)); } @@ -1359,7 +1379,9 @@ mod tests { fn proptest_proof_with_targets( (post_state, targets) in hashed_post_state_strategy() .prop_flat_map(|post_state| { - let account_keys: Vec = post_state.accounts.keys().copied().collect(); + let mut account_keys: Vec = post_state.accounts.keys().copied().collect(); + // Sort to ensure deterministic order when using PROPTEST_RNG_SEED + account_keys.sort_unstable(); let targets_strategy = proof_targets_strategy(account_keys); (Just(post_state), targets_strategy) }) From 34ffc5fbf705bca3a38490001d43cfeee8dc1c10 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 2 Dec 2025 16:14:15 +0100 Subject: [PATCH 27/59] WIP: closer! --- crates/trie/trie/src/proof_v2/mod.rs | 281 +++++++++++++++++---------- 1 file changed, 180 insertions(+), 101 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index a1cb4529a8b..a1a6898ac83 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -31,6 +31,17 @@ static TRACE_TARGET: &str = "trie::proof_v2"; /// Number of bytes to pre-allocate for [`ProofCalculator`]'s `rlp_encode_buf` field. const RLP_ENCODE_BUF_SIZE: usize = 1024; +/// A [`Nibbles`] which contains 64 zero nibbles. +static PATH_ALL_ZEROS: Nibbles = { + let mut path = Nibbles::new(); + let mut i = 0; + while i < 64 { + path.push_unchecked(0); + i += 1; + } + path +}; + /// A proof calculator that generates merkle proofs using only leaf data. /// /// The calculator: @@ -344,8 +355,7 @@ where leaf_val: VE::DeferredEncoder, ) -> Result<(), StateProofError> { // Before pushing the new leaf onto the `child_stack` we need to commit the previous last - // child (ie the first child of this new branch), so that only `child_stack`'s final child - // is a non-RlpNode. + // child, so that only `child_stack`'s final child is a non-RlpNode. self.commit_last_child(targets)?; // Once the first child is committed we set the new child's bit on the top branch's @@ -361,37 +371,25 @@ where Ok(()) } - /// Pushes a new branch onto the `branch_stack`, while also pushing the given leaf onto the - /// `child_stack`. + /// Pushes a new branch onto the `branch_stack` based on the path and short key of the last + /// child on the `child_stack` and the path of the next child which will be pushed on to the + /// stack after this call. /// - /// This method expects that there already exists a child on the `child_stack`, and that that - /// child has a non-zero short key. The new branch is constructed based on the top child from - /// the `child_stack` and the given leaf. - fn push_new_branch( - &mut self, - targets: &mut TargetsIter>, - leaf_key: Nibbles, - leaf_val: VE::DeferredEncoder, - ) -> Result<(), StateProofError> { - // First determine the new leaf's shortkey relative to the current branch. If there is no - // current branch then the short key is the full key. - let leaf_short_key = if self.branch_stack.is_empty() { - leaf_key + /// Returns the nibble of the branch's `state_mask` which should be set for the new child, and + /// short key that the next child should use. + fn push_new_branch(&mut self, new_child_path: Nibbles) -> (u8, Nibbles) { + // First determine the new child's shortkey relative to the current branch. If there is no + // current branch then the short key is the full path. + let new_child_short_key = if self.branch_stack.is_empty() { + new_child_path } else { // When there is a current branch then trim off its path as well as the nibble that it // has set for this leaf. - trim_nibbles_prefix(&leaf_key, self.branch_path.len() + 1) + trim_nibbles_prefix(&new_child_path, self.branch_path.len() + 1) }; - trace!( - target: TRACE_TARGET, - ?leaf_short_key, - branch_path = ?self.branch_path, - "push_new_branch: called", - ); - // Get the new branch's first child, which is the child on the top of the stack with which - // the new leaf shares the same nibble on the current branch. + // the new child shares the same nibble on the current branch. let first_child = self .child_stack .last_mut() @@ -404,8 +402,8 @@ where ); // Determine how many nibbles are shared between the new branch's first child and the new - // leaf. This common prefix will be the extension of the new branch - let common_prefix_len = first_child_short_key.common_prefix_length(&leaf_short_key); + // child. This common prefix will be the extension of the new branch + let common_prefix_len = first_child_short_key.common_prefix_length(&new_child_short_key); // Trim off the common prefix from the first child's short key, plus one nibble which will // stored by the new branch itself in its state mask. @@ -413,23 +411,24 @@ where first_child.trim_short_key_prefix(common_prefix_len + 1); // Similarly, trim off the common prefix, plus one nibble for the new branch, from the new - // leaf's short key. - let leaf_nibble = leaf_short_key.get_unchecked(common_prefix_len); - let leaf_short_key = trim_nibbles_prefix(&leaf_short_key, common_prefix_len + 1); + // child's short key. + let new_child_nibble = new_child_short_key.get_unchecked(common_prefix_len); + let new_child_short_key = trim_nibbles_prefix(&new_child_short_key, common_prefix_len + 1); // Update the branch path to reflect the new branch about to be pushed. Its path will be // the path of the previous branch, plus the nibble shared by each child, plus the parent // extension (denoted by a non-zero `ext_len`). Since the new branch's path is a prefix of - // the original leaf_key we can just slice that. + // the original new_child_path we can just slice that. // - // If the branch is the first branch then we do not add the extra 1, as there is no nibble - // in a parent branch to account for. + // If the new branch is the first branch then we do not add the extra 1, as there is no + // nibble in a parent branch to account for. let branch_path_len = self.branch_path.len() + common_prefix_len + self.maybe_parent_nibble(); - self.branch_path = leaf_key.slice_unchecked(0, branch_path_len); + self.branch_path = new_child_path.slice_unchecked(0, branch_path_len); - // Push the new branch onto the branch stack. We do not yet set the `state_mask` bit of the - // new leaf; `push_new_leaf` will do that. + // Push the new branch onto the `branch_stack`. We do not yet set the `state_mask` bit of + // the new child; whatever actually pushes the child onto the `child_stack` is expected to + // do that. self.branch_stack.push(ProofTrieBranch { ext_len: common_prefix_len as u8, state_mask: TrieMask::new(1 << first_child_nibble), @@ -437,22 +436,18 @@ where hash_mask: TrieMask::default(), }); - // Push the new leaf onto the new branch. This step depends on the top branch being in the - // correct state, so must be done last. - self.push_new_leaf(targets, leaf_nibble, leaf_short_key, leaf_val)?; - trace!( target: TRACE_TARGET, - ?leaf_short_key, + ?new_child_path, ?common_prefix_len, - new_branch = ?self.branch_stack.last().expect("branch_stack was just pushed to"), - ?branch_path_len, + ?first_child_nibble, branch_path = ?self.branch_path, - "push_new_branch: returning", + "Pushed new branch", ); - Ok(()) + (new_child_nibble, new_child_short_key) } + /// Pops the top branch off of the `branch_stack`, hashes its children on the `child_stack`, and /// replaces those children on the `child_stack`. The `branch_path` field will be updated /// accordingly. @@ -577,7 +572,8 @@ where .expect("already checked for emptiness") .short_key() .is_empty()); - self.push_new_branch(targets, key, val)?; + let (nibble, short_key) = self.push_new_branch(key); + self.push_new_leaf(targets, nibble, short_key, val)?; return Ok(()) } }; @@ -600,8 +596,11 @@ where // existing child. let nibble = key.get_unchecked(common_prefix_len); if curr_branch_state_mask.is_bit_set(nibble) { - // This method will also push the new leaf onto the `child_stack`. - self.push_new_branch(targets, key, val)?; + // Push a new branch which splits the short key of the existing child at this + // nibble. + let (nibble, short_key) = self.push_new_branch(key); + // Push the new leaf onto the new branch. + self.push_new_leaf(targets, nibble, short_key, val)?; } else { let short_key = key.slice_unchecked(common_prefix_len + 1, key.len()); self.push_new_leaf(targets, nibble, short_key, val)?; @@ -662,6 +661,106 @@ where Ok(()) } + /// Constructs and returns a new [`ProofTrieBranch`] based on an existing [`BranchNodeCompact`]. + #[inline] + const fn new_from_cached_branch( + cached_branch: &BranchNodeCompact, + ext_len: u8, + ) -> ProofTrieBranch { + ProofTrieBranch { + ext_len, + state_mask: TrieMask::new(0), + tree_mask: cached_branch.tree_mask, + hash_mask: cached_branch.hash_mask, + } + } + + // TODO docs + fn push_new_cached_branch( + &mut self, + targets: &mut TargetsIter>, + cached_branch: &BranchNodeCompact, + cached_branch_path: Nibbles, + ) -> Result<(), StateProofError> { + debug_assert!( + cached_branch_path.starts_with(&self.branch_path), + "push_new_cached_branch called with path {cached_branch_path:?} which is not a child of current branch {:?}", + self.branch_path, + ); + + let parent_branch = self.branch_stack.last(); + + // If both stacks are empty then there were no leaves before this cached branch, push it and + // be done; the extension of the branch will be its full path. + if self.child_stack.is_empty() && parent_branch.is_none() { + self.branch_path = cached_branch_path; + self.branch_stack + .push(Self::new_from_cached_branch(cached_branch, cached_branch_path.len() as u8)); + return Ok(()) + } + + // Get the nibble which should be set in the parent branch's `state_mask` for this new + // branch. + let cached_branch_nibble = cached_branch_path.get_unchecked(self.branch_path.len()); + + // We calculate the `ext_len` of the new branch, and potentially update its nibble if a new + // parent branch is inserted here, based on the state of the parent branch. + let (cached_branch_nibble, ext_len) = if parent_branch + .is_none_or(|parent_branch| parent_branch.state_mask.is_bit_set(cached_branch_nibble)) + { + // If the `child_stack` is not empty but the `branch_stack` is then it implies that + // there must be a leaf or extension at the root of the trie whose short-key will get + // split by a new branch, which will become the parent of both that leaf/extension and + // this new branch. + // + // Similarly, if there is a branch on the `branch_stack` but its `state_mask` bit for + // this new branch is already set, then there must be a leaf/extension with a short-key + // to be split. + debug_assert_eq!(self.child_stack.len(), 1); + debug_assert!(!self + .child_stack + .last() + .expect("already checked for emptiness") + .short_key() + .is_empty()); + + // Split that leaf/extension's short key with a new branch. + let (nibble, short_key) = self.push_new_branch(cached_branch_path); + (nibble, short_key.len()) + } else { + // If there is a parent branch but its `state_mask` bit for this branch is not set + // then we can simply calculate the `ext_len` based on the difference of each, minus + // 1 to account for the nibble in the `state_mask`. + (cached_branch_nibble, cached_branch_path.len() - self.branch_path.len() - 1) + }; + + // `commit_last_child` relies on the last set bit of the parent branch's `state_mask` to + // determine the path of the last child on the `child_stack`. Since we are about to + // change that mask we need to commit that last child first. + self.commit_last_child(targets)?; + + // When pushing a new branch we need to set its child nibble in the `state_mask` of + // its parent, if there is one. + if let Some(parent_branch) = self.branch_stack.last_mut() { + // We've asserted above that branch_path.len() < cached_path.len(), so this + // `get_unchecked` is safe. + parent_branch.state_mask.set_bit(cached_branch_nibble); + } + + // Finally update the `branch_path` and push the new branch. + self.branch_path = cached_branch_path; + self.branch_stack.push(Self::new_from_cached_branch(cached_branch, ext_len as u8)); + + trace!( + target: TRACE_TARGET, + branch=?self.branch_stack.last(), + branch_path=?self.branch_path, + "Pushed cached branch", + ); + + Ok(()) + } + // TODO docs // TODO re-evaluate how next_cached_branch works... might be possible to not always call next // when taking it. @@ -671,7 +770,14 @@ where targets: &mut TargetsIter>, next_cached_branch: &mut Option<(Nibbles, BranchNodeCompact)>, hashed_key_current: Option<&Nibbles>, - ) -> Result<(Nibbles, Option), StateProofError> { + ) -> Result)>, StateProofError> { + // `lower_bound` will be used to track the lower bound of the range which is returned from + // this method. If this is None then there are no further keys which need to be processed. + // + // This starts off being based off of the hashed cursor's current position, which is the + // next key which hasn't been processed. If that is None then we start from zero. + let mut lower_bound = Some(hashed_key_current.copied().unwrap_or_else(Nibbles::new)); + loop { // TODO might be possible to move this out of the loop? // Determine the current cached branch node. @@ -696,7 +802,7 @@ where // If both stack and cursor are empty then there are no more cached nodes, // return an open range to indicate that the rest of the trie should be // calculated solely from leaves. - return Ok((hashed_key_current.copied().unwrap_or_else(Nibbles::new), None)); + return Ok(lower_bound.map(|lower| (lower, None))); } }; @@ -713,28 +819,22 @@ where // TODO might be possible to move this out of the loop? // - // TODO A lot of calculations start with something like: - // ``` - // Returning key range to calculate in order to catch up to cached branch lower=Nibbles(0x) upper=Some(Nibbles(0x0)) - // ``` - // ...which is not strictly necessary, if 0x0 is a branch then there can't be children - // prior. - // // The current hashed key indicates the first key after the previous uncached range, // or None if this is the first call to this method. If the key is not caught up to // this cached branch it means there are portions of the trie prior to this branch // which need to be computed; return the range up to this branch to make that happen. - if hashed_key_current.is_none_or(|k| k < &cached_path) { - // If this is the first call to this method then start computation from zero - let lower = hashed_key_current.copied().unwrap_or_else(Nibbles::new); - let upper = Some(cached_path); + // + // TODO update docs + if hashed_key_current.is_none_or(|k| k < &cached_path) && + !PATH_ALL_ZEROS.starts_with(&cached_path) + { + let range = lower_bound.map(|lower| (lower, Some(cached_path))); trace!( target: TRACE_TARGET, - ?lower, - ?upper, + ?range, "Returning key range to calculate in order to catch up to cached branch", ); - return Ok((lower, upper)); + return Ok(range); } // All trie data prior to this cached branch has been computed. Any branches which were @@ -757,39 +857,7 @@ where // top branch is the parent of this cached branch. Either way we push a branch // corresponding to the cached one onto the stack, so we can begin constructing it. if self.branch_path != cached_path { - // `commit_last_child` relies on the last set bit of the parent branch's - // `state_mask` to determine the path of the last child on the `child_stack`. Since - // we are about to change that mask we need to commit that last child first. - if !self.child_stack.is_empty() { - self.commit_last_child(targets)?; - } - - // When pushing a new branch we need to set its child nibble in the `state_mask` of - // its parent, if there is one. - if let Some(parent_branch) = self.branch_stack.last_mut() { - // We've asserted above that branch_path.len() < cached_path.len(), so this - // `get_unchecked` is safe. - let child_nibble = cached_path.get_unchecked(self.branch_path.len()); - parent_branch.state_mask.set_bit(child_nibble); - } - - // The length of the extension will be the difference of the lengths of the cached - // branch and its parent if any. - let ext_len = - (cached_path.len() - self.branch_path.len() - self.maybe_parent_nibble()) as u8; - self.branch_stack.push(ProofTrieBranch { - ext_len, - state_mask: TrieMask::new(0), - tree_mask: cached_branch.tree_mask, - hash_mask: cached_branch.hash_mask, - }); - self.branch_path = cached_path; - trace!( - target: TRACE_TARGET, - branch=?self.branch_stack.last(), - branch_path=?self.branch_path, - "Pushed cached branch", - ); + self.push_new_cached_branch(targets, &cached_branch, cached_path)?; } // At this point the top of the branch stack is the same branch which was found in the @@ -821,6 +889,12 @@ where ); self.cached_branch_stack.pop(); self.pop_branch(targets)?; + + // The just-popped branch is completely processed; we know there can be no more keys + // with that prefix. Set the lower bound which can be returned from this method to + // be the next possible prefix, if any. + lower_bound = cached_path.increment(); + continue } @@ -912,7 +986,7 @@ where upper=?child_path_upper, "Returning sub-trie's key range to calculate", ); - return Ok((child_path, child_path_upper)); + return Ok(Some((child_path, child_path_upper))); } } @@ -979,11 +1053,16 @@ where loop { // Determine the range of keys of the overall trie which need to be re-computed. - let (lower_bound, upper_bound) = self.next_uncached_key_range( + let Some((lower_bound, upper_bound)) = self.next_uncached_key_range( &mut targets, &mut next_cached_branch, hashed_cursor_current.as_ref().map(|kv| &kv.0), - )?; + )? + else { + // If `next_uncached_key_range` determines that there can be no more keys then + // complete the computation. + break; + }; // Calculate the trie for that range of keys self.calculate_key_range( From 82c150925b90bb8da532b7381feb22b644512be2 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 2 Dec 2025 17:08:53 +0100 Subject: [PATCH 28/59] WIP: IT WORKS --- crates/trie/trie/src/proof_v2/mod.rs | 114 ++++++++++++++++---------- crates/trie/trie/src/proof_v2/node.rs | 26 +++--- 2 files changed, 81 insertions(+), 59 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index a1a6898ac83..ecfc20f6fbd 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -432,8 +432,7 @@ where self.branch_stack.push(ProofTrieBranch { ext_len: common_prefix_len as u8, state_mask: TrieMask::new(1 << first_child_nibble), - tree_mask: TrieMask::default(), - hash_mask: TrieMask::default(), + masks: TrieMasks::none(), }); trace!( @@ -510,8 +509,10 @@ where ); // Wrap the `BranchNode` so it can be pushed onto the child stack. - let mut branch_as_child = - ProofTrieBranchChild::Branch(BranchNode::new(rlp_nodes_buf, branch.state_mask)); + let mut branch_as_child = ProofTrieBranchChild::Branch { + node: BranchNode::new(rlp_nodes_buf, branch.state_mask), + masks: branch.masks, + }; // If there is an extension then encode the branch as an `RlpNode` and use it to construct // the extension in its place @@ -670,8 +671,10 @@ where ProofTrieBranch { ext_len, state_mask: TrieMask::new(0), - tree_mask: cached_branch.tree_mask, - hash_mask: cached_branch.hash_mask, + masks: TrieMasks { + tree_mask: Some(cached_branch.tree_mask), + hash_mask: Some(cached_branch.hash_mask), + }, } } @@ -716,7 +719,6 @@ where // Similarly, if there is a branch on the `branch_stack` but its `state_mask` bit for // this new branch is already set, then there must be a leaf/extension with a short-key // to be split. - debug_assert_eq!(self.child_stack.len(), 1); debug_assert!(!self .child_stack .last() @@ -776,39 +778,58 @@ where // // This starts off being based off of the hashed cursor's current position, which is the // next key which hasn't been processed. If that is None then we start from zero. + // + // TODO better name and docs let mut lower_bound = Some(hashed_key_current.copied().unwrap_or_else(Nibbles::new)); loop { // TODO might be possible to move this out of the loop? // Determine the current cached branch node. // Note: Cloning the `cached_branch` is cheap because it uses an Arc. - let (cached_path, cached_branch) = - match (self.cached_branch_stack.last(), &next_cached_branch) { - (Some(cached), _) => { - // If the `cached_branch_stack` is not empty then its last is the current - cached.clone() - } - (_, Some(_)) => { - // If `cached_branch_stack` is empty but there is an unconsumed cached - // branch from the cursor then we consume that branch, pushing it onto the - // stack. - let cached = core::mem::take(next_cached_branch).expect("is some"); - *next_cached_branch = self.trie_cursor.next()?; - self.cached_branch_stack.push(cached.clone()); - cached - } - (None, None) => { - trace!(target: TRACE_TARGET, "Exhausted cached trie nodes"); - // If both stack and cursor are empty then there are no more cached nodes, - // return an open range to indicate that the rest of the trie should be - // calculated solely from leaves. - return Ok(lower_bound.map(|lower| (lower, None))); - } - }; + let (cached_path, cached_branch) = match ( + self.cached_branch_stack.last(), + &next_cached_branch, + lower_bound.as_ref(), + ) { + (None, None, _) | (_, _, None) => { + trace!(target: TRACE_TARGET, "Exhausted cached trie nodes"); + // If both stack and cursor are empty then there are no more cached nodes, + // return an open range to indicate that the rest of the trie should be + // calculated solely from leaves. + // + // If the `lower_bound` indicates that there can be no more data then this will + // return None to indicate end of computation. + return Ok(lower_bound.map(|lower| (lower, None))); + } + (Some(cached), _, _) => { + // If the `cached_branch_stack` is not empty then its last is the current + cached.clone() + } + (_, Some((next_cached_path, _)), Some(lower_bound)) + if next_cached_path < lower_bound => + { + // If `cached_branch_stack` is empty but there is an unconsumed cached branch, + // we would want to use that. However if that cached branch belongs to a range + // which has already been processed then we can't use it, instead we seek + // forward and try again. + *next_cached_branch = self.trie_cursor.seek(*lower_bound)?; + continue + } + (_, Some(_), _) => { + // If `cached_branch_stack` is empty but there is an unconsumed cached + // branch from the cursor then we consume that branch, pushing it onto the + // stack. + let cached = core::mem::take(next_cached_branch).expect("is some"); + *next_cached_branch = self.trie_cursor.next()?; + self.cached_branch_stack.push(cached.clone()); + trace!(target: TRACE_TARGET, ?cached, "Pushed next trie node onto cached_branch_stack"); + cached + } + }; trace!( target: TRACE_TARGET, - ?hashed_key_current, + ?lower_bound, branch_path = ?self.branch_path, branch_state_mask = ?self.branch_stack.last().map(|b| b.state_mask), ?cached_path, @@ -922,14 +943,12 @@ where if !self.should_retain(targets, &child_path, false) { // Pull this child's hash out of the cached branch node. To get the hash's index - // we first need to calculate which cached hash's have already been used by this - // branch (if any), and subtract that count from the count of total cached - // hashes. + // we first need to calculate the mask of which cached hash's have already been + // used by this branch (if any). The number of set bits in that mask will be the + // index of the next hash in the array to use. let curr_hashed_used_mask = cached_branch.hash_mask.get() & curr_state_mask; - let hash_idx = cached_branch.hash_mask.count_ones() - - curr_hashed_used_mask.count_ones() - - 1; - let hash = cached_branch.hashes[hash_idx as usize]; + let hash_idx = curr_hashed_used_mask.count_ones() as usize; + let hash = cached_branch.hashes[hash_idx]; trace!( target: TRACE_TARGET, @@ -1338,6 +1357,7 @@ mod tests { // Call Proof::multiproof (legacy implementation) let proof_legacy_result = Proof::new(self.trie_cursor_factory.clone(), self.hashed_cursor_factory.clone()) + .with_branch_node_masks(true) .multiproof(legacy_targets)?; // Decode and sort legacy proof nodes @@ -1349,10 +1369,12 @@ mod tests { let node = TrieNode::decode(&mut buf) .expect("legacy implementation should not produce malformed proof nodes"); - ProofTrieNode { - path: *path, - node, - masks: TrieMasks { + // The legacy proof calculator will calculate masks for the root node, even + // though we never store the root node so the masks for it aren't really valid. + let masks = if path.is_empty() { + TrieMasks::none() + } else { + TrieMasks { hash_mask: proof_legacy_result .branch_node_hash_masks .get(path) @@ -1361,8 +1383,10 @@ mod tests { .branch_node_tree_masks .get(path) .copied(), - }, - } + } + }; + + ProofTrieNode { path: *path, node, masks } }) .sorted_by(|a, b| depth_first::cmp(&a.path, &b.path)) .collect::>(); @@ -1379,7 +1403,7 @@ mod tests { } // Basic comparison: both should succeed and produce identical results - assert_eq!(proof_legacy_nodes, proof_v2_result); + pretty_assertions::assert_eq!(proof_legacy_nodes, proof_v2_result); Ok(()) } diff --git a/crates/trie/trie/src/proof_v2/node.rs b/crates/trie/trie/src/proof_v2/node.rs index 536665f19ae..9300123fbe3 100644 --- a/crates/trie/trie/src/proof_v2/node.rs +++ b/crates/trie/trie/src/proof_v2/node.rs @@ -25,7 +25,12 @@ pub(crate) enum ProofTrieBranchChild { child: RlpNode, }, /// A branch node whose children have already been flattened into [`RlpNode`]s. - Branch(BranchNode), + Branch { + /// The node itself, for use during RLP encoding. + node: BranchNode, + /// Bitmasks carried over from cached `BranchNodeCompact` values, if any. + masks: TrieMasks, + }, /// A node whose type is not known, as it has already been converted to an [`RlpNode`]. RlpNode(RlpNode), } @@ -64,7 +69,7 @@ impl ProofTrieBranchChild { ExtensionNodeRef::new(&short_key, child.as_slice()).encode(buf); Ok((RlpNode::from_rlp(buf), None)) } - Self::Branch(branch_node) => { + Self::Branch { node: branch_node, .. } => { branch_node.encode(buf); Ok((RlpNode::from_rlp(buf), Some(branch_node.stack))) } @@ -98,8 +103,7 @@ impl ProofTrieBranchChild { Self::Extension { short_key, child } => { (TrieNode::Extension(ExtensionNode { key: short_key, child }), TrieMasks::none()) } - // TODO store trie masks on branch - Self::Branch(branch_node) => (TrieNode::Branch(branch_node), TrieMasks::none()), + Self::Branch { node, masks } => (TrieNode::Branch(node), masks), Self::RlpNode(_) => panic!("Cannot call `into_proof_trie_node` on RlpNode"), }; @@ -111,7 +115,7 @@ impl ProofTrieBranchChild { pub(crate) fn short_key(&self) -> &Nibbles { match self { Self::Leaf { short_key, .. } | Self::Extension { short_key, .. } => short_key, - Self::Branch(_) | Self::RlpNode(_) => { + Self::Branch { .. } | Self::RlpNode(_) => { static EMPTY_NIBBLES: Nibbles = Nibbles::new(); &EMPTY_NIBBLES } @@ -136,7 +140,7 @@ impl ProofTrieBranchChild { Self::Leaf { short_key, .. } | Self::Extension { short_key, .. } => { *short_key = trim_nibbles_prefix(short_key, len); } - Self::Branch(_) | Self::RlpNode(_) => { + Self::Branch { .. } | Self::RlpNode(_) => { panic!("Cannot call `trim_short_key_prefix` on Branch or RlpNode") } } @@ -153,14 +157,8 @@ pub(crate) struct ProofTrieBranch { /// A mask tracking which child nibbles are set on the branch so far. There will be a single /// child on the stack for each set bit. pub(crate) state_mask: TrieMask, - /// A subset of `state_mask`. Each bit is set if the `state_mask` bit is set and: - /// - The child is a branch which is stored in the DB. - /// - The child is an extension whose child branch is stored in the DB. - #[expect(unused)] - pub(crate) tree_mask: TrieMask, - /// A subset of `state_mask`. Each bit is set if the hash for the child is cached in the DB. - #[expect(unused)] - pub(crate) hash_mask: TrieMask, + /// Bitmasks which are subsets of `state_mask`. + pub(crate) masks: TrieMasks, } /// Trims the first `len` nibbles from the head of the given `Nibbles`. From 7af1f44b0ab65b0137182f453c4c519d3ff8df1d Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 2 Dec 2025 18:16:09 +0100 Subject: [PATCH 29/59] quick fix --- crates/trie/trie/src/proof_v2/mod.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index ecfc20f6fbd..59cd58e0832 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -278,6 +278,7 @@ where /// Returns the path of the child of the currently under-construction branch at the given /// nibble. + #[inline] fn child_path_at(&self, nibble: u8) -> Nibbles { let mut child_path = self.branch_path; debug_assert!(child_path.len() < 64); @@ -965,6 +966,10 @@ where .state_mask .set_bit(child_nibble); + // Update the `lower_bound` to indicate that the child whose bit was just set is + // completely processed. + lower_bound = self.child_path_at(child_nibble).increment(); + continue } } @@ -1467,7 +1472,7 @@ mod tests { } proptest! { - #![proptest_config(ProptestConfig::with_cases(8000))] + #![proptest_config(ProptestConfig::with_cases(80000))] /// Tests that ProofCalculator produces valid proofs for randomly generated /// HashedPostState with proof targets. From 97ef69717df95912700d4fec5456deb0d0522b28 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 2 Dec 2025 18:37:47 +0100 Subject: [PATCH 30/59] reduce proptest iterations --- crates/trie/trie/src/proof_v2/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 59cd58e0832..0962cf1d7a7 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -1472,7 +1472,7 @@ mod tests { } proptest! { - #![proptest_config(ProptestConfig::with_cases(80000))] + #![proptest_config(ProptestConfig::with_cases(8000))] /// Tests that ProofCalculator produces valid proofs for randomly generated /// HashedPostState with proof targets. From d70738265833f6d403daa6dd941e96779f02a142 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Wed, 3 Dec 2025 15:38:10 +0100 Subject: [PATCH 31/59] Simplify trie cursor usage --- crates/trie/trie/src/proof_v2/mod.rs | 133 +++++++++++++++++---------- 1 file changed, 84 insertions(+), 49 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 0962cf1d7a7..afb681cfdce 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -42,6 +42,47 @@ static PATH_ALL_ZEROS: Nibbles = { path }; +/// Used to track the state of the trie cursor, allowing us to differentiate a node having been +/// taken from the cursor having been exhausted. +#[derive(Debug)] +enum TrieCursorState { + /// Cursor is seeked to this path and the node has not been used yet. + Available(Nibbles, BranchNodeCompact), + /// Cursor is seeked to this path, but the node has been used. + Taken(Nibbles), + /// Cursor has been exhausted. + Exhausted, +} + +impl TrieCursorState { + /// Creates a [`Self`] based on an entry returned from the cursor itself. + fn new(entry: Option<(Nibbles, BranchNodeCompact)>) -> Self { + entry.map_or(Self::Exhausted, |(path, node)| Self::Available(path, node)) + } + + /// Returns the path the cursor is seeked to, or None if it's exhausted. + const fn path(&self) -> Option<&Nibbles> { + match self { + Self::Available(path, _) | Self::Taken(path) => Some(path), + Self::Exhausted => None, + } + } + + /// Takes the path and node from a [`Self::Available`]. Panics if not [`Self::Available`]. + fn take(&mut self) -> (Nibbles, BranchNodeCompact) { + let Self::Available(path, _) = self else { + panic!("take called on non-Available: {self:?}") + }; + + let path = *path; + let Self::Available(path, node) = core::mem::replace(self, Self::Taken(path)) else { + unreachable!("already checked that self is Self::Available"); + }; + + (path, node) + } +} + /// A proof calculator that generates merkle proofs using only leaf data. /// /// The calculator: @@ -771,7 +812,7 @@ where fn next_uncached_key_range( &mut self, targets: &mut TargetsIter>, - next_cached_branch: &mut Option<(Nibbles, BranchNodeCompact)>, + trie_cursor_state: &mut TrieCursorState, hashed_key_current: Option<&Nibbles>, ) -> Result)>, StateProofError> { // `lower_bound` will be used to track the lower bound of the range which is returned from @@ -786,15 +827,19 @@ where loop { // TODO might be possible to move this out of the loop? // Determine the current cached branch node. - // Note: Cloning the `cached_branch` is cheap because it uses an Arc. + // Note: cloning is cheap because BranchNodeCompact uses Arcs. let (cached_path, cached_branch) = match ( self.cached_branch_stack.last(), - &next_cached_branch, + &trie_cursor_state, lower_bound.as_ref(), ) { - (None, None, _) | (_, _, None) => { + (Some(cached), _, _) => { + // If the `cached_branch_stack` is not empty then its last is the current + cached.clone() + } + (None, TrieCursorState::Exhausted, _) | (_, _, None) => { trace!(target: TRACE_TARGET, "Exhausted cached trie nodes"); - // If both stack and cursor are empty then there are no more cached nodes, + // If both stack and trie cursor are empty then there are no more cached nodes, // return an open range to indicate that the rest of the trie should be // calculated solely from leaves. // @@ -802,29 +847,25 @@ where // return None to indicate end of computation. return Ok(lower_bound.map(|lower| (lower, None))); } - (Some(cached), _, _) => { - // If the `cached_branch_stack` is not empty then its last is the current - cached.clone() - } - (_, Some((next_cached_path, _)), Some(lower_bound)) - if next_cached_path < lower_bound => + (_, cursor_state, Some(lower_bound)) + if cursor_state.path().expect("not exhausted") < lower_bound => { - // If `cached_branch_stack` is empty but there is an unconsumed cached branch, - // we would want to use that. However if that cached branch belongs to a range - // which has already been processed then we can't use it, instead we seek - // forward and try again. - *next_cached_branch = self.trie_cursor.seek(*lower_bound)?; + // If `cached_branch_stack` is empty then we want to get a new cached branch + // node from the cursor. If the trie cursor is seeked to a branch which has + // already been processed then we can't use it, instead we seek forward and try + // again. + *trie_cursor_state = TrieCursorState::new(self.trie_cursor.seek(*lower_bound)?); continue } - (_, Some(_), _) => { - // If `cached_branch_stack` is empty but there is an unconsumed cached - // branch from the cursor then we consume that branch, pushing it onto the - // stack. - let cached = core::mem::take(next_cached_branch).expect("is some"); - *next_cached_branch = self.trie_cursor.next()?; - self.cached_branch_stack.push(cached.clone()); - trace!(target: TRACE_TARGET, ?cached, "Pushed next trie node onto cached_branch_stack"); - cached + (_, TrieCursorState::Taken(path), _) => { + panic!("trie cursor at {path:?} had its node taken, but lower_bound {lower_bound:?} is still lte"); + } + (_, TrieCursorState::Available(_, _), _) => { + // If `cached_branch_stack` is empty but there is an available cached branch + // from the trie cursor then we consume that branch, pushing it onto the stack. + self.cached_branch_stack.push(trie_cursor_state.take()); + trace!(target: TRACE_TARGET, cached=?self.cached_branch_stack.last(), "Pushed next trie node onto cached_branch_stack"); + self.cached_branch_stack.last().expect("just pushed").clone() } }; @@ -968,6 +1009,7 @@ where // Update the `lower_bound` to indicate that the child whose bit was just set is // completely processed. + // TODO redundant call to child_path_at? could just be child_path? lower_bound = self.child_path_at(child_nibble).increment(); continue @@ -978,25 +1020,28 @@ where // branch node may be the node at this child directly, or this child may be an // extension and the cached branch is the child of that extension. - // All trie nodes prior to `child_path` will not be modified further, so we can seek - // the cached cursor to the next cached node at-or-after `child_path`. - if let Some(next_cached_path) = next_cached_branch.as_ref().map(|kv| kv.0) && - next_cached_path < child_path - { + // All trie nodes prior to `child_path` will not be modified further, so we can seek the + // trie cursor to the next cached node at-or-after `child_path`. + if trie_cursor_state.path().is_some_and(|path| path < &child_path) { trace!(target: TRACE_TARGET, ?child_path, "Seeking trie cursor to child path"); - *next_cached_branch = self.trie_cursor.seek(child_path)?; + *trie_cursor_state = TrieCursorState::new(self.trie_cursor.seek(child_path)?); } // If the next cached branch node is a child of `child_path` then we can assume it is // the cached branch for this child. We push it onto the `cached_branch_stack` and loop // back to the top. - if let Some(next_cached_path) = next_cached_branch.as_ref().map(|kv| kv.0) && + if let TrieCursorState::Available(next_cached_path, next_cached_branch) = + &trie_cursor_state && next_cached_path.starts_with(&child_path) { - let cached = core::mem::take(next_cached_branch).expect("is some"); - trace!(target: TRACE_TARGET, ?child_path, ?cached, "Pushing cached branch for child"); - *next_cached_branch = self.trie_cursor.next()?; - self.cached_branch_stack.push(cached); + trace!( + target: TRACE_TARGET, + ?child_path, + ?next_cached_path, + ?next_cached_branch, + "Pushing cached branch for child", + ); + self.cached_branch_stack.push(trie_cursor_state.take()); continue; } @@ -1062,24 +1107,14 @@ where // Initialize the hashed cursor to None to indicate it hasn't been seeked yet. let mut hashed_cursor_current: Option<(Nibbles, VE::DeferredEncoder)> = None; - // Initialize the `cached_branch_stack` with the node closest to root. - if let Some(cached_branch) = self.trie_cursor.seek(Nibbles::new())? { - self.cached_branch_stack.push(cached_branch); - } - - // `next_cached_branch` will always be the next _unconsumed_ cached node. If the - // `cached_branch_stack` is empty then the seek in the previous step returned None, - // indicating there are no trie nodes. - let mut next_cached_branch = (!self.cached_branch_stack.is_empty()) - .then(|| self.trie_cursor.next().transpose()) - .flatten() - .transpose()?; + // Initialize the `trie_cursor_state` with the node closest to root. + let mut trie_cursor_state = TrieCursorState::new(self.trie_cursor.seek(Nibbles::new())?); loop { // Determine the range of keys of the overall trie which need to be re-computed. let Some((lower_bound, upper_bound)) = self.next_uncached_key_range( &mut targets, - &mut next_cached_branch, + &mut trie_cursor_state, hashed_cursor_current.as_ref().map(|kv| &kv.0), )? else { From fb4ed8faabddc118c47184df45dae2365f9ef403 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Wed, 3 Dec 2025 16:14:04 +0100 Subject: [PATCH 32/59] Simplifications to next_uncached_key_range loop --- crates/trie/trie/src/proof_v2/mod.rs | 73 ++++++++++++++-------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index afb681cfdce..1d697477a23 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -815,6 +815,16 @@ where trie_cursor_state: &mut TrieCursorState, hashed_key_current: Option<&Nibbles>, ) -> Result)>, StateProofError> { + // All trie data prior to the current cached branch, if any, has been computed. Any branches + // which were under-construction previously, and which are not on the same path as this + // cached branch, can be assumed to be completed; they will not have any further keys added + // to them. + if let Some(cached_path) = self.cached_branch_stack.last().map(|kv| kv.0) { + while !cached_path.starts_with(&self.branch_path) { + self.pop_branch(targets)?; + } + } + // `lower_bound` will be used to track the lower bound of the range which is returned from // this method. If this is None then there are no further keys which need to be processed. // @@ -825,7 +835,6 @@ where let mut lower_bound = Some(hashed_key_current.copied().unwrap_or_else(Nibbles::new)); loop { - // TODO might be possible to move this out of the loop? // Determine the current cached branch node. // Note: cloning is cheap because BranchNodeCompact uses Arcs. let (cached_path, cached_branch) = match ( @@ -847,7 +856,7 @@ where // return None to indicate end of computation. return Ok(lower_bound.map(|lower| (lower, None))); } - (_, cursor_state, Some(lower_bound)) + (None, cursor_state, Some(lower_bound)) if cursor_state.path().expect("not exhausted") < lower_bound => { // If `cached_branch_stack` is empty then we want to get a new cached branch @@ -857,21 +866,40 @@ where *trie_cursor_state = TrieCursorState::new(self.trie_cursor.seek(*lower_bound)?); continue } - (_, TrieCursorState::Taken(path), _) => { - panic!("trie cursor at {path:?} had its node taken, but lower_bound {lower_bound:?} is still lte"); + (None, TrieCursorState::Taken(path), _) => { + panic!("trie cursor at {path:?} had its node taken, but is >= lower_bound {lower_bound:?}"); } - (_, TrieCursorState::Available(_, _), _) => { + (None, TrieCursorState::Available(_, _), _) => { // If `cached_branch_stack` is empty but there is an available cached branch // from the trie cursor then we consume that branch, pushing it onto the stack. self.cached_branch_stack.push(trie_cursor_state.take()); trace!(target: TRACE_TARGET, cached=?self.cached_branch_stack.last(), "Pushed next trie node onto cached_branch_stack"); - self.cached_branch_stack.last().expect("just pushed").clone() + + let (cached_path, cached_branch) = + self.cached_branch_stack.last().expect("just pushed"); + + // The current hashed key indicates the first key after the previous uncached + // range, or None if this is the first call to this method. If the key is not + // caught up to the next cached branch it means there are portions of the trie + // prior to that branch which need to be computed; return the uncomputed range + // up to that branch to make that happen. + // + // If the next next cached branch's path is all zeros then we can skip this + // catch-up step, because there cannot be any keys prior to that range. + if hashed_key_current.is_none_or(|k| k < cached_path) && + !PATH_ALL_ZEROS.starts_with(cached_path) + { + let range = lower_bound.map(|lower| (lower, Some(*cached_path))); + trace!(target: TRACE_TARGET, ?range, "Returning key range to calculate in order to catch up to cached branch"); + return Ok(range); + } + + (*cached_path, cached_branch.clone()) } }; trace!( target: TRACE_TARGET, - ?lower_bound, branch_path = ?self.branch_path, branch_state_mask = ?self.branch_stack.last().map(|b| b.state_mask), ?cached_path, @@ -880,34 +908,6 @@ where "loop", ); - // TODO might be possible to move this out of the loop? - // - // The current hashed key indicates the first key after the previous uncached range, - // or None if this is the first call to this method. If the key is not caught up to - // this cached branch it means there are portions of the trie prior to this branch - // which need to be computed; return the range up to this branch to make that happen. - // - // TODO update docs - if hashed_key_current.is_none_or(|k| k < &cached_path) && - !PATH_ALL_ZEROS.starts_with(&cached_path) - { - let range = lower_bound.map(|lower| (lower, Some(cached_path))); - trace!( - target: TRACE_TARGET, - ?range, - "Returning key range to calculate in order to catch up to cached branch", - ); - return Ok(range); - } - - // All trie data prior to this cached branch has been computed. Any branches which were - // under-construction previously, and which are not on the same path as this cached - // branch, can be assumed to be completed; they will not have any further keys added to - // them. - while !cached_path.starts_with(&self.branch_path) { - self.pop_branch(targets)?; - } - // Since we've popped all branches which don't start with cached_path, branch_path at // this point must be equal to or shorter than cached_path. debug_assert!( @@ -1009,8 +1009,7 @@ where // Update the `lower_bound` to indicate that the child whose bit was just set is // completely processed. - // TODO redundant call to child_path_at? could just be child_path? - lower_bound = self.child_path_at(child_nibble).increment(); + lower_bound = child_path.increment(); continue } From aff632a226335c2d0231133f10f96be52362dd3f Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Wed, 3 Dec 2025 16:59:13 +0100 Subject: [PATCH 33/59] Reduce Arc clones --- crates/trie/trie/src/proof_v2/mod.rs | 76 +++++++++++++++++++--------- 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 1d697477a23..f2f9d801c4d 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -720,16 +720,21 @@ where } } - // TODO docs + /// Pushes a new branch onto the `branch_stack` which is based on a cached branch obtained via + /// the trie cursor. + /// + /// If there is already a child at the top branch of `branch_stack` occupying this new branch's + /// nibble then that child will have its short-key split with another new branch, and this + /// cached branch will be a child of that splitting branch. fn push_new_cached_branch( &mut self, targets: &mut TargetsIter>, + cached_path: Nibbles, cached_branch: &BranchNodeCompact, - cached_branch_path: Nibbles, ) -> Result<(), StateProofError> { debug_assert!( - cached_branch_path.starts_with(&self.branch_path), - "push_new_cached_branch called with path {cached_branch_path:?} which is not a child of current branch {:?}", + cached_path.starts_with(&self.branch_path), + "push_new_cached_branch called with path {cached_path:?} which is not a child of current branch {:?}", self.branch_path, ); @@ -738,15 +743,15 @@ where // If both stacks are empty then there were no leaves before this cached branch, push it and // be done; the extension of the branch will be its full path. if self.child_stack.is_empty() && parent_branch.is_none() { - self.branch_path = cached_branch_path; + self.branch_path = cached_path; self.branch_stack - .push(Self::new_from_cached_branch(cached_branch, cached_branch_path.len() as u8)); + .push(Self::new_from_cached_branch(cached_branch, cached_path.len() as u8)); return Ok(()) } // Get the nibble which should be set in the parent branch's `state_mask` for this new // branch. - let cached_branch_nibble = cached_branch_path.get_unchecked(self.branch_path.len()); + let cached_branch_nibble = cached_path.get_unchecked(self.branch_path.len()); // We calculate the `ext_len` of the new branch, and potentially update its nibble if a new // parent branch is inserted here, based on the state of the parent branch. @@ -769,13 +774,13 @@ where .is_empty()); // Split that leaf/extension's short key with a new branch. - let (nibble, short_key) = self.push_new_branch(cached_branch_path); + let (nibble, short_key) = self.push_new_branch(cached_path); (nibble, short_key.len()) } else { // If there is a parent branch but its `state_mask` bit for this branch is not set // then we can simply calculate the `ext_len` based on the difference of each, minus // 1 to account for the nibble in the `state_mask`. - (cached_branch_nibble, cached_branch_path.len() - self.branch_path.len() - 1) + (cached_branch_nibble, cached_path.len() - self.branch_path.len() - 1) }; // `commit_last_child` relies on the last set bit of the parent branch's `state_mask` to @@ -792,7 +797,7 @@ where } // Finally update the `branch_path` and push the new branch. - self.branch_path = cached_branch_path; + self.branch_path = cached_path; self.branch_stack.push(Self::new_from_cached_branch(cached_branch, ext_len as u8)); trace!( @@ -805,9 +810,21 @@ where Ok(()) } - // TODO docs - // TODO re-evaluate how next_cached_branch works... might be possible to not always call next - // when taking it. + /// Accepts the current state of both hashed and trie cursors, and determines the next range of + /// hashed keys which need to be processed using [`Self::push_leaf`]. + /// + /// This method will use cached branch node data from the trie cursor to skip over all possible + /// ranges of keys, to reduce computation as much as possible. + /// + /// # Returns + /// + /// - `None`: No more data to process, finish computation + /// + /// - `Some(lower, None)`: Indicates to call `push_leaf` on all keys starting at `lower`, with + /// no upper bound. This method won't be called again after this. + /// + /// - `Some(lower, Some(upper))`: Indicates to call `push_leaf` on all keys starting at `lower`, + /// up to but excluding `upper`, and then call this method once done. #[instrument(target = TRACE_TARGET, level = "trace", skip_all)] fn next_uncached_key_range( &mut self, @@ -836,16 +853,16 @@ where loop { // Determine the current cached branch node. - // Note: cloning is cheap because BranchNodeCompact uses Arcs. + // + // NOTE we pop off the `cached_branch_stack` because cloning the `BranchNodeCompact` + // means cloning an Arc, which incurs synchronization overhead. We have to be sure to + // push the cached branch back onto the stack once done. let (cached_path, cached_branch) = match ( - self.cached_branch_stack.last(), + self.cached_branch_stack.pop(), &trie_cursor_state, lower_bound.as_ref(), ) { - (Some(cached), _, _) => { - // If the `cached_branch_stack` is not empty then its last is the current - cached.clone() - } + (Some(cached), _, _) => cached, (None, TrieCursorState::Exhausted, _) | (_, _, None) => { trace!(target: TRACE_TARGET, "Exhausted cached trie nodes"); // If both stack and trie cursor are empty then there are no more cached nodes, @@ -875,8 +892,7 @@ where self.cached_branch_stack.push(trie_cursor_state.take()); trace!(target: TRACE_TARGET, cached=?self.cached_branch_stack.last(), "Pushed next trie node onto cached_branch_stack"); - let (cached_path, cached_branch) = - self.cached_branch_stack.last().expect("just pushed"); + let (cached_path, _) = self.cached_branch_stack.last().expect("just pushed"); // The current hashed key indicates the first key after the previous uncached // range, or None if this is the first call to this method. If the key is not @@ -894,7 +910,7 @@ where return Ok(range); } - (*cached_path, cached_branch.clone()) + self.cached_branch_stack.pop().expect("just pushed") } }; @@ -920,7 +936,7 @@ where // top branch is the parent of this cached branch. Either way we push a branch // corresponding to the cached one onto the stack, so we can begin constructing it. if self.branch_path != cached_path { - self.push_new_cached_branch(targets, &cached_branch, cached_path)?; + self.push_new_cached_branch(targets, cached_path, &cached_branch)?; } // At this point the top of the branch stack is the same branch which was found in the @@ -950,9 +966,11 @@ where ?cached_branch, "No further children, popping branch", ); - self.cached_branch_stack.pop(); self.pop_branch(targets)?; + // no need to pop from `cached_branch_stack`, the current cached branch is already + // popped (see note at the top of the loop). + // The just-popped branch is completely processed; we know there can be no more keys // with that prefix. Set the lower bound which can be returned from this method to // be the next possible prefix, if any. @@ -1011,6 +1029,9 @@ where // completely processed. lower_bound = child_path.increment(); + // Push the current cached branch back onto the stack before looping. + self.cached_branch_stack.push((cached_path, cached_branch)); + continue } } @@ -1033,6 +1054,9 @@ where &trie_cursor_state && next_cached_path.starts_with(&child_path) { + // Push the current cached branch back on before pushing its child and then looping + self.cached_branch_stack.push((cached_path, cached_branch)); + trace!( target: TRACE_TARGET, ?child_path, @@ -1054,6 +1078,10 @@ where upper=?child_path_upper, "Returning sub-trie's key range to calculate", ); + + // Push the current cached branch back onto the stack before returning. + self.cached_branch_stack.push((cached_path, cached_branch)); + return Ok(Some((child_path, child_path_upper))); } } From 2147e95544c97f8257d42ff0a6c7b441ee8633fa Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Wed, 3 Dec 2025 17:05:34 +0100 Subject: [PATCH 34/59] Docs --- crates/trie/trie/src/proof_v2/mod.rs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index f2f9d801c4d..5aed8c98165 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -830,7 +830,7 @@ where &mut self, targets: &mut TargetsIter>, trie_cursor_state: &mut TrieCursorState, - hashed_key_current: Option<&Nibbles>, + hashed_key_current_path: Option, ) -> Result)>, StateProofError> { // All trie data prior to the current cached branch, if any, has been computed. Any branches // which were under-construction previously, and which are not on the same path as this @@ -849,7 +849,7 @@ where // next key which hasn't been processed. If that is None then we start from zero. // // TODO better name and docs - let mut lower_bound = Some(hashed_key_current.copied().unwrap_or_else(Nibbles::new)); + let mut lower_bound = Some(hashed_key_current_path.unwrap_or_default()); loop { // Determine the current cached branch node. @@ -895,14 +895,15 @@ where let (cached_path, _) = self.cached_branch_stack.last().expect("just pushed"); // The current hashed key indicates the first key after the previous uncached - // range, or None if this is the first call to this method. If the key is not - // caught up to the next cached branch it means there are portions of the trie - // prior to that branch which need to be computed; return the uncomputed range - // up to that branch to make that happen. + // range, or None if this is the first call to this method. // - // If the next next cached branch's path is all zeros then we can skip this - // catch-up step, because there cannot be any keys prior to that range. - if hashed_key_current.is_none_or(|k| k < cached_path) && + // If the key is not caught up to the next cached branch it means there are + // portions of the trie prior to that branch which need to be computed; return + // the uncomputed range up to that branch to make that happen. + // + // If the next cached branch's path is all zeros then we can skip this catch-up + // step, because there cannot be any keys prior to that range. + if hashed_key_current_path.is_none_or(|k| &k < cached_path) && !PATH_ALL_ZEROS.starts_with(cached_path) { let range = lower_bound.map(|lower| (lower, Some(*cached_path))); @@ -1142,7 +1143,7 @@ where let Some((lower_bound, upper_bound)) = self.next_uncached_key_range( &mut targets, &mut trie_cursor_state, - hashed_cursor_current.as_ref().map(|kv| &kv.0), + hashed_cursor_current.as_ref().map(|kv| kv.0), )? else { // If `next_uncached_key_range` determines that there can be no more keys then From dea820c5be5cc3c74323d13bc67665725b564df8 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Wed, 3 Dec 2025 17:27:30 +0100 Subject: [PATCH 35/59] adjustments after reviewing --- crates/trie/trie/src/proof_v2/mod.rs | 35 +++++++++++----------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 5aed8c98165..961df29da89 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -42,8 +42,8 @@ static PATH_ALL_ZEROS: Nibbles = { path }; -/// Used to track the state of the trie cursor, allowing us to differentiate a node having been -/// taken from the cursor having been exhausted. +/// Used to track the state of the trie cursor, allowing us to differentiate between a branch having +/// been taken (used as a cached branch) and the cursor having been exhausted. #[derive(Debug)] enum TrieCursorState { /// Cursor is seeked to this path and the node has not been used yet. @@ -224,25 +224,18 @@ where /// Because paths in the trie are visited in depth-first order, it's imperative that targets are /// given in depth-first order as well. If the targets were generated off of B256s, which is /// the common-case, then this is equivalent to lexicographical order. - /// - /// If `lexicographic` is true then ordering is checked using lexicographic order, not - /// depth-first. This is used when checking cached branch nodes, which we visit in lexicographic - /// order. fn should_retain( &self, targets: &mut TargetsIter>, path: &Nibbles, - lexicographic: bool, ) -> bool { - let cmp_fn = if lexicographic { std::cmp::Ord::cmp } else { depth_first::cmp }; - - trace!(target: TRACE_TARGET, ?path, target = ?targets.peek(), ?lexicographic, "should_retain: called"); + trace!(target: TRACE_TARGET, ?path, target = ?targets.peek(), "should_retain: called"); debug_assert!(self.retained_proofs.last().is_none_or( |ProofTrieNode { path: last_retained_path, .. }| { - cmp_fn(path, last_retained_path) == Ordering::Greater + depth_first::cmp(path, last_retained_path) == Ordering::Greater } ), - "should_retain called with path {path:?} which is not after previously retained node {:?} (lexicographic order:{lexicographic:?})", + "should_retain called with path {path:?} which is not after previously retained node {:?} in depth-first order", self.retained_proofs.last().map(|n| n.path), ); @@ -256,9 +249,9 @@ where // If the path isn't in the current range then iterate forward until it is (or until // there is no upper bound, indicating unbounded). - if upper.is_some_and(|upper| cmp_fn(path, &upper) != Ordering::Less) { + if upper.is_some_and(|upper| depth_first::cmp(path, &upper) != Ordering::Less) { targets.next(); - trace!(target: TRACE_TARGET, target = ?targets.peek(), ?lexicographic, "upper target <= path, next target"); + trace!(target: TRACE_TARGET, target = ?targets.peek(), "upper target <= path, next target"); let &(l, u) = targets.peek().expect("targets is never exhausted"); (lower, upper) = (l, u); } else { @@ -284,7 +277,7 @@ where } // If we should retain the child then do so. - if self.should_retain(targets, &child_path, false) { + if self.should_retain(targets, &child_path) { trace!(target: TRACE_TARGET, ?child_path, "Retaining child"); // Convert to `ProofTrieNode`, which will be what is retained. @@ -726,7 +719,7 @@ where /// If there is already a child at the top branch of `branch_stack` occupying this new branch's /// nibble then that child will have its short-key split with another new branch, and this /// cached branch will be a child of that splitting branch. - fn push_new_cached_branch( + fn push_cached_branch( &mut self, targets: &mut TargetsIter>, cached_path: Nibbles, @@ -734,7 +727,7 @@ where ) -> Result<(), StateProofError> { debug_assert!( cached_path.starts_with(&self.branch_path), - "push_new_cached_branch called with path {cached_path:?} which is not a child of current branch {:?}", + "push_cached_branch called with path {cached_path:?} which is not a child of current branch {:?}", self.branch_path, ); @@ -937,7 +930,7 @@ where // top branch is the parent of this cached branch. Either way we push a branch // corresponding to the cached one onto the stack, so we can begin constructing it. if self.branch_path != cached_path { - self.push_new_cached_branch(targets, cached_path, &cached_branch)?; + self.push_cached_branch(targets, cached_path, &cached_branch)?; } // At this point the top of the branch stack is the same branch which was found in the @@ -998,11 +991,9 @@ where // `targets` from being moved beyond the last child before it is checked. // - If we do end up using the cached hash value, then we will need to commit the // last child before pushing a new one onto the stack anyway. - if !self.child_stack.is_empty() { - self.commit_last_child(targets)?; - } + self.commit_last_child(targets)?; - if !self.should_retain(targets, &child_path, false) { + if !self.should_retain(targets, &child_path) { // Pull this child's hash out of the cached branch node. To get the hash's index // we first need to calculate the mask of which cached hash's have already been // used by this branch (if any). The number of set bits in that mask will be the From 28b1a770b3122d25dac3f3df3a5b278e5ab11d0e Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Wed, 3 Dec 2025 18:10:55 +0100 Subject: [PATCH 36/59] Benches --- crates/trie/trie/benches/proof_v2.rs | 54 ++++++++++++++++++---------- crates/trie/trie/src/proof_v2/mod.rs | 4 --- 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/crates/trie/trie/benches/proof_v2.rs b/crates/trie/trie/benches/proof_v2.rs index d592091cee4..99a17d54666 100644 --- a/crates/trie/trie/benches/proof_v2.rs +++ b/crates/trie/trie/benches/proof_v2.rs @@ -11,20 +11,19 @@ use reth_trie::{ proof_v2::StorageProofCalculator, trie_cursor::{mock::MockTrieCursorFactory, TrieCursorFactory}, }; -use reth_trie_common::{HashedPostState, HashedStorage, Nibbles}; -use std::collections::BTreeMap; +use reth_trie_common::{HashedPostState, HashedStorage}; /// Generate test data for benchmarking. /// /// Returns a tuple of: /// - Hashed address for the storage trie /// - `HashedPostState` with random storage slots -/// - Proof targets (Nibbles) that are 80% from existing slots, 20% random +/// - Proof targets as B256 (sorted) for V2 implementation /// - Equivalent [`B256Set`] for legacy implementation fn generate_test_data( dataset_size: usize, num_targets: usize, -) -> (B256, HashedPostState, Vec, B256Set) { +) -> (B256, HashedPostState, Vec, B256Set) { let mut runner = TestRunner::deterministic(); // Use a fixed hashed address for the storage trie @@ -68,14 +67,8 @@ fn generate_test_data( let target_b256s = targets_strategy.new_tree(&mut runner).unwrap().current(); - // Convert B256 targets to sorted Nibbles for V2 - let mut targets: Vec = target_b256s - .iter() - .map(|b256| { - // SAFETY: B256 is exactly 32 bytes - unsafe { Nibbles::unpack_unchecked(b256.as_slice()) } - }) - .collect(); + // Sort B256 targets for V2 (storage_proof expects sorted targets) + let mut targets: Vec = target_b256s.clone(); targets.sort(); // Create B256Set for legacy @@ -86,19 +79,42 @@ fn generate_test_data( /// Create cursor factories from a `HashedPostState` for storage trie testing. /// -/// This mimics the test harness pattern from the `proof_v2` tests. +/// This mimics the test harness pattern from the `proof_v2` tests by using StateRoot +/// to generate TrieUpdates from the HashedPostState. fn create_cursor_factories( post_state: &HashedPostState, ) -> (MockTrieCursorFactory, MockHashedCursorFactory) { - // Ensure that there's a storage trie dataset for every storage trie, even if empty - let storage_trie_nodes: B256Map> = - post_state.storages.keys().copied().map(|addr| (addr, Default::default())).collect(); + use reth_trie::{updates::StorageTrieUpdates, StateRoot}; + + // Create empty trie cursor factory to serve as the initial state for StateRoot + // Ensure that there's a storage trie dataset for every storage account + let storage_tries: B256Map<_> = post_state + .storages + .keys() + .copied() + .map(|addr| (addr, StorageTrieUpdates::default())) + .collect(); + + let empty_trie_cursor_factory = + MockTrieCursorFactory::from_trie_updates(reth_trie_common::updates::TrieUpdates { + storage_tries: storage_tries.clone(), + ..Default::default() + }); // Create mock hashed cursor factory from the post state let hashed_cursor_factory = MockHashedCursorFactory::from_hashed_post_state(post_state.clone()); - // Create empty trie cursor factory (leaf-only calculator doesn't need trie nodes) - let trie_cursor_factory = MockTrieCursorFactory::new(BTreeMap::new(), storage_trie_nodes); + // Generate TrieUpdates using StateRoot + let (_root, mut trie_updates) = + StateRoot::new(empty_trie_cursor_factory, hashed_cursor_factory.clone()) + .root_with_updates() + .expect("StateRoot should succeed"); + + // Continue using empty storage tries for each account + trie_updates.storage_tries = storage_tries; + + // Initialize trie cursor factory from the generated TrieUpdates + let trie_cursor_factory = MockTrieCursorFactory::from_trie_updates(trie_updates); (trie_cursor_factory, hashed_cursor_factory) } @@ -148,7 +164,7 @@ fn bench_proof_algos(c: &mut Criterion) { || targets.clone(), |targets| { proof_calculator - .storage_proof(hashed_address, targets.into_iter()) + .storage_proof(hashed_address, targets) .expect("Proof generation failed"); }, BatchSize::SmallInput, diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 961df29da89..2741f37b0e0 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -784,8 +784,6 @@ where // When pushing a new branch we need to set its child nibble in the `state_mask` of // its parent, if there is one. if let Some(parent_branch) = self.branch_stack.last_mut() { - // We've asserted above that branch_path.len() < cached_path.len(), so this - // `get_unchecked` is safe. parent_branch.state_mask.set_bit(cached_branch_nibble); } @@ -840,8 +838,6 @@ where // // This starts off being based off of the hashed cursor's current position, which is the // next key which hasn't been processed. If that is None then we start from zero. - // - // TODO better name and docs let mut lower_bound = Some(hashed_key_current_path.unwrap_or_default()); loop { From c58e4f9004b598a75c40696b1659f3c8cb923e89 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Wed, 3 Dec 2025 18:19:06 +0100 Subject: [PATCH 37/59] Benches doc clippy --- crates/trie/trie/benches/proof_v2.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/trie/trie/benches/proof_v2.rs b/crates/trie/trie/benches/proof_v2.rs index 99a17d54666..e5123ddc9a1 100644 --- a/crates/trie/trie/benches/proof_v2.rs +++ b/crates/trie/trie/benches/proof_v2.rs @@ -79,8 +79,8 @@ fn generate_test_data( /// Create cursor factories from a `HashedPostState` for storage trie testing. /// -/// This mimics the test harness pattern from the `proof_v2` tests by using StateRoot -/// to generate TrieUpdates from the HashedPostState. +/// This mimics the test harness pattern from the `proof_v2` tests by using `StateRoot` +/// to generate `TrieUpdates` from the `HashedPostState`. fn create_cursor_factories( post_state: &HashedPostState, ) -> (MockTrieCursorFactory, MockHashedCursorFactory) { From c5c1def6cfdf9b1bd092b0d2d0ef10e9696358eb Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Thu, 4 Dec 2025 11:02:26 +0100 Subject: [PATCH 38/59] Update crates/trie/trie/src/proof_v2/mod.rs Co-authored-by: YK --- crates/trie/trie/src/proof_v2/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 2741f37b0e0..3a490f32098 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -836,7 +836,7 @@ where // `lower_bound` will be used to track the lower bound of the range which is returned from // this method. If this is None then there are no further keys which need to be processed. // - // This starts off being based off of the hashed cursor's current position, which is the + // This starts off being based on the hashed cursor's current position, which is the // next key which hasn't been processed. If that is None then we start from zero. let mut lower_bound = Some(hashed_key_current_path.unwrap_or_default()); From d020d18125ab0acc359dcf3bc70920582c498234 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Thu, 4 Dec 2025 11:03:27 +0100 Subject: [PATCH 39/59] Update crates/trie/trie/src/proof_v2/mod.rs Co-authored-by: YK --- crates/trie/trie/src/proof_v2/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 3a490f32098..82ec9802de1 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -991,7 +991,7 @@ where if !self.should_retain(targets, &child_path) { // Pull this child's hash out of the cached branch node. To get the hash's index - // we first need to calculate the mask of which cached hash's have already been + // we first need to calculate the mask of which cached hashes have already been // used by this branch (if any). The number of set bits in that mask will be the // index of the next hash in the array to use. let curr_hashed_used_mask = cached_branch.hash_mask.get() & curr_state_mask; From 4333aae10435c1f36e5c830876a8de81502b8e56 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Thu, 4 Dec 2025 11:04:35 +0100 Subject: [PATCH 40/59] clippy docs --- crates/trie/trie/src/proof_v2/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 82ec9802de1..be53c244743 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -1341,7 +1341,7 @@ mod tests { /// Creates a new test harness from a `HashedPostState`. /// /// The `HashedPostState` is used to populate the mock hashed cursor factory directly. - /// The trie cursor factory is initialized from TrieUpdates generated by StateRoot. + /// The trie cursor factory is initialized from `TrieUpdates` generated by `StateRoot`. fn new(post_state: HashedPostState) -> Self { trace!(target: TRACE_TARGET, ?post_state, "Creating ProofTestHarness"); From ca8eadaa6029f2bda947509f2ae5970edcb9ac23 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 5 Dec 2025 11:34:14 +0100 Subject: [PATCH 41/59] Update crates/trie/trie/src/proof_v2/mod.rs Co-authored-by: YK --- crates/trie/trie/src/proof_v2/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index be53c244743..fbfaa5d3988 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -823,9 +823,10 @@ where trie_cursor_state: &mut TrieCursorState, hashed_key_current_path: Option, ) -> Result)>, StateProofError> { + // Pop any under-construction branches that are now complete. // All trie data prior to the current cached branch, if any, has been computed. Any branches // which were under-construction previously, and which are not on the same path as this - // cached branch, can be assumed to be completed; they will not have any further keys added + // cached branch, can be assumed to be completed; they will not have any further keys added. // to them. if let Some(cached_path) = self.cached_branch_stack.last().map(|kv| kv.0) { while !cached_path.starts_with(&self.branch_path) { From fc621348b023e04819f952ce1a4d4a9baadd88aa Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 5 Dec 2025 12:56:33 +0100 Subject: [PATCH 42/59] PR feedback --- crates/trie/trie/src/proof_v2/mod.rs | 253 ++++++++++++++++----------- 1 file changed, 147 insertions(+), 106 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index fbfaa5d3988..0d55439deee 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -42,47 +42,6 @@ static PATH_ALL_ZEROS: Nibbles = { path }; -/// Used to track the state of the trie cursor, allowing us to differentiate between a branch having -/// been taken (used as a cached branch) and the cursor having been exhausted. -#[derive(Debug)] -enum TrieCursorState { - /// Cursor is seeked to this path and the node has not been used yet. - Available(Nibbles, BranchNodeCompact), - /// Cursor is seeked to this path, but the node has been used. - Taken(Nibbles), - /// Cursor has been exhausted. - Exhausted, -} - -impl TrieCursorState { - /// Creates a [`Self`] based on an entry returned from the cursor itself. - fn new(entry: Option<(Nibbles, BranchNodeCompact)>) -> Self { - entry.map_or(Self::Exhausted, |(path, node)| Self::Available(path, node)) - } - - /// Returns the path the cursor is seeked to, or None if it's exhausted. - const fn path(&self) -> Option<&Nibbles> { - match self { - Self::Available(path, _) | Self::Taken(path) => Some(path), - Self::Exhausted => None, - } - } - - /// Takes the path and node from a [`Self::Available`]. Panics if not [`Self::Available`]. - fn take(&mut self) -> (Nibbles, BranchNodeCompact) { - let Self::Available(path, _) = self else { - panic!("take called on non-Available: {self:?}") - }; - - let path = *path; - let Self::Available(path, node) = core::mem::replace(self, Self::Taken(path)) else { - unreachable!("already checked that self is Self::Available"); - }; - - (path, node) - } -} - /// A proof calculator that generates merkle proofs using only leaf data. /// /// The calculator: @@ -393,8 +352,8 @@ where // child, so that only `child_stack`'s final child is a non-RlpNode. self.commit_last_child(targets)?; - // Once the first child is committed we set the new child's bit on the top branch's - // `state_mask` and push that child. + // Once the last child is committed we set the new child's bit on the top branch's + // `state_mask` and push that new child. let branch = self.branch_stack.last_mut().expect("branch_stack cannot be empty"); debug_assert!(!branch.state_mask.is_bit_set(leaf_nibble)); @@ -801,6 +760,77 @@ where Ok(()) } + /// Attempts to pop off the top branch of the `cached_branch_stack`, returning + /// [`PopCachedBranchOutcome::Popped`] on success. Returns other variants to indicate that the + /// stack is empty and what to do about it. + /// + /// This method only returns [`PopCachedBranchOutcome::CalculateLeaves`] if there is a cached + /// branch on top of the stack. + #[inline] + fn try_pop_cached_branch( + &mut self, + trie_cursor_state: &mut TrieCursorState, + uncalculated_lower_bound: &Option, + ) -> Result { + // If there is a branch on top of the stack we use that. + if let Some(cached) = self.cached_branch_stack.pop() { + return Ok(PopCachedBranchOutcome::Popped(cached)); + } + + // There is no cached branch on the stack. It's possible that another one exists + // farther on in the trie, but we perform some checks first to prevent unnecessary + // attempts to find it. + + // If the `uncalculated_lower_bound` is None it indicates that there can be no more + // leaf data, so similarly there be no more branches. + let Some(uncalculated_lower_bound) = uncalculated_lower_bound else { + return Ok(PopCachedBranchOutcome::Exhausted) + }; + + // If [`TrieCursorState::path`] returns None it means that the cursor has been + // exhausted, so there can be no more cached data. + let Some(trie_cursor_path) = trie_cursor_state.path() else { + return Ok(PopCachedBranchOutcome::Exhausted) + }; + + // If the trie cursor is seeked to a branch whose leaves have already been processed + // then we can't use it, instead we seek forward and try again. + if trie_cursor_path < uncalculated_lower_bound { + *trie_cursor_state = + TrieCursorState::new(self.trie_cursor.seek(*uncalculated_lower_bound)?); + + // Having just seeked forward we need to check if the cursor is now exhausted. + if matches!(trie_cursor_state, TrieCursorState::Exhausted) { + return Ok(PopCachedBranchOutcome::Exhausted) + }; + } + + // At this point we can be sure that the cursor is in an `Available` state. We know for + // sure it's not `Exhausted` because of the call to `path` above, and we know it's not + // `Taken` because we push all taken branches onto the `cached_branch_stack`, and the + // stack is empty. + // + // We will use this `Available` cached branch as our next branch. + self.cached_branch_stack.push(trie_cursor_state.take()); + trace!(target: TRACE_TARGET, cached=?self.cached_branch_stack.last(), "Pushed next trie node onto cached_branch_stack"); + + let (cached_path, _) = self.cached_branch_stack.last().expect("just pushed"); + + // If the calculated range is not caught up to the next cached branch it means there + // are portions of the trie prior to that branch which may need to be calculated; + // return the uncalculated range up to that branch to make that happen. + // + // If the next cached branch's path is all zeros then we can skip this catch-up step, + // because there cannot be any keys prior to that range. + if uncalculated_lower_bound < cached_path && !PATH_ALL_ZEROS.starts_with(cached_path) { + let range = (*uncalculated_lower_bound, Some(*cached_path)); + trace!(target: TRACE_TARGET, ?range, "Returning key range to calculate in order to catch up to cached branch"); + return Ok(PopCachedBranchOutcome::CalculateLeaves(range)); + } + + Ok(PopCachedBranchOutcome::Popped(self.cached_branch_stack.pop().expect("just pushed"))) + } + /// Accepts the current state of both hashed and trie cursors, and determines the next range of /// hashed keys which need to be processed using [`Self::push_leaf`]. /// @@ -834,74 +864,33 @@ where } } - // `lower_bound` will be used to track the lower bound of the range which is returned from - // this method. If this is None then there are no further keys which need to be processed. + // `uncalculated_lower_bound` tracks the lower bound of node paths which have yet to be + // visited, either via the hashed key cursor (`calculate_key_range`) or trie cursor (this + // method). If this is None then there are no further nodes which could exist. // // This starts off being based on the hashed cursor's current position, which is the - // next key which hasn't been processed. If that is None then we start from zero. - let mut lower_bound = Some(hashed_key_current_path.unwrap_or_default()); + // next hashed key which hasn't been processed. If that is None then we start from zero. + let mut uncalculated_lower_bound = Some(hashed_key_current_path.unwrap_or_default()); loop { - // Determine the current cached branch node. + // Pop the currently cached branch node. // // NOTE we pop off the `cached_branch_stack` because cloning the `BranchNodeCompact` // means cloning an Arc, which incurs synchronization overhead. We have to be sure to // push the cached branch back onto the stack once done. - let (cached_path, cached_branch) = match ( - self.cached_branch_stack.pop(), - &trie_cursor_state, - lower_bound.as_ref(), - ) { - (Some(cached), _, _) => cached, - (None, TrieCursorState::Exhausted, _) | (_, _, None) => { - trace!(target: TRACE_TARGET, "Exhausted cached trie nodes"); - // If both stack and trie cursor are empty then there are no more cached nodes, - // return an open range to indicate that the rest of the trie should be - // calculated solely from leaves. - // - // If the `lower_bound` indicates that there can be no more data then this will - // return None to indicate end of computation. - return Ok(lower_bound.map(|lower| (lower, None))); - } - (None, cursor_state, Some(lower_bound)) - if cursor_state.path().expect("not exhausted") < lower_bound => - { - // If `cached_branch_stack` is empty then we want to get a new cached branch - // node from the cursor. If the trie cursor is seeked to a branch which has - // already been processed then we can't use it, instead we seek forward and try - // again. - *trie_cursor_state = TrieCursorState::new(self.trie_cursor.seek(*lower_bound)?); - continue - } - (None, TrieCursorState::Taken(path), _) => { - panic!("trie cursor at {path:?} had its node taken, but is >= lower_bound {lower_bound:?}"); + let (cached_path, cached_branch) = match self + .try_pop_cached_branch(trie_cursor_state, &uncalculated_lower_bound)? + { + PopCachedBranchOutcome::Popped(cached) => cached, + PopCachedBranchOutcome::Exhausted => { + // If cached branches are exhausted it's possible that there is still an + // unbounded range of leaves to be processed. `uncalculated_lower_bound` is + // used to return that range. + trace!(target: TRACE_TARGET, ?uncalculated_lower_bound, "Exhausted cached trie nodes"); + return Ok(uncalculated_lower_bound.map(|lower| (lower, None))); } - (None, TrieCursorState::Available(_, _), _) => { - // If `cached_branch_stack` is empty but there is an available cached branch - // from the trie cursor then we consume that branch, pushing it onto the stack. - self.cached_branch_stack.push(trie_cursor_state.take()); - trace!(target: TRACE_TARGET, cached=?self.cached_branch_stack.last(), "Pushed next trie node onto cached_branch_stack"); - - let (cached_path, _) = self.cached_branch_stack.last().expect("just pushed"); - - // The current hashed key indicates the first key after the previous uncached - // range, or None if this is the first call to this method. - // - // If the key is not caught up to the next cached branch it means there are - // portions of the trie prior to that branch which need to be computed; return - // the uncomputed range up to that branch to make that happen. - // - // If the next cached branch's path is all zeros then we can skip this catch-up - // step, because there cannot be any keys prior to that range. - if hashed_key_current_path.is_none_or(|k| &k < cached_path) && - !PATH_ALL_ZEROS.starts_with(cached_path) - { - let range = lower_bound.map(|lower| (lower, Some(*cached_path))); - trace!(target: TRACE_TARGET, ?range, "Returning key range to calculate in order to catch up to cached branch"); - return Ok(range); - } - - self.cached_branch_stack.pop().expect("just pushed") + PopCachedBranchOutcome::CalculateLeaves(range) => { + return Ok(Some(range)); } }; @@ -965,7 +954,7 @@ where // The just-popped branch is completely processed; we know there can be no more keys // with that prefix. Set the lower bound which can be returned from this method to // be the next possible prefix, if any. - lower_bound = cached_path.increment(); + uncalculated_lower_bound = cached_path.increment(); continue } @@ -1014,9 +1003,9 @@ where .state_mask .set_bit(child_nibble); - // Update the `lower_bound` to indicate that the child whose bit was just set is - // completely processed. - lower_bound = child_path.increment(); + // Update the `uncalculated_lower_bound` to indicate that the child whose bit + // was just set is completely processed. + uncalculated_lower_bound = child_path.increment(); // Push the current cached branch back onto the stack before looping. self.cached_branch_stack.push((cached_path, cached_branch)); @@ -1307,6 +1296,58 @@ impl> Iterator for WindowIter { } } +/// Used to track the state of the trie cursor, allowing us to differentiate between a branch having +/// been taken (used as a cached branch) and the cursor having been exhausted. +#[derive(Debug)] +enum TrieCursorState { + /// Cursor is seeked to this path and the node has not been used yet. + Available(Nibbles, BranchNodeCompact), + /// Cursor is seeked to this path, but the node has been used. + Taken(Nibbles), + /// Cursor has been exhausted. + Exhausted, +} + +impl TrieCursorState { + /// Creates a [`Self`] based on an entry returned from the cursor itself. + fn new(entry: Option<(Nibbles, BranchNodeCompact)>) -> Self { + entry.map_or(Self::Exhausted, |(path, node)| Self::Available(path, node)) + } + + /// Returns the path the cursor is seeked to, or None if it's exhausted. + const fn path(&self) -> Option<&Nibbles> { + match self { + Self::Available(path, _) | Self::Taken(path) => Some(path), + Self::Exhausted => None, + } + } + + /// Takes the path and node from a [`Self::Available`]. Panics if not [`Self::Available`]. + fn take(&mut self) -> (Nibbles, BranchNodeCompact) { + let Self::Available(path, _) = self else { + panic!("take called on non-Available: {self:?}") + }; + + let path = *path; + let Self::Available(path, node) = core::mem::replace(self, Self::Taken(path)) else { + unreachable!("already checked that self is Self::Available"); + }; + + (path, node) + } +} + +/// Describes the state of the currently cached branch node (if any). +enum PopCachedBranchOutcome { + /// Cached branch has been popped from the `cached_branch_stack` and is ready to be used. + Popped((Nibbles, BranchNodeCompact)), + /// All cached branches have been exhausted. + Exhausted, + /// Need to calculate leaves from this range (exclusive upper) before the cached branch + /// (catch-up range). If None then + CalculateLeaves((Nibbles, Option)), +} + #[cfg(test)] mod tests { use super::*; From 9c1a55eda0f80fe00e71161dcdca1839525aad02 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 5 Dec 2025 17:23:48 +0100 Subject: [PATCH 43/59] improve tests a bit --- Cargo.lock | 1 + crates/trie/trie/Cargo.toml | 1 + crates/trie/trie/src/proof_v2/mod.rs | 68 ++++++++++++++++++++++++---- 3 files changed, 62 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 74867a1b269..f40242c5770 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10831,6 +10831,7 @@ dependencies = [ "pretty_assertions", "proptest", "proptest-arbitrary-interop", + "rand 0.9.2", "reth-ethereum-primitives", "reth-execution-errors", "reth-metrics", diff --git a/crates/trie/trie/Cargo.toml b/crates/trie/trie/Cargo.toml index 504f1bc6c2f..ed83494b52b 100644 --- a/crates/trie/trie/Cargo.toml +++ b/crates/trie/trie/Cargo.toml @@ -64,6 +64,7 @@ parking_lot.workspace = true pretty_assertions.workspace = true proptest-arbitrary-interop.workspace = true proptest.workspace = true +rand.workspace = true [features] metrics = ["reth-metrics", "dep:metrics"] diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 0d55439deee..d41bec40e70 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -1352,14 +1352,21 @@ enum PopCachedBranchOutcome { mod tests { use super::*; use crate::{ - hashed_cursor::{mock::MockHashedCursorFactory, HashedCursorFactory}, + hashed_cursor::{ + mock::MockHashedCursorFactory, HashedCursorFactory, HashedCursorMetricsCache, + InstrumentedHashedCursor, + }, proof::Proof, - trie_cursor::{depth_first, mock::MockTrieCursorFactory, TrieCursorFactory}, + trie_cursor::{ + depth_first, mock::MockTrieCursorFactory, InstrumentedTrieCursor, TrieCursorFactory, + TrieCursorMetricsCache, + }, }; use alloy_primitives::map::{B256Map, B256Set}; use alloy_rlp::Decodable; use assert_matches::assert_matches; use itertools::Itertools; + use reth_primitives_traits::Account; use reth_trie_common::{ updates::{StorageTrieUpdates, TrieUpdates}, HashedPostState, MultiProofTargets, TrieNode, @@ -1385,8 +1392,6 @@ mod tests { /// The `HashedPostState` is used to populate the mock hashed cursor factory directly. /// The trie cursor factory is initialized from `TrieUpdates` generated by `StateRoot`. fn new(post_state: HashedPostState) -> Self { - trace!(target: TRACE_TARGET, ?post_state, "Creating ProofTestHarness"); - // Create empty trie cursor factory to serve as the initial state for StateRoot // Ensure that there's a storage trie dataset for every account, to make // `MockTrieCursorFactory` happy. @@ -1443,6 +1448,13 @@ mod tests { let trie_cursor = self.trie_cursor_factory.account_trie_cursor()?; let hashed_cursor = self.hashed_cursor_factory.hashed_account_cursor()?; + // Collect metrics for cursors + let mut trie_cursor_metrics = TrieCursorMetricsCache::default(); + let trie_cursor = InstrumentedTrieCursor::new(trie_cursor, &mut trie_cursor_metrics); + let mut hashed_cursor_metrics = HashedCursorMetricsCache::default(); + let hashed_cursor = + InstrumentedHashedCursor::new(hashed_cursor, &mut hashed_cursor_metrics); + // Call ProofCalculator::proof with account targets let value_encoder = SyncAccountValueEncoder::new( self.trie_cursor_factory.clone(), @@ -1451,6 +1463,10 @@ mod tests { let mut proof_calculator = ProofCalculator::new(trie_cursor, hashed_cursor); let proof_v2_result = proof_calculator.proof(&value_encoder, targets_vec.clone())?; + // Output metrics + trace!(target: TRACE_TARGET, ?trie_cursor_metrics, "V2 trie cursor metrics"); + trace!(target: TRACE_TARGET, ?hashed_cursor_metrics, "V2 hashed cursor metrics"); + // Call Proof::multiproof (legacy implementation) let proof_legacy_result = Proof::new(self.trie_cursor_factory.clone(), self.hashed_cursor_factory.clone()) @@ -1510,7 +1526,6 @@ mod tests { use super::*; use alloy_primitives::{map::B256Map, U256}; use proptest::prelude::*; - use reth_primitives_traits::Account; use reth_trie_common::HashedPostState; /// Generate a strategy for Account values @@ -1526,7 +1541,7 @@ mod tests { /// Generate a strategy for `HashedPostState` with random accounts fn hashed_post_state_strategy() -> impl Strategy { - prop::collection::vec((any::<[u8; 32]>(), account_strategy()), 0..40).prop_map( + prop::collection::vec((any::<[u8; 32]>(), account_strategy()), 0..=100).prop_map( |accounts| { let account_map = accounts .into_iter() @@ -1565,7 +1580,7 @@ mod tests { proptest! { #![proptest_config(ProptestConfig::with_cases(8000))] - + #[test] /// Tests that ProofCalculator produces valid proofs for randomly generated /// HashedPostState with proof targets. /// @@ -1575,7 +1590,6 @@ mod tests { /// - Creates a test harness with the generated state /// - Calls assert_proof with the generated targets /// - Verifies both ProofCalculator and legacy Proof produce equivalent results - #[test] fn proptest_proof_with_targets( (post_state, targets) in hashed_post_state_strategy() .prop_flat_map(|post_state| { @@ -1594,4 +1608,42 @@ mod tests { } } } + + #[test] + fn test_big_trie() { + use rand::prelude::*; + + reth_tracing::init_test_tracing(); + let mut rng = rand::rngs::SmallRng::seed_from_u64(1); + + let mut rand_b256 = || { + let mut buf: [u8; 32] = [0; 32]; + rng.fill_bytes(&mut buf); + B256::from_slice(&buf) + }; + + // Generate random HashedPostState. + let mut post_state = HashedPostState::default(); + for _ in 0..10240 { + let hashed_addr = rand_b256(); + let account = Account { bytecode_hash: Some(hashed_addr), ..Default::default() }; + post_state.accounts.insert(hashed_addr, Some(account)); + } + + // Collect targets; partially from real keys, partially random keys which probably won't + // exist. + let num_real_targets = post_state.accounts.len() * 5; + let mut targets = + post_state.accounts.keys().copied().sorted().take(num_real_targets).collect::>(); + for _ in 0..post_state.accounts.len() / 5 { + targets.push(rand_b256()); + } + targets.sort(); + + // Create test harness + let harness = ProofTestHarness::new(post_state); + + // Assert the proof + harness.assert_proof(targets).expect("Proof generation failed"); + } } From e25fdef8bde9f67ea7beaeccba0f91a05a35d794 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 5 Dec 2025 17:30:29 +0100 Subject: [PATCH 44/59] Fix up try_pop logic a bit --- crates/trie/trie/src/proof_v2/mod.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index d41bec40e70..f0fb5b1042c 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -811,10 +811,8 @@ where // stack is empty. // // We will use this `Available` cached branch as our next branch. - self.cached_branch_stack.push(trie_cursor_state.take()); - trace!(target: TRACE_TARGET, cached=?self.cached_branch_stack.last(), "Pushed next trie node onto cached_branch_stack"); - - let (cached_path, _) = self.cached_branch_stack.last().expect("just pushed"); + let cached = trie_cursor_state.take(); + trace!(target: TRACE_TARGET, cached=?cached, "Pushed next trie node onto cached_branch_stack"); // If the calculated range is not caught up to the next cached branch it means there // are portions of the trie prior to that branch which may need to be calculated; @@ -822,13 +820,19 @@ where // // If the next cached branch's path is all zeros then we can skip this catch-up step, // because there cannot be any keys prior to that range. + let cached_path = &cached.0; if uncalculated_lower_bound < cached_path && !PATH_ALL_ZEROS.starts_with(cached_path) { let range = (*uncalculated_lower_bound, Some(*cached_path)); trace!(target: TRACE_TARGET, ?range, "Returning key range to calculate in order to catch up to cached branch"); + + // Push the cached branch onto the stack so it's available once the leaf range is done + // being calculated. + self.cached_branch_stack.push(cached); + return Ok(PopCachedBranchOutcome::CalculateLeaves(range)); } - Ok(PopCachedBranchOutcome::Popped(self.cached_branch_stack.pop().expect("just pushed"))) + Ok(PopCachedBranchOutcome::Popped(cached)) } /// Accepts the current state of both hashed and trie cursors, and determines the next range of From d3d640d88925ae9fbf0554571ef76be1dfa99ba3 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 5 Dec 2025 18:28:34 +0100 Subject: [PATCH 45/59] increment_and_strip_trailing_zeros --- crates/trie/trie/src/proof_v2/mod.rs | 50 ++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index f0fb5b1042c..3a3bf3d7f32 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -958,7 +958,7 @@ where // The just-popped branch is completely processed; we know there can be no more keys // with that prefix. Set the lower bound which can be returned from this method to // be the next possible prefix, if any. - uncalculated_lower_bound = cached_path.increment(); + uncalculated_lower_bound = increment_and_strip_trailing_zeros(&cached_path); continue } @@ -1009,7 +1009,7 @@ where // Update the `uncalculated_lower_bound` to indicate that the child whose bit // was just set is completely processed. - uncalculated_lower_bound = child_path.increment(); + uncalculated_lower_bound = increment_and_strip_trailing_zeros(&child_path); // Push the current cached branch back onto the stack before looping. self.cached_branch_stack.push((cached_path, cached_branch)); @@ -1053,7 +1053,7 @@ where // There is no cached data for the sub-trie at this child, we must recalculate the // sub-trie root (this child) using the leaves. Return the range of keys based on the // child path. - let child_path_upper = child_path.increment(); + let child_path_upper = increment_and_strip_trailing_zeros(&child_path); trace!( target: TRACE_TARGET, lower=?child_path, @@ -1352,6 +1352,26 @@ enum PopCachedBranchOutcome { CalculateLeaves((Nibbles, Option)), } +/// Increments the nibbles and strips any trailing zeros. +/// +/// This function wraps `Nibbles::increment` and when it returns a value with trailing zeros, +/// it strips those zeros using bit manipulation on the underlying U256. +fn increment_and_strip_trailing_zeros(nibbles: &Nibbles) -> Option { + let mut result = nibbles.increment()?; + + // If result is empty, just return it + if result.is_empty() { + return Some(result); + } + + // Get access to the underlying U256 to detect trailing zeros + let uint_val = *result.as_mut_uint_unchecked(); + let non_zero_prefix_len = 64 - (uint_val.trailing_zeros() / 4); + result.truncate(non_zero_prefix_len); + + Some(result) +} + #[cfg(test)] mod tests { use super::*; @@ -1650,4 +1670,28 @@ mod tests { // Assert the proof harness.assert_proof(targets).expect("Proof generation failed"); } + + #[test] + fn test_increment_and_strip_trailing_zeros() { + let test_cases: Vec<(Nibbles, Option)> = vec![ + // Basic increment without trailing zeros + (Nibbles::from_nibbles([0x1, 0x2, 0x3]), Some(Nibbles::from_nibbles([0x1, 0x2, 0x4]))), + // Increment with trailing zeros - should be stripped + (Nibbles::from_nibbles([0x0, 0x0, 0xF]), Some(Nibbles::from_nibbles([0x0, 0x1]))), + (Nibbles::from_nibbles([0x0, 0xF, 0xF]), Some(Nibbles::from_nibbles([0x1]))), + // Overflow case + (Nibbles::from_nibbles([0xF, 0xF, 0xF]), None), + // Empty nibbles + (Nibbles::new(), None), + // Single nibble + (Nibbles::from_nibbles([0x5]), Some(Nibbles::from_nibbles([0x6]))), + // All Fs except last - results in trailing zeros after increment + (Nibbles::from_nibbles([0xE, 0xF, 0xF]), Some(Nibbles::from_nibbles([0xF]))), + ]; + + for (input, expected) in test_cases { + let result = increment_and_strip_trailing_zeros(&input); + assert_eq!(result, expected, "Failed for input: {:?}", input); + } + } } From 308a8dd514732bb1fa6157e8d2b01d3169999fb0 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 5 Dec 2025 18:56:28 +0100 Subject: [PATCH 46/59] rand/serde --- crates/trie/trie/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/trie/trie/Cargo.toml b/crates/trie/trie/Cargo.toml index ed83494b52b..d3540adda88 100644 --- a/crates/trie/trie/Cargo.toml +++ b/crates/trie/trie/Cargo.toml @@ -85,6 +85,7 @@ serde = [ "revm-state/serde", "parking_lot/serde", "reth-ethereum-primitives/serde", + "rand/serde", ] test-utils = [ "triehash", From c45778f086869c67f368b9801f3938a44585f2cf Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Wed, 10 Dec 2025 17:59:10 +0100 Subject: [PATCH 47/59] WIP: Target type --- crates/trie/trie/src/proof_v2/mod.rs | 51 ++++++++++++------------- crates/trie/trie/src/proof_v2/target.rs | 32 ++++++++++++++++ crates/trie/trie/src/proof_v2/value.rs | 2 +- 3 files changed, 57 insertions(+), 28 deletions(-) create mode 100644 crates/trie/trie/src/proof_v2/target.rs diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 3a3bf3d7f32..5793f6efc75 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -25,6 +25,9 @@ pub use value::*; mod node; use node::*; +mod target; +pub use target::*; + /// Target to use with the `tracing` crate. static TRACE_TARGET: &str = "trie::proof_v2"; @@ -185,7 +188,7 @@ where /// the common-case, then this is equivalent to lexicographical order. fn should_retain( &self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter>, path: &Nibbles, ) -> bool { trace!(target: TRACE_TARGET, ?path, target = ?targets.peek(), "should_retain: called"); @@ -202,13 +205,13 @@ where loop { // If the node in question is a prefix of the target then we retain - if lower.starts_with(path) { + if lower.key.starts_with(path) { return true } // If the path isn't in the current range then iterate forward until it is (or until // there is no upper bound, indicating unbounded). - if upper.is_some_and(|upper| depth_first::cmp(path, &upper) != Ordering::Less) { + if upper.is_some_and(|upper| depth_first::cmp(path, &upper.key) != Ordering::Less) { targets.next(); trace!(target: TRACE_TARGET, target = ?targets.peek(), "upper target <= path, next target"); let &(l, u) = targets.peek().expect("targets is never exhausted"); @@ -226,7 +229,7 @@ where /// therefore can be retained as a proof node if applicable. fn commit_child( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter>, child_path: Nibbles, child: ProofTrieBranchChild, ) -> Result { @@ -312,7 +315,7 @@ where /// to this method. fn commit_last_child( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter>, ) -> Result<(), StateProofError> { let Some(child) = self.child_stack.pop() else { return Ok(()) }; @@ -343,7 +346,7 @@ where /// - If the leaf's nibble is already set in the branch's `state_mask`. fn push_new_leaf( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter>, leaf_nibble: u8, leaf_short_key: Nibbles, leaf_val: VE::DeferredEncoder, @@ -450,7 +453,7 @@ where /// This method panics if `branch_stack` is empty. fn pop_branch( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter>, ) -> Result<(), StateProofError> { trace!( target: TRACE_TARGET, @@ -532,7 +535,7 @@ where /// creating a new one depending on the path of the key. fn push_leaf( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter>, key: Nibbles, val: VE::DeferredEncoder, ) -> Result<(), StateProofError> { @@ -619,7 +622,7 @@ where fn calculate_key_range( &mut self, value_encoder: &VE, - targets: &mut TargetsIter>, + targets: &mut TargetsIter>, hashed_cursor_current: &mut Option<(Nibbles, VE::DeferredEncoder)>, lower_bound: Nibbles, upper_bound: Option, @@ -680,7 +683,7 @@ where /// cached branch will be a child of that splitting branch. fn push_cached_branch( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter>, cached_path: Nibbles, cached_branch: &BranchNodeCompact, ) -> Result<(), StateProofError> { @@ -853,7 +856,7 @@ where #[instrument(target = TRACE_TARGET, level = "trace", skip_all)] fn next_uncached_key_range( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter>, trie_cursor_state: &mut TrieCursorState, hashed_key_current_path: Option, ) -> Result)>, StateProofError> { @@ -1073,31 +1076,25 @@ where fn proof_inner( &mut self, value_encoder: &VE, - targets: impl IntoIterator, + targets: impl IntoIterator, ) -> Result, StateProofError> { trace!(target: TRACE_TARGET, "proof_inner: called"); // In debug builds, verify that targets are sorted #[cfg(debug_assertions)] let targets = { - let mut prev: Option = None; + let mut prev: Option = None; targets.into_iter().inspect(move |target| { if let Some(prev) = prev { - debug_assert!(&prev <= target, "prev:{prev:?} target:{target:?}"); + debug_assert!(prev <= target.key, "prev:{prev:?} target:{target:?}"); } - prev = Some(*target); + prev = Some(target.key); }) }; #[cfg(not(debug_assertions))] let targets = targets.into_iter(); - // Convert B256 targets into Nibbles. - let targets = targets.into_iter().map(|key| { - // SAFETY: key is a B256 and so is exactly 32-bytes. - unsafe { Nibbles::unpack_unchecked(key.as_slice()) } - }); - // Wrap targets into a `TargetsIter`. let mut targets = WindowIter::new(targets).peekable(); @@ -1204,7 +1201,7 @@ where pub fn proof( &mut self, value_encoder: &VE, - targets: impl IntoIterator, + targets: impl IntoIterator, ) -> Result, StateProofError> { self.trie_cursor.reset(); self.hashed_cursor.reset(); @@ -1237,7 +1234,7 @@ where pub fn storage_proof( &mut self, hashed_address: B256, - targets: impl IntoIterator, + targets: impl IntoIterator, ) -> Result, StateProofError> { /// Static storage value encoder instance used by all storage proofs. static STORAGE_VALUE_ENCODER: StorageValueEncoder = StorageValueEncoder; @@ -1457,7 +1454,7 @@ mod tests { /// the results. fn assert_proof( &self, - targets: impl IntoIterator, + targets: impl IntoIterator, ) -> Result<(), StateProofError> { let targets_vec = targets.into_iter().sorted().collect::>(); @@ -1465,7 +1462,7 @@ mod tests { // For account-only proofs, each account maps to an empty storage set let legacy_targets = targets_vec .iter() - .map(|addr| (*addr, B256Set::default())) + .map(|target| (B256::from_slice(&target.key.pack()), B256Set::default())) .collect::(); // Create ProofCalculator (proof_v2) with account cursors @@ -1628,7 +1625,7 @@ mod tests { let harness = ProofTestHarness::new(post_state); // Pass generated targets to both implementations - harness.assert_proof(targets).expect("Proof generation failed"); + harness.assert_proof(targets.into_iter().map(Into::into)).expect("Proof generation failed"); } } } @@ -1668,7 +1665,7 @@ mod tests { let harness = ProofTestHarness::new(post_state); // Assert the proof - harness.assert_proof(targets).expect("Proof generation failed"); + harness.assert_proof(targets.into_iter().map(Into::into)).expect("Proof generation failed"); } #[test] diff --git a/crates/trie/trie/src/proof_v2/target.rs b/crates/trie/trie/src/proof_v2/target.rs new file mode 100644 index 00000000000..946242b4e73 --- /dev/null +++ b/crates/trie/trie/src/proof_v2/target.rs @@ -0,0 +1,32 @@ +use alloy_primitives::B256; +use reth_trie_common::Nibbles; + +/// Target describes a proof target. For every proof target given, the [`crate::ProofCalculator`] +/// will calculate and return all nodes whose path is a prefix of the target's `key`. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Target { + pub(crate) key: Nibbles, + pub(crate) min_len: u8, +} + +impl Target { + /// Returns a new [`Target`] which matches all trie nodes whose path is a prefix of this key. + pub fn new(key: B256) -> Self { + // SAFETY: key is a B256 and so is exactly 32-bytes. + let key = unsafe { Nibbles::unpack_unchecked(key.as_slice()) }; + Self { key, min_len: 0 } + } + + /// Only match trie nodes whose path is at least this long. + pub fn with_min_len(mut self, min_len: u8) -> Self { + debug_assert!(min_len <= 64); + self.min_len = min_len; + self + } +} + +impl From for Target { + fn from(key: B256) -> Self { + Self::new(key) + } +} diff --git a/crates/trie/trie/src/proof_v2/value.rs b/crates/trie/trie/src/proof_v2/value.rs index b97e7579d4d..843eeada04b 100644 --- a/crates/trie/trie/src/proof_v2/value.rs +++ b/crates/trie/trie/src/proof_v2/value.rs @@ -123,7 +123,7 @@ where // Compute storage root by calling storage_proof with the root path as a target. // This returns just the root node of the storage trie. let storage_root = storage_proof_calculator - .storage_proof(self.hashed_address, [B256::ZERO]) + .storage_proof(self.hashed_address, [B256::ZERO.into()]) .map(|nodes| { // Encode the root node to RLP and hash it let root_node = From b52eaf259b135db88774f9c8879d5b98bc405e9f Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Thu, 11 Dec 2025 14:49:16 +0100 Subject: [PATCH 48/59] WIP: ref hell --- crates/trie/trie/src/proof_v2/mod.rs | 74 ++++++++++---------- crates/trie/trie/src/proof_v2/target.rs | 89 +++++++++++++++++++++++-- crates/trie/trie/src/proof_v2/value.rs | 2 +- 3 files changed, 123 insertions(+), 42 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 5793f6efc75..7130714c74f 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -115,7 +115,7 @@ impl ProofCalculator { } /// Helper type for the [`Iterator`] used to pass targets in from the caller. -type TargetsIter = Peekable>; +type TargetsIter<'a, I> = Peekable>; impl ProofCalculator where @@ -186,9 +186,9 @@ where /// Because paths in the trie are visited in depth-first order, it's imperative that targets are /// given in depth-first order as well. If the targets were generated off of B256s, which is /// the common-case, then this is equivalent to lexicographical order. - fn should_retain( + fn should_retain<'a>( &self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter<'a, impl Iterator>, path: &Nibbles, ) -> bool { trace!(target: TRACE_TARGET, ?path, target = ?targets.peek(), "should_retain: called"); @@ -227,9 +227,9 @@ where /// /// Calling this method indicates that the child will not undergo any further modifications, and /// therefore can be retained as a proof node if applicable. - fn commit_child( + fn commit_child<'a>( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter<'a, impl Iterator>, child_path: Nibbles, child: ProofTrieBranchChild, ) -> Result { @@ -313,9 +313,9 @@ where /// `branch_stack` to determine the last child's path. When committing the last child prior to /// pushing a new child, it's important to set the new child's `state_mask` bit _after_ the call /// to this method. - fn commit_last_child( + fn commit_last_child<'a>( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter<'a, impl Iterator>, ) -> Result<(), StateProofError> { let Some(child) = self.child_stack.pop() else { return Ok(()) }; @@ -344,9 +344,9 @@ where /// /// - If `branch_stack` is empty /// - If the leaf's nibble is already set in the branch's `state_mask`. - fn push_new_leaf( + fn push_new_leaf<'a>( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter<'a, impl Iterator>, leaf_nibble: u8, leaf_short_key: Nibbles, leaf_val: VE::DeferredEncoder, @@ -451,9 +451,9 @@ where /// # Panics /// /// This method panics if `branch_stack` is empty. - fn pop_branch( + fn pop_branch<'a>( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter<'a, impl Iterator>, ) -> Result<(), StateProofError> { trace!( target: TRACE_TARGET, @@ -533,9 +533,9 @@ where /// Adds a single leaf for a key to the stack, possibly collapsing an existing branch and/or /// creating a new one depending on the path of the key. - fn push_leaf( + fn push_leaf<'a>( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter<'a, impl Iterator>, key: Nibbles, val: VE::DeferredEncoder, ) -> Result<(), StateProofError> { @@ -619,10 +619,10 @@ where level = "trace", skip(self, value_encoder, targets, hashed_cursor_current), )] - fn calculate_key_range( + fn calculate_key_range<'a>( &mut self, value_encoder: &VE, - targets: &mut TargetsIter>, + targets: &mut TargetsIter<'a, impl Iterator>, hashed_cursor_current: &mut Option<(Nibbles, VE::DeferredEncoder)>, lower_bound: Nibbles, upper_bound: Option, @@ -681,9 +681,9 @@ where /// If there is already a child at the top branch of `branch_stack` occupying this new branch's /// nibble then that child will have its short-key split with another new branch, and this /// cached branch will be a child of that splitting branch. - fn push_cached_branch( + fn push_cached_branch<'a>( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter<'a, impl Iterator>, cached_path: Nibbles, cached_branch: &BranchNodeCompact, ) -> Result<(), StateProofError> { @@ -854,9 +854,9 @@ where /// - `Some(lower, Some(upper))`: Indicates to call `push_leaf` on all keys starting at `lower`, /// up to but excluding `upper`, and then call this method once done. #[instrument(target = TRACE_TARGET, level = "trace", skip_all)] - fn next_uncached_key_range( + fn next_uncached_key_range<'a>( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsIter<'a, impl Iterator>, trie_cursor_state: &mut TrieCursorState, hashed_key_current_path: Option, ) -> Result)>, StateProofError> { @@ -1076,10 +1076,13 @@ where fn proof_inner( &mut self, value_encoder: &VE, - targets: impl IntoIterator, + targets: &mut [Target], ) -> Result, StateProofError> { trace!(target: TRACE_TARGET, "proof_inner: called"); + // TODO remove once SparseTrieTargets is used + let targets = targets.iter(); + // In debug builds, verify that targets are sorted #[cfg(debug_assertions)] let targets = { @@ -1191,8 +1194,8 @@ where { /// Generate a proof for the given targets. /// - /// Given lexicographically sorted targets, returns nodes whose paths are a prefix of any - /// target. The returned nodes will be sorted lexicographically by path. + /// Given a set of [`Target`]s, returns nodes whose paths are a prefix of any target. The + /// returned nodes will be sorted depth-first by path. /// /// # Panics /// @@ -1201,7 +1204,7 @@ where pub fn proof( &mut self, value_encoder: &VE, - targets: impl IntoIterator, + targets: &mut [Target], ) -> Result, StateProofError> { self.trie_cursor.reset(); self.hashed_cursor.reset(); @@ -1224,8 +1227,8 @@ where /// Generate a proof for a storage trie at the given hashed address. /// - /// Given lexicographically sorted targets, returns nodes whose paths are a prefix of any - /// target. The returned nodes will be sorted lexicographically by path. + /// Given a set of [`Target`]s, returns nodes whose paths are a prefix of any target. The + /// returned nodes will be sorted depth-first by path. /// /// # Panics /// @@ -1234,7 +1237,7 @@ where pub fn storage_proof( &mut self, hashed_address: B256, - targets: impl IntoIterator, + targets: &mut [Target], ) -> Result, StateProofError> { /// Static storage value encoder instance used by all storage proofs. static STORAGE_VALUE_ENCODER: StorageValueEncoder = StorageValueEncoder; @@ -1263,32 +1266,32 @@ where /// `WindowIter` is a wrapper around an [`Iterator`] which allows viewing both previous and current /// items on every iteration. It is similar to `itertools::tuple_windows`, except that the final /// item returned will contain the previous item and `None` as the current. -struct WindowIter { +struct WindowIter<'a, V, I: Iterator> { iter: I, - prev: Option, + prev: Option<&'a V>, } -impl WindowIter { +impl<'a, V, I: Iterator> WindowIter<'a, V, I> { /// Wraps an iterator with a [`WindowIter`]. const fn new(iter: I) -> Self { Self { iter, prev: None } } } -impl> Iterator for WindowIter { +impl<'a, V, I: Iterator> Iterator for WindowIter<'a, I> { /// The iterator returns the previous and current items, respectively. If the underlying /// iterator is exhausted then `Some(prev, None)` is returned on the subsequent call to /// `WindowIter::next`, and `None` from the call after that. - type Item = (I::Item, Option); + type Item = (&'a V, Option<&'a V>); fn next(&mut self) -> Option { loop { match (self.prev, self.iter.next()) { (None, None) => return None, - (None, Some(v)) => { + (None, Some(ref v)) => { self.prev = Some(v); } - (Some(v), next) => { + (Some(ref v), next) => { self.prev = next; return Some((v, next)) } @@ -1456,7 +1459,7 @@ mod tests { &self, targets: impl IntoIterator, ) -> Result<(), StateProofError> { - let targets_vec = targets.into_iter().sorted().collect::>(); + let targets_vec = targets.into_iter().collect::>(); // Convert B256 targets to MultiProofTargets for legacy implementation // For account-only proofs, each account maps to an empty storage set @@ -1482,7 +1485,8 @@ mod tests { self.hashed_cursor_factory.clone(), ); let mut proof_calculator = ProofCalculator::new(trie_cursor, hashed_cursor); - let proof_v2_result = proof_calculator.proof(&value_encoder, targets_vec.clone())?; + let proof_v2_result = + proof_calculator.proof(&value_encoder, &mut targets_vec.clone())?; // Output metrics trace!(target: TRACE_TARGET, ?trie_cursor_metrics, "V2 trie cursor metrics"); diff --git a/crates/trie/trie/src/proof_v2/target.rs b/crates/trie/trie/src/proof_v2/target.rs index 946242b4e73..f1c14d14be0 100644 --- a/crates/trie/trie/src/proof_v2/target.rs +++ b/crates/trie/trie/src/proof_v2/target.rs @@ -1,12 +1,17 @@ use alloy_primitives::B256; use reth_trie_common::Nibbles; -/// Target describes a proof target. For every proof target given, the [`crate::ProofCalculator`] -/// will calculate and return all nodes whose path is a prefix of the target's `key`. -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +use crate::proof_v2::increment_and_strip_trailing_zeros; + +/// Target describes a proof target. For every proof target given, the +/// [`crate::proof_v2::ProofCalculator`] will calculate and return all nodes whose path is a prefix +/// of the target's `key`. +#[derive(Debug, Copy, Clone)] pub struct Target { pub(crate) key: Nibbles, - pub(crate) min_len: u8, + /// The lower bound of the range of trie nodes which can be retained by this target. In other + /// words, the shortest trie node path which can be retained by this target. + pub(crate) lower_bound: Nibbles, } impl Target { @@ -14,15 +19,26 @@ impl Target { pub fn new(key: B256) -> Self { // SAFETY: key is a B256 and so is exactly 32-bytes. let key = unsafe { Nibbles::unpack_unchecked(key.as_slice()) }; - Self { key, min_len: 0 } + Self { key, lower_bound: Nibbles::new() } } /// Only match trie nodes whose path is at least this long. + /// + /// # Panics + /// + /// This method panics if `min_len` is greater than 64. pub fn with_min_len(mut self, min_len: u8) -> Self { debug_assert!(min_len <= 64); - self.min_len = min_len; + self.lower_bound = self.key; + self.lower_bound.truncate(64 - min_len as usize); self } + + /// Returns the exclusive upper bound of the range of possible trie nodes which can be retained + /// by this target, or None for unbounded. + fn upper_bound(&self) -> Option { + increment_and_strip_trailing_zeros(&self.lower_bound) + } } impl From for Target { @@ -30,3 +46,64 @@ impl From for Target { Self::new(key) } } + +/// Describes a set of targets which all apply to a single sub-trie, ie a section of the overall +/// trie whose nodes all share a prefix. +pub(crate) struct SubTrieTargets<'a> { + /// The lower bound of the sub-trie, ie the prefix which all nodes in the sub-trie share. + pub(crate) lower_bound: Nibbles, + /// The exclusive upper bound of the sub-trie, or None if unbounded. This will be the first + /// path in lexicographical order which is not contained by the sub-trie. + pub(crate) upper_bound: Option, + /// The targets belonging to this sub-trie. These will be sorted by their `key` field, + /// lexicographically. + pub(crate) targets: &'a [Target], +} + +/// Given a set of [`Target`]s, returns an iterator over those same [`Target`]s chunked by the +/// sub-tries they apply to within the overall trie. +pub fn sub_trie_targets<'a>(targets: &'a mut [Target]) -> impl Iterator> { + // First sort lexicographically by lower bound. We will use this for chunking targets into + // contiguous sections in the next steps based on their bounds. + targets.sort_unstable_by_key(|target| target.lower_bound); + + // We now chunk targets, such that each chunk contains all targets belonging to the same + // sub-trie. We are taking advantage of the following properties: + // + // - The first target in the chunk has the lowest lower bound (see previous sorting step). + // + // - The first target in the chunk's upper bound will therefore be the highest upper bound, and + // the upper bound of the whole chunk. + // - For example, given a chunk with lower bounds [0x2, 0x2f, 0x2fa], the upper bounds will + // be [0x3, 0x3, 0x2fb]. Note that no target could match a trie node with path equal to + // or greater than 0x3. + // + // - If a target's lower bound does not lie within the bounds of the current chunk, then that + // target must be the first target of the next chunk, covering a separate sub-trie. + // - Example: given lower bounds of [0x2, 0x2fa, 0x4c, 0x4ce, 0x4e], we would end up with + // the following chunks: + // - [0x2, 0x2a] w/ upper bound 0x3 + // - [0x4c 0x4ce] w/ upper bound 0x4d + // - [0x4e] w/ upper bound 0x4f + let mut upper_bound = targets.first().and_then(|t| t.upper_bound()); + let target_chunks = targets.chunk_by_mut(move |_, next| { + if let Some(some_upper_bound) = upper_bound { + let same_chunk = next.lower_bound < some_upper_bound; + if !same_chunk { + upper_bound = next.upper_bound(); + } + same_chunk + } else { + true + } + }); + + // Map the chunks to the return type. Within each chunk we want targets to be sorted by their + // key, as that will be the order they are checked by the `ProofCalculator`. + target_chunks.map(|target_chunk| { + let lower_bound = target_chunk[0].lower_bound; + let upper_bound = target_chunk[0].upper_bound(); + target_chunk.sort_unstable_by_key(|target| target.key); + SubTrieTargets { lower_bound, upper_bound, targets: target_chunk } + }) +} diff --git a/crates/trie/trie/src/proof_v2/value.rs b/crates/trie/trie/src/proof_v2/value.rs index 843eeada04b..dd330d9a879 100644 --- a/crates/trie/trie/src/proof_v2/value.rs +++ b/crates/trie/trie/src/proof_v2/value.rs @@ -123,7 +123,7 @@ where // Compute storage root by calling storage_proof with the root path as a target. // This returns just the root node of the storage trie. let storage_root = storage_proof_calculator - .storage_proof(self.hashed_address, [B256::ZERO.into()]) + .storage_proof(self.hashed_address, &mut [B256::ZERO.into()]) .map(|nodes| { // Encode the root node to RLP and hash it let root_node = From 71479a333899fb381b904797dd7c326e638a9b43 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Thu, 11 Dec 2025 16:38:01 +0100 Subject: [PATCH 49/59] WIP --- crates/trie/trie/src/proof_v2/mod.rs | 86 ++++++---- crates/trie/trie/src/proof_v2/target.rs | 201 +++++++++++++++++++++++- 2 files changed, 252 insertions(+), 35 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 7130714c74f..c6a4420fd16 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -115,7 +115,7 @@ impl ProofCalculator { } /// Helper type for the [`Iterator`] used to pass targets in from the caller. -type TargetsIter<'a, I> = Peekable>; +type TargetsIter = Peekable>; impl ProofCalculator where @@ -188,7 +188,7 @@ where /// the common-case, then this is equivalent to lexicographical order. fn should_retain<'a>( &self, - targets: &mut TargetsIter<'a, impl Iterator>, + targets: &mut TargetsIter>, path: &Nibbles, ) -> bool { trace!(target: TRACE_TARGET, ?path, target = ?targets.peek(), "should_retain: called"); @@ -229,7 +229,7 @@ where /// therefore can be retained as a proof node if applicable. fn commit_child<'a>( &mut self, - targets: &mut TargetsIter<'a, impl Iterator>, + targets: &mut TargetsIter>, child_path: Nibbles, child: ProofTrieBranchChild, ) -> Result { @@ -315,7 +315,7 @@ where /// to this method. fn commit_last_child<'a>( &mut self, - targets: &mut TargetsIter<'a, impl Iterator>, + targets: &mut TargetsIter>, ) -> Result<(), StateProofError> { let Some(child) = self.child_stack.pop() else { return Ok(()) }; @@ -346,7 +346,7 @@ where /// - If the leaf's nibble is already set in the branch's `state_mask`. fn push_new_leaf<'a>( &mut self, - targets: &mut TargetsIter<'a, impl Iterator>, + targets: &mut TargetsIter>, leaf_nibble: u8, leaf_short_key: Nibbles, leaf_val: VE::DeferredEncoder, @@ -453,7 +453,7 @@ where /// This method panics if `branch_stack` is empty. fn pop_branch<'a>( &mut self, - targets: &mut TargetsIter<'a, impl Iterator>, + targets: &mut TargetsIter>, ) -> Result<(), StateProofError> { trace!( target: TRACE_TARGET, @@ -535,7 +535,7 @@ where /// creating a new one depending on the path of the key. fn push_leaf<'a>( &mut self, - targets: &mut TargetsIter<'a, impl Iterator>, + targets: &mut TargetsIter>, key: Nibbles, val: VE::DeferredEncoder, ) -> Result<(), StateProofError> { @@ -622,7 +622,7 @@ where fn calculate_key_range<'a>( &mut self, value_encoder: &VE, - targets: &mut TargetsIter<'a, impl Iterator>, + targets: &mut TargetsIter>, hashed_cursor_current: &mut Option<(Nibbles, VE::DeferredEncoder)>, lower_bound: Nibbles, upper_bound: Option, @@ -683,7 +683,7 @@ where /// cached branch will be a child of that splitting branch. fn push_cached_branch<'a>( &mut self, - targets: &mut TargetsIter<'a, impl Iterator>, + targets: &mut TargetsIter>, cached_path: Nibbles, cached_branch: &BranchNodeCompact, ) -> Result<(), StateProofError> { @@ -856,7 +856,7 @@ where #[instrument(target = TRACE_TARGET, level = "trace", skip_all)] fn next_uncached_key_range<'a>( &mut self, - targets: &mut TargetsIter<'a, impl Iterator>, + targets: &mut TargetsIter>, trie_cursor_state: &mut TrieCursorState, hashed_key_current_path: Option, ) -> Result)>, StateProofError> { @@ -1071,18 +1071,17 @@ where } } - /// Internal implementation of proof calculation. Assumes both cursors have already been reset. - /// See docs on [`Self::proof`] for expected behavior. - fn proof_inner( + /// Calculates trie nodes and retains proofs for targetted nodes within a sub-trie. The + /// sub-trie's bounds are denoted by the `lower_bound` and `upper_bound` arguments, + /// `upper_bound` is exclusive, None indicates unbounded. + #[instrument(target = TRACE_TARGET, level = "trace", skip(self, value_encoder, targets))] + fn proof_subtrie<'a>( &mut self, value_encoder: &VE, - targets: &mut [Target], - ) -> Result, StateProofError> { - trace!(target: TRACE_TARGET, "proof_inner: called"); - - // TODO remove once SparseTrieTargets is used - let targets = targets.iter(); - + lower_bound: Nibbles, + upper_bound: Option, + targets: impl Iterator, + ) -> Result<(), StateProofError> { // In debug builds, verify that targets are sorted #[cfg(debug_assertions)] let targets = { @@ -1101,12 +1100,6 @@ where // Wrap targets into a `TargetsIter`. let mut targets = WindowIter::new(targets).peekable(); - // If there are no targets then nothing could be returned, return early. - if targets.peek().is_none() { - trace!(target: TRACE_TARGET, "Empty targets, returning"); - return Ok(Vec::new()) - } - // Ensure initial state is cleared. By the end of the method call these should be empty once // again. debug_assert!(self.branch_stack.is_empty()); @@ -1177,6 +1170,33 @@ where }; self.retained_proofs.push(root_node); + Ok(()) + } + + /// Internal implementation of proof calculation. Assumes both cursors have already been reset. + /// See docs on [`Self::proof`] for expected behavior. + fn proof_inner( + &mut self, + value_encoder: &VE, + targets: &mut [Target], + ) -> Result, StateProofError> { + trace!(target: TRACE_TARGET, "proof_inner: called"); + + // If there are no targets then nothing could be returned, return early. + if targets.is_empty() { + trace!(target: TRACE_TARGET, "Empty targets, returning"); + return Ok(Vec::new()) + } + + for sub_trie_targets in iter_sub_trie_targets(targets) { + self.proof_subtrie( + value_encoder, + sub_trie_targets.lower_bound, + sub_trie_targets.upper_bound, + sub_trie_targets.targets.iter(), + )?; + } + trace!( target: TRACE_TARGET, retained_proofs_len = ?self.retained_proofs.len(), @@ -1266,32 +1286,32 @@ where /// `WindowIter` is a wrapper around an [`Iterator`] which allows viewing both previous and current /// items on every iteration. It is similar to `itertools::tuple_windows`, except that the final /// item returned will contain the previous item and `None` as the current. -struct WindowIter<'a, V, I: Iterator> { +struct WindowIter { iter: I, - prev: Option<&'a V>, + prev: Option, } -impl<'a, V, I: Iterator> WindowIter<'a, V, I> { +impl WindowIter { /// Wraps an iterator with a [`WindowIter`]. const fn new(iter: I) -> Self { Self { iter, prev: None } } } -impl<'a, V, I: Iterator> Iterator for WindowIter<'a, I> { +impl> Iterator for WindowIter { /// The iterator returns the previous and current items, respectively. If the underlying /// iterator is exhausted then `Some(prev, None)` is returned on the subsequent call to /// `WindowIter::next`, and `None` from the call after that. - type Item = (&'a V, Option<&'a V>); + type Item = (I::Item, Option); fn next(&mut self) -> Option { loop { match (self.prev, self.iter.next()) { (None, None) => return None, - (None, Some(ref v)) => { + (None, Some(v)) => { self.prev = Some(v); } - (Some(ref v), next) => { + (Some(v), next) => { self.prev = next; return Some((v, next)) } diff --git a/crates/trie/trie/src/proof_v2/target.rs b/crates/trie/trie/src/proof_v2/target.rs index f1c14d14be0..3c50d3a3eb5 100644 --- a/crates/trie/trie/src/proof_v2/target.rs +++ b/crates/trie/trie/src/proof_v2/target.rs @@ -30,7 +30,7 @@ impl Target { pub fn with_min_len(mut self, min_len: u8) -> Self { debug_assert!(min_len <= 64); self.lower_bound = self.key; - self.lower_bound.truncate(64 - min_len as usize); + self.lower_bound.truncate(min_len as usize); self } @@ -62,7 +62,9 @@ pub(crate) struct SubTrieTargets<'a> { /// Given a set of [`Target`]s, returns an iterator over those same [`Target`]s chunked by the /// sub-tries they apply to within the overall trie. -pub fn sub_trie_targets<'a>(targets: &'a mut [Target]) -> impl Iterator> { +pub(crate) fn iter_sub_trie_targets<'a>( + targets: &'a mut [Target], +) -> impl Iterator> { // First sort lexicographically by lower bound. We will use this for chunking targets into // contiguous sections in the next steps based on their bounds. targets.sort_unstable_by_key(|target| target.lower_bound); @@ -107,3 +109,198 @@ pub fn sub_trie_targets<'a>(targets: &'a mut [Target]) -> impl Iterator Nibbles { + if hex.is_empty() { + return Nibbles::new(); + } + format!("0x{}", hex).parse().expect("valid nibbles hex string") + }; + + // Test cases: (input_targets, expected_output) + // Expected output format: Vec<(lower_bound_hex, upper_bound_hex_opt, Vec)> + let test_cases = vec![ + // Case 1: Empty targets + (vec![], vec![]), + // Case 2: Single target without min_len + // lower_bound is empty, upper_bound is None (unbounded) + ( + vec![Target::new(B256::repeat_byte(0x20))], + vec![( + "", + None, + vec!["2020202020202020202020202020202020202020202020202020202020202020"], + )], + ), + // Case 3: Multiple targets in same sub-trie (no min_len) + // Both have empty lower_bound, so they're in the same sub-trie + ( + vec![Target::new(B256::repeat_byte(0x20)), Target::new(B256::repeat_byte(0x21))], + vec![( + "", + None, + vec![ + "2020202020202020202020202020202020202020202020202020202020202020", + "2121212121212121212121212121212121212121212121212121212121212121", + ], + )], + ), + // Case 4: Multiple targets in different sub-tries + // with_min_len(1) gives lower_bound with first 1 nibble + // First has lower_bound=0x2, upper_bound=0x3 + // Second has lower_bound=0x4, upper_bound=0x5 + ( + vec![ + Target::new(B256::repeat_byte(0x20)).with_min_len(1), + Target::new(B256::repeat_byte(0x40)).with_min_len(1), + ], + vec![ + ( + "2", + Some("3"), + vec!["2020202020202020202020202020202020202020202020202020202020202020"], + ), + ( + "4", + Some("5"), + vec!["4040404040404040404040404040404040404040404040404040404040404040"], + ), + ], + ), + // Case 5: Three targets, two in same sub-trie, one separate + // 0x20 and 0x2f both have lower_bound=0x2, upper_bound=0x3 + // 0x40 has lower_bound=0x4, upper_bound=0x5 + ( + vec![ + Target::new(B256::repeat_byte(0x20)).with_min_len(1), + Target::new(B256::repeat_byte(0x2f)).with_min_len(1), + Target::new(B256::repeat_byte(0x40)).with_min_len(1), + ], + vec![ + ( + "2", + Some("3"), + vec![ + "2020202020202020202020202020202020202020202020202020202020202020", + "2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f", + ], + ), + ( + "4", + Some("5"), + vec!["4040404040404040404040404040404040404040404040404040404040404040"], + ), + ], + ), + // Case 6: Targets with different min_len values in same sub-trie + // First has min_len=1 (lower=0x2), second has min_len=2 (lower=0x2f) + // Second's lower bound (0x2f) < first's upper bound (0x3), so same sub-trie + ( + vec![ + Target::new(B256::repeat_byte(0x20)).with_min_len(1), + Target::new(B256::repeat_byte(0x2f)).with_min_len(2), + ], + vec![( + "2", + Some("3"), + vec![ + "2020202020202020202020202020202020202020202020202020202020202020", + "2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f", + ], + )], + ), + // Case 7: More complex chunking with multiple sub-tries + // As described in the function comments: [0x2, 0x2fa, 0x4c, 0x4ce, 0x4e] + ( + vec![ + Target::new(B256::repeat_byte(0x20)).with_min_len(1), // lower_bound: 0x2 + Target::new(B256::repeat_byte(0x2f)).with_min_len(3), // lower_bound: 0x2f2 + Target::new(B256::repeat_byte(0x4c)).with_min_len(2), // lower_bound: 0x4c + Target::new(B256::repeat_byte(0x4c)).with_min_len(3), // lower_bound: 0x4c4 + Target::new(B256::repeat_byte(0x4e)).with_min_len(2), // lower_bound: 0x4e + ], + vec![ + ( + "2", + Some("3"), + vec![ + "2020202020202020202020202020202020202020202020202020202020202020", + "2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f", + ], + ), + ( + "4c", + Some("4d"), + vec![ + "4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c", + "4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c", + ], + ), + ( + "4e", + Some("4f"), + vec!["4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e"], + ), + ], + ), + ]; + + for (i, (mut input_targets, expected)) in test_cases.into_iter().enumerate() { + let sub_tries: Vec<_> = iter_sub_trie_targets(&mut input_targets).collect(); + + assert_eq!( + sub_tries.len(), + expected.len(), + "Test case {} failed: expected {} sub-tries, got {}", + i, + expected.len(), + sub_tries.len() + ); + + for (j, (sub_trie, (exp_lower_hex, exp_upper_hex_opt, exp_keys))) in + sub_tries.iter().zip(expected.iter()).enumerate() + { + let exp_lower = nibbles(exp_lower_hex); + let exp_upper = exp_upper_hex_opt.map(|hex| nibbles(hex)); + + assert_eq!( + sub_trie.lower_bound, exp_lower, + "Test case {} sub-trie {}: lower_bound mismatch", + i, j + ); + assert_eq!( + sub_trie.upper_bound, exp_upper, + "Test case {} sub-trie {}: upper_bound mismatch", + i, j + ); + assert_eq!( + sub_trie.targets.len(), + exp_keys.len(), + "Test case {} sub-trie {}: expected {} targets, got {}", + i, + j, + exp_keys.len(), + sub_trie.targets.len() + ); + + for (k, (target, exp_key_hex)) in + sub_trie.targets.iter().zip(exp_keys.iter()).enumerate() + { + let exp_key = nibbles(exp_key_hex); + assert_eq!( + target.key, exp_key, + "Test case {} sub-trie {} target {}: key mismatch", + i, j, k + ); + } + } + } + } +} From 38fb6f5974908abca2add2b15f34f36667070ffc Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Thu, 11 Dec 2025 17:51:57 +0100 Subject: [PATCH 50/59] WIP: almost done, still some minor notes --- crates/trie/trie/src/proof_v2/mod.rs | 139 ++++++++++++++++-------- crates/trie/trie/src/proof_v2/target.rs | 64 ++++------- 2 files changed, 115 insertions(+), 88 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index c6a4420fd16..13f40a8aa57 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -191,6 +191,7 @@ where targets: &mut TargetsIter>, path: &Nibbles, ) -> bool { + // TODO check target lower bound trace!(target: TRACE_TARGET, ?path, target = ?targets.peek(), "should_retain: called"); debug_assert!(self.retained_proofs.last().is_none_or( |ProofTrieNode { path: last_retained_path, .. }| { @@ -773,6 +774,7 @@ where fn try_pop_cached_branch( &mut self, trie_cursor_state: &mut TrieCursorState, + sub_trie_prefix: &Nibbles, uncalculated_lower_bound: &Option, ) -> Result { // If there is a branch on top of the stack we use that. @@ -785,14 +787,14 @@ where // attempts to find it. // If the `uncalculated_lower_bound` is None it indicates that there can be no more - // leaf data, so similarly there be no more branches. + // leaf data, so similarly there can be no more branches. let Some(uncalculated_lower_bound) = uncalculated_lower_bound else { return Ok(PopCachedBranchOutcome::Exhausted) }; // If [`TrieCursorState::path`] returns None it means that the cursor has been // exhausted, so there can be no more cached data. - let Some(trie_cursor_path) = trie_cursor_state.path() else { + let Some(mut trie_cursor_path) = trie_cursor_state.path() else { return Ok(PopCachedBranchOutcome::Exhausted) }; @@ -800,16 +802,24 @@ where // then we can't use it, instead we seek forward and try again. if trie_cursor_path < uncalculated_lower_bound { *trie_cursor_state = - TrieCursorState::new(self.trie_cursor.seek(*uncalculated_lower_bound)?); + TrieCursorState::seeked(self.trie_cursor.seek(*uncalculated_lower_bound)?); - // Having just seeked forward we need to check if the cursor is now exhausted. - if matches!(trie_cursor_state, TrieCursorState::Exhausted) { + // Having just seeked forward we need to check if the cursor is now exhausted, + // extracting the new path at the same time. + if let Some(new_trie_cursor_path) = trie_cursor_state.path() { + trie_cursor_path = new_trie_cursor_path + } else { return Ok(PopCachedBranchOutcome::Exhausted) }; } + // If the trie cursor has exceeded the sub-trie then we consider it to be exhausted. + if !trie_cursor_path.starts_with(sub_trie_prefix) { + return Ok(PopCachedBranchOutcome::Exhausted) + } + // At this point we can be sure that the cursor is in an `Available` state. We know for - // sure it's not `Exhausted` because of the call to `path` above, and we know it's not + // sure it's not `Exhausted` because of the calls to `path` above, and we know it's not // `Taken` because we push all taken branches onto the `cached_branch_stack`, and the // stack is empty. // @@ -858,7 +868,8 @@ where &mut self, targets: &mut TargetsIter>, trie_cursor_state: &mut TrieCursorState, - hashed_key_current_path: Option, + sub_trie_prefix: &Nibbles, + uncalculated_lower_bound: Nibbles, ) -> Result)>, StateProofError> { // Pop any under-construction branches that are now complete. // All trie data prior to the current cached branch, if any, has been computed. Any branches @@ -874,10 +885,7 @@ where // `uncalculated_lower_bound` tracks the lower bound of node paths which have yet to be // visited, either via the hashed key cursor (`calculate_key_range`) or trie cursor (this // method). If this is None then there are no further nodes which could exist. - // - // This starts off being based on the hashed cursor's current position, which is the - // next hashed key which hasn't been processed. If that is None then we start from zero. - let mut uncalculated_lower_bound = Some(hashed_key_current_path.unwrap_or_default()); + let mut uncalculated_lower_bound = Some(uncalculated_lower_bound); loop { // Pop the currently cached branch node. @@ -885,9 +893,11 @@ where // NOTE we pop off the `cached_branch_stack` because cloning the `BranchNodeCompact` // means cloning an Arc, which incurs synchronization overhead. We have to be sure to // push the cached branch back onto the stack once done. - let (cached_path, cached_branch) = match self - .try_pop_cached_branch(trie_cursor_state, &uncalculated_lower_bound)? - { + let (cached_path, cached_branch) = match self.try_pop_cached_branch( + trie_cursor_state, + sub_trie_prefix, + &uncalculated_lower_bound, + )? { PopCachedBranchOutcome::Popped(cached) => cached, PopCachedBranchOutcome::Exhausted => { // If cached branches are exhausted it's possible that there is still an @@ -1029,7 +1039,7 @@ where // trie cursor to the next cached node at-or-after `child_path`. if trie_cursor_state.path().is_some_and(|path| path < &child_path) { trace!(target: TRACE_TARGET, ?child_path, "Seeking trie cursor to child path"); - *trie_cursor_state = TrieCursorState::new(self.trie_cursor.seek(child_path)?); + *trie_cursor_state = TrieCursorState::seeked(self.trie_cursor.seek(child_path)?); } // If the next cached branch node is a child of `child_path` then we can assume it is @@ -1074,19 +1084,24 @@ where /// Calculates trie nodes and retains proofs for targetted nodes within a sub-trie. The /// sub-trie's bounds are denoted by the `lower_bound` and `upper_bound` arguments, /// `upper_bound` is exclusive, None indicates unbounded. - #[instrument(target = TRACE_TARGET, level = "trace", skip(self, value_encoder, targets))] + #[instrument( + target = TRACE_TARGET, + level = "trace", + skip_all, + fields(prefix=?sub_trie_targets.prefix), + )] fn proof_subtrie<'a>( &mut self, value_encoder: &VE, - lower_bound: Nibbles, - upper_bound: Option, - targets: impl Iterator, + trie_cursor_state: &mut TrieCursorState, + hashed_cursor_current: &mut Option<(Nibbles, VE::DeferredEncoder)>, + sub_trie_targets: SubTrieTargets<'a>, ) -> Result<(), StateProofError> { // In debug builds, verify that targets are sorted #[cfg(debug_assertions)] let targets = { let mut prev: Option = None; - targets.into_iter().inspect(move |target| { + sub_trie_targets.targets.iter().inspect(move |target| { if let Some(prev) = prev { debug_assert!(prev <= target.key, "prev:{prev:?} target:{target:?}"); } @@ -1095,7 +1110,7 @@ where }; #[cfg(not(debug_assertions))] - let targets = targets.into_iter(); + let targets = sub_trie_targets.targets.into_iter(); // Wrap targets into a `TargetsIter`. let mut targets = WindowIter::new(targets).peekable(); @@ -1106,18 +1121,32 @@ where debug_assert!(self.branch_path.is_empty()); debug_assert!(self.child_stack.is_empty()); - // Initialize the hashed cursor to None to indicate it hasn't been seeked yet. - let mut hashed_cursor_current: Option<(Nibbles, VE::DeferredEncoder)> = None; - - // Initialize the `trie_cursor_state` with the node closest to root. - let mut trie_cursor_state = TrieCursorState::new(self.trie_cursor.seek(Nibbles::new())?); + // `next_uncached_key_range`, which will be called in the loop below, expects the trie + // cursor to have already been seeked. If it's never been seeked before then we seek it to + // the prefix (the first possible node) to initialize it. + if matches!(trie_cursor_state, TrieCursorState::Unseeked) { + trace!(target: TRACE_TARGET, "Doing initial seek of trie cursor"); + *trie_cursor_state = + TrieCursorState::seeked(self.trie_cursor.seek(sub_trie_targets.prefix)?); + } + trace!(target: TRACE_TARGET, "Starting loop"); loop { + // Calculate the uncalculated lower bound that `next_uncached_key_range` should use. + // + // The lower bound is the higher of the sub-trie's prefix (ie its first possible node) + // and the hashed cursor's current position (which is the next key which a trie node has + // not been computed). + let uncached_lower_bound = hashed_cursor_current + .as_ref() + .map_or(sub_trie_targets.prefix, |kv| std::cmp::max(kv.0, sub_trie_targets.prefix)); + // Determine the range of keys of the overall trie which need to be re-computed. - let Some((lower_bound, upper_bound)) = self.next_uncached_key_range( + let Some((calc_lower_bound, calc_upper_bound)) = self.next_uncached_key_range( &mut targets, - &mut trie_cursor_state, - hashed_cursor_current.as_ref().map(|kv| kv.0), + trie_cursor_state, + &sub_trie_targets.prefix, + uncached_lower_bound, )? else { // If `next_uncached_key_range` determines that there can be no more keys then @@ -1129,9 +1158,9 @@ where self.calculate_key_range( value_encoder, &mut targets, - &mut hashed_cursor_current, - lower_bound, - upper_bound, + hashed_cursor_current, + calc_lower_bound, + calc_upper_bound, )?; // Once outside `calculate_key_range`, `hashed_cursor_current` will be at the first key @@ -1145,6 +1174,7 @@ where } // Once there's no more leaves we can pop the remaining branches, if any. + trace!(target: TRACE_TARGET, "Exited loop, popping remaining branches"); while !self.branch_stack.is_empty() { self.pop_branch(&mut targets)?; } @@ -1158,17 +1188,21 @@ where // All targets match the root node, so always retain it. Determine the root node based on // the child stack, and push the proof of the root node onto the result stack. - let root_node = if let Some(node) = self.child_stack.pop() { + // TODO fix this + if let Some(node) = self.child_stack.pop() { self.rlp_encode_buf.clear(); - node.into_proof_trie_node(Nibbles::new(), &mut self.rlp_encode_buf)? - } else { - ProofTrieNode { + let root_node = node.into_proof_trie_node(Nibbles::new(), &mut self.rlp_encode_buf)?; + self.retained_proofs.push(root_node); + } else if sub_trie_targets.prefix.is_empty() { + // Empty prefix covers the entire trie, and is therefore the only prefix which covers + // the root node itself. If the entire trie is targetted, and it has no data, then we + // retain the empty root proof. + self.retained_proofs.push(ProofTrieNode { path: Nibbles::new(), // root path node: TrieNode::EmptyRoot, masks: TrieMasks::none(), - } + }); }; - self.retained_proofs.push(root_node); Ok(()) } @@ -1180,20 +1214,25 @@ where value_encoder: &VE, targets: &mut [Target], ) -> Result, StateProofError> { - trace!(target: TRACE_TARGET, "proof_inner: called"); - // If there are no targets then nothing could be returned, return early. if targets.is_empty() { trace!(target: TRACE_TARGET, "Empty targets, returning"); return Ok(Vec::new()) } + // Initialize the variables which track the state of the two cursors. Both indicated the + // cursors are unseeked. + let mut trie_cursor_state = TrieCursorState::unseeked(); + let mut hashed_cursor_current: Option<(Nibbles, VE::DeferredEncoder)> = None; + + // Divide targets into chunks, each chunk corresponding to a different sub-trie within the + // overall trie, and handle all proofs within that sub-trie. for sub_trie_targets in iter_sub_trie_targets(targets) { self.proof_subtrie( value_encoder, - sub_trie_targets.lower_bound, - sub_trie_targets.upper_bound, - sub_trie_targets.targets.iter(), + &mut trie_cursor_state, + &mut hashed_cursor_current, + sub_trie_targets, )?; } @@ -1324,6 +1363,8 @@ impl> Iterator for WindowIter { /// been taken (used as a cached branch) and the cursor having been exhausted. #[derive(Debug)] enum TrieCursorState { + /// The initial state of the cursor, indicating it's never been seeked. + Unseeked, /// Cursor is seeked to this path and the node has not been used yet. Available(Nibbles, BranchNodeCompact), /// Cursor is seeked to this path, but the node has been used. @@ -1333,14 +1374,24 @@ enum TrieCursorState { } impl TrieCursorState { + /// Creates a [`Self::Unseeked`] based on an entry returned from the cursor itself. + const fn unseeked() -> Self { + Self::Unseeked + } + /// Creates a [`Self`] based on an entry returned from the cursor itself. - fn new(entry: Option<(Nibbles, BranchNodeCompact)>) -> Self { + fn seeked(entry: Option<(Nibbles, BranchNodeCompact)>) -> Self { entry.map_or(Self::Exhausted, |(path, node)| Self::Available(path, node)) } /// Returns the path the cursor is seeked to, or None if it's exhausted. + /// + /// # Panics + /// + /// Panics if the cursor is unseeked. const fn path(&self) -> Option<&Nibbles> { match self { + Self::Unseeked => panic!("cursor is unseeked"), Self::Available(path, _) | Self::Taken(path) => Some(path), Self::Exhausted => None, } diff --git a/crates/trie/trie/src/proof_v2/target.rs b/crates/trie/trie/src/proof_v2/target.rs index 3c50d3a3eb5..4840f6a8396 100644 --- a/crates/trie/trie/src/proof_v2/target.rs +++ b/crates/trie/trie/src/proof_v2/target.rs @@ -50,11 +50,9 @@ impl From for Target { /// Describes a set of targets which all apply to a single sub-trie, ie a section of the overall /// trie whose nodes all share a prefix. pub(crate) struct SubTrieTargets<'a> { - /// The lower bound of the sub-trie, ie the prefix which all nodes in the sub-trie share. - pub(crate) lower_bound: Nibbles, - /// The exclusive upper bound of the sub-trie, or None if unbounded. This will be the first - /// path in lexicographical order which is not contained by the sub-trie. - pub(crate) upper_bound: Option, + /// The prefix which all nodes in the sub-trie share. This is also the first node in the trie + /// in lexicographic order. + pub(crate) prefix: Nibbles, /// The targets belonging to this sub-trie. These will be sorted by their `key` field, /// lexicographically. pub(crate) targets: &'a [Target], @@ -65,6 +63,10 @@ pub(crate) struct SubTrieTargets<'a> { pub(crate) fn iter_sub_trie_targets<'a>( targets: &'a mut [Target], ) -> impl Iterator> { + // TODO this isn't quite right... if the lower_bound of a target is 0xabc, then the lower_bound + // of the sub-trie is actually 0xab, because we need to calculate the 0xab sub-trie in case 0xab + // is a branch, when could then hav a leaf/extension at 0xabc. + // First sort lexicographically by lower bound. We will use this for chunking targets into // contiguous sections in the next steps based on their bounds. targets.sort_unstable_by_key(|target| target.lower_bound); @@ -104,9 +106,8 @@ pub(crate) fn iter_sub_trie_targets<'a>( // key, as that will be the order they are checked by the `ProofCalculator`. target_chunks.map(|target_chunk| { let lower_bound = target_chunk[0].lower_bound; - let upper_bound = target_chunk[0].upper_bound(); target_chunk.sort_unstable_by_key(|target| target.key); - SubTrieTargets { lower_bound, upper_bound, targets: target_chunk } + SubTrieTargets { prefix: lower_bound, targets: target_chunk } }) } @@ -130,12 +131,11 @@ mod tests { // Case 1: Empty targets (vec![], vec![]), // Case 2: Single target without min_len - // lower_bound is empty, upper_bound is None (unbounded) + // lower_bound is empty ( vec![Target::new(B256::repeat_byte(0x20))], vec![( "", - None, vec!["2020202020202020202020202020202020202020202020202020202020202020"], )], ), @@ -145,7 +145,6 @@ mod tests { vec![Target::new(B256::repeat_byte(0x20)), Target::new(B256::repeat_byte(0x21))], vec![( "", - None, vec![ "2020202020202020202020202020202020202020202020202020202020202020", "2121212121212121212121212121212121212121212121212121212121212121", @@ -154,29 +153,21 @@ mod tests { ), // Case 4: Multiple targets in different sub-tries // with_min_len(1) gives lower_bound with first 1 nibble - // First has lower_bound=0x2, upper_bound=0x3 - // Second has lower_bound=0x4, upper_bound=0x5 + // First has lower_bound=0x2 + // Second has lower_bound=0x4 ( vec![ Target::new(B256::repeat_byte(0x20)).with_min_len(1), Target::new(B256::repeat_byte(0x40)).with_min_len(1), ], vec![ - ( - "2", - Some("3"), - vec!["2020202020202020202020202020202020202020202020202020202020202020"], - ), - ( - "4", - Some("5"), - vec!["4040404040404040404040404040404040404040404040404040404040404040"], - ), + ("2", vec!["2020202020202020202020202020202020202020202020202020202020202020"]), + ("4", vec!["4040404040404040404040404040404040404040404040404040404040404040"]), ], ), // Case 5: Three targets, two in same sub-trie, one separate - // 0x20 and 0x2f both have lower_bound=0x2, upper_bound=0x3 - // 0x40 has lower_bound=0x4, upper_bound=0x5 + // 0x20 and 0x2f both have lower_bound=0x2 + // 0x40 has lower_bound=0x4 ( vec![ Target::new(B256::repeat_byte(0x20)).with_min_len(1), @@ -186,17 +177,12 @@ mod tests { vec![ ( "2", - Some("3"), vec![ "2020202020202020202020202020202020202020202020202020202020202020", "2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f", ], ), - ( - "4", - Some("5"), - vec!["4040404040404040404040404040404040404040404040404040404040404040"], - ), + ("4", vec!["4040404040404040404040404040404040404040404040404040404040404040"]), ], ), // Case 6: Targets with different min_len values in same sub-trie @@ -209,7 +195,6 @@ mod tests { ], vec![( "2", - Some("3"), vec![ "2020202020202020202020202020202020202020202020202020202020202020", "2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f", @@ -229,7 +214,6 @@ mod tests { vec![ ( "2", - Some("3"), vec![ "2020202020202020202020202020202020202020202020202020202020202020", "2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f", @@ -237,7 +221,6 @@ mod tests { ), ( "4c", - Some("4d"), vec![ "4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c", "4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c4c", @@ -245,7 +228,6 @@ mod tests { ), ( "4e", - Some("4f"), vec!["4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e4e"], ), ], @@ -264,20 +246,14 @@ mod tests { sub_tries.len() ); - for (j, (sub_trie, (exp_lower_hex, exp_upper_hex_opt, exp_keys))) in + for (j, (sub_trie, (exp_prefix_hex, exp_keys))) in sub_tries.iter().zip(expected.iter()).enumerate() { - let exp_lower = nibbles(exp_lower_hex); - let exp_upper = exp_upper_hex_opt.map(|hex| nibbles(hex)); + let exp_prefix = nibbles(exp_prefix_hex); assert_eq!( - sub_trie.lower_bound, exp_lower, - "Test case {} sub-trie {}: lower_bound mismatch", - i, j - ); - assert_eq!( - sub_trie.upper_bound, exp_upper, - "Test case {} sub-trie {}: upper_bound mismatch", + sub_trie.prefix, exp_prefix, + "Test case {} sub-trie {}: prefix mismatch", i, j ); assert_eq!( From c942364f4a70649f29ca20c004d34dfe2b278f82 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 12 Dec 2025 14:36:23 +0100 Subject: [PATCH 51/59] WIP: just needs tests --- crates/trie/trie/src/proof_v2/mod.rs | 218 ++++++++++++------------ crates/trie/trie/src/proof_v2/target.rs | 140 ++++++++------- 2 files changed, 194 insertions(+), 164 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 13f40a8aa57..da73289003e 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -16,7 +16,7 @@ use alloy_rlp::Encodable; use alloy_trie::{BranchNodeCompact, TrieMask}; use reth_execution_errors::trie::StateProofError; use reth_trie_common::{BranchNode, Nibbles, ProofTrieNode, RlpNode, TrieMasks, TrieNode}; -use std::{cmp::Ordering, iter::Peekable}; +use std::cmp::Ordering; use tracing::{instrument, trace}; mod value; @@ -114,9 +114,6 @@ impl ProofCalculator { } } -/// Helper type for the [`Iterator`] used to pass targets in from the caller. -type TargetsIter = Peekable>; - impl ProofCalculator where TC: TrieCursor, @@ -148,18 +145,19 @@ where !self.branch_stack.is_empty() as usize } - /// Returns true if the proof of a node at the given path should be retained. - /// A node is retained if its path is a prefix of any target. - /// This may move the - /// `targets` iterator forward if the given path comes after the current target. + /// Returns true if the proof of a node at the given path should be retained. A node is retained + /// if its path is a prefix of any target. + /// + /// This may move the `targets` iterator forward if the given path comes after the current + /// target. /// - /// This method takes advantage of the [`WindowIter`] component of [`TargetsIter`] to only check - /// a single target at a time. The [`WindowIter`] allows us to look at a current target and the - /// next target simultaneously, forming an end-exclusive range. + /// This method takes advantage of the [`std::slice::Iter`] component of [`TargetsCursor`] to + /// check the minimum number of targets. In general it looks at a current target and the next + /// target simultaneously, forming an end-exclusive range. /// /// ```text /// * Given targets: [ 0x012, 0x045, 0x678 ] - /// * targets.next() returns: + /// * targets.current() returns: /// - (0x012, Some(0x045)): covers (0x012..0x045) /// - (0x045, Some(0x678)): covers (0x045..0x678) /// - (0x678, None): covers (0x678..) @@ -172,27 +170,20 @@ where /// ```text /// * Given: /// - path: 0x04 - /// - targets.peek() returns (0x012, Some(0x045)) + /// - targets.current() returns (0x012, Some(0x045)) /// /// * 0x04 comes _after_ 0x045 in depth-first order, so (0x012..0x045) does not contain 0x04. /// /// * targets.next() is called. /// - /// * targets.peek() now returns (0x045, Some(0x678)). This does contain 0x04. + /// * targets.current() now returns (0x045, Some(0x678)). This does contain 0x04. /// /// * 0x04 is a prefix of 0x045, and so is retained. /// ``` - /// - /// Because paths in the trie are visited in depth-first order, it's imperative that targets are - /// given in depth-first order as well. If the targets were generated off of B256s, which is - /// the common-case, then this is equivalent to lexicographical order. - fn should_retain<'a>( - &self, - targets: &mut TargetsIter>, - path: &Nibbles, - ) -> bool { - // TODO check target lower bound - trace!(target: TRACE_TARGET, ?path, target = ?targets.peek(), "should_retain: called"); + fn should_retain<'a>(&self, targets: &mut TargetsCursor<'a>, path: &Nibbles) -> bool { + let (mut lower, mut upper) = targets.current(); + + trace!(target: TRACE_TARGET, ?path, target = ?lower, "should_retain: called"); debug_assert!(self.retained_proofs.last().is_none_or( |ProofTrieNode { path: last_retained_path, .. }| { depth_first::cmp(path, last_retained_path) == Ordering::Greater @@ -202,21 +193,40 @@ where self.retained_proofs.last().map(|n| n.path), ); - let &(mut lower, mut upper) = targets.peek().expect("targets is never exhausted"); - loop { - // If the node in question is a prefix of the target then we retain + // If the node in question is a prefix of the target then we do not iterate targets + // further. + // + // Even if the node is a prefix of the target's key, if the target has a `prefix` field + // it indicates that the node should only be retained if it has that prefix (ie if it + // has a minimum length). + // + // _However_ even if the node doesn't match the target due to the target's prefix, it + // may match previous targets whose keys match this node. So we search backwards for all + // targets which might match this node, and check the prefix on each. + // + // For example, given targets: + // - key: 0xabc0, prefix: 0xab + // - key: 0xabc1, prefix: 0xa + // - key: 0xabc2, prefix: 0xabc2 + // + // When the branch node at 0xabc is visited it will be after targets has iterated + // forward to 0xabc2 (because all children will have been visited already). At this + // point the target for 0xabc2 will not match the branch due to its prefix, but previous + // targets which would, so we need to check those as well. if lower.key.starts_with(path) { - return true + return path.starts_with(&lower.prefix) || + targets + .rev_iter() + .take_while(|target| target.key.starts_with(path)) + .any(|target| path.starts_with(&target.prefix)) } // If the path isn't in the current range then iterate forward until it is (or until // there is no upper bound, indicating unbounded). if upper.is_some_and(|upper| depth_first::cmp(path, &upper.key) != Ordering::Less) { - targets.next(); - trace!(target: TRACE_TARGET, target = ?targets.peek(), "upper target <= path, next target"); - let &(l, u) = targets.peek().expect("targets is never exhausted"); - (lower, upper) = (l, u); + (lower, upper) = targets.next(); + trace!(target: TRACE_TARGET, target = ?lower, "upper target <= path, next target"); } else { return false } @@ -230,7 +240,7 @@ where /// therefore can be retained as a proof node if applicable. fn commit_child<'a>( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsCursor<'a>, child_path: Nibbles, child: ProofTrieBranchChild, ) -> Result { @@ -316,7 +326,7 @@ where /// to this method. fn commit_last_child<'a>( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsCursor<'a>, ) -> Result<(), StateProofError> { let Some(child) = self.child_stack.pop() else { return Ok(()) }; @@ -347,7 +357,7 @@ where /// - If the leaf's nibble is already set in the branch's `state_mask`. fn push_new_leaf<'a>( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsCursor<'a>, leaf_nibble: u8, leaf_short_key: Nibbles, leaf_val: VE::DeferredEncoder, @@ -452,10 +462,7 @@ where /// # Panics /// /// This method panics if `branch_stack` is empty. - fn pop_branch<'a>( - &mut self, - targets: &mut TargetsIter>, - ) -> Result<(), StateProofError> { + fn pop_branch<'a>(&mut self, targets: &mut TargetsCursor<'a>) -> Result<(), StateProofError> { trace!( target: TRACE_TARGET, branch = ?self.branch_stack.last(), @@ -536,7 +543,7 @@ where /// creating a new one depending on the path of the key. fn push_leaf<'a>( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsCursor<'a>, key: Nibbles, val: VE::DeferredEncoder, ) -> Result<(), StateProofError> { @@ -623,7 +630,7 @@ where fn calculate_key_range<'a>( &mut self, value_encoder: &VE, - targets: &mut TargetsIter>, + targets: &mut TargetsCursor<'a>, hashed_cursor_current: &mut Option<(Nibbles, VE::DeferredEncoder)>, lower_bound: Nibbles, upper_bound: Option, @@ -684,7 +691,7 @@ where /// cached branch will be a child of that splitting branch. fn push_cached_branch<'a>( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsCursor<'a>, cached_path: Nibbles, cached_branch: &BranchNodeCompact, ) -> Result<(), StateProofError> { @@ -866,7 +873,7 @@ where #[instrument(target = TRACE_TARGET, level = "trace", skip_all)] fn next_uncached_key_range<'a>( &mut self, - targets: &mut TargetsIter>, + targets: &mut TargetsCursor<'a>, trie_cursor_state: &mut TrieCursorState, sub_trie_prefix: &Nibbles, uncalculated_lower_bound: Nibbles, @@ -1097,23 +1104,8 @@ where hashed_cursor_current: &mut Option<(Nibbles, VE::DeferredEncoder)>, sub_trie_targets: SubTrieTargets<'a>, ) -> Result<(), StateProofError> { - // In debug builds, verify that targets are sorted - #[cfg(debug_assertions)] - let targets = { - let mut prev: Option = None; - sub_trie_targets.targets.iter().inspect(move |target| { - if let Some(prev) = prev { - debug_assert!(prev <= target.key, "prev:{prev:?} target:{target:?}"); - } - prev = Some(target.key); - }) - }; - - #[cfg(not(debug_assertions))] - let targets = sub_trie_targets.targets.into_iter(); - - // Wrap targets into a `TargetsIter`. - let mut targets = WindowIter::new(targets).peekable(); + // Wrap targets into a `TargetsCursor`. + let mut targets = TargetsCursor::new(sub_trie_targets.targets); // Ensure initial state is cleared. By the end of the method call these should be empty once // again. @@ -1186,23 +1178,29 @@ where debug_assert!(self.branch_path.is_empty()); debug_assert!(self.child_stack.len() < 2); - // All targets match the root node, so always retain it. Determine the root node based on - // the child stack, and push the proof of the root node onto the result stack. - // TODO fix this - if let Some(node) = self.child_stack.pop() { - self.rlp_encode_buf.clear(); - let root_node = node.into_proof_trie_node(Nibbles::new(), &mut self.rlp_encode_buf)?; - self.retained_proofs.push(root_node); - } else if sub_trie_targets.prefix.is_empty() { - // Empty prefix covers the entire trie, and is therefore the only prefix which covers - // the root node itself. If the entire trie is targetted, and it has no data, then we - // retain the empty root proof. - self.retained_proofs.push(ProofTrieNode { - path: Nibbles::new(), // root path - node: TrieNode::EmptyRoot, - masks: TrieMasks::none(), - }); - }; + // We always pop the root node off of the `child_stack` in order to empty it, however we + // might not want to retain the node unless the `SubTrieTargets` indicates it. + match (sub_trie_targets.retain_root, self.child_stack.pop()) { + (false, _) => { + // Whether the root node is exists or not, we don't want it. + } + (true, None) => { + // If `child_stack` is empty it means there was no keys at all, retain an empty + // root node. + self.retained_proofs.push(ProofTrieNode { + path: Nibbles::new(), // root path + node: TrieNode::EmptyRoot, + masks: TrieMasks::none(), + }); + } + (true, Some(root_node)) => { + // Encode and retain the root node. + self.rlp_encode_buf.clear(); + let root_node = + root_node.into_proof_trie_node(Nibbles::new(), &mut self.rlp_encode_buf)?; + self.retained_proofs.push(root_node); + } + } Ok(()) } @@ -1322,40 +1320,46 @@ where } } -/// `WindowIter` is a wrapper around an [`Iterator`] which allows viewing both previous and current -/// items on every iteration. It is similar to `itertools::tuple_windows`, except that the final -/// item returned will contain the previous item and `None` as the current. -struct WindowIter { - iter: I, - prev: Option, +/// Helper type wrapping a slice of [`Target`]s, primarily used to iterate through targets in +/// [`ProofCalculator::should_retain`]. +/// +/// It is assumed that the underlying slice is never empty, and that the iterator is never +/// exhausted. +struct TargetsCursor<'a> { + targets: &'a [Target], + i: usize, } -impl WindowIter { - /// Wraps an iterator with a [`WindowIter`]. - const fn new(iter: I) -> Self { - Self { iter, prev: None } +impl<'a> TargetsCursor<'a> { + /// Wraps a slice of [`Target`]s with the `TargetsCursor`. + /// + /// # Panics + /// + /// Will panic in debug mode if called with an empty slice. + fn new(targets: &'a [Target]) -> Self { + debug_assert!(!targets.is_empty()); + Self { targets, i: 0 } + } + + /// Returns the current and next [`Target`] that the cursor is pointed at. + fn current(&self) -> (&'a Target, Option<&'a Target>) { + (&self.targets[self.i], self.targets.get(self.i + 1)) } -} -impl> Iterator for WindowIter { - /// The iterator returns the previous and current items, respectively. If the underlying - /// iterator is exhausted then `Some(prev, None)` is returned on the subsequent call to - /// `WindowIter::next`, and `None` from the call after that. - type Item = (I::Item, Option); + /// Iterates the cursor forward. + /// + /// # Panics + /// + /// Will panic if the cursor is exhausted. + fn next(&mut self) -> (&'a Target, Option<&'a Target>) { + self.i += 1; + debug_assert!(self.i < self.targets.len()); + self.current() + } - fn next(&mut self) -> Option { - loop { - match (self.prev, self.iter.next()) { - (None, None) => return None, - (None, Some(v)) => { - self.prev = Some(v); - } - (Some(v), next) => { - self.prev = next; - return Some((v, next)) - } - } - } + /// Iterated backwards over the slice, starting from the [`Target`] previous to the current. + fn rev_iter(&self) -> impl Iterator { + self.targets[..self.i].iter().rev() } } diff --git a/crates/trie/trie/src/proof_v2/target.rs b/crates/trie/trie/src/proof_v2/target.rs index 4840f6a8396..e97e6470a69 100644 --- a/crates/trie/trie/src/proof_v2/target.rs +++ b/crates/trie/trie/src/proof_v2/target.rs @@ -9,9 +9,9 @@ use crate::proof_v2::increment_and_strip_trailing_zeros; #[derive(Debug, Copy, Clone)] pub struct Target { pub(crate) key: Nibbles, - /// The lower bound of the range of trie nodes which can be retained by this target. In other - /// words, the shortest trie node path which can be retained by this target. - pub(crate) lower_bound: Nibbles, + /// The shortest trie node path which can be retained by this target. `key.starts_with(prefix)` + /// is always true. + pub(crate) prefix: Nibbles, } impl Target { @@ -19,7 +19,7 @@ impl Target { pub fn new(key: B256) -> Self { // SAFETY: key is a B256 and so is exactly 32-bytes. let key = unsafe { Nibbles::unpack_unchecked(key.as_slice()) }; - Self { key, lower_bound: Nibbles::new() } + Self { key, prefix: Nibbles::new() } } /// Only match trie nodes whose path is at least this long. @@ -29,16 +29,10 @@ impl Target { /// This method panics if `min_len` is greater than 64. pub fn with_min_len(mut self, min_len: u8) -> Self { debug_assert!(min_len <= 64); - self.lower_bound = self.key; - self.lower_bound.truncate(min_len as usize); + self.prefix = self.key; + self.prefix.truncate(min_len as usize); self } - - /// Returns the exclusive upper bound of the range of possible trie nodes which can be retained - /// by this target, or None for unbounded. - fn upper_bound(&self) -> Option { - increment_and_strip_trailing_zeros(&self.lower_bound) - } } impl From for Target { @@ -56,6 +50,10 @@ pub(crate) struct SubTrieTargets<'a> { /// The targets belonging to this sub-trie. These will be sorted by their `key` field, /// lexicographically. pub(crate) targets: &'a [Target], + /// Will be true if at least one target in the set has an empty `prefix`. + /// + /// If this is true then `prefix.is_empty()`, though not necessarily vice-versa. + pub(crate) retain_root: bool, } /// Given a set of [`Target`]s, returns an iterator over those same [`Target`]s chunked by the @@ -63,38 +61,61 @@ pub(crate) struct SubTrieTargets<'a> { pub(crate) fn iter_sub_trie_targets<'a>( targets: &'a mut [Target], ) -> impl Iterator> { - // TODO this isn't quite right... if the lower_bound of a target is 0xabc, then the lower_bound - // of the sub-trie is actually 0xab, because we need to calculate the 0xab sub-trie in case 0xab - // is a branch, when could then hav a leaf/extension at 0xabc. - - // First sort lexicographically by lower bound. We will use this for chunking targets into + // First sort lexicographically by prefix. We will use this for chunking targets into // contiguous sections in the next steps based on their bounds. - targets.sort_unstable_by_key(|target| target.lower_bound); + targets.sort_unstable_by_key(|target| target.prefix); + + // A helper function for getting the largest prefix of the sub-trie which contains a particular + // target, based on its prefix. + // + // In general the target will only match within the sub-trie with the same prefix as the + // target's. However there is an exception: + // + // Given a trie with a node at 0xabc, there must be a branch at 0xab. A target with prefix 0xabc + // needs to match that node, but in order to know the node is at that path the branch at 0xab + // must be constructed. Therefore the sub-trie prefix is the target prefix with a nibble + // truncated. + // + // For a target with an empty prefix we still use an empty sub-trie prefix; this will still + // construct the branch at the root node (if there is one), the only behavioral difference + // between targets with prefix lengths zero and one will be that with prefix length zero the + // root node's proof will be retained. + let sub_trie_prefix = |prefix: Nibbles| { + let mut lower_bound = prefix; + lower_bound.truncate(prefix.len().saturating_sub(1)); + lower_bound + }; + + // A helper function which returns the first path following a sub-trie in lexicographical order. + let sub_trie_upper_bound = + |sub_trie_prefix: &Nibbles| increment_and_strip_trailing_zeros(sub_trie_prefix); // We now chunk targets, such that each chunk contains all targets belonging to the same // sub-trie. We are taking advantage of the following properties: // - // - The first target in the chunk has the lowest lower bound (see previous sorting step). + // - The first target in the chunk has the shortest prefix (see previous sorting step). // - // - The first target in the chunk's upper bound will therefore be the highest upper bound, and - // the upper bound of the whole chunk. - // - For example, given a chunk with lower bounds [0x2, 0x2f, 0x2fa], the upper bounds will - // be [0x3, 0x3, 0x2fb]. Note that no target could match a trie node with path equal to - // or greater than 0x3. + // - The first target in the chunk's upper bound will therefore belong to the sub-trie with the + // highest upper bound, and the upper bound of the whole chunk. + // - For example, given a chunk with sub-trie prefixes [0x2, 0x2f, 0x2fa], the upper bounds + // will be [0x3, 0x3, 0x2fb]. Note that no target could match a trie node with path equal + // to or greater than 0x3. // - // - If a target's lower bound does not lie within the bounds of the current chunk, then that - // target must be the first target of the next chunk, covering a separate sub-trie. - // - Example: given lower bounds of [0x2, 0x2fa, 0x4c, 0x4ce, 0x4e], we would end up with - // the following chunks: + // - If a target's sub-trie's prefix does not lie within the bounds of the current chunk, then + // that target must be the first target of the next chunk, lying in a separate sub-trie. + // - Example: given sub-trie prefixes of [0x2, 0x2fa, 0x4c, 0x4ce, 0x4e], we would end up + // with the following chunks: // - [0x2, 0x2a] w/ upper bound 0x3 // - [0x4c 0x4ce] w/ upper bound 0x4d // - [0x4e] w/ upper bound 0x4f - let mut upper_bound = targets.first().and_then(|t| t.upper_bound()); + let mut upper_bound = + targets.first().and_then(|t| sub_trie_upper_bound(&sub_trie_prefix(t.prefix))); let target_chunks = targets.chunk_by_mut(move |_, next| { if let Some(some_upper_bound) = upper_bound { - let same_chunk = next.lower_bound < some_upper_bound; + let sub_trie_prefix = sub_trie_prefix(next.prefix); + let same_chunk = sub_trie_prefix < some_upper_bound; if !same_chunk { - upper_bound = next.upper_bound(); + upper_bound = sub_trie_upper_bound(&sub_trie_prefix); } same_chunk } else { @@ -104,10 +125,11 @@ pub(crate) fn iter_sub_trie_targets<'a>( // Map the chunks to the return type. Within each chunk we want targets to be sorted by their // key, as that will be the order they are checked by the `ProofCalculator`. - target_chunks.map(|target_chunk| { - let lower_bound = target_chunk[0].lower_bound; - target_chunk.sort_unstable_by_key(|target| target.key); - SubTrieTargets { prefix: lower_bound, targets: target_chunk } + target_chunks.map(move |targets| { + let prefix = sub_trie_prefix(targets[0].prefix); + let retain_root = targets[0].prefix.is_empty(); + targets.sort_unstable_by_key(|target| target.key); + SubTrieTargets { prefix, targets, retain_root } }) } @@ -131,7 +153,6 @@ mod tests { // Case 1: Empty targets (vec![], vec![]), // Case 2: Single target without min_len - // lower_bound is empty ( vec![Target::new(B256::repeat_byte(0x20))], vec![( @@ -140,7 +161,6 @@ mod tests { )], ), // Case 3: Multiple targets in same sub-trie (no min_len) - // Both have empty lower_bound, so they're in the same sub-trie ( vec![Target::new(B256::repeat_byte(0x20)), Target::new(B256::repeat_byte(0x21))], vec![( @@ -152,13 +172,10 @@ mod tests { )], ), // Case 4: Multiple targets in different sub-tries - // with_min_len(1) gives lower_bound with first 1 nibble - // First has lower_bound=0x2 - // Second has lower_bound=0x4 ( vec![ - Target::new(B256::repeat_byte(0x20)).with_min_len(1), - Target::new(B256::repeat_byte(0x40)).with_min_len(1), + Target::new(B256::repeat_byte(0x20)).with_min_len(2), + Target::new(B256::repeat_byte(0x40)).with_min_len(2), ], vec![ ("2", vec!["2020202020202020202020202020202020202020202020202020202020202020"]), @@ -166,13 +183,11 @@ mod tests { ], ), // Case 5: Three targets, two in same sub-trie, one separate - // 0x20 and 0x2f both have lower_bound=0x2 - // 0x40 has lower_bound=0x4 ( vec![ - Target::new(B256::repeat_byte(0x20)).with_min_len(1), - Target::new(B256::repeat_byte(0x2f)).with_min_len(1), - Target::new(B256::repeat_byte(0x40)).with_min_len(1), + Target::new(B256::repeat_byte(0x20)).with_min_len(2), + Target::new(B256::repeat_byte(0x2f)).with_min_len(2), + Target::new(B256::repeat_byte(0x40)).with_min_len(2), ], vec![ ( @@ -186,12 +201,10 @@ mod tests { ], ), // Case 6: Targets with different min_len values in same sub-trie - // First has min_len=1 (lower=0x2), second has min_len=2 (lower=0x2f) - // Second's lower bound (0x2f) < first's upper bound (0x3), so same sub-trie ( vec![ - Target::new(B256::repeat_byte(0x20)).with_min_len(1), - Target::new(B256::repeat_byte(0x2f)).with_min_len(2), + Target::new(B256::repeat_byte(0x20)).with_min_len(2), + Target::new(B256::repeat_byte(0x2f)).with_min_len(3), ], vec![( "2", @@ -202,14 +215,13 @@ mod tests { )], ), // Case 7: More complex chunking with multiple sub-tries - // As described in the function comments: [0x2, 0x2fa, 0x4c, 0x4ce, 0x4e] ( vec![ - Target::new(B256::repeat_byte(0x20)).with_min_len(1), // lower_bound: 0x2 - Target::new(B256::repeat_byte(0x2f)).with_min_len(3), // lower_bound: 0x2f2 - Target::new(B256::repeat_byte(0x4c)).with_min_len(2), // lower_bound: 0x4c - Target::new(B256::repeat_byte(0x4c)).with_min_len(3), // lower_bound: 0x4c4 - Target::new(B256::repeat_byte(0x4e)).with_min_len(2), // lower_bound: 0x4e + Target::new(B256::repeat_byte(0x20)).with_min_len(2), + Target::new(B256::repeat_byte(0x2f)).with_min_len(4), + Target::new(B256::repeat_byte(0x4c)).with_min_len(3), + Target::new(B256::repeat_byte(0x4c)).with_min_len(4), + Target::new(B256::repeat_byte(0x4e)).with_min_len(3), ], vec![ ( @@ -232,6 +244,20 @@ mod tests { ), ], ), + // Case 8: Min-len 1 should result in zero-length sub-trie prefix + ( + vec![ + Target::new(B256::repeat_byte(0x20)).with_min_len(1), + Target::new(B256::repeat_byte(0x40)).with_min_len(1), + ], + vec![( + "", + vec![ + "2020202020202020202020202020202020202020202020202020202020202020", + "4040404040404040404040404040404040404040404040404040404040404040", + ], + )], + ), ]; for (i, (mut input_targets, expected)) in test_cases.into_iter().enumerate() { From 00512b705b68748fb713fd45115f947a4a427a18 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 12 Dec 2025 17:13:25 +0100 Subject: [PATCH 52/59] Working! --- crates/trie/trie/src/proof_v2/mod.rs | 162 ++++++++++++++++-------- crates/trie/trie/src/proof_v2/target.rs | 98 +++++++++----- 2 files changed, 175 insertions(+), 85 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index da73289003e..b2b2768eca4 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -180,7 +180,12 @@ where /// /// * 0x04 is a prefix of 0x045, and so is retained. /// ``` - fn should_retain<'a>(&self, targets: &mut TargetsCursor<'a>, path: &Nibbles) -> bool { + fn should_retain<'a>( + &self, + targets: &mut TargetsCursor<'a>, + path: &Nibbles, + check_prefix: bool, + ) -> bool { let (mut lower, mut upper) = targets.current(); trace!(target: TRACE_TARGET, ?path, target = ?lower, "should_retain: called"); @@ -202,24 +207,30 @@ where // has a minimum length). // // _However_ even if the node doesn't match the target due to the target's prefix, it - // may match previous targets whose keys match this node. So we search backwards for all - // targets which might match this node, and check the prefix on each. + // may match other targets whose keys match this node. So we search forwards and + // backwards for all targets which might match this node, and check the prefix on each. // - // For example, given targets: + // For example, given a branch 0xabc, with children at 0, 1, and 2, and targets: // - key: 0xabc0, prefix: 0xab // - key: 0xabc1, prefix: 0xa - // - key: 0xabc2, prefix: 0xabc2 + // - key: 0xabc2, prefix: 0xabc2 <-- current + // - key: 0xabc3, prefix: 0xabc // // When the branch node at 0xabc is visited it will be after targets has iterated // forward to 0xabc2 (because all children will have been visited already). At this - // point the target for 0xabc2 will not match the branch due to its prefix, but previous - // targets which would, so we need to check those as well. + // point the target for 0xabc2 will not match the branch due to its prefix, any of the + // other targets would, so we need to check those as well. if lower.key.starts_with(path) { - return path.starts_with(&lower.prefix) || - targets - .rev_iter() - .take_while(|target| target.key.starts_with(path)) - .any(|target| path.starts_with(&target.prefix)) + return !check_prefix || + (path.starts_with(&lower.prefix) || + targets + .skip_iter() + .take_while(|target| target.key.starts_with(path)) + .any(|target| path.starts_with(&target.prefix)) || + targets + .rev_iter() + .take_while(|target| target.key.starts_with(path)) + .any(|target| path.starts_with(&target.prefix))) } // If the path isn't in the current range then iterate forward until it is (or until @@ -250,7 +261,7 @@ where } // If we should retain the child then do so. - if self.should_retain(targets, &child_path) { + if self.should_retain(targets, &child_path, true) { trace!(target: TRACE_TARGET, ?child_path, "Retaining child"); // Convert to `ProofTrieNode`, which will be what is retained. @@ -625,7 +636,8 @@ where #[instrument( target = TRACE_TARGET, level = "trace", - skip(self, value_encoder, targets, hashed_cursor_current), + skip_all, + fields(?lower_bound, ?upper_bound), )] fn calculate_key_range<'a>( &mut self, @@ -649,6 +661,12 @@ where // If the cursor hasn't been used, or the last iterated key is prior to this range's // key range, then seek forward to at least the first key. if hashed_cursor_current.as_ref().is_none_or(|(key, _)| key < &lower_bound) { + trace!( + target: TRACE_TARGET, + current=?hashed_cursor_current.as_ref().map(|(k, _)| k), + "Seeking hashed cursor to meet lower bound", + ); + let lower_key = B256::right_padding_from(&lower_bound.pack()); *hashed_cursor_current = self.hashed_cursor.seek(lower_key)?.map(map_hashed_cursor_entry); @@ -664,6 +682,7 @@ where *hashed_cursor_current = self.hashed_cursor.next()?.map(map_hashed_cursor_entry); } + trace!(target: TRACE_TARGET, "No further keys within range"); Ok(()) } @@ -876,6 +895,7 @@ where targets: &mut TargetsCursor<'a>, trie_cursor_state: &mut TrieCursorState, sub_trie_prefix: &Nibbles, + sub_trie_upper_bound: Option<&Nibbles>, uncalculated_lower_bound: Nibbles, ) -> Result)>, StateProofError> { // Pop any under-construction branches that are now complete. @@ -911,7 +931,8 @@ where // unbounded range of leaves to be processed. `uncalculated_lower_bound` is // used to return that range. trace!(target: TRACE_TARGET, ?uncalculated_lower_bound, "Exhausted cached trie nodes"); - return Ok(uncalculated_lower_bound.map(|lower| (lower, None))); + return Ok(uncalculated_lower_bound + .map(|lower| (lower, sub_trie_upper_bound.copied()))); } PopCachedBranchOutcome::CalculateLeaves(range) => { return Ok(Some(range)); @@ -1003,7 +1024,7 @@ where // last child before pushing a new one onto the stack anyway. self.commit_last_child(targets)?; - if !self.should_retain(targets, &child_path) { + if !self.should_retain(targets, &child_path, false) { // Pull this child's hash out of the cached branch node. To get the hash's index // we first need to calculate the mask of which cached hashes have already been // used by this branch (if any). The number of set bits in that mask will be the @@ -1104,19 +1125,22 @@ where hashed_cursor_current: &mut Option<(Nibbles, VE::DeferredEncoder)>, sub_trie_targets: SubTrieTargets<'a>, ) -> Result<(), StateProofError> { + let sub_trie_upper_bound = sub_trie_targets.upper_bound(); + // Wrap targets into a `TargetsCursor`. let mut targets = TargetsCursor::new(sub_trie_targets.targets); // Ensure initial state is cleared. By the end of the method call these should be empty once // again. + debug_assert!(self.cached_branch_stack.is_empty()); debug_assert!(self.branch_stack.is_empty()); debug_assert!(self.branch_path.is_empty()); debug_assert!(self.child_stack.is_empty()); // `next_uncached_key_range`, which will be called in the loop below, expects the trie - // cursor to have already been seeked. If it's never been seeked before then we seek it to - // the prefix (the first possible node) to initialize it. - if matches!(trie_cursor_state, TrieCursorState::Unseeked) { + // cursor to have already been seeked. If it's not yet seeked, or seeked to a prior node, + // then we seek it to the prefix (the first possible node) to initialize it. + if trie_cursor_state.before(&sub_trie_targets.prefix) { trace!(target: TRACE_TARGET, "Doing initial seek of trie cursor"); *trie_cursor_state = TrieCursorState::seeked(self.trie_cursor.seek(sub_trie_targets.prefix)?); @@ -1138,6 +1162,7 @@ where &mut targets, trie_cursor_state, &sub_trie_targets.prefix, + sub_trie_upper_bound.as_ref(), uncached_lower_bound, )? else { @@ -1158,9 +1183,13 @@ where // Once outside `calculate_key_range`, `hashed_cursor_current` will be at the first key // after the range. // - // If the `hashed_cursor_current` is None then there are no more keys at all, meaning - // the trie couldn't possibly have more data and we should complete computation. - if hashed_cursor_current.is_none() { + // If the `hashed_cursor_current` is None (exhausted), or not within the range of the + // sub-trie, then there are no more keys at all, meaning the trie couldn't possibly have + // more data and we should complete computation. + if hashed_cursor_current + .as_ref() + .is_none_or(|(key, _)| !key.starts_with(&sub_trie_targets.prefix)) + { break; } } @@ -1178,8 +1207,18 @@ where debug_assert!(self.branch_path.is_empty()); debug_assert!(self.child_stack.len() < 2); + // The `cached_branch_stack` may still have cached branches on it, as it's not affected by + // `pop_branch`, but it is no longer needed and should be cleared. + self.cached_branch_stack.clear(); + // We always pop the root node off of the `child_stack` in order to empty it, however we // might not want to retain the node unless the `SubTrieTargets` indicates it. + trace!( + target: TRACE_TARGET, + retain_root = ?sub_trie_targets.retain_root, + child_stack_empty = self.child_stack.is_empty(), + "Maybe retaining root", + ); match (sub_trie_targets.retain_root, self.child_stack.pop()) { (false, _) => { // Whether the root node is exists or not, we don't want it. @@ -1357,6 +1396,11 @@ impl<'a> TargetsCursor<'a> { self.current() } + // Iterate forwards over the slice, starting from the [`Target`] after the current. + fn skip_iter(&self) -> impl Iterator { + self.targets[self.i + 1..].iter() + } + /// Iterated backwards over the slice, starting from the [`Target`] previous to the current. fn rev_iter(&self) -> impl Iterator { self.targets[..self.i].iter().rev() @@ -1401,6 +1445,15 @@ impl TrieCursorState { } } + /// Returns true if the cursor is unseeked, or is seeked to a node prior to the given one. + fn before(&self, path: &Nibbles) -> bool { + match self { + Self::Unseeked => true, + Self::Available(seeked_to, _) | Self::Taken(seeked_to) => path < seeked_to, + Self::Exhausted => false, + } + } + /// Takes the path and node from a [`Self::Available`]. Panics if not [`Self::Available`]. fn take(&mut self) -> (Nibbles, BranchNodeCompact) { let Self::Available(path, _) = self else { @@ -1463,7 +1516,6 @@ mod tests { }; use alloy_primitives::map::{B256Map, B256Set}; use alloy_rlp::Decodable; - use assert_matches::assert_matches; use itertools::Itertools; use reth_primitives_traits::Account; use reth_trie_common::{ @@ -1536,8 +1588,9 @@ mod tests { ) -> Result<(), StateProofError> { let targets_vec = targets.into_iter().collect::>(); - // Convert B256 targets to MultiProofTargets for legacy implementation + // Convert Target keys to MultiProofTargets for legacy implementation // For account-only proofs, each account maps to an empty storage set + // Legacy implementation only uses the keys, not the prefix let legacy_targets = targets_vec .iter() .map(|target| (B256::from_slice(&target.key.pack()), B256Set::default())) @@ -1573,10 +1626,22 @@ mod tests { .with_branch_node_masks(true) .multiproof(legacy_targets)?; - // Decode and sort legacy proof nodes - let mut proof_legacy_nodes = proof_legacy_result + // Helper function to check if a node path matches at least one target + let node_matches_target = |node_path: &Nibbles| -> bool { + targets_vec.iter().any(|target| { + // Node path must be a prefix of the target's key + target.key.starts_with(node_path) && + // Node path must start with the target's prefix (minimum length requirement) + node_path.starts_with(&target.prefix) + }) + }; + + // Decode and sort legacy proof nodes, filtering to only those that match at least one + // target + let proof_legacy_nodes = proof_legacy_result .account_subtree .iter() + .filter(|(path, _)| node_matches_target(path)) .map(|(path, node_enc)| { let mut buf = node_enc.as_ref(); let node = TrieNode::decode(&mut buf) @@ -1604,17 +1669,6 @@ mod tests { .sorted_by(|a, b| depth_first::cmp(&a.path, &b.path)) .collect::>(); - // When no targets are given the legacy implementation will still produce the root node - // in the proof. This differs from the V2 implementation, which produces nothing when - // given no targets. - if targets_vec.is_empty() { - assert_matches!( - proof_legacy_nodes.pop(), - Some(ProofTrieNode { path, .. }) if path.is_empty() - ); - assert!(proof_legacy_nodes.is_empty()); - } - // Basic comparison: both should succeed and produce identical results pretty_assertions::assert_eq!(proof_legacy_nodes, proof_v2_result); @@ -1654,8 +1708,8 @@ mod tests { } /// Generate a strategy for proof targets that are 80% from the `HashedPostState` accounts - /// and 20% random keys. - fn proof_targets_strategy(account_keys: Vec) -> impl Strategy> { + /// and 20% random keys. Each target has a random min_len of 0..16. + fn proof_targets_strategy(account_keys: Vec) -> impl Strategy> { let num_accounts = account_keys.len(); // Generate between 0 and (num_accounts + 5) targets @@ -1664,15 +1718,19 @@ mod tests { target_count.prop_flat_map(move |count| { let account_keys = account_keys.clone(); prop::collection::vec( - prop::bool::weighted(0.8).prop_flat_map(move |from_accounts| { - if from_accounts && !account_keys.is_empty() { - // 80% chance: pick from existing account keys - prop::sample::select(account_keys.clone()).boxed() - } else { - // 20% chance: generate random B256 - any::<[u8; 32]>().prop_map(B256::from).boxed() - } - }), + ( + prop::bool::weighted(0.8).prop_flat_map(move |from_accounts| { + if from_accounts && !account_keys.is_empty() { + // 80% chance: pick from existing account keys + prop::sample::select(account_keys.clone()).boxed() + } else { + // 20% chance: generate random B256 + any::<[u8; 32]>().prop_map(B256::from).boxed() + } + }), + 0u8..16u8, // Random min_len from 0 to 15 + ) + .prop_map(|(key, min_len)| Target::new(key).with_min_len(min_len)), count, ) }) @@ -1704,7 +1762,7 @@ mod tests { let harness = ProofTestHarness::new(post_state); // Pass generated targets to both implementations - harness.assert_proof(targets.into_iter().map(Into::into)).expect("Proof generation failed"); + harness.assert_proof(targets).expect("Proof generation failed"); } } } @@ -1743,8 +1801,10 @@ mod tests { // Create test harness let harness = ProofTestHarness::new(post_state); - // Assert the proof - harness.assert_proof(targets.into_iter().map(Into::into)).expect("Proof generation failed"); + // Assert the proof (convert B256 to Target with no min_len for this test) + harness + .assert_proof(targets.into_iter().map(Target::new)) + .expect("Proof generation failed"); } #[test] diff --git a/crates/trie/trie/src/proof_v2/target.rs b/crates/trie/trie/src/proof_v2/target.rs index e97e6470a69..5db021fda19 100644 --- a/crates/trie/trie/src/proof_v2/target.rs +++ b/crates/trie/trie/src/proof_v2/target.rs @@ -1,8 +1,7 @@ +use crate::proof_v2::increment_and_strip_trailing_zeros; use alloy_primitives::B256; use reth_trie_common::Nibbles; -use crate::proof_v2::increment_and_strip_trailing_zeros; - /// Target describes a proof target. For every proof target given, the /// [`crate::proof_v2::ProofCalculator`] will calculate and return all nodes whose path is a prefix /// of the target's `key`. @@ -41,6 +40,34 @@ impl From for Target { } } +// A helper function for getting the largest prefix of the sub-trie which contains a particular +// target, based on its prefix. +// +// In general the target will only match within the sub-trie with the same prefix as the +// target's. However there is an exception: +// +// Given a trie with a node at 0xabc, there must be a branch at 0xab. A target with prefix 0xabc +// needs to match that node, but in order to know the node is at that path the branch at 0xab +// must be constructed. Therefore the sub-trie prefix is the target prefix with a nibble +// truncated. +// +// For a target with an empty prefix we still use an empty sub-trie prefix; this will still +// construct the branch at the root node (if there is one), the only behavioral difference +// between targets with prefix lengths zero and one will be that with prefix length zero the +// root node's proof will be retained. +#[inline] +fn sub_trie_prefix(target_prefix: Nibbles) -> Nibbles { + let mut sub_trie_prefix = target_prefix; + sub_trie_prefix.truncate(target_prefix.len().saturating_sub(1)); + sub_trie_prefix +} + +// A helper function which returns the first path following a sub-trie in lexicographical order. +#[inline] +fn sub_trie_upper_bound(sub_trie_prefix: &Nibbles) -> Option { + increment_and_strip_trailing_zeros(sub_trie_prefix) +} + /// Describes a set of targets which all apply to a single sub-trie, ie a section of the overall /// trie whose nodes all share a prefix. pub(crate) struct SubTrieTargets<'a> { @@ -56,39 +83,27 @@ pub(crate) struct SubTrieTargets<'a> { pub(crate) retain_root: bool, } +impl<'a> SubTrieTargets<'a> { + // A helper function which returns the first path following a sub-trie in lexicographical order. + #[inline] + pub(crate) fn upper_bound(&self) -> Option { + sub_trie_upper_bound(&self.prefix) + } +} + /// Given a set of [`Target`]s, returns an iterator over those same [`Target`]s chunked by the /// sub-tries they apply to within the overall trie. pub(crate) fn iter_sub_trie_targets<'a>( targets: &'a mut [Target], ) -> impl Iterator> { - // First sort lexicographically by prefix. We will use this for chunking targets into - // contiguous sections in the next steps based on their bounds. - targets.sort_unstable_by_key(|target| target.prefix); - - // A helper function for getting the largest prefix of the sub-trie which contains a particular - // target, based on its prefix. - // - // In general the target will only match within the sub-trie with the same prefix as the - // target's. However there is an exception: - // - // Given a trie with a node at 0xabc, there must be a branch at 0xab. A target with prefix 0xabc - // needs to match that node, but in order to know the node is at that path the branch at 0xab - // must be constructed. Therefore the sub-trie prefix is the target prefix with a nibble - // truncated. - // - // For a target with an empty prefix we still use an empty sub-trie prefix; this will still - // construct the branch at the root node (if there is one), the only behavioral difference - // between targets with prefix lengths zero and one will be that with prefix length zero the - // root node's proof will be retained. - let sub_trie_prefix = |prefix: Nibbles| { - let mut lower_bound = prefix; - lower_bound.truncate(prefix.len().saturating_sub(1)); - lower_bound - }; - - // A helper function which returns the first path following a sub-trie in lexicographical order. - let sub_trie_upper_bound = - |sub_trie_prefix: &Nibbles| increment_and_strip_trailing_zeros(sub_trie_prefix); + // First sort by the sub-trie prefix of each target, falling back to the actual prefix in cases + // where the sub-trie prefixes are equal (to differentiate an empty target prefix from an empty + // sub-trie prefix). + targets.sort_unstable_by(|a, b| { + let sub_trie_prefix_a = sub_trie_prefix(a.prefix); + let sub_trie_prefix_b = sub_trie_prefix(b.prefix); + sub_trie_prefix_a.cmp(&sub_trie_prefix_b).then_with(|| a.prefix.cmp(&b.prefix)) + }); // We now chunk targets, such that each chunk contains all targets belonging to the same // sub-trie. We are taking advantage of the following properties: @@ -258,16 +273,31 @@ mod tests { ], )], ), + // Case 9: Second target's sub-trie prefix is root + ( + vec![ + Target::new(B256::repeat_byte(0x20)).with_min_len(2), + Target::new(B256::repeat_byte(0x40)).with_min_len(1), + ], + vec![( + "", + vec![ + "2020202020202020202020202020202020202020202020202020202020202020", + "4040404040404040404040404040404040404040404040404040404040404040", + ], + )], + ), ]; for (i, (mut input_targets, expected)) in test_cases.into_iter().enumerate() { + let test_case = i + 1; let sub_tries: Vec<_> = iter_sub_trie_targets(&mut input_targets).collect(); assert_eq!( sub_tries.len(), expected.len(), "Test case {} failed: expected {} sub-tries, got {}", - i, + test_case, expected.len(), sub_tries.len() ); @@ -280,13 +310,13 @@ mod tests { assert_eq!( sub_trie.prefix, exp_prefix, "Test case {} sub-trie {}: prefix mismatch", - i, j + test_case, j ); assert_eq!( sub_trie.targets.len(), exp_keys.len(), "Test case {} sub-trie {}: expected {} targets, got {}", - i, + test_case, j, exp_keys.len(), sub_trie.targets.len() @@ -299,7 +329,7 @@ mod tests { assert_eq!( target.key, exp_key, "Test case {} sub-trie {} target {}: key mismatch", - i, j, k + test_case, j, k ); } } From 57ac62763aaeb184ad115a1c35051130c519660a Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 12 Dec 2025 17:32:12 +0100 Subject: [PATCH 53/59] Cleanup Target definition --- crates/trie/trie/src/proof_v2/mod.rs | 35 +++++------ crates/trie/trie/src/proof_v2/target.rs | 80 ++++++++++++------------- 2 files changed, 55 insertions(+), 60 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index b2b2768eca4..1b0c4a85b54 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -202,35 +202,36 @@ where // If the node in question is a prefix of the target then we do not iterate targets // further. // - // Even if the node is a prefix of the target's key, if the target has a `prefix` field - // it indicates that the node should only be retained if it has that prefix (ie if it - // has a minimum length). + // Even if the node is a prefix of the target's key, if the target has a non-zero + // `min_len` it indicates that the node should only be retained if it is + // longer than that value. // - // _However_ even if the node doesn't match the target due to the target's prefix, it + // _However_ even if the node doesn't match the target due to the target's `min_len`, it // may match other targets whose keys match this node. So we search forwards and - // backwards for all targets which might match this node, and check the prefix on each. + // backwards for all targets which might match this node, and check against the + // `min_len` of each. // // For example, given a branch 0xabc, with children at 0, 1, and 2, and targets: - // - key: 0xabc0, prefix: 0xab - // - key: 0xabc1, prefix: 0xa - // - key: 0xabc2, prefix: 0xabc2 <-- current - // - key: 0xabc3, prefix: 0xabc + // - key: 0xabc0, min_len: 2 + // - key: 0xabc1, min_len: 1 + // - key: 0xabc2, min_len: 4 <-- current + // - key: 0xabc3, min_len: 3 // - // When the branch node at 0xabc is visited it will be after targets has iterated + // When the branch node at 0xabc is visited it will be after the targets has iterated // forward to 0xabc2 (because all children will have been visited already). At this - // point the target for 0xabc2 will not match the branch due to its prefix, any of the - // other targets would, so we need to check those as well. + // point the target for 0xabc2 will not match the branch due to its prefix, but any of + // the other targets would, so we need to check those as well. if lower.key.starts_with(path) { return !check_prefix || - (path.starts_with(&lower.prefix) || + (path.len() >= lower.min_len as usize || targets .skip_iter() .take_while(|target| target.key.starts_with(path)) - .any(|target| path.starts_with(&target.prefix)) || + .any(|target| path.len() >= target.min_len as usize) || targets .rev_iter() .take_while(|target| target.key.starts_with(path)) - .any(|target| path.starts_with(&target.prefix))) + .any(|target| path.len() >= target.min_len as usize)) } // If the path isn't in the current range then iterate forward until it is (or until @@ -1631,8 +1632,8 @@ mod tests { targets_vec.iter().any(|target| { // Node path must be a prefix of the target's key target.key.starts_with(node_path) && - // Node path must start with the target's prefix (minimum length requirement) - node_path.starts_with(&target.prefix) + // Node path must be at least `min_len` long + node_path.len() >= target.min_len as usize }) }; diff --git a/crates/trie/trie/src/proof_v2/target.rs b/crates/trie/trie/src/proof_v2/target.rs index 5db021fda19..b60e50078c7 100644 --- a/crates/trie/trie/src/proof_v2/target.rs +++ b/crates/trie/trie/src/proof_v2/target.rs @@ -8,9 +8,7 @@ use reth_trie_common::Nibbles; #[derive(Debug, Copy, Clone)] pub struct Target { pub(crate) key: Nibbles, - /// The shortest trie node path which can be retained by this target. `key.starts_with(prefix)` - /// is always true. - pub(crate) prefix: Nibbles, + pub(crate) min_len: u8, } impl Target { @@ -18,7 +16,7 @@ impl Target { pub fn new(key: B256) -> Self { // SAFETY: key is a B256 and so is exactly 32-bytes. let key = unsafe { Nibbles::unpack_unchecked(key.as_slice()) }; - Self { key, prefix: Nibbles::new() } + Self { key, min_len: 0 } } /// Only match trie nodes whose path is at least this long. @@ -28,10 +26,31 @@ impl Target { /// This method panics if `min_len` is greater than 64. pub fn with_min_len(mut self, min_len: u8) -> Self { debug_assert!(min_len <= 64); - self.prefix = self.key; - self.prefix.truncate(min_len as usize); + self.min_len = min_len; self } + + // A helper function for getting the largest prefix of the sub-trie which contains a particular + // target, based on its prefix. + // + // In general the target will only match within the sub-trie with the same prefix as the + // target's. However there is an exception: + // + // Given a trie with a node at 0xabc, there must be a branch at 0xab. A target with prefix 0xabc + // needs to match that node, but in order to know the node is at that path the branch at 0xab + // must be constructed. Therefore the sub-trie prefix is the target prefix with a nibble + // truncated. + // + // For a target with an empty prefix we still use an empty sub-trie prefix; this will still + // construct the branch at the root node (if there is one), the only behavioral difference + // between targets with prefix lengths zero and one will be that with prefix length zero the + // root node's proof will be retained. + #[inline] + fn sub_trie_prefix(&self) -> Nibbles { + let mut sub_trie_prefix = self.key; + sub_trie_prefix.truncate(self.min_len.saturating_sub(1) as usize); + sub_trie_prefix + } } impl From for Target { @@ -40,28 +59,6 @@ impl From for Target { } } -// A helper function for getting the largest prefix of the sub-trie which contains a particular -// target, based on its prefix. -// -// In general the target will only match within the sub-trie with the same prefix as the -// target's. However there is an exception: -// -// Given a trie with a node at 0xabc, there must be a branch at 0xab. A target with prefix 0xabc -// needs to match that node, but in order to know the node is at that path the branch at 0xab -// must be constructed. Therefore the sub-trie prefix is the target prefix with a nibble -// truncated. -// -// For a target with an empty prefix we still use an empty sub-trie prefix; this will still -// construct the branch at the root node (if there is one), the only behavioral difference -// between targets with prefix lengths zero and one will be that with prefix length zero the -// root node's proof will be retained. -#[inline] -fn sub_trie_prefix(target_prefix: Nibbles) -> Nibbles { - let mut sub_trie_prefix = target_prefix; - sub_trie_prefix.truncate(target_prefix.len().saturating_sub(1)); - sub_trie_prefix -} - // A helper function which returns the first path following a sub-trie in lexicographical order. #[inline] fn sub_trie_upper_bound(sub_trie_prefix: &Nibbles) -> Option { @@ -77,7 +74,7 @@ pub(crate) struct SubTrieTargets<'a> { /// The targets belonging to this sub-trie. These will be sorted by their `key` field, /// lexicographically. pub(crate) targets: &'a [Target], - /// Will be true if at least one target in the set has an empty `prefix`. + /// Will be true if at least one target in the set has a zero `min_len`. /// /// If this is true then `prefix.is_empty()`, though not necessarily vice-versa. pub(crate) retain_root: bool, @@ -96,22 +93,20 @@ impl<'a> SubTrieTargets<'a> { pub(crate) fn iter_sub_trie_targets<'a>( targets: &'a mut [Target], ) -> impl Iterator> { - // First sort by the sub-trie prefix of each target, falling back to the actual prefix in cases - // where the sub-trie prefixes are equal (to differentiate an empty target prefix from an empty - // sub-trie prefix). + // First sort by the sub-trie prefix of each target, falling back to the `min_len` in cases + // where the sub-trie prefixes are equal (to differentiate targets which match the root node and + // those which don't). targets.sort_unstable_by(|a, b| { - let sub_trie_prefix_a = sub_trie_prefix(a.prefix); - let sub_trie_prefix_b = sub_trie_prefix(b.prefix); - sub_trie_prefix_a.cmp(&sub_trie_prefix_b).then_with(|| a.prefix.cmp(&b.prefix)) + a.sub_trie_prefix().cmp(&b.sub_trie_prefix()).then_with(|| a.min_len.cmp(&b.min_len)) }); // We now chunk targets, such that each chunk contains all targets belonging to the same // sub-trie. We are taking advantage of the following properties: // - // - The first target in the chunk has the shortest prefix (see previous sorting step). + // - The first target in the chunk has the shortest sub-trie prefix (see previous sorting step). // - // - The first target in the chunk's upper bound will therefore belong to the sub-trie with the - // highest upper bound, and the upper bound of the whole chunk. + // - The upper bound of the first target in the chunk's sub-trie will therefore be the upper + // bound of the whole chunk. // - For example, given a chunk with sub-trie prefixes [0x2, 0x2f, 0x2fa], the upper bounds // will be [0x3, 0x3, 0x2fb]. Note that no target could match a trie node with path equal // to or greater than 0x3. @@ -123,11 +118,10 @@ pub(crate) fn iter_sub_trie_targets<'a>( // - [0x2, 0x2a] w/ upper bound 0x3 // - [0x4c 0x4ce] w/ upper bound 0x4d // - [0x4e] w/ upper bound 0x4f - let mut upper_bound = - targets.first().and_then(|t| sub_trie_upper_bound(&sub_trie_prefix(t.prefix))); + let mut upper_bound = targets.first().and_then(|t| sub_trie_upper_bound(&t.sub_trie_prefix())); let target_chunks = targets.chunk_by_mut(move |_, next| { if let Some(some_upper_bound) = upper_bound { - let sub_trie_prefix = sub_trie_prefix(next.prefix); + let sub_trie_prefix = next.sub_trie_prefix(); let same_chunk = sub_trie_prefix < some_upper_bound; if !same_chunk { upper_bound = sub_trie_upper_bound(&sub_trie_prefix); @@ -141,8 +135,8 @@ pub(crate) fn iter_sub_trie_targets<'a>( // Map the chunks to the return type. Within each chunk we want targets to be sorted by their // key, as that will be the order they are checked by the `ProofCalculator`. target_chunks.map(move |targets| { - let prefix = sub_trie_prefix(targets[0].prefix); - let retain_root = targets[0].prefix.is_empty(); + let prefix = targets[0].sub_trie_prefix(); + let retain_root = targets[0].min_len == 0; targets.sort_unstable_by_key(|target| target.key); SubTrieTargets { prefix, targets, retain_root } }) From 65497a09f0b43634f1bb0201d2da821bba8c8a24 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 12 Dec 2025 17:37:58 +0100 Subject: [PATCH 54/59] doc fix --- crates/trie/trie/src/proof_v2/target.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/trie/trie/src/proof_v2/target.rs b/crates/trie/trie/src/proof_v2/target.rs index b60e50078c7..fb58e307b36 100644 --- a/crates/trie/trie/src/proof_v2/target.rs +++ b/crates/trie/trie/src/proof_v2/target.rs @@ -115,7 +115,7 @@ pub(crate) fn iter_sub_trie_targets<'a>( // that target must be the first target of the next chunk, lying in a separate sub-trie. // - Example: given sub-trie prefixes of [0x2, 0x2fa, 0x4c, 0x4ce, 0x4e], we would end up // with the following chunks: - // - [0x2, 0x2a] w/ upper bound 0x3 + // - [0x2, 0x2fa] w/ upper bound 0x3 // - [0x4c 0x4ce] w/ upper bound 0x4d // - [0x4e] w/ upper bound 0x4f let mut upper_bound = targets.first().and_then(|t| sub_trie_upper_bound(&t.sub_trie_prefix())); From 1e0f72da40ffeffc6b06e6df902bc28baf695edb Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 12 Dec 2025 17:45:21 +0100 Subject: [PATCH 55/59] Clippy --- crates/trie/trie/benches/proof_v2.rs | 6 +++--- crates/trie/trie/src/proof_v2/mod.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/trie/trie/benches/proof_v2.rs b/crates/trie/trie/benches/proof_v2.rs index e5123ddc9a1..87dd20b9404 100644 --- a/crates/trie/trie/benches/proof_v2.rs +++ b/crates/trie/trie/benches/proof_v2.rs @@ -161,10 +161,10 @@ fn bench_proof_algos(c: &mut Criterion) { StorageProofCalculator::new_storage(trie_cursor, hashed_cursor); b.iter_batched( - || targets.clone(), - |targets| { + || targets.iter().copied().map(Into::into).collect::>(), + |mut targets| { proof_calculator - .storage_proof(hashed_address, targets) + .storage_proof(hashed_address, &mut targets) .expect("Proof generation failed"); }, BatchSize::SmallInput, diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 1b0c4a85b54..041c221fd48 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -1709,7 +1709,7 @@ mod tests { } /// Generate a strategy for proof targets that are 80% from the `HashedPostState` accounts - /// and 20% random keys. Each target has a random min_len of 0..16. + /// and 20% random keys. Each target has a random `min_len` of 0..16. fn proof_targets_strategy(account_keys: Vec) -> impl Strategy> { let num_accounts = account_keys.len(); From 02b0b18af02d8ebfe91f5543e792d447a2932963 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 12 Dec 2025 17:57:15 +0100 Subject: [PATCH 56/59] fix arg naming --- crates/trie/trie/src/proof_v2/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 041c221fd48..72cfd7eaf59 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -184,7 +184,7 @@ where &self, targets: &mut TargetsCursor<'a>, path: &Nibbles, - check_prefix: bool, + check_min_len: bool, ) -> bool { let (mut lower, mut upper) = targets.current(); @@ -222,7 +222,7 @@ where // point the target for 0xabc2 will not match the branch due to its prefix, but any of // the other targets would, so we need to check those as well. if lower.key.starts_with(path) { - return !check_prefix || + return !check_min_len || (path.len() >= lower.min_len as usize || targets .skip_iter() From c444ecee997405ba71e6350ef9fba78726022f77 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Fri, 12 Dec 2025 18:05:27 +0100 Subject: [PATCH 57/59] typo --- crates/trie/trie/src/proof_v2/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 72cfd7eaf59..2b5d690566a 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -1110,7 +1110,7 @@ where } } - /// Calculates trie nodes and retains proofs for targetted nodes within a sub-trie. The + /// Calculates trie nodes and retains proofs for targeted nodes within a sub-trie. The /// sub-trie's bounds are denoted by the `lower_bound` and `upper_bound` arguments, /// `upper_bound` is exclusive, None indicates unbounded. #[instrument( From 6ce13f98e16891c71bc817d77b37639c983ebdd7 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Mon, 15 Dec 2025 17:04:57 +0100 Subject: [PATCH 58/59] Fix initialization of uncalculated_lower_bound --- crates/trie/trie/src/proof_v2/mod.rs | 417 ++++++++++++++++++++++++++- 1 file changed, 401 insertions(+), 16 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/mod.rs b/crates/trie/trie/src/proof_v2/mod.rs index 2b5d690566a..a9cf0b2d510 100644 --- a/crates/trie/trie/src/proof_v2/mod.rs +++ b/crates/trie/trie/src/proof_v2/mod.rs @@ -897,7 +897,7 @@ where trie_cursor_state: &mut TrieCursorState, sub_trie_prefix: &Nibbles, sub_trie_upper_bound: Option<&Nibbles>, - uncalculated_lower_bound: Nibbles, + mut uncalculated_lower_bound: Option, ) -> Result)>, StateProofError> { // Pop any under-construction branches that are now complete. // All trie data prior to the current cached branch, if any, has been computed. Any branches @@ -910,11 +910,6 @@ where } } - // `uncalculated_lower_bound` tracks the lower bound of node paths which have yet to be - // visited, either via the hashed key cursor (`calculate_key_range`) or trie cursor (this - // method). If this is None then there are no further nodes which could exist. - let mut uncalculated_lower_bound = Some(uncalculated_lower_bound); - loop { // Pop the currently cached branch node. // @@ -1147,24 +1142,21 @@ where TrieCursorState::seeked(self.trie_cursor.seek(sub_trie_targets.prefix)?); } + // `uncalculated_lower_bound` tracks the lower bound of node paths which have yet to be + // visited, either via the hashed key cursor (`calculate_key_range`) or trie cursor + // (`next_uncached_key_range`). If/when this becomes None then there are no further nodes + // which could exist. + let mut uncalculated_lower_bound = Some(sub_trie_targets.prefix); + trace!(target: TRACE_TARGET, "Starting loop"); loop { - // Calculate the uncalculated lower bound that `next_uncached_key_range` should use. - // - // The lower bound is the higher of the sub-trie's prefix (ie its first possible node) - // and the hashed cursor's current position (which is the next key which a trie node has - // not been computed). - let uncached_lower_bound = hashed_cursor_current - .as_ref() - .map_or(sub_trie_targets.prefix, |kv| std::cmp::max(kv.0, sub_trie_targets.prefix)); - // Determine the range of keys of the overall trie which need to be re-computed. let Some((calc_lower_bound, calc_upper_bound)) = self.next_uncached_key_range( &mut targets, trie_cursor_state, &sub_trie_targets.prefix, sub_trie_upper_bound.as_ref(), - uncached_lower_bound, + uncalculated_lower_bound, )? else { // If `next_uncached_key_range` determines that there can be no more keys then @@ -1193,6 +1185,10 @@ where { break; } + + // The upper bound of previous calculation becomes the lower bound of the uncalculated + // range, for which we'll once again check for cached data. + uncalculated_lower_bound = calc_upper_bound; } // Once there's no more leaves we can pop the remaining branches, if any. @@ -1831,4 +1827,393 @@ mod tests { assert_eq!(result, expected, "Failed for input: {:?}", input); } } + + #[test] + fn test_failing_proptest_case_0() { + use alloy_primitives::{hex, map::B256Map}; + + reth_tracing::init_test_tracing(); + + // Helper function to create B256 from hex string + let b256 = |s: &str| B256::from_slice(&hex::decode(s).unwrap()); + + // Create the HashedPostState from test case input + let mut accounts = B256Map::default(); + + // Define all account data from test case input + let account_data = [ + ( + "9f3a475db85ff1f5b5e82d8614ee4afc670d27aefb9a43da0bd863a54acf1fe6", + 8396790837504194281u64, + 9224366602005816983u64, + "103c5b0538f4e37944321a30f5cb1f7005d2ee70998106f34f36d7adb838c789", + ), + ( + "c736258fdfd23d73ec4c5e54b8c3b58e26726b361d438ef48670f028286b70ca", + 9193115115482903760u64, + 4515164289866465875u64, + "9f24ef3ab0b4893b0ec38d0e9b00f239da072ccf093b0b24f1ea1f99547abe55", + ), + ( + "780a3476520090f97e847181aee17515c5ea30b7607775103df16d2b6611a87a", + 8404772182417755681u64, + 16639574952778823617u64, + "214b12bee666ce8c64c6bbbcfafa0c3e55b4b05a8724ec4182b9a6caa774c56d", + ), + ( + "23ebfa849308a5d02c3048040217cd1f4b71fb01a9b54dafe541284ebec2bcce", + 17978809803974566048u64, + 11093542035392742776u64, + "5384dfda8f1935d98e463c00a96960ff24e4d4893ec21e5ece0d272df33ac7e9", + ), + ( + "348e476c24fac841b11d358431b4526db09edc9f39906e0ac8809886a04f3c5a", + 9422945522568453583u64, + 9737072818780682487u64, + "79f8f25b2cbb7485c5c7b627917c0f562f012d3d7ddd486212c90fbea0cf686e", + ), + ( + "830536ee6c8f780a1cd760457345b79fc09476018a59cf3e8fd427a793d99633", + 16497625187081138489u64, + 15143978245385012455u64, + "00ede4000cc2a16fca7e930761aaf30d1fddcc3803f0009d6a0742b4ee519342", + ), + ( + "806c74b024b2fe81f077ea93d2936c489689f7fe024febc3a0fb71a8a9f22fbc", + 8103477314050566918u64, + 1383893458340561723u64, + "690ed176136174c4f0cc442e6dcbcf6e7b577e30fc052430b6060f97af1f8e85", + ), + ( + "b903d962ffc520877f14e1e8328160e5b22f8086b0f7e9cba7a373a8376028a0", + 12972727566246296372u64, + 1130659127924527352u64, + "cadf1f09d8e6a0d945a58ccd2ff36e2ae99f8146f02be96873e84bef0462d64a", + ), + ( + "d36a16afff0097e06b2c28bd795b889265e2ceff9a086173113fbeb6f7a9bc42", + 15682404502571860137u64, + 2025886798818635036u64, + "c2cee70663e9ff1b521e2e1602e88723da52ccdc7a69e370cde9595af435e654", + ), + ( + "f3e8461cba0b84f5b81f8ca63d0456cb567e701ec1d6e77b1a03624c5018389b", + 5663749586038550112u64, + 7681243595728002238u64, + "072c547c3ab9744bcd2ed9dbd813bd62866a673f4ca5d46939b65e9507be0e70", + ), + ( + "40b71840b6f43a493b32f4aa755e02d572012392fd582c81a513a169447e194c", + 518207789203399614u64, + 317311275468085815u64, + "85541d48471bf639c2574600a9b637338c49729ba9e741f157cc6ebaae139da0", + ), + ( + "3f77cd91ceb7d335dd2527c29e79aaf94f14141438740051eb0163d86c35bcc9", + 16227517944662106096u64, + 12646193931088343779u64, + "54999911d82dd63d526429275115fa98f6a560bc2d8e00be24962e91e38d7182", + ), + ( + "5cd903814ba84daa6956572411cd1bf4d48a8e230003d28cc3f942697bf8debb", + 5096288383163945009u64, + 17919982845103509853u64, + "6a53c812e713f1bfe6bf21954f291140c60ec3f2ef353ecdae5dc7b263a37282", + ), + ( + "23f3602c95fd98d7fbe48a326ae1549030a2c7574099432cce5b458182f16bf2", + 11136020130962086191u64, + 12045219101880183180u64, + "ce53fb9b108a3ee90db8469e44948ba3263ca8d8a0d92a076c9516f9a3d30bd1", + ), + ( + "be86489b3594a9da83e04a9ff81c8d68d528b8b9d31f3942d1c5856a4a8c5af7", + 16293506537092575994u64, + 536238712429663046u64, + "a2af0607ade21241386ecfb3780aa90514f43595941daeff8dd599c203cde30a", + ), + ( + "97bcd85ee5d6033bdf86397e8b26f711912948a7298114be27ca5499ea99725f", + 3086656672041156193u64, + 8667446575959669532u64, + "0474377538684a991ffc9b41f970b48e65eda9e07c292e60861258ef87d45272", + ), + ( + "40065932e6c70eb907e4f2a89ec772f5382ca90a49ef44c4ae21155b9decdcc0", + 17152529399128063686u64, + 3643450822628960860u64, + "d5f6198c64c797f455f5b44062bb136734f508f9cdd02d8d69d24100ac8d6252", + ), + ( + "c136436c2db6b2ebd14985e2c883e73c6d8fd95ace54bfefae9eeca47b7da800", + 727585093455815585u64, + 521742371554431881u64, + "3dfad04a6eb46d175b63e96943c7d636c56d61063277e25557aace95820432da", + ), + ( + "9ea50348595593788645394eb041ac4f75ee4d6a4840b9cf1ed304e895060791", + 8654829249939415079u64, + 15623358443672184321u64, + "61bb0d6ffcd5b32d0ee34a3b7dfb1c495888059be02b255dd1fa3be02fa1ddbd", + ), + ( + "5abc714353ad6abda44a609f9b61f310f5b0a7df55ccf553dc2db3edda18ca17", + 5732104102609402825u64, + 15720007305337585794u64, + "8b55b7e9c6f54057322c5e0610b33b3137f1fcd46f7d4af1aca797c7b5fff033", + ), + ( + "e270b59e6e56100f9e2813f263884ba5f74190a1770dd88cd9603266174e0a6b", + 4728642361690813205u64, + 6762867306120182099u64, + "5e9aa1ff854504b4bfea4a7f0175866eba04e88e14e57ac08dddc63d6917bf47", + ), + ( + "78286294c6fb6823bb8b2b2ddb7a1e71ee64e05c9ba33b0eb8bb6654c64a8259", + 6032052879332640150u64, + 498315069638377858u64, + "799ef578ffb51a5ec42484e788d6ada4f13f0ff73e1b7b3e6d14d58caae9319a", + ), + ( + "af1b85cf284b0cb59a4bfb0f699194bcd6ad4538f27057d9d93dc7a95c1ff32e", + 1647153930670480138u64, + 13109595411418593026u64, + "429dcdf4748c0047b0dd94f3ad12b5e62bbadf8302525cc5d2aad9c9c746696f", + ), + ( + "0152b7a0626771a2518de84c01e52839e7821a655f9dcb9a174d8f52b64b7086", + 3915492299782594412u64, + 9550071871839879785u64, + "4d5e6ce993dfc9597585ae2b4bacd6d055fefc56ae825666c83e0770e4aa0527", + ), + ( + "9ea9b8a4f6bce1dba63290b81f4d1b88dfeac3e244856904a5c9d4086a10271b", + 8824593031424861220u64, + 15831101445348312026u64, + "a07602b4dd5cba679562061b7c5c0344b2edd6eba36aa97ca57a6fe01ed80a48", + ), + ( + "d7b26c2d8f85b74423a57a3da56c61829340f65967791bab849c90b5e1547e7a", + 12723258987146468813u64, + 10714399360315276559u64, + "3705e57b27d931188c0d2017ab62577355b0cdda4173203478a8562a0cdcae0c", + ), + ( + "da354ceca117552482e628937931870a28e9d4416f47a58ee77176d0b760c75b", + 1580954430670112951u64, + 14920857341852745222u64, + "a13d6b0123daa2e662699ac55a2d0ed1d2e73a02ed00ee5a4dd34db8dea2a37e", + ), + ( + "53140d0c8b90b4c3c49e0604879d0dc036e914c4c4f799f1ccae357fef2613e3", + 12521658365236780592u64, + 11630410585145916252u64, + "46f06ce1435a7a0fd3476bbcffe4aac88c33a7fcf50080270b715d25c93d96d7", + ), + ( + "4b1c151815da6f18f27e98890eac1f7d43b80f3386c7c7d15ee0e43a7edfe0a6", + 9575643484508382933u64, + 3471795678079408573u64, + "a9e6a8fac46c5fc61ae07bddc223e9f105f567ad039d2312a03431d1f24d8b2c", + ), + ( + "39436357a2bcd906e58fb88238be2ddb2e43c8a5590332e3aee1d1134a0d0ba4", + 10171391804125392783u64, + 2915644784933705108u64, + "1d5db03f07137da9d3af85096ed51a4ff64bb476a79bf4294850438867fe3833", + ), + ( + "5fbe8d9d6a12b061a94a72436caec331ab1fd4e472c3bb4688215788c5e9bcd9", + 5663512925993713993u64, + 18170240962605758111u64, + "bd5d601cbcb47bd84d410bafec72f2270fceb1ed2ed11499a1e218a9f89a9f7f", + ), + ( + "f2e29a909dd31b38e9b92b2b2d214e822ebddb26183cd077d4009773854ab099", + 7512894577556564068u64, + 15905517369556068583u64, + "a36e66ce11eca7900248c518e12c6c08d659d609f4cbd98468292de7adf780f2", + ), + ( + "3eb82e6d6e964ca56b50cc54bdd55bb470c67a4932aba48d27d175d1be2542aa", + 12645567232869276853u64, + 8416544129280224452u64, + "d177f246a45cc76d39a8ee06b32d8c076c986106b9a8e0455a0b41d00fe3cbde", + ), + ( + "c903731014f6a5b4b45174ef5f9d5a2895a19d1308292f25aa323fda88acc938", + 5989992708726918818u64, + 17462460601463602125u64, + "01241c61ad1c8adc27e5a1096ab6c643af0fbb6e2818ef77272b70e5c3624abc", + ), + ( + "ef46410ab47113a78c27e100ed1b476f82a8789012bd95a047a4b23385596f53", + 11884362385049322305u64, + 619908411193297508u64, + "e9b4c929e26077ac1fd5a771ea5badc7e9ddb58a20a2a797389c63b3dd3df00d", + ), + ( + "be336bc6722bb787d542f4ef8ecb6f46a449557ca7b69b8668b6fed19dfa73b7", + 11490216175357680195u64, + 13136528075688203375u64, + "31bfd807f92e6d5dc5c534e9ad0cb29d00c6f0ae7d7b5f1e65f8e683de0bce59", + ), + ( + "39599e5828a8f102b8a6808103ae7df29b838fe739d8b73f72f8f0d282ca5a47", + 6957481657451522177u64, + 4196708540027060724u64, + "968a12d79704b313471ece148cb4e26b8b11620db2a9ee6da0f5dc200801f555", + ), + ( + "acd99530bb14ca9a7fac3df8eebfd8cdd234b0f6f7c3893a20bc159a4fd54df5", + 9792913946138032169u64, + 9219321015500590384u64, + "db45a98128770a329c82c904ceee21d3917f6072b8bd260e46218f65656c964c", + ), + ( + "453b80a0b11f237011c57630034ed46888ad96f4300a58aea24c0fe4a5472f68", + 14407140330317286994u64, + 5783848199433986576u64, + "b8cded0b4efd6bf2282a4f8b3c353f74821714f84df9a6ab25131edc7fdad00f", + ), + ( + "23e464d1e9b413a4a6b378cee3a0405ec6ccbb4d418372d1b42d3fde558d48d1", + 1190974500816796805u64, + 1621159728666344828u64, + "d677f41d273754da3ab8080b605ae07a7193c9f35f6318b809e42a1fdf594be3", + ), + ( + "d0e590648dec459aca50edf44251627bab5a36029a0c748b1ddf86b7b887425b", + 4807164391931567365u64, + 4256042233199858200u64, + "a8677de59ab856516a03663730af54c55a79169346c3d958b564e5ee35d8622b", + ), + ( + "72387dbaaaf2c39175d8c067558b869ba7bdc6234bc63ee97a53fea1d988ff39", + 5046042574093452325u64, + 3088471405044806123u64, + "83c226621506b07073936aec3c87a8e2ef34dd42e504adc2bbab39ede49aa77f", + ), + ( + "de6874ca2b9dd8b4347c25d32b882a2a7c127b127d6c5e00d073ab3853339d0e", + 6112730660331874479u64, + 10943246617310133253u64, + "a0c96a69e5ab3e3fe1a1a2fd0e5e68035ff3c7b2985e4e6b8407d4c377600c6f", + ), + ( + "b0d8689e08b983e578d6a0c136b76952497087ee144369af653a0a1b231eeb28", + 15612408165265483596u64, + 13112504741499957010u64, + "4fc49edeff215f1d54dfd2e60a14a3de2abecbe845db2148c7aee32c65f3c91c", + ), + ( + "29d7fb6b714cbdd1be95c4a268cef7f544329642ae05fab26dc251bbc773085e", + 17509162400681223655u64, + 5075629528173950353u64, + "781ecb560ef8cf0bcfa96b8d12075f4cf87ad52d69dfb2c72801206eded135bd", + ), + ( + "85dbf7074c93a4e39b67cc504b35351ee16c1fab437a7fb9e5d9320be1d9c13c", + 17692199403267011109u64, + 7069378948726478427u64, + "a3ff0d8dee5aa0214460f5b03a70bd76ef00ac8c07f07c0b3d82c9c57e4c72a9", + ), + ( + "7bd5a9f3126b4a681afac9a177c6ff7f3dd80d8d7fd5a821a705221c96975ded", + 17807965607151214145u64, + 5562549152802999850u64, + "dbc3861943b7372e49698b1c5b0e4255b7c93e9fa2c13d6a4405172ab0db9a5b", + ), + ( + "496d13d45dbe7eb02fee23c914ac9fefdf86cf5c937c520719fc6a31b3fcf8d9", + 13446203348342334214u64, + 332407928246785326u64, + "d2d73f15fcdc12adce25b911aa4551dcf900e225761e254eb6392cbd414e389c", + ), + ( + "b2f0a0127fc74a35dec5515b1c7eb8a3833ca99925049c47cd109ec94678e6c5", + 9683373807753869342u64, + 7570798132195583433u64, + "e704110433e5ab17858c5fbe4f1b6d692942d5f5981cac68372d06066bee97fe", + ), + ( + "d5f65171b17d7720411905ef138e84b9d1f459e2b248521c449f1781aafd675e", + 10088287051097617949u64, + 185695341767856973u64, + "8d784c4171e242af4187f30510cd298106b7e68cd3088444a055cb1f3893ba28", + ), + ( + "7dcbec5c20fbf1d69665d4b9cdc450fea2d0098e78084bce0a864fea4ba016b0", + 13908816056510478374u64, + 17793990636863600193u64, + "18e9026372d91e116faf813ce3ba9d7fadef2bb3b779be6efeba8a4ecd9e1f38", + ), + ( + "d4f772f4bf1cfa4dad4b55962b50900da8657a4961dabbdf0664f3cd42d368f8", + 16438076732493217366u64, + 18419670900047275588u64, + "b9fd16b16b3a8fab4d9c47f452d9ce4aad530edeb06ee6830589078db2f79382", + ), + ( + "2d009535f82b1813ce2ca7236ceae7864c1e4d3644a1acd02656919ef1aa55d0", + 10206924399607440433u64, + 3986996560633257271u64, + "db49e225bd427768599a7c06d7aee432121fa3179505f9ee8c717f51c7fa8c54", + ), + ( + "b1d7a292df12e505e7433c7e850e9efc81a8931b65f3354a66402894b6d5ba76", + 8215550459234533539u64, + 10241096845089693964u64, + "5567813b312cb811909a01d14ee8f7ec4d239198ea2d37243123e1de2317e1af", + ), + ( + "85120d6f43ea9258accf6a87e49cd5461d9b3735a4dc623f9fbcc669cbdd1ce6", + 17566770568845511328u64, + 8686605711223432099u64, + "e163f4fcd17acf5714ee48278732808601e861cd4c4c24326cd24431aab1d0ce", + ), + ( + "48fe4c22080c6e702f7af0e97fb5354c1c14ff4616c6fc4ac8a4491d4b9b3473", + 14371024664575587429u64, + 15149464181957728462u64, + "061dec7af4b41bdd056306a8b13b71d574a49a4595884b1a77674f5150d4509d", + ), + ( + "29d14b014fa3cabbb3b4808e751e81f571de6d0e727cae627318a5fd82fef517", + 9612395342616083334u64, + 3700617080099093094u64, + "f7b33a2d2784441f77f0cc1c87930e79bea3332a921269b500e81d823108561c", + ), + ]; + + // Insert all accounts + for (addr, nonce, balance, code_hash) in &account_data { + accounts.insert( + b256(addr), + Some(Account { + nonce: *nonce, + balance: U256::from(*balance), + bytecode_hash: Some(b256(code_hash)), + }), + ); + } + + let post_state = HashedPostState { accounts, storages: Default::default() }; + + // Create test harness + let harness = ProofTestHarness::new(post_state); + + // Create targets from test case input - these are Nibbles in hex form + let targets = vec![ + Target::new(b256("0153000000000000000000000000000000000000000000000000000000000000")) + .with_min_len(2), + Target::new(b256("0000000000000000000000000000000000000000000000000000000000000000")) + .with_min_len(2), + Target::new(b256("2300000000000000000000000000000000000000000000000000000000000000")) + .with_min_len(2), + ]; + + // Test proof generation + harness.assert_proof(targets).expect("Proof generation failed"); + } } From d7a5fdffe8b44ccf9b794559d07fbb0edfce7a5e Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 16 Dec 2025 09:52:29 +0100 Subject: [PATCH 59/59] Fix out of date comments --- crates/trie/trie/src/proof_v2/target.rs | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/crates/trie/trie/src/proof_v2/target.rs b/crates/trie/trie/src/proof_v2/target.rs index fb58e307b36..c96b202e85a 100644 --- a/crates/trie/trie/src/proof_v2/target.rs +++ b/crates/trie/trie/src/proof_v2/target.rs @@ -31,20 +31,23 @@ impl Target { } // A helper function for getting the largest prefix of the sub-trie which contains a particular - // target, based on its prefix. + // target, based on its `min_len`. // - // In general the target will only match within the sub-trie with the same prefix as the + // A target will only match nodes which share the target's prefix, where the target's prefix is + // the first `min_len` nibbles of its key. E.g. a target with `key` 0xabcd and `min_len` 2 will + // only match nodes with prefix 0xab. + // + // In general the target will only match within the sub-trie whose prefix is identical to the // target's. However there is an exception: // // Given a trie with a node at 0xabc, there must be a branch at 0xab. A target with prefix 0xabc - // needs to match that node, but in order to know the node is at that path the branch at 0xab - // must be constructed. Therefore the sub-trie prefix is the target prefix with a nibble - // truncated. + // needs to match that node, but the branch at 0xab must be constructed order to know the node + // is at that path. Therefore the sub-trie prefix is the target prefix with a nibble truncated. // - // For a target with an empty prefix we still use an empty sub-trie prefix; this will still - // construct the branch at the root node (if there is one), the only behavioral difference - // between targets with prefix lengths zero and one will be that with prefix length zero the - // root node's proof will be retained. + // For a target with an empty prefix (`min_len` of 0) we still use an empty sub-trie prefix; + // this will still construct the branch at the root node (if there is one). Targets with + // `min_len` of both 0 and 1 will therefore construct the root node, but only those with + // `min_len` of 0 will retain it. #[inline] fn sub_trie_prefix(&self) -> Nibbles { let mut sub_trie_prefix = self.key; @@ -157,7 +160,7 @@ mod tests { }; // Test cases: (input_targets, expected_output) - // Expected output format: Vec<(lower_bound_hex, upper_bound_hex_opt, Vec)> + // Expected output format: Vec<(exp_prefix_hex, Vec)> let test_cases = vec![ // Case 1: Empty targets (vec![], vec![]),