Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
be4e8cd
refactor(chain-state): derive lazy overlay anchor from blocks
mediocregopher Apr 20, 2026
cacb69a
refactor(chain-state): address lazy overlay review feedback
mediocregopher Apr 20, 2026
ebfaa6f
refactor(chain-state): cache lazy overlays by anchor
mediocregopher Apr 20, 2026
5041d55
refactor(provider): resolve lazy overlay anchors at use time
mediocregopher Apr 20, 2026
812e479
refactor(provider): separate overlay anchors from revert state
mediocregopher Apr 20, 2026
5036eb5
refactor(provider): infer overlay anchors from sources
mediocregopher Apr 20, 2026
b5ad001
refactor(provider): thread explicit requested anchors
mediocregopher Apr 20, 2026
d92ad5a
fix(provider): anchor overlay state providers by hash
mediocregopher Apr 21, 2026
134a7f3
fix(provider): pass overlay anchors via constructor
mediocregopher Apr 21, 2026
7db14d0
fix(engine): anchor state-root test overlay factory
mediocregopher Apr 21, 2026
45db5e0
fix(trie): initialize test overlay anchors
mediocregopher Apr 21, 2026
ffb0587
fix(provider): satisfy overlay lint checks
mediocregopher Apr 21, 2026
87b5240
Merge branch 'main' into mediocregopher/lazyoverlay-refactor
mediocregopher Apr 22, 2026
d5169ed
fix(engine): update sparse trie overlay factory test
mediocregopher Apr 22, 2026
c4d0949
style(engine): format sparse trie overlay test
mediocregopher Apr 22, 2026
6e8dbe3
Merge branch 'main' into mediocregopher/lazyoverlay-refactor
mediocregopher Apr 23, 2026
b60758e
fix(trie): remove unused parallel test dependency
mediocregopher Apr 24, 2026
31d0c78
fix(ci): clean bench checkouts and lock cargo builds
mediocregopher Apr 24, 2026
b6eec2e
refactor(provider): require overlay builder anchor hash
mediocregopher Apr 24, 2026
dd7c9a8
test(trie): initialize canonical genesis in overlay root test
mediocregopher Apr 27, 2026
2065ca1
Merge remote-tracking branch 'origin/main' into mediocregopher/lazyov…
mediocregopher Apr 27, 2026
eaefe88
fix(provider): make historical state use provider primitives
mediocregopher Apr 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/chain-state/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ reth-errors.workspace = true
reth-execution-types.workspace = true
reth-metrics.workspace = true
reth-ethereum-primitives.workspace = true
reth-primitives-traits.workspace = true
reth-primitives-traits = { workspace = true, features = ["dashmap"] }
reth-storage-api.workspace = true
reth-trie.workspace = true

Expand Down
282 changes: 202 additions & 80 deletions crates/chain-state/src/lazy_overlay.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,108 +4,131 @@
//! lazily on first access. This allows execution to start before the trie overlay
//! is fully computed.

use crate::DeferredTrieData;
use crate::{EthPrimitives, ExecutedBlock};
use alloy_primitives::B256;
use reth_primitives_traits::{
dashmap::{self, DashMap},
AlloyBlockHeader, NodePrimitives,
};
use reth_trie::{updates::TrieUpdatesSorted, HashedPostStateSorted, TrieInputSorted};
use std::sync::{Arc, OnceLock};
use std::sync::Arc;
use tracing::{debug, trace};

/// Inputs captured for lazy overlay computation.
#[derive(Clone)]
struct LazyOverlayInputs {
/// The persisted ancestor hash (anchor) this overlay should be built on.
anchor_hash: B256,
/// Deferred trie data handles for all in-memory blocks (newest to oldest).
blocks: Vec<DeferredTrieData>,
struct LazyOverlayInputs<N: NodePrimitives = EthPrimitives> {
/// In-memory blocks from tip to anchor child.
///
/// Blocks must be provided in reverse chain order (newest to oldest).
blocks: Vec<ExecutedBlock<N>>,
}

/// Lazily computed trie overlay.
///
/// Captures the inputs needed to compute a [`TrieInputSorted`] and defers the actual
/// computation until first access. This is conceptually similar to [`DeferredTrieData`]
/// but for overlay computation.
/// computation until first access.
///
/// Blocks must be provided in reverse chain order (newest to oldest), so the first block is the
/// chain tip and the last block is the oldest in-memory block in the chain segment.
///
/// # Fast Path vs Slow Path
///
/// - **Fast path**: If the tip block's cached `anchored_trie_input` is ready and its `anchor_hash`
/// matches our expected anchor, we can reuse it directly (O(1)).
/// - **Slow path**: Otherwise, we merge all ancestor blocks' trie data into a new overlay.
#[derive(Clone)]
pub struct LazyOverlay {
/// Computed result, cached after first access.
inner: Arc<OnceLock<TrieInputSorted>>,
pub struct LazyOverlay<N: NodePrimitives = EthPrimitives> {
/// Computed results, cached by requested anchor hash.
inner: Arc<DashMap<B256, Arc<TrieInputSorted>>>,
/// Inputs for lazy computation.
inputs: LazyOverlayInputs,
inputs: LazyOverlayInputs<N>,
}

impl std::fmt::Debug for LazyOverlay {
impl<N: NodePrimitives> std::fmt::Debug for LazyOverlay<N> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LazyOverlay")
.field("anchor_hash", &self.inputs.anchor_hash)
.field(
"oldest_block_parent_hash",
&self.inputs.blocks.last().map(|block| block.recovered_block().parent_hash()),
)
.field("num_blocks", &self.inputs.blocks.len())
.field("computed", &self.inner.get().is_some())
.field("cached_anchors", &self.inner.len())
.finish()
}
}

impl LazyOverlay {
/// Create a new lazy overlay with the given anchor hash and block handles.
impl<N: NodePrimitives> LazyOverlay<N> {
/// Create a new lazy overlay from in-memory blocks.
///
/// # Arguments
///
/// * `anchor_hash` - The persisted ancestor hash this overlay is built on top of
/// * `blocks` - Deferred trie data handles for in-memory blocks (newest to oldest)
pub fn new(anchor_hash: B256, blocks: Vec<DeferredTrieData>) -> Self {
Self { inner: Arc::new(OnceLock::new()), inputs: LazyOverlayInputs { anchor_hash, blocks } }
}
/// * `blocks` - Executed blocks in reverse chain order (newest to oldest)
pub fn new(blocks: Vec<ExecutedBlock<N>>) -> Self {
debug_assert!(
blocks.windows(2).all(|window| {
window[0].recovered_block().parent_hash() == window[1].recovered_block().hash()
}),
"LazyOverlay blocks must be ordered newest to oldest along a single chain"
);

/// Returns the anchor hash this overlay is built on.
pub const fn anchor_hash(&self) -> B256 {
self.inputs.anchor_hash
Self { inner: Default::default(), inputs: LazyOverlayInputs { blocks } }
}

/// Returns the number of in-memory blocks this overlay covers.
pub const fn num_blocks(&self) -> usize {
self.inputs.blocks.len()
}

/// Returns true if the overlay has already been computed.
pub fn is_computed(&self) -> bool {
self.inner.get().is_some()
/// Returns true if the overlay has already been computed for the requested anchor.
pub fn is_computed(&self, anchor_hash: B256) -> bool {
Comment thread
mediocregopher marked this conversation as resolved.
self.inner.contains_key(&anchor_hash)
}

/// Returns the computed trie input, computing it if necessary.
/// Returns the computed trie input for the requested anchor, computing it if necessary.
///
/// The first call triggers computation (which may block waiting for deferred data).
/// Subsequent calls return the cached result immediately.
pub fn get(&self) -> &TrieInputSorted {
self.inner.get_or_init(|| self.compute())
/// Subsequent calls for the same anchor return the cached result immediately.
pub fn get(&self, anchor_hash: B256) -> Arc<TrieInputSorted> {
match self.inner.entry(anchor_hash) {
dashmap::Entry::Occupied(entry) => Arc::clone(entry.get()),
dashmap::Entry::Vacant(entry) => {
let input = self.compute(anchor_hash);
entry.insert(Arc::clone(&input));
input
}
}
}

/// Returns the overlay as (nodes, state) tuple for use with `OverlayStateProviderFactory`.
pub fn as_overlay(&self) -> (Arc<TrieUpdatesSorted>, Arc<HashedPostStateSorted>) {
let input = self.get();
pub fn as_overlay(
&self,
anchor_hash: B256,
) -> (Arc<TrieUpdatesSorted>, Arc<HashedPostStateSorted>) {
let input = self.get(anchor_hash);
(Arc::clone(&input.nodes), Arc::clone(&input.state))
}

/// Compute the trie input overlay.
fn compute(&self) -> TrieInputSorted {
let anchor_hash = self.inputs.anchor_hash;
fn compute(&self, anchor_hash: B256) -> Arc<TrieInputSorted> {
let blocks = &self.inputs.blocks;

if blocks.is_empty() {
debug!(target: "chain_state::lazy_overlay", "No in-memory blocks, returning empty overlay");
return TrieInputSorted::default();
}
let Some(last_index) =
blocks.iter().position(|block| block.recovered_block().parent_hash() == anchor_hash)
else {
panic!(
"LazyOverlay does not contain a block whose parent hash matches requested anchor {anchor_hash}"
);
};
let blocks = &blocks[..=last_index];

// Fast path: Check if tip block's overlay is ready and anchor matches.
// The tip block (first in list) has the cumulative overlay from all ancestors.
// The tip block (first in list) has the cumulative overlay from all ancestors up to the
// requested anchor.
if let Some(tip) = blocks.first() {
let data = tip.wait_cloned();
let data = tip.trie_data();
if let Some(anchored) = &data.anchored_trie_input {
if anchored.anchor_hash == anchor_hash {
trace!(target: "chain_state::lazy_overlay", %anchor_hash, "Reusing tip block's cached overlay (fast path)");
return (*anchored.trie_input).clone();
return Arc::clone(&anchored.trie_input);
}
debug!(
target: "chain_state::lazy_overlay",
Expand All @@ -116,23 +139,30 @@ impl LazyOverlay {
}
}

// Slow path: Merge all blocks' trie data into a new overlay.
debug!(target: "chain_state::lazy_overlay", num_blocks = blocks.len(), "Merging blocks (slow path)");
Self::merge_blocks(blocks)
// Slow path: Merge the prefix of blocks from the tip back to the requested anchor.
debug!(
target: "chain_state::lazy_overlay",
%anchor_hash,
num_blocks = blocks.len(),
"Merging blocks (slow path)"
);
Arc::new(Self::merge_blocks(blocks))
}

/// Merge all blocks' trie data into a single [`TrieInputSorted`].
///
/// Blocks are ordered newest to oldest.
fn merge_blocks(blocks: &[DeferredTrieData]) -> TrieInputSorted {
fn merge_blocks(blocks: &[ExecutedBlock<N>]) -> TrieInputSorted {
if blocks.is_empty() {
return TrieInputSorted::default();
}

let state =
HashedPostStateSorted::merge_batch(blocks.iter().map(|b| b.wait_cloned().hashed_state));
let nodes =
TrieUpdatesSorted::merge_batch(blocks.iter().map(|b| b.wait_cloned().trie_updates));
let state = HashedPostStateSorted::merge_batch(
blocks.iter().map(|block| block.trie_data().hashed_state),
);
let nodes = TrieUpdatesSorted::merge_batch(
blocks.iter().map(|block| block.trie_data().trie_updates),
);

TrieInputSorted { state, nodes, prefix_sets: Default::default() }
}
Expand All @@ -141,46 +171,138 @@ impl LazyOverlay {
#[cfg(test)]
mod tests {
use super::*;
use reth_trie::{updates::TrieUpdates, HashedPostState};

fn empty_deferred(anchor: B256) -> DeferredTrieData {
DeferredTrieData::pending(
Arc::new(HashedPostState::default()),
Arc::new(TrieUpdates::default()),
anchor,
Vec::new(),
use crate::{test_utils::TestBlockBuilder, ComputedTrieData, EthPrimitives, ExecutedBlock};
use alloy_primitives::U256;
use reth_primitives_traits::Account;
use reth_trie::{updates::TrieUpdatesSorted, HashedPostState, HashedStorage};
use std::sync::Arc;

fn with_unique_state(
block: &ExecutedBlock<EthPrimitives>,
id: u8,
) -> ExecutedBlock<EthPrimitives> {
let hashed_address = B256::with_last_byte(id);
let hashed_slot = B256::with_last_byte(id.saturating_add(32));
let hashed_state = HashedPostState::default()
.with_accounts([(hashed_address, Some(Account::default()))])
.with_storages([(
hashed_address,
HashedStorage::from_iter(false, [(hashed_slot, U256::from(id))]),
)])
.into_sorted();

ExecutedBlock::new(
Arc::clone(&block.recovered_block),
Arc::clone(&block.execution_output),
ComputedTrieData::without_trie_input(
Arc::new(hashed_state),
Arc::new(TrieUpdatesSorted::default()),
),
)
}

fn test_blocks() -> Vec<ExecutedBlock<EthPrimitives>> {
TestBlockBuilder::eth()
.get_executed_blocks(1..4)
.collect::<Vec<_>>()
.into_iter()
.rev()
.enumerate()
.map(|(index, block)| with_unique_state(&block, index as u8 + 1))
.collect()
}

#[test]
fn empty_blocks_returns_default() {
let overlay = LazyOverlay::new(B256::ZERO, vec![]);
let result = overlay.get();
assert!(result.state.is_empty());
assert!(result.nodes.is_empty());
#[should_panic(
expected = "LazyOverlay does not contain a block whose parent hash matches requested anchor"
)]
fn empty_blocks_panic_for_any_anchor() {
let overlay = LazyOverlay::<EthPrimitives>::new(vec![]);
let _ = overlay.get(B256::ZERO);
}

#[test]
fn single_block_uses_data_directly() {
let anchor = B256::random();
let deferred = empty_deferred(anchor);
let overlay = LazyOverlay::new(anchor, vec![deferred]);
let block = TestBlockBuilder::eth().get_executed_block_with_number(1, B256::random());
let anchor_hash = block.recovered_block().parent_hash();
let overlay = LazyOverlay::new(vec![block]);

assert!(!overlay.is_computed(anchor_hash));
let _ = overlay.get(anchor_hash);
assert!(overlay.is_computed(anchor_hash));
}

assert!(!overlay.is_computed());
let _ = overlay.get();
assert!(overlay.is_computed());
#[test]
fn caches_results_per_anchor() {
let blocks = test_blocks();
let prefix_anchor = blocks[2].recovered_block().hash();
let full_anchor = blocks[2].recovered_block().parent_hash();
let overlay = LazyOverlay::new(blocks);

let prefix = overlay.get(prefix_anchor);
let full = overlay.get(full_anchor);

assert!(overlay.is_computed(prefix_anchor));
assert!(overlay.is_computed(full_anchor));
assert!(!Arc::ptr_eq(&prefix, &full));
assert!(Arc::ptr_eq(&prefix, &overlay.get(prefix_anchor)));
assert!(Arc::ptr_eq(&full, &overlay.get(full_anchor)));
}

#[test]
fn requested_anchor_limits_the_merged_prefix() {
let blocks = test_blocks();
let prefix_anchor = blocks[2].recovered_block().hash();
let expected = LazyOverlay::merge_blocks(&blocks[..2]);
let overlay = LazyOverlay::new(blocks);
let actual = overlay.get(prefix_anchor);

assert_eq!(actual.nodes.as_ref(), expected.nodes.as_ref());
assert_eq!(actual.state.as_ref(), expected.state.as_ref());
}

#[test]
fn cached_after_first_access() {
let overlay = LazyOverlay::new(B256::ZERO, vec![]);
fn reuses_tip_overlay_when_anchor_matches() {
let mut blocks = test_blocks();
let prefix_anchor = blocks[2].recovered_block().hash();
let tip_overlay = Arc::new(LazyOverlay::merge_blocks(&blocks[..2]));
let tip_data = blocks[0].trie_data();

blocks[0] = ExecutedBlock::new(
Arc::clone(&blocks[0].recovered_block),
Arc::clone(&blocks[0].execution_output),
ComputedTrieData::with_trie_input(
tip_data.hashed_state,
tip_data.trie_updates,
prefix_anchor,
Arc::clone(&tip_overlay),
),
);

// First access computes
let _ = overlay.get();
assert!(overlay.is_computed());
let overlay = LazyOverlay::new(blocks);
let actual = overlay.get(prefix_anchor);

// Second access uses cache
let _ = overlay.get();
assert!(overlay.is_computed());
assert!(Arc::ptr_eq(&actual, &tip_overlay));
}

#[test]
#[should_panic(
expected = "LazyOverlay does not contain a block whose parent hash matches requested anchor"
)]
fn missing_anchor_panics() {
let blocks = test_blocks();
let missing_anchor = blocks[0].recovered_block().hash();
let overlay = LazyOverlay::new(blocks);

let _ = overlay.get(missing_anchor);
}

#[test]
#[should_panic(
expected = "LazyOverlay blocks must be ordered newest to oldest along a single chain"
)]
fn misordered_blocks_panic() {
let blocks: Vec<_> = TestBlockBuilder::eth().get_executed_blocks(1..3).collect();
let _ = LazyOverlay::new(blocks);
}
}
Loading
Loading