diff --git a/crates/storage/provider/src/providers/database/provider.rs b/crates/storage/provider/src/providers/database/provider.rs index 974f0257d44..99ad1331967 100644 --- a/crates/storage/provider/src/providers/database/provider.rs +++ b/crates/storage/provider/src/providers/database/provider.rs @@ -1791,10 +1791,10 @@ impl StateWriter fn write_hashed_state(&self, hashed_state: &HashedPostStateSorted) -> ProviderResult<()> { // Write hashed account updates. let mut hashed_accounts_cursor = self.tx_ref().cursor_write::()?; - for (hashed_address, account) in hashed_state.accounts().accounts_sorted() { + for (hashed_address, account) in hashed_state.accounts() { if let Some(account) = account { - hashed_accounts_cursor.upsert(hashed_address, &account)?; - } else if hashed_accounts_cursor.seek_exact(hashed_address)?.is_some() { + hashed_accounts_cursor.upsert(*hashed_address, account)?; + } else if hashed_accounts_cursor.seek_exact(*hashed_address)?.is_some() { hashed_accounts_cursor.delete_current()?; } } @@ -1808,8 +1808,9 @@ impl StateWriter hashed_storage_cursor.delete_current_duplicates()?; } - for (hashed_slot, value) in storage.storage_slots_sorted() { - let entry = StorageEntry { key: hashed_slot, value }; + for (hashed_slot, value) in storage.storage_slots_ref() { + let entry = StorageEntry { key: *hashed_slot, value: *value }; + if let Some(db_entry) = hashed_storage_cursor.seek_by_key_subkey(*hashed_address, entry.key)? && db_entry.key == entry.key diff --git a/crates/trie/common/src/hashed_state.rs b/crates/trie/common/src/hashed_state.rs index 22f57d9f34d..c8cf64ba020 100644 --- a/crates/trie/common/src/hashed_state.rs +++ b/crates/trie/common/src/hashed_state.rs @@ -9,7 +9,7 @@ use crate::{ use alloc::{borrow::Cow, vec::Vec}; use alloy_primitives::{ keccak256, - map::{hash_map, B256Map, B256Set, HashMap, HashSet}, + map::{hash_map, B256Map, HashMap, HashSet}, Address, B256, U256, }; use itertools::Itertools; @@ -332,26 +332,8 @@ impl HashedPostState { } /// Converts hashed post state into [`HashedPostStateSorted`]. - pub fn into_sorted(self) -> HashedPostStateSorted { - let mut updated_accounts = Vec::new(); - let mut destroyed_accounts = HashSet::default(); - for (hashed_address, info) in self.accounts { - if let Some(info) = info { - updated_accounts.push((hashed_address, info)); - } else { - destroyed_accounts.insert(hashed_address); - } - } - updated_accounts.sort_unstable_by_key(|(address, _)| *address); - let accounts = HashedAccountsSorted { accounts: updated_accounts, destroyed_accounts }; - - let storages = self - .storages - .into_iter() - .map(|(hashed_address, storage)| (hashed_address, storage.into_sorted())) - .collect(); - - HashedPostStateSorted { accounts, storages } + pub fn into_sorted(mut self) -> HashedPostStateSorted { + self.drain_into_sorted() } /// Converts hashed post state into [`HashedPostStateSorted`], but keeping the maps allocated by @@ -362,17 +344,8 @@ impl HashedPostState { /// This allows us to reuse the allocated space. This allocates new space for the sorted hashed /// post state, like `into_sorted`. pub fn drain_into_sorted(&mut self) -> HashedPostStateSorted { - let mut updated_accounts = Vec::new(); - let mut destroyed_accounts = HashSet::default(); - for (hashed_address, info) in self.accounts.drain() { - if let Some(info) = info { - updated_accounts.push((hashed_address, info)); - } else { - destroyed_accounts.insert(hashed_address); - } - } - updated_accounts.sort_unstable_by_key(|(address, _)| *address); - let accounts = HashedAccountsSorted { accounts: updated_accounts, destroyed_accounts }; + let mut accounts: Vec<_> = self.accounts.drain().collect(); + accounts.sort_unstable_by_key(|(address, _)| *address); let storages = self .storages @@ -452,41 +425,33 @@ impl HashedStorage { /// Converts hashed storage into [`HashedStorageSorted`]. pub fn into_sorted(self) -> HashedStorageSorted { - let mut non_zero_valued_slots = Vec::new(); - let mut zero_valued_slots = HashSet::default(); - for (hashed_slot, value) in self.storage { - if value.is_zero() { - zero_valued_slots.insert(hashed_slot); - } else { - non_zero_valued_slots.push((hashed_slot, value)); - } - } - non_zero_valued_slots.sort_unstable_by_key(|(key, _)| *key); + let mut storage_slots: Vec<_> = self.storage.into_iter().collect(); + storage_slots.sort_unstable_by_key(|(key, _)| *key); - HashedStorageSorted { non_zero_valued_slots, zero_valued_slots, wiped: self.wiped } + HashedStorageSorted { storage_slots, wiped: self.wiped } } } /// Sorted hashed post state optimized for iterating during state trie calculation. #[derive(PartialEq, Eq, Clone, Default, Debug)] pub struct HashedPostStateSorted { - /// Updated state of accounts. - pub accounts: HashedAccountsSorted, - /// Map of hashed addresses to hashed storage. + /// Sorted collection of account updates. `None` indicates a destroyed account. + pub accounts: Vec<(B256, Option)>, + /// Map of hashed addresses to their sorted storage updates. pub storages: B256Map, } impl HashedPostStateSorted { /// Create new instance of [`HashedPostStateSorted`] pub const fn new( - accounts: HashedAccountsSorted, + accounts: Vec<(B256, Option)>, storages: B256Map, ) -> Self { Self { accounts, storages } } /// Returns reference to hashed accounts. - pub const fn accounts(&self) -> &HashedAccountsSorted { + pub const fn accounts(&self) -> &Vec<(B256, Option)> { &self.accounts } @@ -497,23 +462,19 @@ impl HashedPostStateSorted { /// Returns `true` if there are no account or storage updates. pub fn is_empty(&self) -> bool { - self.accounts.accounts.is_empty() && - self.accounts.destroyed_accounts.is_empty() && - self.storages.is_empty() + self.accounts.is_empty() && self.storages.is_empty() } /// Returns the total number of updates including all accounts and storage updates. pub fn total_len(&self) -> usize { - self.accounts.accounts.len() + - self.accounts.destroyed_accounts.len() + - self.storages.values().map(|storage| storage.len()).sum::() + self.accounts.len() + self.storages.values().map(|s| s.len()).sum::() } /// Extends this state with contents of another sorted state. /// Entries in `other` take precedence for duplicate keys. pub fn extend_ref(&mut self, other: &Self) { // Extend accounts - self.accounts.extend_ref(&other.accounts); + extend_sorted_vec(&mut self.accounts, &other.accounts); // Extend storages for (hashed_address, other_storage) in &other.storages { @@ -531,47 +492,11 @@ impl AsRef for HashedPostStateSorted { } } -/// Sorted account state optimized for iterating during state trie calculation. -#[derive(Clone, Eq, PartialEq, Default, Debug)] -pub struct HashedAccountsSorted { - /// Sorted collection of hashed addresses and their account info. - pub accounts: Vec<(B256, Account)>, - /// Set of destroyed account keys. - pub destroyed_accounts: B256Set, -} - -impl HashedAccountsSorted { - /// Returns a sorted iterator over updated accounts. - pub fn accounts_sorted(&self) -> impl Iterator)> { - self.accounts - .iter() - .map(|(address, account)| (*address, Some(*account))) - .chain(self.destroyed_accounts.iter().map(|address| (*address, None))) - .sorted_by_key(|entry| *entry.0) - } - - /// Extends this collection with contents of another sorted collection. - /// Entries in `other` take precedence for duplicate keys. - pub fn extend_ref(&mut self, other: &Self) { - // Updates take precedence over removals, so we want removals from `other` to only apply to - // the previous accounts. - self.accounts.retain(|(addr, _)| !other.destroyed_accounts.contains(addr)); - - // Extend the sorted accounts vector - extend_sorted_vec(&mut self.accounts, &other.accounts); - - // Merge destroyed accounts sets - self.destroyed_accounts.extend(&other.destroyed_accounts); - } -} - /// Sorted hashed storage optimized for iterating during state trie calculation. #[derive(Clone, Eq, PartialEq, Debug)] pub struct HashedStorageSorted { - /// Sorted hashed storage slots with non-zero value. - pub non_zero_valued_slots: Vec<(B256, U256)>, - /// Slots that have been zero valued. - pub zero_valued_slots: B256Set, + /// Sorted collection of updated storage slots. [`U256::ZERO`] indicates a deleted value. + pub storage_slots: Vec<(B256, U256)>, /// Flag indicating whether the storage was wiped or not. pub wiped: bool, } @@ -582,45 +507,36 @@ impl HashedStorageSorted { self.wiped } - /// Returns a sorted iterator over updated storage slots. - pub fn storage_slots_sorted(&self) -> impl Iterator { - self.non_zero_valued_slots - .iter() - .map(|(hashed_slot, value)| (*hashed_slot, *value)) - .chain(self.zero_valued_slots.iter().map(|hashed_slot| (*hashed_slot, U256::ZERO))) - .sorted_by_key(|entry| *entry.0) + /// Returns reference to updated storage slots. + pub fn storage_slots_ref(&self) -> &[(B256, U256)] { + &self.storage_slots } /// Returns the total number of storage slot updates. - pub fn len(&self) -> usize { - self.non_zero_valued_slots.len() + self.zero_valued_slots.len() + pub const fn len(&self) -> usize { + self.storage_slots.len() } /// Returns `true` if there are no storage slot updates. - pub fn is_empty(&self) -> bool { - self.non_zero_valued_slots.is_empty() && self.zero_valued_slots.is_empty() + pub const fn is_empty(&self) -> bool { + self.storage_slots.is_empty() } - /// Extends this storage with contents of another sorted storage. - /// Entries in `other` take precedence for duplicate keys. + /// Extends the storage slots updates with another set of sorted updates. + /// + /// If `other` is marked as deleted, this will be marked as deleted and all slots cleared. + /// Otherwise, nodes are merged with `other`'s values taking precedence for duplicates. pub fn extend_ref(&mut self, other: &Self) { if other.wiped { // If other is wiped, clear everything and copy from other self.wiped = true; - self.non_zero_valued_slots.clear(); - self.zero_valued_slots.clear(); - self.non_zero_valued_slots.extend_from_slice(&other.non_zero_valued_slots); - self.zero_valued_slots.extend(&other.zero_valued_slots); + self.storage_slots.clear(); + self.storage_slots.extend(other.storage_slots.iter().copied()); return; } - self.non_zero_valued_slots.retain(|(slot, _)| !other.zero_valued_slots.contains(slot)); - // Extend the sorted non-zero valued slots - extend_sorted_vec(&mut self.non_zero_valued_slots, &other.non_zero_valued_slots); - - // Merge zero valued slots sets - self.zero_valued_slots.extend(&other.zero_valued_slots); + extend_sorted_vec(&mut self.storage_slots, &other.storage_slots); } } @@ -628,16 +544,11 @@ impl From for HashedStorage { fn from(sorted: HashedStorageSorted) -> Self { let mut storage = B256Map::default(); - // Add all non-zero valued slots - for (slot, value) in sorted.non_zero_valued_slots { + // Add all storage slots (including zero-valued ones which indicate deletion) + for (slot, value) in sorted.storage_slots { storage.insert(slot, value); } - // Add all zero valued slots - for slot in sorted.zero_valued_slots { - storage.insert(slot, U256::ZERO); - } - Self { wiped: sorted.wiped, storage } } } @@ -646,14 +557,9 @@ impl From for HashedPostState { fn from(sorted: HashedPostStateSorted) -> Self { let mut accounts = B256Map::default(); - // Add all updated accounts - for (address, account) in sorted.accounts.accounts { - accounts.insert(address, Some(account)); - } - - // Add all destroyed accounts - for address in sorted.accounts.destroyed_accounts { - accounts.insert(address, None); + // Add all accounts (Some for updated, None for destroyed) + for (address, account) in sorted.accounts { + accounts.insert(address, account); } // Convert storages @@ -1205,87 +1111,92 @@ mod tests { fn test_hashed_post_state_sorted_extend_ref() { // Test extending accounts let mut state1 = HashedPostStateSorted { - accounts: HashedAccountsSorted { - accounts: vec![ - (B256::from([1; 32]), Account::default()), - (B256::from([3; 32]), Account::default()), - ], - destroyed_accounts: B256Set::from_iter([B256::from([5; 32])]), - }, + accounts: vec![ + (B256::from([1; 32]), Some(Account::default())), + (B256::from([3; 32]), Some(Account::default())), + (B256::from([5; 32]), None), + ], storages: B256Map::default(), }; let state2 = HashedPostStateSorted { - accounts: HashedAccountsSorted { - accounts: vec![ - (B256::from([2; 32]), Account::default()), - (B256::from([3; 32]), Account { nonce: 1, ..Default::default() }), // Override - (B256::from([4; 32]), Account::default()), - ], - destroyed_accounts: B256Set::from_iter([B256::from([6; 32])]), - }, + accounts: vec![ + (B256::from([2; 32]), Some(Account::default())), + (B256::from([3; 32]), Some(Account { nonce: 1, ..Default::default() })), /* Override */ + (B256::from([4; 32]), Some(Account::default())), + (B256::from([6; 32]), None), + ], storages: B256Map::default(), }; state1.extend_ref(&state2); // Check accounts are merged and sorted - assert_eq!(state1.accounts.accounts.len(), 4); - assert_eq!(state1.accounts.accounts[0].0, B256::from([1; 32])); - assert_eq!(state1.accounts.accounts[1].0, B256::from([2; 32])); - assert_eq!(state1.accounts.accounts[2].0, B256::from([3; 32])); - assert_eq!(state1.accounts.accounts[2].1.nonce, 1); // Should have state2's value - assert_eq!(state1.accounts.accounts[3].0, B256::from([4; 32])); - - // Check destroyed accounts are merged - assert!(state1.accounts.destroyed_accounts.contains(&B256::from([5; 32]))); - assert!(state1.accounts.destroyed_accounts.contains(&B256::from([6; 32]))); + assert_eq!(state1.accounts.len(), 6); + assert_eq!(state1.accounts[0].0, B256::from([1; 32])); + assert_eq!(state1.accounts[1].0, B256::from([2; 32])); + assert_eq!(state1.accounts[2].0, B256::from([3; 32])); + assert_eq!(state1.accounts[2].1.unwrap().nonce, 1); // Should have state2's value + assert_eq!(state1.accounts[3].0, B256::from([4; 32])); + assert_eq!(state1.accounts[4].0, B256::from([5; 32])); + assert_eq!(state1.accounts[4].1, None); + assert_eq!(state1.accounts[5].0, B256::from([6; 32])); + assert_eq!(state1.accounts[5].1, None); } #[test] fn test_hashed_storage_sorted_extend_ref() { // Test normal extension let mut storage1 = HashedStorageSorted { - non_zero_valued_slots: vec![ + storage_slots: vec![ (B256::from([1; 32]), U256::from(10)), (B256::from([3; 32]), U256::from(30)), + (B256::from([5; 32]), U256::ZERO), ], - zero_valued_slots: B256Set::from_iter([B256::from([5; 32])]), wiped: false, }; let storage2 = HashedStorageSorted { - non_zero_valued_slots: vec![ + storage_slots: vec![ (B256::from([2; 32]), U256::from(20)), (B256::from([3; 32]), U256::from(300)), // Override (B256::from([4; 32]), U256::from(40)), + (B256::from([6; 32]), U256::ZERO), ], - zero_valued_slots: B256Set::from_iter([B256::from([6; 32])]), wiped: false, }; storage1.extend_ref(&storage2); - assert_eq!(storage1.non_zero_valued_slots.len(), 4); - assert_eq!(storage1.non_zero_valued_slots[0].0, B256::from([1; 32])); - assert_eq!(storage1.non_zero_valued_slots[1].0, B256::from([2; 32])); - assert_eq!(storage1.non_zero_valued_slots[2].0, B256::from([3; 32])); - assert_eq!(storage1.non_zero_valued_slots[2].1, U256::from(300)); // Should have storage2's value - assert_eq!(storage1.non_zero_valued_slots[3].0, B256::from([4; 32])); - assert!(storage1.zero_valued_slots.contains(&B256::from([5; 32]))); - assert!(storage1.zero_valued_slots.contains(&B256::from([6; 32]))); + assert_eq!(storage1.storage_slots.len(), 6); + assert_eq!(storage1.storage_slots[0].0, B256::from([1; 32])); + assert_eq!(storage1.storage_slots[0].1, U256::from(10)); + assert_eq!(storage1.storage_slots[1].0, B256::from([2; 32])); + assert_eq!(storage1.storage_slots[1].1, U256::from(20)); + assert_eq!(storage1.storage_slots[2].0, B256::from([3; 32])); + assert_eq!(storage1.storage_slots[2].1, U256::from(300)); // Should have storage2's value + assert_eq!(storage1.storage_slots[3].0, B256::from([4; 32])); + assert_eq!(storage1.storage_slots[3].1, U256::from(40)); + assert_eq!(storage1.storage_slots[4].0, B256::from([5; 32])); + assert_eq!(storage1.storage_slots[4].1, U256::ZERO); + assert_eq!(storage1.storage_slots[5].0, B256::from([6; 32])); + assert_eq!(storage1.storage_slots[5].1, U256::ZERO); assert!(!storage1.wiped); // Test wiped storage let mut storage3 = HashedStorageSorted { - non_zero_valued_slots: vec![(B256::from([1; 32]), U256::from(10))], - zero_valued_slots: B256Set::from_iter([B256::from([2; 32])]), + storage_slots: vec![ + (B256::from([1; 32]), U256::from(10)), + (B256::from([2; 32]), U256::ZERO), + ], wiped: false, }; let storage4 = HashedStorageSorted { - non_zero_valued_slots: vec![(B256::from([3; 32]), U256::from(30))], - zero_valued_slots: B256Set::from_iter([B256::from([4; 32])]), + storage_slots: vec![ + (B256::from([3; 32]), U256::from(30)), + (B256::from([4; 32]), U256::ZERO), + ], wiped: true, }; @@ -1293,10 +1204,11 @@ mod tests { assert!(storage3.wiped); // When wiped, should only have storage4's values - assert_eq!(storage3.non_zero_valued_slots.len(), 1); - assert_eq!(storage3.non_zero_valued_slots[0].0, B256::from([3; 32])); - assert_eq!(storage3.zero_valued_slots.len(), 1); - assert!(storage3.zero_valued_slots.contains(&B256::from([4; 32]))); + assert_eq!(storage3.storage_slots.len(), 2); + assert_eq!(storage3.storage_slots[0].0, B256::from([3; 32])); + assert_eq!(storage3.storage_slots[0].1, U256::from(30)); + assert_eq!(storage3.storage_slots[1].0, B256::from([4; 32])); + assert_eq!(storage3.storage_slots[1].1, U256::ZERO); } #[test] diff --git a/crates/trie/db/tests/post_state.rs b/crates/trie/db/tests/post_state.rs index ae59bc871ec..d3f8fe36484 100644 --- a/crates/trie/db/tests/post_state.rs +++ b/crates/trie/db/tests/post_state.rs @@ -208,6 +208,18 @@ fn fuzz_hashed_account_cursor() { ); } +/// Tests `is_storage_empty()` correctly distinguishes wiped storage from storage with zero values. +/// +/// Key distinction: +/// - `wiped = true`: Storage cleared/deleted → empty +/// - `wiped = false` with zeros: Explicit zero values → not empty +/// +/// Test cases: +/// 1. No entries → empty +/// 2. Non-zero values → not empty +/// 3. Some zero values, not wiped → not empty +/// 4. Wiped + zero post-state → empty +/// 5. Wiped + non-zero post-state → not empty #[test] fn storage_is_empty() { let address = B256::random(); @@ -244,6 +256,23 @@ fn storage_is_empty() { assert!(!cursor.is_storage_empty().unwrap()); } + // Some zero values, but not wiped + { + let wiped = false; + let mut hashed_storage = HashedStorage::new(wiped); + hashed_storage.storage.insert(B256::with_last_byte(0), U256::ZERO); + + let mut hashed_post_state = HashedPostState::default(); + hashed_post_state.storages.insert(address, hashed_storage); + + let sorted = hashed_post_state.into_sorted(); + let tx = db.tx().unwrap(); + let factory = + HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + let mut cursor = factory.hashed_storage_cursor(address).unwrap(); + assert!(!cursor.is_storage_empty().unwrap()); + } + // wiped storage, must be empty { let wiped = true; @@ -488,3 +517,80 @@ fn fuzz_hashed_storage_cursor() { assert_storage_cursor_order(&factory, expected.into_iter()); }); } + +#[test] +fn all_storage_slots_deleted_not_wiped_exact_keys() { + // This test reproduces an edge case where: + // - wiped = false + // - All post state entries are deletions (None values) + // - Database has corresponding entries + // - Expected: NO leaves should be returned (all deleted) + let address = B256::random(); + + // Generate 42 storage entries with keys distributed across the keyspace + let db_entries: Vec<(B256, u64)> = (0..42) + .map(|i| { + let mut key_bytes = [0u8; 32]; + key_bytes[0] = (i * 6) as u8; // Spread keys across keyspace + key_bytes[31] = i as u8; // Ensure uniqueness + (B256::from(key_bytes), i as u64 + 1) + }) + .collect(); + + let db = create_test_rw_db(); + db.update(|tx| { + for (key, value) in &db_entries { + tx.put::( + address, + StorageEntry { key: *key, value: U256::from(*value) }, + ) + .unwrap(); + } + }) + .unwrap(); + + // Create post state with same keys but all Zero values (deletions) + let mut hashed_storage = HashedStorage::new(false); + for (key, _) in &db_entries { + hashed_storage.storage.insert(*key, U256::ZERO); // Zero value = deletion + } + + let mut hashed_post_state = HashedPostState::default(); + hashed_post_state.storages.insert(address, hashed_storage); + + let sorted = hashed_post_state.into_sorted(); + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(&tx), &sorted); + + let mut cursor = factory.hashed_storage_cursor(address).unwrap(); + + // Seek to beginning should return None (all slots are deleted) + let result = cursor.seek(B256::ZERO).unwrap(); + assert_eq!( + result, None, + "Expected no entries when all slots are deleted, but got {:?}", + result + ); + + // Test seek operations at various positions - all should return None + // Pattern: before all, early range, mid-early range, mid-late range, late range, near end + let seek_keys = vec![ + B256::ZERO, // Before all entries + B256::right_padding_from(&[0x5d]), + B256::right_padding_from(&[0x5e]), + B256::right_padding_from(&[0x5f]), + B256::right_padding_from(&[0xc2]), + B256::right_padding_from(&[0xc5]), + B256::right_padding_from(&[0xc9]), + B256::right_padding_from(&[0xf0]), + ]; + + for seek_key in seek_keys { + let result = cursor.seek(seek_key).unwrap(); + assert_eq!(result, None, "Expected None when seeking to {} but got {:?}", seek_key, result); + } + + // next() should also always return None + let result = cursor.next().unwrap(); + assert_eq!(result, None, "Expected None from next() but got {:?}", result); +} diff --git a/crates/trie/sparse-parallel/src/trie.rs b/crates/trie/sparse-parallel/src/trie.rs index 133cdfece4c..4244f20ab11 100644 --- a/crates/trie/sparse-parallel/src/trie.rs +++ b/crates/trie/sparse-parallel/src/trie.rs @@ -2678,7 +2678,7 @@ mod tests { use reth_primitives_traits::Account; use reth_provider::{test_utils::create_test_provider_factory, TrieWriter}; use reth_trie::{ - hashed_cursor::{noop::NoopHashedAccountCursor, HashedPostStateAccountCursor}, + hashed_cursor::{noop::NoopHashedCursor, HashedPostStateCursor}, node_iter::{TrieElement, TrieNodeIter}, trie_cursor::{noop::NoopAccountTrieCursor, TrieCursor, TrieCursorFactory}, walker::TrieWalker, @@ -2990,8 +2990,8 @@ mod tests { .into_sorted(); let mut node_iter = TrieNodeIter::state_trie( walker, - HashedPostStateAccountCursor::new( - NoopHashedAccountCursor::default(), + HashedPostStateCursor::new( + Option::>::None, hashed_post_state.accounts(), ), ); diff --git a/crates/trie/sparse/benches/root.rs b/crates/trie/sparse/benches/root.rs index 9eaf54c2d0f..76eaac91a74 100644 --- a/crates/trie/sparse/benches/root.rs +++ b/crates/trie/sparse/benches/root.rs @@ -5,7 +5,7 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use itertools::Itertools; use proptest::{prelude::*, strategy::ValueTree, test_runner::TestRunner}; use reth_trie::{ - hashed_cursor::{noop::NoopHashedStorageCursor, HashedPostStateStorageCursor}, + hashed_cursor::{noop::NoopHashedCursor, HashedPostStateCursor}, node_iter::{TrieElement, TrieNodeIter}, trie_cursor::{noop::NoopStorageTrieCursor, InMemoryTrieCursor}, updates::StorageTrieUpdates, @@ -142,9 +142,9 @@ fn calculate_root_from_leaves_repeated(c: &mut Criterion) { ); let mut node_iter = TrieNodeIter::storage_trie( walker, - HashedPostStateStorageCursor::new( - NoopHashedStorageCursor::default(), - Some(&storage_sorted), + HashedPostStateCursor::new( + Option::>::None, + &storage_sorted.storage_slots, ), ); diff --git a/crates/trie/sparse/src/trie.rs b/crates/trie/sparse/src/trie.rs index 500b642cd1e..cf92942b82d 100644 --- a/crates/trie/sparse/src/trie.rs +++ b/crates/trie/sparse/src/trie.rs @@ -2356,7 +2356,7 @@ mod tests { use reth_primitives_traits::Account; use reth_provider::{test_utils::create_test_provider_factory, TrieWriter}; use reth_trie::{ - hashed_cursor::{noop::NoopHashedAccountCursor, HashedPostStateAccountCursor}, + hashed_cursor::{noop::NoopHashedCursor, HashedPostStateCursor}, node_iter::{TrieElement, TrieNodeIter}, trie_cursor::{noop::NoopAccountTrieCursor, TrieCursor, TrieCursorFactory}, walker::TrieWalker, @@ -2416,8 +2416,8 @@ mod tests { .into_sorted(); let mut node_iter = TrieNodeIter::state_trie( walker, - HashedPostStateAccountCursor::new( - NoopHashedAccountCursor::default(), + HashedPostStateCursor::new( + Option::>::None, hashed_post_state.accounts(), ), ); diff --git a/crates/trie/trie/src/forward_cursor.rs b/crates/trie/trie/src/forward_cursor.rs index c99b0d049ee..53d07d52472 100644 --- a/crates/trie/trie/src/forward_cursor.rs +++ b/crates/trie/trie/src/forward_cursor.rs @@ -4,8 +4,9 @@ #[derive(Debug)] pub struct ForwardInMemoryCursor<'a, K, V> { /// The reference to the pre-sorted collection of entries. - entries: std::slice::Iter<'a, (K, V)>, - is_empty: bool, + entries: &'a [(K, V)], + /// Current index in the collection. + idx: usize, } impl<'a, K, V> ForwardInMemoryCursor<'a, K, V> { @@ -13,25 +14,36 @@ impl<'a, K, V> ForwardInMemoryCursor<'a, K, V> { /// /// The cursor expects all of the entries to have been sorted in advance. #[inline] - pub fn new(entries: &'a [(K, V)]) -> Self { - Self { entries: entries.iter(), is_empty: entries.is_empty() } + pub const fn new(entries: &'a [(K, V)]) -> Self { + Self { entries, idx: 0 } } /// Returns `true` if the cursor is empty, regardless of its position. #[inline] pub const fn is_empty(&self) -> bool { - self.is_empty + self.entries.is_empty() + } + + /// Returns `true` if any entry satisfies the predicate. + #[inline] + pub fn has_any(&self, predicate: F) -> bool + where + F: Fn(&(K, V)) -> bool, + { + self.entries.iter().any(predicate) } /// Returns the current entry pointed to be the cursor, or `None` if no entries are left. #[inline] pub fn current(&self) -> Option<&(K, V)> { - self.entries.clone().next() + self.entries.get(self.idx) } #[inline] fn next(&mut self) -> Option<&(K, V)> { - self.entries.next() + let entry = self.entries.get(self.idx)?; + self.idx += 1; + Some(entry) } } diff --git a/crates/trie/trie/src/hashed_cursor/mock.rs b/crates/trie/trie/src/hashed_cursor/mock.rs index f091ae6ffe5..334b5ba77c5 100644 --- a/crates/trie/trie/src/hashed_cursor/mock.rs +++ b/crates/trie/trie/src/hashed_cursor/mock.rs @@ -99,7 +99,11 @@ pub struct MockHashedCursor { } impl MockHashedCursor { - fn new(values: Arc>, visited_keys: Arc>>>) -> Self { + /// Creates a new mock hashed cursor with the given values and key tracking. + pub fn new( + values: Arc>, + visited_keys: Arc>>>, + ) -> Self { Self { current_key: None, values, visited_keys } } } diff --git a/crates/trie/trie/src/hashed_cursor/noop.rs b/crates/trie/trie/src/hashed_cursor/noop.rs index e5bc44f0f5c..4f59af2ed36 100644 --- a/crates/trie/trie/src/hashed_cursor/noop.rs +++ b/crates/trie/trie/src/hashed_cursor/noop.rs @@ -1,5 +1,6 @@ use super::{HashedCursor, HashedCursorFactory, HashedStorageCursor}; use alloy_primitives::{B256, U256}; +use core::marker::PhantomData; use reth_primitives_traits::Account; use reth_storage_errors::db::DatabaseError; @@ -10,50 +11,43 @@ pub struct NoopHashedCursorFactory; impl HashedCursorFactory for NoopHashedCursorFactory { type AccountCursor<'a> - = NoopHashedAccountCursor + = NoopHashedCursor where Self: 'a; type StorageCursor<'a> - = NoopHashedStorageCursor + = NoopHashedCursor where Self: 'a; fn hashed_account_cursor(&self) -> Result, DatabaseError> { - Ok(NoopHashedAccountCursor::default()) + Ok(NoopHashedCursor::default()) } fn hashed_storage_cursor( &self, _hashed_address: B256, ) -> Result, DatabaseError> { - Ok(NoopHashedStorageCursor::default()) + Ok(NoopHashedCursor::default()) } } -/// Noop account hashed cursor. -#[derive(Default, Debug)] -#[non_exhaustive] -pub struct NoopHashedAccountCursor; - -impl HashedCursor for NoopHashedAccountCursor { - type Value = Account; - - fn seek(&mut self, _key: B256) -> Result, DatabaseError> { - Ok(None) - } +/// Generic noop hashed cursor. +#[derive(Debug)] +pub struct NoopHashedCursor { + _marker: PhantomData, +} - fn next(&mut self) -> Result, DatabaseError> { - Ok(None) +impl Default for NoopHashedCursor { + fn default() -> Self { + Self { _marker: PhantomData } } } -/// Noop account hashed cursor. -#[derive(Default, Debug)] -#[non_exhaustive] -pub struct NoopHashedStorageCursor; - -impl HashedCursor for NoopHashedStorageCursor { - type Value = U256; +impl HashedCursor for NoopHashedCursor +where + V: std::fmt::Debug, +{ + type Value = V; fn seek(&mut self, _key: B256) -> Result, DatabaseError> { Ok(None) @@ -64,7 +58,7 @@ impl HashedCursor for NoopHashedStorageCursor { } } -impl HashedStorageCursor for NoopHashedStorageCursor { +impl HashedStorageCursor for NoopHashedCursor { fn is_storage_empty(&mut self) -> Result { Ok(true) } diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index 896251f3634..bf80ee63a1d 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -1,9 +1,9 @@ use super::{HashedCursor, HashedCursorFactory, HashedStorageCursor}; use crate::forward_cursor::ForwardInMemoryCursor; -use alloy_primitives::{map::B256Set, B256, U256}; +use alloy_primitives::{B256, U256}; use reth_primitives_traits::Account; use reth_storage_errors::db::DatabaseError; -use reth_trie_common::{HashedAccountsSorted, HashedPostStateSorted, HashedStorageSorted}; +use reth_trie_common::HashedPostStateSorted; /// The hashed cursor factory for the post state. #[derive(Clone, Debug)] @@ -25,130 +25,212 @@ where T: AsRef, { type AccountCursor<'cursor> - = HashedPostStateAccountCursor<'overlay, CF::AccountCursor<'cursor>> + = HashedPostStateCursor<'overlay, CF::AccountCursor<'cursor>, Option> where Self: 'cursor; type StorageCursor<'cursor> - = HashedPostStateStorageCursor<'overlay, CF::StorageCursor<'cursor>> + = HashedPostStateCursor<'overlay, CF::StorageCursor<'cursor>, U256> where Self: 'cursor; fn hashed_account_cursor(&self) -> Result, DatabaseError> { let cursor = self.cursor_factory.hashed_account_cursor()?; - Ok(HashedPostStateAccountCursor::new(cursor, &self.post_state.as_ref().accounts)) + Ok(HashedPostStateCursor::new(Some(cursor), &self.post_state.as_ref().accounts)) } fn hashed_storage_cursor( &self, hashed_address: B256, ) -> Result, DatabaseError> { - let cursor = self.cursor_factory.hashed_storage_cursor(hashed_address)?; - Ok(HashedPostStateStorageCursor::new( - cursor, - self.post_state.as_ref().storages.get(&hashed_address), - )) + static EMPTY_UPDATES: Vec<(B256, U256)> = Vec::new(); + + let post_state_storage = self.post_state.as_ref().storages.get(&hashed_address); + let (storage_slots, wiped) = post_state_storage + .map(|u| (u.storage_slots_ref(), u.is_wiped())) + .unwrap_or((&EMPTY_UPDATES, false)); + + let cursor = if wiped { + None + } else { + Some(self.cursor_factory.hashed_storage_cursor(hashed_address)?) + }; + + Ok(HashedPostStateCursor::new(cursor, storage_slots)) + } +} + +/// Trait for types that can be used with [`HashedPostStateCursor`] as a value. +/// +/// This enables uniform handling of deletions across different wrapper types: +/// - `Option`: `None` indicates deletion +/// - `U256`: `U256::ZERO` indicates deletion (maps to `None`) +/// +/// This design allows us to use `U256::ZERO`, rather than an Option, to indicate deletion for +/// storage (which maps cleanly to how changesets are stored in the DB) while not requiring two +/// different cursor implementations. +pub trait HashedPostStateCursorValue: Copy { + /// The non-zero type returned by `into_option`. + /// For `Option`, this is `Account`. + /// For `U256`, this is `U256`. + type NonZero: Copy + std::fmt::Debug; + + /// Returns `Some(&NonZero)` if the value is present, `None` if deleted. + fn into_option(self) -> Option; +} + +impl HashedPostStateCursorValue for Option { + type NonZero = Account; + + fn into_option(self) -> Option { + self + } +} + +impl HashedPostStateCursorValue for U256 { + type NonZero = Self; + + fn into_option(self) -> Option { + (self != Self::ZERO).then_some(self) } } -/// The cursor to iterate over post state hashed accounts and corresponding database entries. -/// It will always give precedence to the data from the hashed post state. +/// A cursor to iterate over state updates and corresponding database entries. +/// It will always give precedence to the data from the post state updates. #[derive(Debug)] -pub struct HashedPostStateAccountCursor<'a, C> { - /// The database cursor. - cursor: C, - /// Forward-only in-memory cursor over accounts. - post_state_cursor: ForwardInMemoryCursor<'a, B256, Account>, - /// Reference to the collection of account keys that were destroyed. - destroyed_accounts: &'a B256Set, - /// The last hashed account that was returned by the cursor. +pub struct HashedPostStateCursor<'a, C, V> +where + V: HashedPostStateCursorValue, +{ + /// The underlying `database_cursor`. If None then it is assumed there is no DB data. + cursor: Option, + /// Entry that `database_cursor` is currently pointing to. + cursor_entry: Option<(B256, V::NonZero)>, + /// Forward-only in-memory cursor over underlying V. + post_state_cursor: ForwardInMemoryCursor<'a, B256, V>, + /// The last hashed key that was returned by the cursor. /// De facto, this is a current cursor position. - last_account: Option, + last_key: Option, + /// Tracks whether `seek` has been called. Used to prevent re-seeking the DB cursor + /// when it has been exhausted by iteration. + seeked: bool, } -impl<'a, C> HashedPostStateAccountCursor<'a, C> +impl<'a, C, V> HashedPostStateCursor<'a, C, V> where - C: HashedCursor, + C: HashedCursor, + V: HashedPostStateCursorValue, { - /// Create new instance of [`HashedPostStateAccountCursor`]. - pub fn new(cursor: C, post_state_accounts: &'a HashedAccountsSorted) -> Self { - let post_state_cursor = ForwardInMemoryCursor::new(&post_state_accounts.accounts); - let destroyed_accounts = &post_state_accounts.destroyed_accounts; - Self { cursor, post_state_cursor, destroyed_accounts, last_account: None } - } - - /// Returns `true` if the account has been destroyed. - /// This check is used for evicting account keys from the state trie. + /// Creates a new post state cursor which combines a DB cursor and in-memory post state updates. /// - /// This function only checks the post state, not the database, because the latter does not - /// store destroyed accounts. - fn is_account_cleared(&self, account: &B256) -> bool { - self.destroyed_accounts.contains(account) + /// # Parameters + /// - `cursor`: The database cursor. Pass `None` to indicate: + /// - For accounts: Empty database (no persisted accounts) + /// - For storage: Wiped storage (e.g., via `SELFDESTRUCT` - all previous storage destroyed) + /// - `updates`: Pre-sorted post state updates. + pub fn new(cursor: Option, updates: &'a [(B256, V)]) -> Self { + debug_assert!(updates.is_sorted_by_key(|(k, _)| k), "Overlay values must be sorted by key"); + Self { + cursor, + cursor_entry: None, + post_state_cursor: ForwardInMemoryCursor::new(updates), + last_key: None, + seeked: false, + } } - fn seek_inner(&mut self, key: B256) -> Result, DatabaseError> { - // Take the next account from the post state with the key greater than or equal to the - // sought key. - let post_state_entry = self.post_state_cursor.seek(&key); + /// Asserts that the next entry to be returned from the cursor is not previous to the last entry + /// returned. + fn set_last_key(&mut self, next_entry: &Option<(B256, V::NonZero)>) { + let next_key = next_entry.as_ref().map(|e| e.0); + debug_assert!( + self.last_key.is_none_or(|last| next_key.is_none_or(|next| next >= last)), + "Cannot return entry {:?} previous to the last returned entry at {:?}", + next_key, + self.last_key, + ); + self.last_key = next_key; + } - // It's an exact match, return the account from post state without looking up in the - // database. - if post_state_entry.is_some_and(|entry| entry.0 == key) { - return Ok(post_state_entry) - } + /// Seeks the `cursor_entry` field of the struct using the cursor. + fn cursor_seek(&mut self, key: B256) -> Result<(), DatabaseError> { + // Only seek if: + // 1. We have a cursor entry and need to seek forward (entry.0 < key), OR + // 2. We have no cursor entry and haven't seeked yet (!self.seeked) + let should_seek = match self.cursor_entry.as_ref() { + Some(entry) => entry.0 < key, + None => !self.seeked, + }; - // It's not an exact match, reposition to the first greater or equal account that wasn't - // cleared. - let mut db_entry = self.cursor.seek(key)?; - while db_entry.as_ref().is_some_and(|(address, _)| self.is_account_cleared(address)) { - db_entry = self.cursor.next()?; + if should_seek { + self.cursor_entry = self.cursor.as_mut().map(|c| c.seek(key)).transpose()?.flatten(); } - // Compare two entries and return the lowest. - Ok(Self::compare_entries(post_state_entry, db_entry)) + Ok(()) } - fn next_inner(&mut self, last_account: B256) -> Result, DatabaseError> { - // Take the next account from the post state with the key greater than the last sought key. - let post_state_entry = self.post_state_cursor.first_after(&last_account); + /// Seeks the `cursor_entry` field of the struct to the subsequent entry using the cursor. + fn cursor_next(&mut self) -> Result<(), DatabaseError> { + debug_assert!(self.seeked); - // If post state was given precedence or account was cleared, move the cursor forward. - let mut db_entry = self.cursor.seek(last_account)?; - while db_entry.as_ref().is_some_and(|(address, _)| { - address <= &last_account || self.is_account_cleared(address) - }) { - db_entry = self.cursor.next()?; + // If the previous entry is `None`, and we've done a seek previously, then the cursor is + // exhausted, and we shouldn't call `next` again. + if self.cursor_entry.is_some() { + self.cursor_entry = self.cursor.as_mut().map(|c| c.next()).transpose()?.flatten(); } - // Compare two entries and return the lowest. - Ok(Self::compare_entries(post_state_entry, db_entry)) + Ok(()) } - /// Return the account with the lowest hashed account key. + /// Compares the current in-memory entry with the current entry of the cursor, and applies the + /// in-memory entry to the cursor entry as an overlay. /// - /// Given the next post state and database entries, return the smallest of the two. - /// If the account keys are the same, the post state entry is given precedence. - fn compare_entries( - post_state_item: Option<(B256, Account)>, - db_item: Option<(B256, Account)>, - ) -> Option<(B256, Account)> { - if let Some((post_state_entry, db_entry)) = post_state_item.zip(db_item) { - // If both are not empty, return the smallest of the two - // Post state is given precedence if keys are equal - Some(if post_state_entry.0 <= db_entry.0 { post_state_entry } else { db_entry }) - } else { - // Return either non-empty entry - db_item.or(post_state_item) + /// This may consume and move forward the current entries when the overlay indicates a removed + /// node. + fn choose_next_entry(&mut self) -> Result, DatabaseError> { + loop { + let post_state_current = + self.post_state_cursor.current().copied().map(|(k, v)| (k, v.into_option())); + + match (post_state_current, &self.cursor_entry) { + (Some((mem_key, None)), _) + if self.cursor_entry.as_ref().is_none_or(|(db_key, _)| &mem_key < db_key) => + { + // If overlay has a removed value but DB cursor is exhausted or ahead of the + // in-memory cursor then move ahead in-memory, as there might be further + // non-removed overlay values. + self.post_state_cursor.first_after(&mem_key); + } + (Some((mem_key, None)), Some((db_key, _))) if &mem_key == db_key => { + // If overlay has a removed value which is returned from DB then move both + // cursors ahead to the next key. + self.post_state_cursor.first_after(&mem_key); + self.cursor_next()?; + } + (Some((mem_key, Some(value))), _) + if self.cursor_entry.as_ref().is_none_or(|(db_key, _)| &mem_key <= db_key) => + { + // If overlay returns a value prior to the DB's value, or the DB is exhausted, + // then we return the overlay's value. + return Ok(Some((mem_key, value))) + } + // All other cases: + // - mem_key > db_key + // - overlay is exhausted + // Return the db_entry. If DB is also exhausted then this returns None. + _ => return Ok(self.cursor_entry), + } } } } -impl HashedCursor for HashedPostStateAccountCursor<'_, C> +impl HashedCursor for HashedPostStateCursor<'_, C, V> where - C: HashedCursor, + C: HashedCursor, + V: HashedPostStateCursorValue, { - type Value = Account; + type Value = V::NonZero; - /// Seek the next entry for a given hashed account key. + /// Seek the next entry for a given hashed key. /// /// If the post state contains the exact match for the key, return it. /// Otherwise, retrieve the next entries that are greater than or equal to the key from the @@ -157,9 +239,13 @@ where /// The returned account key is memoized and the cursor remains positioned at that key until /// [`HashedCursor::seek`] or [`HashedCursor::next`] are called. fn seek(&mut self, key: B256) -> Result, DatabaseError> { - // Find the closes account. - let entry = self.seek_inner(key)?; - self.last_account = entry.as_ref().map(|entry| entry.0); + self.cursor_seek(key)?; + self.post_state_cursor.seek(&key); + + self.seeked = true; + + let entry = self.choose_next_entry()?; + self.set_last_key(&entry); Ok(entry) } @@ -168,170 +254,249 @@ where /// If the cursor is positioned at the entry, return the entry with next greater key. /// Returns [None] if the previous memoized or the next greater entries are missing. /// - /// NOTE: This function will not return any entry unless [`HashedCursor::seek`] has been - /// called. + /// NOTE: This function will not return any entry unless [`HashedCursor::seek`] has been called. fn next(&mut self) -> Result, DatabaseError> { - let next = match self.last_account { - Some(account) => { - let entry = self.next_inner(account)?; - self.last_account = entry.as_ref().map(|entry| entry.0); - entry - } - // no previous entry was found - None => None, - }; - Ok(next) - } -} - -/// The cursor to iterate over post state hashed storages and corresponding database entries. -/// It will always give precedence to the data from the post state. -#[derive(Debug)] -pub struct HashedPostStateStorageCursor<'a, C> { - /// The database cursor. - cursor: C, - /// Forward-only in-memory cursor over non zero-valued account storage slots. - post_state_cursor: Option>, - /// Reference to the collection of storage slot keys that were cleared. - cleared_slots: Option<&'a B256Set>, - /// Flag indicating whether database storage was wiped. - storage_wiped: bool, - /// The last slot that has been returned by the cursor. - /// De facto, this is the cursor's position for the given account key. - last_slot: Option, -} - -impl<'a, C> HashedPostStateStorageCursor<'a, C> -where - C: HashedStorageCursor, -{ - /// Create new instance of [`HashedPostStateStorageCursor`] for the given hashed address. - pub fn new(cursor: C, post_state_storage: Option<&'a HashedStorageSorted>) -> Self { - let post_state_cursor = - post_state_storage.map(|s| ForwardInMemoryCursor::new(&s.non_zero_valued_slots)); - let cleared_slots = post_state_storage.map(|s| &s.zero_valued_slots); - let storage_wiped = post_state_storage.is_some_and(|s| s.wiped); - Self { cursor, post_state_cursor, cleared_slots, storage_wiped, last_slot: None } - } - - /// Check if the slot was zeroed out in the post state. - /// The database is not checked since it already has no zero-valued slots. - fn is_slot_zero_valued(&self, slot: &B256) -> bool { - self.cleared_slots.is_some_and(|s| s.contains(slot)) - } + debug_assert!(self.seeked, "Cursor must be seek'd before next is called"); - /// Find the storage entry in post state or database that's greater or equal to provided subkey. - fn seek_inner(&mut self, subkey: B256) -> Result, DatabaseError> { - // Attempt to find the account's storage in post state. - let post_state_entry = self.post_state_cursor.as_mut().and_then(|c| c.seek(&subkey)); - - // If database storage was wiped or it's an exact match, - // return the storage slot from post state without looking up in the database. - if self.storage_wiped || post_state_entry.is_some_and(|entry| entry.0 == subkey) { - return Ok(post_state_entry) - } - - // It's not an exact match and storage was not wiped, - // reposition to the first greater or equal account. - let mut db_entry = self.cursor.seek(subkey)?; - while db_entry.as_ref().is_some_and(|entry| self.is_slot_zero_valued(&entry.0)) { - db_entry = self.cursor.next()?; - } - - // Compare two entries and return the lowest. - Ok(Self::compare_entries(post_state_entry, db_entry)) - } - - /// Find the storage entry that is right after current cursor position. - fn next_inner(&mut self, last_slot: B256) -> Result, DatabaseError> { - // Attempt to find the account's storage in post state. - let post_state_entry = - self.post_state_cursor.as_mut().and_then(|c| c.first_after(&last_slot)); - - // Return post state entry immediately if database was wiped. - if self.storage_wiped { - return Ok(post_state_entry) - } + // A `last_key` of `None` indicates that the cursor is exhausted. + let Some(last_key) = self.last_key else { + return Ok(None); + }; - // If post state was given precedence, move the cursor forward. - // If the entry was already returned or is zero-valued, move to the next. - let mut db_entry = self.cursor.seek(last_slot)?; - while db_entry - .as_ref() - .is_some_and(|entry| entry.0 == last_slot || self.is_slot_zero_valued(&entry.0)) + // If either cursor is currently pointing to the last entry which was returned then consume + // that entry so that `choose_next_entry` is looking at the subsequent one. + if let Some((key, _)) = self.post_state_cursor.current() && + key == &last_key { - db_entry = self.cursor.next()?; + self.post_state_cursor.first_after(&last_key); } - // Compare two entries and return the lowest. - Ok(Self::compare_entries(post_state_entry, db_entry)) - } - - /// Return the storage entry with the lowest hashed storage key (hashed slot). - /// - /// Given the next post state and database entries, return the smallest of the two. - /// If the storage keys are the same, the post state entry is given precedence. - fn compare_entries( - post_state_item: Option<(B256, U256)>, - db_item: Option<(B256, U256)>, - ) -> Option<(B256, U256)> { - if let Some((post_state_entry, db_entry)) = post_state_item.zip(db_item) { - // If both are not empty, return the smallest of the two - // Post state is given precedence if keys are equal - Some(if post_state_entry.0 <= db_entry.0 { post_state_entry } else { db_entry }) - } else { - // Return either non-empty entry - db_item.or(post_state_item) + if let Some((key, _)) = &self.cursor_entry && + key == &last_key + { + self.cursor_next()?; } - } -} -impl HashedCursor for HashedPostStateStorageCursor<'_, C> -where - C: HashedStorageCursor, -{ - type Value = U256; - - /// Seek the next account storage entry for a given hashed key pair. - fn seek(&mut self, subkey: B256) -> Result, DatabaseError> { - let entry = self.seek_inner(subkey)?; - self.last_slot = entry.as_ref().map(|entry| entry.0); + let entry = self.choose_next_entry()?; + self.set_last_key(&entry); Ok(entry) } - - /// Return the next account storage entry for the current account key. - fn next(&mut self) -> Result, DatabaseError> { - let next = match self.last_slot { - Some(last_slot) => { - let entry = self.next_inner(last_slot)?; - self.last_slot = entry.as_ref().map(|entry| entry.0); - entry - } - // no previous entry was found - None => None, - }; - Ok(next) - } } -impl HashedStorageCursor for HashedPostStateStorageCursor<'_, C> +/// The cursor to iterate over post state hashed values and corresponding database entries. +/// It will always give precedence to the data from the post state. +impl HashedStorageCursor for HashedPostStateCursor<'_, C, V> where - C: HashedStorageCursor, + C: HashedStorageCursor, + V: HashedPostStateCursorValue, { /// Returns `true` if the account has no storage entries. /// /// This function should be called before attempting to call [`HashedCursor::seek`] or /// [`HashedCursor::next`]. fn is_storage_empty(&mut self) -> Result { - let is_empty = match &self.post_state_cursor { - Some(cursor) => { - // If the storage has been wiped at any point - self.storage_wiped && - // and the current storage does not contain any non-zero values - cursor.is_empty() + // Storage is not empty if it has non-zero slots. + if self.post_state_cursor.has_any(|(_, value)| value.into_option().is_some()) { + return Ok(false); + } + + // If no non-zero slots in post state, check the database. + // Returns true if cursor is None (wiped storage or empty DB). + self.cursor.as_mut().map_or(Ok(true), |c| c.is_storage_empty()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::hashed_cursor::mock::MockHashedCursor; + use parking_lot::Mutex; + use std::{collections::BTreeMap, sync::Arc}; + + mod proptest_tests { + use super::*; + use itertools::Itertools; + use proptest::prelude::*; + + /// Merge `db_nodes` with `post_state_nodes`, applying the post state overlay. + /// This properly handles deletions (ZERO values for U256, None for Account). + fn merge_with_overlay( + db_nodes: Vec<(B256, V::NonZero)>, + post_state_nodes: Vec<(B256, V)>, + ) -> Vec<(B256, V::NonZero)> + where + V: HashedPostStateCursorValue, + V::NonZero: Copy, + { + db_nodes + .into_iter() + .merge_join_by(post_state_nodes, |db_entry, mem_entry| db_entry.0.cmp(&mem_entry.0)) + .filter_map(|entry| match entry { + // Only in db: keep it + itertools::EitherOrBoth::Left((key, node)) => Some((key, node)), + // Only in post state: keep if not a deletion + itertools::EitherOrBoth::Right((key, wrapped)) => { + wrapped.into_option().map(|val| (key, val)) + } + // In both: post state takes precedence (keep if not a deletion) + itertools::EitherOrBoth::Both(_, (key, wrapped)) => { + wrapped.into_option().map(|val| (key, val)) + } + }) + .collect() + } + + /// Generate a strategy for U256 values + fn u256_strategy() -> impl Strategy { + any::().prop_map(U256::from) + } + + /// Generate a sorted vector of (B256, U256) entries + fn sorted_db_nodes_strategy() -> impl Strategy> { + prop::collection::vec((any::(), u256_strategy()), 0..20).prop_map(|entries| { + let mut result: Vec<(B256, U256)> = entries + .into_iter() + .map(|(byte, value)| (B256::repeat_byte(byte), value)) + .collect(); + result.sort_by(|a, b| a.0.cmp(&b.0)); + result.dedup_by(|a, b| a.0 == b.0); + result + }) + } + + /// Generate a sorted vector of (B256, U256) entries (including deletions as ZERO) + fn sorted_post_state_nodes_strategy() -> impl Strategy> { + prop::collection::vec((any::(), u256_strategy()), 0..20).prop_map(|entries| { + let mut result: Vec<(B256, U256)> = entries + .into_iter() + .map(|(byte, value)| (B256::repeat_byte(byte), value)) + .collect(); + result.sort_by(|a, b| a.0.cmp(&b.0)); + result.dedup_by(|a, b| a.0 == b.0); + result + }) + } + + proptest! { + #![proptest_config(ProptestConfig::with_cases(1000))] + /// Tests `HashedPostStateCursor` produces identical results to a pre-merged cursor + /// across 1000 random scenarios. + /// + /// For random DB entries and post-state changes, creates two cursors: + /// - Control: pre-merged data (expected behavior) + /// - Test: `HashedPostStateCursor` (lazy overlay) + /// + /// Executes random sequences of `next()` and `seek()` operations, asserting + /// both cursors return identical results. + #[test] + fn proptest_hashed_post_state_cursor( + db_nodes in sorted_db_nodes_strategy(), + post_state_nodes in sorted_post_state_nodes_strategy(), + op_choices in prop::collection::vec(any::(), 10..500), + ) { + reth_tracing::init_test_tracing(); + use tracing::debug; + + debug!("Starting proptest!"); + + // Create the expected results by merging the two sorted vectors, + // properly handling deletions (ZERO values in post_state_nodes) + let expected_combined = merge_with_overlay(db_nodes.clone(), post_state_nodes.clone()); + + // Collect all keys for operation generation + let all_keys: Vec = expected_combined.iter().map(|(k, _)| *k).collect(); + + // Create a control cursor using the combined result with a mock cursor + let control_db_map: BTreeMap = expected_combined.into_iter().collect(); + let control_db_arc = Arc::new(control_db_map); + let control_visited_keys = Arc::new(Mutex::new(Vec::new())); + let mut control_cursor = MockHashedCursor::new(control_db_arc, control_visited_keys); + + // Create the HashedPostStateCursor being tested + let db_nodes_map: BTreeMap = db_nodes.into_iter().collect(); + let db_nodes_arc = Arc::new(db_nodes_map); + let visited_keys = Arc::new(Mutex::new(Vec::new())); + let mock_cursor = MockHashedCursor::new(db_nodes_arc, visited_keys); + let mut test_cursor = HashedPostStateCursor::new(Some(mock_cursor), &post_state_nodes); + + // Test: seek to the beginning first + let control_first = control_cursor.seek(B256::ZERO).unwrap(); + let test_first = test_cursor.seek(B256::ZERO).unwrap(); + debug!( + control=?control_first.as_ref().map(|(k, _)| k), + test=?test_first.as_ref().map(|(k, _)| k), + "Initial seek returned", + ); + assert_eq!(control_first, test_first, "Initial seek mismatch"); + + // If both cursors returned None, nothing to test + if control_first.is_none() && test_first.is_none() { + return Ok(()); + } + + // Track the last key returned from the cursor + let mut last_returned_key = control_first.as_ref().map(|(k, _)| *k); + + // Execute a sequence of random operations + for choice in op_choices { + let op_type = choice % 2; // Only 2 operation types: next and seek + + match op_type { + 0 => { + // Next operation + let control_result = control_cursor.next().unwrap(); + let test_result = test_cursor.next().unwrap(); + debug!( + control=?control_result.as_ref().map(|(k, _)| k), + test=?test_result.as_ref().map(|(k, _)| k), + "Next returned", + ); + assert_eq!(control_result, test_result, "Next operation mismatch"); + + last_returned_key = control_result.as_ref().map(|(k, _)| *k); + + // Stop if both cursors are exhausted + if control_result.is_none() && test_result.is_none() { + break; + } + } + _ => { + // Seek operation - choose a key >= last_returned_key + if all_keys.is_empty() { + continue; + } + + let valid_keys: Vec<_> = all_keys + .iter() + .filter(|k| last_returned_key.is_none_or(|last| **k >= last)) + .collect(); + + if valid_keys.is_empty() { + continue; + } + + let key = *valid_keys[(choice as usize / 2) % valid_keys.len()]; + + let control_result = control_cursor.seek(key).unwrap(); + let test_result = test_cursor.seek(key).unwrap(); + debug!( + control=?control_result.as_ref().map(|(k, _)| k), + test=?test_result.as_ref().map(|(k, _)| k), + ?key, + "Seek returned", + ); + assert_eq!(control_result, test_result, "Seek operation mismatch for key {:?}", key); + + last_returned_key = control_result.as_ref().map(|(k, _)| *k); + + // Stop if both cursors are exhausted + if control_result.is_none() && test_result.is_none() { + break; + } + } + } + } } - None => self.cursor.is_storage_empty()?, - }; - Ok(is_empty) + } } } diff --git a/crates/trie/trie/src/node_iter.rs b/crates/trie/trie/src/node_iter.rs index 862176c803a..aef668e8ffe 100644 --- a/crates/trie/trie/src/node_iter.rs +++ b/crates/trie/trie/src/node_iter.rs @@ -306,10 +306,11 @@ where #[cfg(test)] mod tests { + use super::{TrieElement, TrieNodeIter}; use crate::{ hashed_cursor::{ - mock::MockHashedCursorFactory, noop::NoopHashedAccountCursor, HashedCursorFactory, - HashedPostStateAccountCursor, + mock::MockHashedCursorFactory, noop::NoopHashedCursor, HashedCursorFactory, + HashedPostStateCursor, }, mock::{KeyVisit, KeyVisitType}, trie_cursor::{ @@ -332,8 +333,6 @@ mod tests { }; use std::collections::BTreeMap; - use super::{TrieElement, TrieNodeIter}; - /// Calculate the branch node stored in the database by feeding the provided state to the hash /// builder and taking the trie updates. fn get_hash_builder_branch_nodes( @@ -353,8 +352,8 @@ mod tests { let mut node_iter = TrieNodeIter::state_trie( walker, - HashedPostStateAccountCursor::new( - NoopHashedAccountCursor::default(), + HashedPostStateCursor::new( + Option::>::None, hashed_post_state.accounts(), ), ); diff --git a/crates/trie/trie/src/trie_cursor/in_memory.rs b/crates/trie/trie/src/trie_cursor/in_memory.rs index 2311dce82b3..b4f4bdd5fd6 100644 --- a/crates/trie/trie/src/trie_cursor/in_memory.rs +++ b/crates/trie/trie/src/trie_cursor/in_memory.rs @@ -86,6 +86,10 @@ impl<'a, C: TrieCursor> InMemoryTrieCursor<'a, C> { cursor: Option, trie_updates: &'a [(Nibbles, Option)], ) -> Self { + debug_assert!( + trie_updates.is_sorted_by_key(|(k, _)| k), + "Overlay values must be sorted by path" + ); let in_memory_cursor = ForwardInMemoryCursor::new(trie_updates); Self { cursor, cursor_entry: None, in_memory_cursor, last_key: None, seeked: false } }