diff --git a/crates/engine/tree/src/tree/cached_state.rs b/crates/engine/tree/src/tree/cached_state.rs index c1bb028cab2..bc543d067a0 100644 --- a/crates/engine/tree/src/tree/cached_state.rs +++ b/crates/engine/tree/src/tree/cached_state.rs @@ -1,8 +1,5 @@ //! Execution cache implementation for block processing. -use alloy_primitives::{ - map::{DefaultHashBuilder, HashSet}, - Address, StorageKey, StorageValue, B256, -}; +use alloy_primitives::{Address, StorageKey, StorageValue, B256}; use metrics::Gauge; use mini_moka::sync::CacheBuilder; use reth_errors::ProviderResult; @@ -17,6 +14,7 @@ use reth_trie::{ updates::TrieUpdates, AccountProof, HashedPostState, HashedStorage, MultiProof, MultiProofTargets, StorageMultiProof, StorageProof, TrieInput, }; +use revm_primitives::map::DefaultHashBuilder; use std::{sync::Arc, time::Duration}; use tracing::{debug_span, instrument, trace}; @@ -302,70 +300,65 @@ pub(crate) struct ExecutionCache { /// Cache for contract bytecode, keyed by code hash. code_cache: Cache>, - /// Flattened storage cache: composite key of (`Address`, `StorageKey`) maps directly to - /// values. - storage_cache: Cache<(Address, StorageKey), Option>, + /// Per-account storage cache: outer cache keyed by Address, inner cache tracks that account’s + /// storage slots. + storage_cache: Cache, /// Cache for basic account information (nonce, balance, code hash). account_cache: Cache>, } impl ExecutionCache { - /// Get storage value from flattened cache. + /// Get storage value from hierarchical cache. /// /// Returns a `SlotStatus` indicating whether: - /// - `NotCached`: The storage slot is not in the cache - /// - `Empty`: The slot exists in the cache but is empty + /// - `NotCached`: The account's storage cache doesn't exist + /// - `Empty`: The slot exists in the account's cache but is empty /// - `Value`: The slot exists and has a specific value pub(crate) fn get_storage(&self, address: &Address, key: &StorageKey) -> SlotStatus { - match self.storage_cache.get(&(*address, *key)) { + match self.storage_cache.get(address) { None => SlotStatus::NotCached, - Some(None) => SlotStatus::Empty, - Some(Some(value)) => SlotStatus::Value(value), + Some(account_cache) => account_cache.get_storage(key), } } - /// Insert storage value into flattened cache + /// Insert storage value into hierarchical cache pub(crate) fn insert_storage( &self, address: Address, key: StorageKey, value: Option, ) { - self.storage_cache.insert((address, key), value); + self.insert_storage_bulk(address, [(key, value)]); } - /// Insert multiple storage values into flattened cache for a single account + /// Insert multiple storage values into hierarchical cache for a single account /// - /// This method inserts multiple storage values for the same address directly - /// into the flattened cache. + /// This method is optimized for inserting multiple storage values for the same address + /// by doing the account cache lookup only once instead of for each key-value pair. pub(crate) fn insert_storage_bulk(&self, address: Address, storage_entries: I) where I: IntoIterator)>, { + let account_cache = self.storage_cache.get(&address).unwrap_or_else(|| { + let account_cache = AccountStorageCache::default(); + self.storage_cache.insert(address, account_cache.clone()); + account_cache + }); + for (key, value) in storage_entries { - self.storage_cache.insert((address, key), value); + account_cache.insert_storage(key, value); } } + /// Invalidate storage for specific account + pub(crate) fn invalidate_account_storage(&self, address: &Address) { + self.storage_cache.invalidate(address); + } + /// Returns the total number of storage slots cached across all accounts pub(crate) fn total_storage_slots(&self) -> usize { - self.storage_cache.entry_count() as usize - } - - /// Invalidates the storage for all addresses in the set - #[instrument(level = "debug", target = "engine::caching", skip_all, fields(accounts = addresses.len()))] - pub(crate) fn invalidate_storages(&self, addresses: HashSet<&Address>) { - // NOTE: this must collect because the invalidate function should not be called while we - // hold an iter for it - let storage_entries = self - .storage_cache - .iter() - .filter_map(|entry| addresses.contains(&entry.key().0).then_some(*entry.key())) - .collect::>(); - for key in storage_entries { - self.storage_cache.invalidate(&key) - } + self.storage_cache.iter().map(|addr| addr.len()).sum() } /// Inserts the post-execution state changes into the cache. @@ -405,7 +398,6 @@ impl ExecutionCache { state_updates.state.values().map(|account| account.storage.len()).sum::() ) .entered(); - let mut invalidated_accounts = HashSet::default(); for (addr, account) in &state_updates.state { // If the account was not modified, as in not changed and not destroyed, then we have // nothing to do w.r.t. this particular account and can move on @@ -418,7 +410,7 @@ impl ExecutionCache { // Invalidate the account cache entry if destroyed self.account_cache.invalidate(addr); - invalidated_accounts.insert(addr); + self.invalidate_account_storage(addr); continue } @@ -445,9 +437,6 @@ impl ExecutionCache { self.account_cache.insert(*addr, Some(Account::from(account_info))); } - // invalidate storage for all destroyed accounts - self.invalidate_storages(invalidated_accounts); - Ok(()) } } @@ -476,11 +465,11 @@ impl ExecutionCacheBuilder { const TIME_TO_IDLE: Duration = Duration::from_secs(3600); // 1 hour let storage_cache = CacheBuilder::new(self.storage_cache_entries) - .weigher(|_key: &(Address, StorageKey), _value: &Option| -> u32 { - // Size of composite key (Address + StorageKey) + Option - // Address: 20 bytes, StorageKey: 32 bytes, Option: 33 bytes - // Plus some overhead for the hash map entry - 120_u32 + .weigher(|_key: &Address, value: &AccountStorageCache| -> u32 { + // values based on results from measure_storage_cache_overhead test + let base_weight = 39_000; + let slots_weight = value.len() * 218; + (base_weight + slots_weight) as u32 }) .max_capacity(storage_cache_size) .time_to_live(EXPIRY_TIME) @@ -603,6 +592,56 @@ impl SavedCache { } } +/// Cache for an individual account's storage slots. +/// +/// This represents the second level of the hierarchical storage cache. +/// Each account gets its own `AccountStorageCache` to store accessed storage slots. +#[derive(Debug, Clone)] +pub(crate) struct AccountStorageCache { + /// Map of storage keys to their cached values. + slots: Cache>, +} + +impl AccountStorageCache { + /// Create a new [`AccountStorageCache`] + pub(crate) fn new(max_slots: u64) -> Self { + Self { + slots: CacheBuilder::new(max_slots).build_with_hasher(DefaultHashBuilder::default()), + } + } + + /// Get a storage value from this account's cache. + /// - `NotCached`: The slot is not in the cache + /// - `Empty`: The slot is empty + /// - `Value`: The slot has a specific value + pub(crate) fn get_storage(&self, key: &StorageKey) -> SlotStatus { + match self.slots.get(key) { + None => SlotStatus::NotCached, + Some(None) => SlotStatus::Empty, + Some(Some(value)) => SlotStatus::Value(value), + } + } + + /// Insert a storage value + pub(crate) fn insert_storage(&self, key: StorageKey, value: Option) { + self.slots.insert(key, value); + } + + /// Returns the number of slots in the cache + pub(crate) fn len(&self) -> usize { + self.slots.entry_count() as usize + } +} + +impl Default for AccountStorageCache { + fn default() -> Self { + // With weigher and max_capacity in place, this number represents + // the maximum number of entries that can be stored, not the actual + // memory usage which is controlled by storage cache's max_capacity. + Self::new(1_000_000) + } +} + #[cfg(test)] mod tests { use super::*; @@ -677,36 +716,32 @@ mod tests { #[test] fn measure_storage_cache_overhead() { - let (base_overhead, cache) = - measure_allocation(|| ExecutionCacheBuilder::default().build_caches(1000)); - println!("Base ExecutionCache overhead: {base_overhead} bytes"); + let (base_overhead, cache) = measure_allocation(|| AccountStorageCache::new(1000)); + println!("Base AccountStorageCache overhead: {base_overhead} bytes"); let mut rng = rand::rng(); - let address = Address::random(); let key = StorageKey::random(); let value = StorageValue::from(rng.random::()); let (first_slot, _) = measure_allocation(|| { - cache.insert_storage(address, key, Some(value)); + cache.insert_storage(key, Some(value)); }); println!("First slot insertion overhead: {first_slot} bytes"); const TOTAL_SLOTS: usize = 10_000; let (test_slots, _) = measure_allocation(|| { for _ in 0..TOTAL_SLOTS { - let addr = Address::random(); let key = StorageKey::random(); let value = StorageValue::from(rng.random::()); - cache.insert_storage(addr, key, Some(value)); + cache.insert_storage(key, Some(value)); } }); println!("Average overhead over {} slots: {} bytes", TOTAL_SLOTS, test_slots / TOTAL_SLOTS); println!("\nTheoretical sizes:"); - println!("Address size: {} bytes", size_of::
()); println!("StorageKey size: {} bytes", size_of::()); println!("StorageValue size: {} bytes", size_of::()); println!("Option size: {} bytes", size_of::>()); - println!("(Address, StorageKey) size: {} bytes", size_of::<(Address, StorageKey)>()); + println!("Option size: {} bytes", size_of::>()); } #[test]