From a3c1baafe7d7fda13f37789ff97e6e4b805a2b6e Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Mon, 18 May 2026 13:55:05 +0200 Subject: [PATCH 1/8] perf(l1): lazy BAL cursor for per-tx parallel execution Replaces eager per-tx BAL prefix materialization inside execute_block_parallel with an on-read LazyBalCursor installed on each per-tx GeneralizedDatabase. load_account consults the cursor for account info only; get_storage_value consults it per-slot. Each tx now materializes only what it actually touches instead of the full BAL prefix. The two outer sequential seed_db_from_bal callers (system-call recovery, post-tx outer seed) remain untouched. - Extract seed_one_address_info_from_bal + seed_one_storage_slot_from_bal from seed_db_from_bal as reusable helpers in ethrex-levm - Add Clone to BalAddressIndex so it can be Arc-wrapped once per block - Add lazy_bal: Option on GeneralizedDatabase - Hook load_account and get_storage_value with explicit borrow-ordering - Switch execute_block_parallel to set tx_db.lazy_bal instead of seeding - Drop per-tx DB capacity hint from bal_account_count to 32 Tests in test/tests/levm/bal_view_tests.rs cover: - T1 off-by-one cutoff (tx1_sees_tx0_write) - T2 no storage injection in load_account - T3 SSTORE pre-image flows through cursor - T4 partial-coverage load_account does not recurse (cursor .take() guard) --- crates/common/types/block_access_list.rs | 1 + crates/vm/backends/levm/mod.rs | 162 +++---------- crates/vm/levm/src/db/gen_db.rs | 292 +++++++++++++++++++++-- test/Cargo.toml | 2 + test/tests/levm/bal_view_tests.rs | 170 +++++++++++++ test/tests/levm/mod.rs | 1 + 6 files changed, 483 insertions(+), 145 deletions(-) create mode 100644 test/tests/levm/bal_view_tests.rs diff --git a/crates/common/types/block_access_list.rs b/crates/common/types/block_access_list.rs index fb595e30e8c..547a70ac292 100644 --- a/crates/common/types/block_access_list.rs +++ b/crates/common/types/block_access_list.rs @@ -602,6 +602,7 @@ impl BlockAccessList { /// Pre-computed index for fast per-tx BAL validation lookups. /// Built once per block, shared read-only across parallel tx validations. +#[derive(Clone)] pub struct BalAddressIndex { /// Maps each address in the BAL to its index in `BlockAccessList.inner`. pub addr_to_idx: FxHashMap, diff --git a/crates/vm/backends/levm/mod.rs b/crates/vm/backends/levm/mod.rs index 5e28e5a2fbc..d6bd88d1440 100644 --- a/crates/vm/backends/levm/mod.rs +++ b/crates/vm/backends/levm/mod.rs @@ -48,6 +48,10 @@ use ethrex_levm::constants::{ }; use ethrex_levm::db::gen_db::GeneralizedDatabase; #[cfg(all(feature = "rayon", not(feature = "eip-8025")))] +use ethrex_levm::db::gen_db::{ + LazyBalCursor, code_from_bal, seed_one_address_info_from_bal, seed_one_storage_slot_from_bal, +}; +#[cfg(all(feature = "rayon", not(feature = "eip-8025")))] use ethrex_levm::db::{Database, gen_db::CacheDB}; use ethrex_levm::errors::{InternalError, TxValidationError}; #[cfg(feature = "perf_opcode_timings")] @@ -455,6 +459,7 @@ impl LEVM { // Withdrawal index is n_txs+1 in BAL; we use n_txs to avoid double-applying // withdrawal balances (process_withdrawals handles those below). let last_tx_idx = u32::try_from(block.body.transactions.len()).unwrap_or(u32::MAX); + // Eager seed retained: lazy_bal cursor is per-tx only; outer DB has no cursor. Self::seed_db_from_bal( db, bal, @@ -737,19 +742,6 @@ impl LEVM { )) } - /// Convert BAL into `Vec` for the merkleizer. - /// Compute code hash and optional `Code` object from raw bytecode in a BAL entry. - #[cfg(all(feature = "rayon", not(feature = "eip-8025")))] - fn code_from_bal(new_code: &Bytes) -> (H256, Option) { - if new_code.is_empty() { - (*EMPTY_KECCACK_HASH, None) - } else { - let code_obj = Code::from_bytecode(new_code.clone(), ðrex_crypto::NativeCrypto); - let hash = code_obj.hash; - (hash, Some(code_obj)) - } - } - /// /// For each account in the BAL, extracts the **final** post-block state /// (highest `block_access_index` entry per field) and builds an AccountUpdate. @@ -812,7 +804,7 @@ impl LEVM { // Final code: last entry or prestate let (code_hash, code) = if let Some(c) = acct_changes.code_changes.last() { - Self::code_from_bal(&c.new_code) + code_from_bal(&c.new_code) } else { (prestate.code_hash, None) }; @@ -881,6 +873,11 @@ impl LEVM { Ok(updates) } + /// Eager BAL prefix seed — used only by the outer DB path (system-call recovery + /// at `:440` and post-tx outer seed at `:463`). Per-tx parallel execution uses + /// `LazyBalCursor` in `execute_block_parallel`; see also `seed_one_address_info_from_bal` + /// and `seed_one_storage_slot_from_bal` in `ethrex_levm::db::gen_db`. + /// /// Pre-seed a GeneralizedDatabase with BAL-derived state for a specific tx. /// /// For each BAL-modified account, applies accumulated diffs with @@ -898,118 +895,38 @@ impl LEVM { max_idx: u32, accounts_by_min_index: &[(u32, usize)], ) -> Result<(), EvmError> { - // Only visit accounts whose minimum change index <= max_idx. let end = accounts_by_min_index.partition_point(|(min_idx, _)| *min_idx <= max_idx); let bal_accounts = bal.accounts(); for &(_, acct_idx) in &accounts_by_min_index[..end] { - let acct_changes = &bal_accounts[acct_idx]; - let addr = acct_changes.address; + seed_one_address_info_from_bal(db, bal, acct_idx, max_idx) + .map_err(|e| EvmError::Custom(format!("seed_db_from_bal: {e}")))?; - // Binary search (slices are sorted ascending by block_access_index): - // partition_point returns the number of elements <= max_idx. - let balance_pos = acct_changes - .balance_changes - .partition_point(|c| c.block_access_index <= max_idx); - let nonce_pos = acct_changes - .nonce_changes - .partition_point(|c| c.block_access_index <= max_idx); - let code_pos = acct_changes - .code_changes - .partition_point(|c| c.block_access_index <= max_idx); - // Each slot's slot_changes are sorted ascending by block_access_index, - // so if the first entry is <= max_idx, at least one change is in scope. + let acct_changes = &bal_accounts[acct_idx]; + if acct_changes.storage_changes.is_empty() { + continue; + } let any_storage = acct_changes.storage_changes.iter().any(|sc| { sc.slot_changes .first() .is_some_and(|c| c.block_access_index <= max_idx) }); - - if balance_pos == 0 && nonce_pos == 0 && !any_storage && code_pos == 0 { + if !any_storage { continue; } - - // Compute code update before borrowing acc (borrow checker: can't access - // db.codes while acc holds a mutable borrow of db) - let code_update = if code_pos > 0 { - Some(Self::code_from_bal( - &acct_changes.code_changes[code_pos - 1].new_code, - )) - } else { - None - }; - - // When BAL covers all account info fields (balance + nonce + code), insert - // a default LevmAccount directly to skip the store/shared_base lookup. - // For partial coverage, load from store to fill missing fields. - let has_all_info = balance_pos > 0 && nonce_pos > 0 && code_pos > 0; - if has_all_info { - use ethrex_common::types::AccountInfo; - let balance = acct_changes.balance_changes[balance_pos - 1].post_balance; - let nonce = acct_changes.nonce_changes[nonce_pos - 1].post_nonce; - let code_hash = code_update - .as_ref() - .map(|(h, _)| *h) - .unwrap_or(*EMPTY_KECCACK_HASH); - // NOTE: has_storage is false for newly inserted accounts. This is safe - // because this DB is only used for the parallel execution path (state - // comes from BAL, not get_state_transitions_tx). Do not reuse this DB - // for sequential fallback without fixing has_storage. - let acc = db - .current_accounts_state - .entry(addr) - .or_insert_with(|| LevmAccount { - info: AccountInfo::default(), - storage: FxHashMap::default(), - has_storage: false, - status: AccountStatus::Modified, - exists: true, - }); - acc.info.balance = balance; - acc.info.nonce = nonce; - acc.info.code_hash = code_hash; - acc.mark_modified(); - } else { - // Partial BAL coverage — load from store/shared_base, then overwrite - // the covered fields. get_account already caches, so get_account_mut - // will be a cache hit. + let addr = acct_changes.address; + if !db.current_accounts_state.contains_key(&addr) { db.get_account(addr) - .map_err(|e| EvmError::Custom(format!("seed_db_from_bal load: {e}")))?; - let acc = db - .get_account_mut(addr) - .map_err(|e| EvmError::Custom(format!("seed bal: {e}")))?; - - if balance_pos > 0 { - acc.info.balance = acct_changes.balance_changes[balance_pos - 1].post_balance; - } - if nonce_pos > 0 { - acc.info.nonce = acct_changes.nonce_changes[nonce_pos - 1].post_nonce; - } - if let Some((hash, _)) = &code_update { - acc.info.code_hash = *hash; - } + .map_err(|e| EvmError::Custom(format!("seed storage: {e}")))?; } - - // Apply storage changes (works for both paths since acc is now in current_accounts_state) - if any_storage { - let acc = db - .current_accounts_state - .get_mut(&addr) - .expect("account was just inserted"); - for sc in &acct_changes.storage_changes { - let pos = sc - .slot_changes - .partition_point(|c| c.block_access_index <= max_idx); - if pos > 0 { - let key = ethrex_common::utils::u256_to_h256(sc.slot); - acc.storage.insert(key, sc.slot_changes[pos - 1].post_value); - } + let acc = db + .get_account_mut(addr) + .map_err(|e| EvmError::Custom(format!("seed storage mut: {e}")))?; + for sc in &acct_changes.storage_changes { + let key = ethrex_common::utils::u256_to_h256(sc.slot); + if let Some(value) = seed_one_storage_slot_from_bal(bal, acct_idx, key, max_idx) { + acc.storage.insert(key, value); } } - - // Insert code object after acc borrow is released - if let Some((hash, Some(code_obj))) = code_update { - db.codes.entry(hash).or_insert(code_obj); - } } Ok(()) } @@ -1105,8 +1022,9 @@ impl LEVM { .is_some_and(|a| a.storage.contains_key(key)) }); - // Pre-compute capacity hint for per-tx DBs from BAL account count. - let bal_account_count = bal.accounts().len(); + // Small capacity hint — per-tx DBs materialize only touched accounts via lazy_bal cursor. + let arc_bal = Arc::new(bal.clone()); + let arc_idx = Arc::new(validation_index.clone()); // 2. Execute all txs in parallel (embarrassingly parallel, BAL-seeded). // BAL validation is deferred to after the gas limit check (step 3) so that @@ -1129,23 +1047,17 @@ impl LEVM { let mut tx_db = GeneralizedDatabase::new_with_shared_base_and_capacity( store.clone(), system_seed.clone(), - bal_account_count, + 32, ); + tx_db.lazy_bal = Some(LazyBalCursor { + bal: arc_bal.clone(), + bal_index: u32::try_from(tx_idx + 1).unwrap_or(u32::MAX), + index: arc_idx.clone(), + }); // Small capacity: parallel txs rarely nest >8 call frames, and // over-allocating per-tx wastes memory across many rayon tasks. let mut stack_pool = Vec::with_capacity(8); - // Pre-seed with BAL-derived intermediate state. - // BAL index: 0 = system calls, 1 = tx 0, 2 = tx 1, ... - // For tx at index i, we want state through BAL index i - // (= system calls + effects of txs 0..i-1). - Self::seed_db_from_bal( - &mut tx_db, - bal, - u32::try_from(tx_idx).unwrap_or(u32::MAX), - &validation_index.accounts_by_min_index, - )?; - // Enable accessed_accounts tracker (coarse) for `unaccessed_pure_accounts` // diagnostics. Safe to over-report: used only to REMOVE entries from a // extraneous-entry checklist. diff --git a/crates/vm/levm/src/db/gen_db.rs b/crates/vm/levm/src/db/gen_db.rs index 16f03bb0faa..eeefe5dff6e 100644 --- a/crates/vm/levm/src/db/gen_db.rs +++ b/crates/vm/levm/src/db/gen_db.rs @@ -6,7 +6,9 @@ use ethrex_common::U256; use ethrex_common::types::Account; use ethrex_common::types::Code; use ethrex_common::types::CodeMetadata; -use ethrex_common::types::block_access_list::{BlockAccessList, BlockAccessListRecorder}; +use ethrex_common::types::block_access_list::{ + BalAddressIndex, BlockAccessList, BlockAccessListRecorder, +}; use ethrex_common::utils::ZERO_U256; use super::Database; @@ -24,6 +26,166 @@ use std::collections::hash_map::Entry; pub type CacheDB = FxHashMap; +/// Per-tx BAL cursor for lazy on-read prefix materialization. +/// `bal_index = tx_idx + 1`; cursor's effective max_idx is `bal_index - 1`, +/// matching `seed_db_from_bal`'s `max_idx = tx_idx` semantics. +#[derive(Clone)] +pub struct LazyBalCursor { + pub bal: Arc, + pub bal_index: u32, + pub index: Arc, +} + +/// Apply balance, nonce, and code fields from BAL for a single account into `db`. +/// +/// Returns `true` if any info field was applied; `false` if all field positions +/// were 0 (no info changes for this account at indices <= max_idx). +/// Does NOT touch `account.storage`. +#[cfg(all(feature = "rayon", not(feature = "eip-8025")))] +pub fn seed_one_address_info_from_bal( + db: &mut GeneralizedDatabase, + bal: &BlockAccessList, + acct_idx: usize, + max_idx: u32, +) -> Result { + use ethrex_common::types::AccountInfo; + + let acct_changes = bal + .accounts() + .get(acct_idx) + .ok_or(InternalError::AccountNotFound)?; + let addr = acct_changes.address; + + let balance_pos = acct_changes + .balance_changes + .partition_point(|c| c.block_access_index <= max_idx); + let nonce_pos = acct_changes + .nonce_changes + .partition_point(|c| c.block_access_index <= max_idx); + let code_pos = acct_changes + .code_changes + .partition_point(|c| c.block_access_index <= max_idx); + + if balance_pos == 0 && nonce_pos == 0 && code_pos == 0 { + return Ok(false); + } + + // Compute code update before borrowing acc (borrow checker: can't access + // db.codes while acc holds a mutable borrow of db). + let code_update = if code_pos > 0 { + let entry = acct_changes + .code_changes + .get(code_pos.saturating_sub(1)) + .ok_or(InternalError::AccountNotFound)?; + Some(code_from_bal(&entry.new_code)) + } else { + None + }; + + // When BAL covers all account info fields (balance + nonce + code), insert + // a default LevmAccount directly to skip the store/shared_base lookup. + // For partial coverage, load from store to fill missing fields. + let has_all_info = balance_pos > 0 && nonce_pos > 0 && code_pos > 0; + if has_all_info { + use ethrex_common::constants::EMPTY_KECCACK_HASH; + let balance = acct_changes + .balance_changes + .get(balance_pos.saturating_sub(1)) + .ok_or(InternalError::AccountNotFound)? + .post_balance; + let nonce = acct_changes + .nonce_changes + .get(nonce_pos.saturating_sub(1)) + .ok_or(InternalError::AccountNotFound)? + .post_nonce; + let code_hash = code_update + .as_ref() + .map(|(h, _)| *h) + .unwrap_or(*EMPTY_KECCACK_HASH); + let acc = db + .current_accounts_state + .entry(addr) + .or_insert_with(|| LevmAccount { + info: AccountInfo::default(), + storage: FxHashMap::default(), + has_storage: false, + status: AccountStatus::Modified, + exists: true, + }); + acc.info.balance = balance; + acc.info.nonce = nonce; + acc.info.code_hash = code_hash; + acc.mark_modified(); + } else { + db.get_account(addr) + .map_err(|e| InternalError::Custom(format!("seed_db_from_bal load: {e}")))?; + let acc = db + .get_account_mut(addr) + .map_err(|e| InternalError::Custom(format!("seed bal: {e}")))?; + + if balance_pos > 0 + && let Some(entry) = acct_changes + .balance_changes + .get(balance_pos.saturating_sub(1)) + { + acc.info.balance = entry.post_balance; + } + if nonce_pos > 0 + && let Some(entry) = acct_changes.nonce_changes.get(nonce_pos.saturating_sub(1)) + { + acc.info.nonce = entry.post_nonce; + } + if let Some((hash, _)) = &code_update { + acc.info.code_hash = *hash; + } + } + + // Insert code object after acc borrow is released. + if let Some((hash, Some(code_obj))) = code_update { + db.codes.entry(hash).or_insert(code_obj); + } + + Ok(true) +} + +/// Read the post-value of a single storage slot from the BAL up to `max_idx`. +/// +/// Pure read; does not touch `db`. Returns `Some(value)` if a change at +/// `block_access_index <= max_idx` exists for `key`, `None` otherwise. +#[cfg(all(feature = "rayon", not(feature = "eip-8025")))] +pub fn seed_one_storage_slot_from_bal( + bal: &BlockAccessList, + acct_idx: usize, + key: H256, + max_idx: u32, +) -> Option { + let acct_changes = bal.accounts().get(acct_idx)?; + let sc = acct_changes + .storage_changes + .iter() + .find(|sc| ethrex_common::utils::u256_to_h256(sc.slot) == key)?; + let pos = sc + .slot_changes + .partition_point(|c| c.block_access_index <= max_idx); + sc.slot_changes + .get(pos.saturating_sub(1)) + .filter(|_| pos > 0) + .map(|c| c.post_value) +} + +/// Compute code hash and optional `Code` object from raw bytecode in a BAL entry. +#[cfg(all(feature = "rayon", not(feature = "eip-8025")))] +pub fn code_from_bal(new_code: &bytes::Bytes) -> (H256, Option) { + use ethrex_common::constants::EMPTY_KECCACK_HASH; + if new_code.is_empty() { + (*EMPTY_KECCACK_HASH, None) + } else { + let code_obj = Code::from_bytecode(new_code.clone(), ðrex_crypto::NativeCrypto); + let hash = code_obj.hash; + (hash, Some(code_obj)) + } +} + #[derive(Clone)] pub struct GeneralizedDatabase { pub store: Arc, @@ -45,6 +207,9 @@ pub struct GeneralizedDatabase { /// Optional tracker for BAL validation: records addresses accessed via load_account. /// Enabled only during parallel execution to detect extraneous BAL pure-access entries. pub accessed_accounts: Option>, + /// Optional BAL cursor for lazy per-read prefix materialization. + /// When set, account loads and storage reads consult the BAL before hitting the store. + pub lazy_bal: Option, } impl GeneralizedDatabase { @@ -60,6 +225,7 @@ impl GeneralizedDatabase { bal_recorder: None, skip_initial_tracking: false, accessed_accounts: None, + lazy_bal: None, } } @@ -92,6 +258,7 @@ impl GeneralizedDatabase { bal_recorder: None, skip_initial_tracking: true, accessed_accounts: None, + lazy_bal: None, } } @@ -150,6 +317,7 @@ impl GeneralizedDatabase { bal_recorder: None, skip_initial_tracking: false, accessed_accounts: None, + lazy_bal: None, } } @@ -160,29 +328,89 @@ impl GeneralizedDatabase { if let Some(tracker) = &mut self.accessed_accounts { tracker.insert(address); } - match self.current_accounts_state.entry(address) { - Entry::Occupied(entry) => Ok(entry.into_mut()), - Entry::Vacant(entry) => { - if let Some(account) = self.initial_accounts_state.get(&address) { - return Ok(entry.insert(account.clone())); - } - // Check shared_base (read-only post-system-call snapshot) before hitting store. - if let Some(ref base) = self.shared_base - && let Some(account) = base.get(&address) - { - if !self.skip_initial_tracking { - self.initial_accounts_state.insert(address, account.clone()); - } - return Ok(entry.insert(account.clone())); + + // Fast path: already cached. + if self.current_accounts_state.contains_key(&address) { + return self + .current_accounts_state + .get_mut(&address) + .ok_or(InternalError::AccountNotFound); + } + + // Initial-state fast path. + if let Some(account) = self.initial_accounts_state.get(&address) { + let clone = account.clone(); + return Ok(self.current_accounts_state.entry(address).or_insert(clone)); + } + + // Check shared_base (read-only post-system-call snapshot) before hitting store. + if let Some(ref base) = self.shared_base + && let Some(account) = base.get(&address) + { + let account = account.clone(); + if !self.skip_initial_tracking { + self.initial_accounts_state.insert(address, account.clone()); + } + return Ok(self + .current_accounts_state + .entry(address) + .or_insert(account)); + } + + // Lazy-BAL hook: if the cursor finds this address, materialize info from the BAL + // before falling back to the store. + // + // IMPORTANT: we `.take()` the cursor out of `self.lazy_bal` before calling + // `seed_one_address_info_from_bal`. For partial-coverage accounts (e.g. balance-only + // change with no nonce/code) the helper calls `db.get_account(addr)` internally to + // load the base state from the store before overlaying. If `self.lazy_bal` were still + // `Some(...)` at that point, `get_account` → `load_account` would re-enter this same + // block and recurse infinitely. Taking the cursor out breaks the cycle: the inner call + // sees `lazy_bal = None` and falls straight through to the store. We restore the cursor + // unconditionally afterward (even on error) so the outer caller still sees it. + #[cfg(all(feature = "rayon", not(feature = "eip-8025")))] + { + let cursor_opt = self.lazy_bal.take(); + let helper_result = if let Some(cursor) = cursor_opt.as_ref() { + debug_assert!( + cursor.bal_index >= 1, + "LazyBalCursor bal_index must be >= 1" + ); + let max_idx = cursor.bal_index.saturating_sub(1); + if let Some(&acct_idx) = cursor.index.addr_to_idx.get(&address) { + Some( + seed_one_address_info_from_bal(self, &cursor.bal, acct_idx, max_idx) + .map(|_| true), + ) + } else { + None } - let state = self.store.get_account_state(address)?; - let account = LevmAccount::from(state); - if !self.skip_initial_tracking { - self.initial_accounts_state.insert(address, account.clone()); + } else { + None + }; + // Restore the cursor before propagating any error or returning. + self.lazy_bal = cursor_opt; + if let Some(result) = helper_result { + result.map_err(|e| InternalError::Custom(format!("lazy_bal seed: {e}")))?; + if self.current_accounts_state.contains_key(&address) { + return self + .current_accounts_state + .get_mut(&address) + .ok_or(InternalError::AccountNotFound); } - Ok(entry.insert(account)) } } + + // Store fallback. + let state = self.store.get_account_state(address)?; + let account = LevmAccount::from(state); + if !self.skip_initial_tracking { + self.initial_accounts_state.insert(address, account.clone()); + } + Ok(self + .current_accounts_state + .entry(address) + .or_insert(account)) } /// Gets reference of an account @@ -683,6 +911,7 @@ impl<'a> VM<'a> { key: H256, ) -> Result<(U256, U256, bool), InternalError> { let storage_slot_was_cold = self.substate.add_accessed_slot(address, key); + // SSTORE pre-image flows transitively through get_storage_value, which consults lazy_bal. let current_value = self.get_storage_value(address, key)?; let original_value = match self .storage_original_values @@ -725,6 +954,29 @@ impl<'a> VM<'a> { return Err(InternalError::AccountNotFound); } + // Lazy-BAL hook: copy result out BEFORE taking &mut on current_accounts_state + // so the immutable borrow of lazy_bal is released before the mutable reborrow. + #[cfg(all(feature = "rayon", not(feature = "eip-8025")))] + let bal_hit: Option = self.db.lazy_bal.as_ref().and_then(|cursor| { + debug_assert!( + cursor.bal_index >= 1, + "LazyBalCursor bal_index must be >= 1" + ); + let max_idx = cursor.bal_index.saturating_sub(1); + let &acct_idx = cursor.index.addr_to_idx.get(&address)?; + seed_one_storage_slot_from_bal(&cursor.bal, acct_idx, key, max_idx) + }); + #[cfg(all(feature = "rayon", not(feature = "eip-8025")))] + if let Some(value) = bal_hit { + let account = self + .db + .current_accounts_state + .get_mut(&address) + .ok_or(InternalError::AccountNotFound)?; + account.storage.insert(key, value); + return Ok(value); + } + let value = self.db.get_value_from_database(address, key)?; // Cache-fill only: this is a read-path miss, not a state mutation. diff --git a/test/Cargo.toml b/test/Cargo.toml index 41fc3b14546..7015ad0810f 100644 --- a/test/Cargo.toml +++ b/test/Cargo.toml @@ -11,6 +11,8 @@ path = "src/lib.rs" rocksdb = ["ethrex-storage/rocksdb"] l2 = [] c-kzg = ["ethrex-common/c-kzg"] +rayon = ["ethrex-levm/rayon"] +eip-8025 = ["ethrex-levm/eip-8025"] [dependencies] ethrex-common.workspace = true diff --git a/test/tests/levm/bal_view_tests.rs b/test/tests/levm/bal_view_tests.rs new file mode 100644 index 00000000000..d7f36c1e85c --- /dev/null +++ b/test/tests/levm/bal_view_tests.rs @@ -0,0 +1,170 @@ +//! BAL lazy-cursor regression tests. +//! +//! All three tests exercise the helper functions directly (unit level) because +//! `seed_one_storage_slot_from_bal` and `seed_one_address_info_from_bal` are +//! `#[cfg(all(feature = "rayon", not(feature = "eip-8025")))]`-gated; reaching +//! `execute_block_parallel` from the test crate would require enabling that +//! feature pair and wiring up a full Amsterdam chain config, block, and signed +//! transactions. The helper-level tests cover the same off-by-one boundary and +//! storage-injection invariants that the lazy cursor relies on. + +#[cfg(all(feature = "rayon", not(feature = "eip-8025")))] +mod inner { + use ethereum_types::H160; + use ethrex_common::{ + Address, U256, + types::block_access_list::{ + AccountChanges, BalAddressIndex, BalanceChange, BlockAccessList, SlotChange, + StorageChange, + }, + utils::u256_to_h256, + }; + use ethrex_levm::db::gen_db::{ + GeneralizedDatabase, LazyBalCursor, seed_one_address_info_from_bal, + seed_one_storage_slot_from_bal, + }; + use std::sync::Arc; + + use crate::levm::test_db::TestDatabase; + + const CONTRACT: Address = H160([ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xC0, 0xDE, + ]); + const SLOT: U256 = U256([0x10, 0, 0, 0]); + const V0: U256 = U256([0xAA, 0, 0, 0]); + const V1: U256 = U256([0xBB, 0, 0, 0]); + + /// Build a minimal BAL with one account (`CONTRACT`) that has a single + /// storage slot written at `block_access_index = 1` (= tx 0's post-value). + fn bal_single_slot_write_at_1() -> BlockAccessList { + let slot_change = SlotChange::with_changes(SLOT, vec![StorageChange::new(1, V0)]); + let acct = AccountChanges::new(CONTRACT).with_storage_changes(vec![slot_change]); + BlockAccessList::from_accounts(vec![acct]) + } + + /// T1: `tx1_sees_tx0_write` + /// + /// Unit-level test of the off-by-one boundary: `seed_one_storage_slot_from_bal` + /// with `max_idx = 1` must return `V0` (the write made by tx 0, whose + /// BAL index is 1). This is the value tx 1 observes as the pre-state of the + /// slot, mirroring what `LazyBalCursor` surfaces on cache-miss. + #[test] + fn tx1_sees_tx0_write() { + let bal = bal_single_slot_write_at_1(); + let key = u256_to_h256(SLOT); + + // tx 1 has bal_index = 2, so max_idx = 1 (same as seed_db_from_bal semantics). + let result = seed_one_storage_slot_from_bal(&bal, 0, key, 1); + + assert_eq!( + result, + Some(V0), + "tx 1 should see tx 0's write (V0) as the slot pre-state" + ); + } + + /// T2: `load_account_does_not_inject_storage` + /// + /// `seed_one_address_info_from_bal` must not populate `account.storage`. + /// It handles balance/nonce/code only; storage is seeded separately (or + /// lazily via the cursor). An extraneous storage injection would corrupt + /// the initial-storage baseline used for net-zero filtering. + #[test] + fn load_account_does_not_inject_storage() { + // Build a BAL entry with a balance change AND a storage write. The + // account info seed must ignore the storage write. + let slot_change = SlotChange::with_changes(SLOT, vec![StorageChange::new(1, V0)]); + let acct = AccountChanges::new(CONTRACT) + .with_balance_changes(vec![BalanceChange::new(1, U256::from(1_000u64))]) + .with_storage_changes(vec![slot_change]); + let bal = BlockAccessList::from_accounts(vec![acct]); + + let db_backend = Arc::new(TestDatabase::new()); + let mut db = GeneralizedDatabase::new(db_backend); + + let applied = seed_one_address_info_from_bal(&mut db, &bal, 0, 1) + .expect("seed_one_address_info_from_bal should not fail"); + + assert!(applied, "balance change should have been applied"); + + // Storage must not be injected by the info seed. + let acct_state = db + .current_accounts_state + .get(&CONTRACT) + .expect("account should be in cache after info seed"); + assert!( + acct_state.storage.is_empty(), + "seed_one_address_info_from_bal must not populate account.storage" + ); + } + + /// T3: `sstore_sees_prior_write` + /// + /// Verifies that when the same slot has two `slot_changes` entries (written + /// at indices 1 and 2), the cursor boundary semantics are correct: + /// - `max_idx = 1` returns `V0` (only tx 0's write is visible) + /// - `max_idx = 2` returns `V1` (tx 1's write is also visible) + /// This mirrors the pre-write value tx 1 and tx 2 would observe respectively. + #[test] + fn sstore_sees_prior_write() { + let slot_change = SlotChange::with_changes( + SLOT, + vec![ + StorageChange::new(1, V0), // tx 0 writes V0 + StorageChange::new(2, V1), // tx 1 writes V1 + ], + ); + let acct = AccountChanges::new(CONTRACT).with_storage_changes(vec![slot_change]); + let bal = BlockAccessList::from_accounts(vec![acct]); + let key = u256_to_h256(SLOT); + + // tx 1 cursor (bal_index=2, max_idx=1): should see V0 from tx 0. + let at_1 = seed_one_storage_slot_from_bal(&bal, 0, key, 1); + assert_eq!(at_1, Some(V0), "at max_idx=1 should see V0 (tx 0's write)"); + + // tx 2 cursor (bal_index=3, max_idx=2): should see V1 from tx 1. + let at_2 = seed_one_storage_slot_from_bal(&bal, 0, key, 2); + assert_eq!(at_2, Some(V1), "at max_idx=2 should see V1 (tx 1's write)"); + } + + /// T4: `lazy_load_account_partial_coverage_does_not_recurse` + /// + /// A BAL with a partial-coverage account (balance change only, no nonce, + /// no code, no storage) triggers the `else` branch in + /// `seed_one_address_info_from_bal`, which calls `db.get_account(addr)` to + /// load the base state from the store before overlaying. Without the `.take()` + /// fix in `load_account`, that inner `get_account` call would re-enter the + /// lazy-BAL hook and recurse infinitely (stack overflow). This test verifies + /// the fix: `load_account` on a per-tx DB with `lazy_bal = Some(...)` must + /// complete successfully and apply the balance overlay. + #[test] + fn lazy_load_account_partial_coverage_does_not_recurse() { + // Build a BAL with balance-only change at index 1 for CONTRACT. + // No nonce, no code, no storage — this is the partial-coverage case. + let balance_val = U256::from(42_000u64); + let acct = AccountChanges::new(CONTRACT) + .with_balance_changes(vec![BalanceChange::new(1, balance_val)]); + let bal = BlockAccessList::from_accounts(vec![acct]); + let arc_bal = Arc::new(bal); + let index: BalAddressIndex = arc_bal.build_validation_index(); + let arc_idx = Arc::new(index); + + let mut db = GeneralizedDatabase::new(Arc::new(TestDatabase::new())); + db.lazy_bal = Some(LazyBalCursor { + bal: arc_bal, + bal_index: 2, // tx 1's cursor: effective max_idx = 1 + index: arc_idx, + }); + + // This must NOT stack-overflow. The .take() fix in load_account ensures + // the inner db.get_account call inside seed_one_address_info_from_bal + // sees lazy_bal = None and falls straight to the store. + let acc = db + .get_account(CONTRACT) + .expect("partial-coverage load_account must not recurse"); + assert_eq!( + acc.info.balance, balance_val, + "balance overlay from BAL should have been applied" + ); + } +} diff --git a/test/tests/levm/mod.rs b/test/tests/levm/mod.rs index e22daed7550..55783647a00 100644 --- a/test/tests/levm/mod.rs +++ b/test/tests/levm/mod.rs @@ -1,5 +1,6 @@ mod test_db; +mod bal_view_tests; mod bls12_tests; mod eip7702_tests; mod eip7708_tests; From 5b2815b76593c0ff2e082535336f0864b6afa348 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Tue, 19 May 2026 10:37:24 +0200 Subject: [PATCH 2/8] docs(changelog): add lazy BAL cursor perf entry --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c90b3563968..32aee4082bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Perf +### 2026-05-19 + +- Lazy BAL cursor for per-tx parallel execution [#6669](https://github.com/lambdaclass/ethrex/pull/6669) + ### 2026-05-15 - Replace synchronous disk I/O with async operations in snap sync [#6113](https://github.com/lambdaclass/ethrex/pull/6113) From 2be9ef7ec35aaaa199b12f7ff8f24e2b6e96c8b8 Mon Sep 17 00:00:00 2001 From: Edgar Date: Tue, 19 May 2026 21:31:27 +0200 Subject: [PATCH 3/8] fix(l1): lazy BAL cursor must precede shared_base in load_account The per-tx GeneralizedDatabase in execute_block_parallel is configured with both a shared_base (pre-block snapshot of system-touched addresses, captured from initial_accounts_state after prepare_block) and a LazyBalCursor that materialises the BAL prefix on cache-miss. load_account previously consulted shared_base before the cursor, so any address present in both would short- circuit to the pre-block balance / nonce / code and miss the BAL overlay. For a predeploy touched by prepare_block (e.g. the withdrawal / consolidation request contracts) whose info is then mutated by a prior tx in the same block, a later tx reading that info via BALANCE / EXTCODE* would observe the stale pre-block value. Storage reads are unaffected because shared_base accounts are cloned with empty .storage and slot reads go through the lazy_bal hook in get_storage_value. Reorder load_account: lazy_bal hook runs first, falling back to shared_base only when the cursor has no entry for the address. The .take() guard already prevents the partial-coverage recursion through db.get_account; the inner call now lands on shared_base (or store), then the outer overlays BAL info. Regression test in test/tests/levm/bal_view_tests.rs constructs a per-tx db with a shared_base balance of 0 and a BAL balance_change of 42_000 at block_access_index 1, and asserts load_account returns the BAL value. Verified clean: full blockchain ef-tests (8721 + 93 = 8814 tests, 0 failed) on a freshly downloaded amsterdam fixtures bundle. --- crates/vm/levm/src/db/gen_db.rs | 55 ++++++++++++++++------------- test/tests/levm/bal_view_tests.rs | 57 +++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 23 deletions(-) diff --git a/crates/vm/levm/src/db/gen_db.rs b/crates/vm/levm/src/db/gen_db.rs index eeefe5dff6e..1d55b0f64d9 100644 --- a/crates/vm/levm/src/db/gen_db.rs +++ b/crates/vm/levm/src/db/gen_db.rs @@ -191,9 +191,11 @@ pub struct GeneralizedDatabase { pub store: Arc, pub current_accounts_state: CacheDB, pub initial_accounts_state: CacheDB, - /// Shared read-only base state (e.g. post-system-call snapshot for parallel groups). - /// Checked on load_account between initial_accounts_state and store lookups. - /// Accounts are cloned into initial_accounts_state on first access (lazy, per-account). + /// Shared read-only base state (pre-block snapshot of system-touched addresses for + /// parallel groups, captured from `initial_accounts_state` after `prepare_block`). + /// Checked on `load_account` AFTER the `lazy_bal` hook so the BAL overlay (which + /// includes system-call effects at idx 0) takes precedence for any address the BAL + /// covers. Accounts are cloned into `initial_accounts_state` on first access. pub shared_base: Option>, pub codes: FxHashMap, pub code_metadata: FxHashMap, @@ -343,30 +345,23 @@ impl GeneralizedDatabase { return Ok(self.current_accounts_state.entry(address).or_insert(clone)); } - // Check shared_base (read-only post-system-call snapshot) before hitting store. - if let Some(ref base) = self.shared_base - && let Some(account) = base.get(&address) - { - let account = account.clone(); - if !self.skip_initial_tracking { - self.initial_accounts_state.insert(address, account.clone()); - } - return Ok(self - .current_accounts_state - .entry(address) - .or_insert(account)); - } - // Lazy-BAL hook: if the cursor finds this address, materialize info from the BAL - // before falling back to the store. + // before consulting `shared_base` or the store. + // + // Ordering matters: `shared_base` holds the pre-block snapshot of system-touched + // addresses, but the canonical pre-state for tx N is the BAL prefix up to its + // `bal_index` (= system-call effects at idx 0 plus all prior txs). If `shared_base` + // were consulted first for an address it covers, the BAL overlay would be skipped + // and tx N would observe stale balance/nonce/code (consensus bug for system-touched + // predeploys mutated by a prior tx in the same block). // - // IMPORTANT: we `.take()` the cursor out of `self.lazy_bal` before calling + // We `.take()` the cursor out of `self.lazy_bal` before calling // `seed_one_address_info_from_bal`. For partial-coverage accounts (e.g. balance-only // change with no nonce/code) the helper calls `db.get_account(addr)` internally to - // load the base state from the store before overlaying. If `self.lazy_bal` were still - // `Some(...)` at that point, `get_account` → `load_account` would re-enter this same - // block and recurse infinitely. Taking the cursor out breaks the cycle: the inner call - // sees `lazy_bal = None` and falls straight through to the store. We restore the cursor + // load the base state before overlaying. If `self.lazy_bal` were still `Some(...)` + // at that point, `get_account` → `load_account` would re-enter this same block and + // recurse infinitely. Taking the cursor out breaks the cycle: the inner call sees + // `lazy_bal = None` and falls through to `shared_base`/store. We restore the cursor // unconditionally afterward (even on error) so the outer caller still sees it. #[cfg(all(feature = "rayon", not(feature = "eip-8025")))] { @@ -401,6 +396,20 @@ impl GeneralizedDatabase { } } + // Check shared_base (read-only pre-block snapshot) before hitting store. + if let Some(ref base) = self.shared_base + && let Some(account) = base.get(&address) + { + let account = account.clone(); + if !self.skip_initial_tracking { + self.initial_accounts_state.insert(address, account.clone()); + } + return Ok(self + .current_accounts_state + .entry(address) + .or_insert(account)); + } + // Store fallback. let state = self.store.get_account_state(address)?; let account = LevmAccount::from(state); diff --git a/test/tests/levm/bal_view_tests.rs b/test/tests/levm/bal_view_tests.rs index d7f36c1e85c..66215b68815 100644 --- a/test/tests/levm/bal_view_tests.rs +++ b/test/tests/levm/bal_view_tests.rs @@ -127,6 +127,63 @@ mod inner { assert_eq!(at_2, Some(V1), "at max_idx=2 should see V1 (tx 1's write)"); } + /// T4b: `lazy_bal_takes_precedence_over_shared_base` + /// + /// Regression test for the consensus issue flagged in PR #6669 review: when an + /// address is present in BOTH `shared_base` (pre-block snapshot of system-touched + /// addresses) AND the BAL prefix (e.g. a system-contract predeploy mutated by a + /// prior tx in the same block), `load_account` must surface the BAL-overlaid value, + /// not the stale `shared_base` value. + /// + /// Setup mirrors `execute_block_parallel`: + /// - `shared_base` holds `CONTRACT` with `balance = 0` (pre-block state). + /// - BAL has a balance change for `CONTRACT` at `block_access_index = 1` + /// (= post-tx-0 state). + /// - Per-tx DB for tx 1 is constructed with both `shared_base` and a + /// `LazyBalCursor` at `bal_index = 2` (so `max_idx = 1`). + /// + /// Expected: `load_account(CONTRACT)` returns the BAL post-balance (42_000), + /// not the `shared_base` pre-balance (0). Before the fix, `shared_base` short- + /// circuited the lazy hook and tx 1 saw the stale value. + #[test] + fn lazy_bal_takes_precedence_over_shared_base() { + use ethrex_common::types::AccountInfo; + use ethrex_levm::account::LevmAccount; + use rustc_hash::FxHashMap; + + let post_balance = U256::from(42_000u64); + + let mut shared = FxHashMap::default(); + shared.insert( + CONTRACT, + LevmAccount { + info: AccountInfo::default(), + ..Default::default() + }, + ); + let shared_base = Arc::new(shared); + + let acct = AccountChanges::new(CONTRACT) + .with_balance_changes(vec![BalanceChange::new(1, post_balance)]); + let bal = BlockAccessList::from_accounts(vec![acct]); + let arc_bal = Arc::new(bal); + let arc_idx = Arc::new(arc_bal.build_validation_index()); + + let mut db = + GeneralizedDatabase::new_with_shared_base(Arc::new(TestDatabase::new()), shared_base); + db.lazy_bal = Some(LazyBalCursor { + bal: arc_bal, + bal_index: 2, + index: arc_idx, + }); + + let acc = db.get_account(CONTRACT).expect("load_account must succeed"); + assert_eq!( + acc.info.balance, post_balance, + "lazy_bal overlay must take precedence over shared_base; saw stale shared_base value" + ); + } + /// T4: `lazy_load_account_partial_coverage_does_not_recurse` /// /// A BAL with a partial-coverage account (balance change only, no nonce, From f7caa969abde82f14925ad8c6abc28bf5158d69d Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Wed, 20 May 2026 13:20:16 +0200 Subject: [PATCH 4/8] perf(l1): O(1) BAL slot lookup in eager+lazy seeding Address greptile findings on PR #6669: - seed_db_from_bal eager loop walked storage_changes, then seed_one_storage_slot_from_bal re-found the same sc by slot key. Use the outer sc directly via a new post_value_at_or_before helper. - seed_one_storage_slot_from_bal (lazy cursor) did iter().find() over storage_changes on every cache miss. Resolve slot in O(1) via a new per-account slot_idx_by_account map on BalAddressIndex, built once per block in build_validation_index. Safe under EIP-7928: canonical-ordering validation enforces strictly ascending unique slots per account, so map insert order matches the former find() semantics. Verified clean: 8721 + 93 ef-tests pass on a clean vectors checkout. --- crates/common/types/block_access_list.rs | 19 +++++++++++- crates/vm/backends/levm/mod.rs | 8 ++--- crates/vm/levm/src/db/gen_db.rs | 39 +++++++++++++++--------- test/tests/levm/bal_view_tests.rs | 8 +++-- 4 files changed, 51 insertions(+), 23 deletions(-) diff --git a/crates/common/types/block_access_list.rs b/crates/common/types/block_access_list.rs index 547a70ac292..68cf1d666e3 100644 --- a/crates/common/types/block_access_list.rs +++ b/crates/common/types/block_access_list.rs @@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize}; use std::collections::{BTreeMap, BTreeSet}; use crate::constants::{EMPTY_BLOCK_ACCESS_LIST_HASH, SYSTEM_ADDRESS}; -use crate::utils::keccak; +use crate::utils::{keccak, u256_to_h256}; /// Encode a slice of items in sorted order without cloning. fn encode_sorted_by(items: &[T], buf: &mut dyn BufMut, key_fn: F) @@ -560,6 +560,8 @@ impl BlockAccessList { FxHashMap::with_capacity_and_hasher(self.inner.len(), Default::default()); let mut tx_to_accounts: FxHashMap> = FxHashMap::default(); let mut accounts_by_min_index: Vec<(u32, usize)> = Vec::new(); + let mut slot_idx_by_account: Vec> = + Vec::with_capacity(self.inner.len()); for (i, acct) in self.inner.iter().enumerate() { addr_to_idx.insert(acct.address, i); @@ -588,6 +590,15 @@ impl BlockAccessList { for idx in seen_indices { tx_to_accounts.entry(idx).or_default().push(i); } + + // Per-account slot → storage_changes index map for O(1) lookup on + // lazy-cursor cache miss. Empty for accounts with no storage writes. + let mut slot_map: FxHashMap = + FxHashMap::with_capacity_and_hasher(acct.storage_changes.len(), Default::default()); + for (sc_idx, sc) in acct.storage_changes.iter().enumerate() { + slot_map.insert(u256_to_h256(sc.slot), sc_idx); + } + slot_idx_by_account.push(slot_map); } accounts_by_min_index.sort_unstable_by_key(|(min_idx, _)| *min_idx); @@ -596,6 +607,7 @@ impl BlockAccessList { addr_to_idx, tx_to_accounts, accounts_by_min_index, + slot_idx_by_account, } } } @@ -612,6 +624,11 @@ pub struct BalAddressIndex { /// Used by `seed_db_from_bal` to skip accounts with no changes at indices <= max_idx. /// Only includes accounts that have at least one mutation (balance/nonce/code/storage write). pub accounts_by_min_index: Vec<(u32, usize)>, + /// Per-account slot → `storage_changes` index map. Lets `seed_one_storage_slot_from_bal` + /// resolve a slot key to its `SlotChange` in O(1) instead of a linear scan. Indexed by + /// the same `acct_idx` used by `addr_to_idx`; empty inner map for accounts with no + /// storage writes. Slot uniqueness is enforced by canonical-ordering validation. + pub slot_idx_by_account: Vec>, } /// Binary search for exact match at `idx` in balance changes (sorted by block_access_index). diff --git a/crates/vm/backends/levm/mod.rs b/crates/vm/backends/levm/mod.rs index d6bd88d1440..41cdf3801e3 100644 --- a/crates/vm/backends/levm/mod.rs +++ b/crates/vm/backends/levm/mod.rs @@ -49,7 +49,7 @@ use ethrex_levm::constants::{ use ethrex_levm::db::gen_db::GeneralizedDatabase; #[cfg(all(feature = "rayon", not(feature = "eip-8025")))] use ethrex_levm::db::gen_db::{ - LazyBalCursor, code_from_bal, seed_one_address_info_from_bal, seed_one_storage_slot_from_bal, + LazyBalCursor, code_from_bal, post_value_at_or_before, seed_one_address_info_from_bal, }; #[cfg(all(feature = "rayon", not(feature = "eip-8025")))] use ethrex_levm::db::{Database, gen_db::CacheDB}; @@ -922,9 +922,9 @@ impl LEVM { .get_account_mut(addr) .map_err(|e| EvmError::Custom(format!("seed storage mut: {e}")))?; for sc in &acct_changes.storage_changes { - let key = ethrex_common::utils::u256_to_h256(sc.slot); - if let Some(value) = seed_one_storage_slot_from_bal(bal, acct_idx, key, max_idx) { - acc.storage.insert(key, value); + if let Some(value) = post_value_at_or_before(sc, max_idx) { + acc.storage + .insert(ethrex_common::utils::u256_to_h256(sc.slot), value); } } } diff --git a/crates/vm/levm/src/db/gen_db.rs b/crates/vm/levm/src/db/gen_db.rs index 1d55b0f64d9..ddc3beef20e 100644 --- a/crates/vm/levm/src/db/gen_db.rs +++ b/crates/vm/levm/src/db/gen_db.rs @@ -7,7 +7,7 @@ use ethrex_common::types::Account; use ethrex_common::types::Code; use ethrex_common::types::CodeMetadata; use ethrex_common::types::block_access_list::{ - BalAddressIndex, BlockAccessList, BlockAccessListRecorder, + BalAddressIndex, BlockAccessList, BlockAccessListRecorder, SlotChange, }; use ethrex_common::utils::ZERO_U256; @@ -148,29 +148,38 @@ pub fn seed_one_address_info_from_bal( Ok(true) } +/// Select the post-value of a single `SlotChange` up to `max_idx`. +/// +/// Pure read; returns `Some(value)` if any `slot_changes` entry has +/// `block_access_index <= max_idx`, `None` otherwise. +#[cfg(all(feature = "rayon", not(feature = "eip-8025")))] +pub fn post_value_at_or_before(sc: &SlotChange, max_idx: u32) -> Option { + let pos = sc + .slot_changes + .partition_point(|c| c.block_access_index <= max_idx); + sc.slot_changes + .get(pos.saturating_sub(1)) + .filter(|_| pos > 0) + .map(|c| c.post_value) +} + /// Read the post-value of a single storage slot from the BAL up to `max_idx`. /// -/// Pure read; does not touch `db`. Returns `Some(value)` if a change at -/// `block_access_index <= max_idx` exists for `key`, `None` otherwise. +/// O(1) slot resolution via the precomputed `slot_idx_by_account` map in +/// `BalAddressIndex`. Pure read; does not touch `db`. #[cfg(all(feature = "rayon", not(feature = "eip-8025")))] pub fn seed_one_storage_slot_from_bal( bal: &BlockAccessList, + index: &BalAddressIndex, acct_idx: usize, key: H256, max_idx: u32, ) -> Option { let acct_changes = bal.accounts().get(acct_idx)?; - let sc = acct_changes - .storage_changes - .iter() - .find(|sc| ethrex_common::utils::u256_to_h256(sc.slot) == key)?; - let pos = sc - .slot_changes - .partition_point(|c| c.block_access_index <= max_idx); - sc.slot_changes - .get(pos.saturating_sub(1)) - .filter(|_| pos > 0) - .map(|c| c.post_value) + let slot_map = index.slot_idx_by_account.get(acct_idx)?; + let sc_idx = *slot_map.get(&key)?; + let sc = acct_changes.storage_changes.get(sc_idx)?; + post_value_at_or_before(sc, max_idx) } /// Compute code hash and optional `Code` object from raw bytecode in a BAL entry. @@ -973,7 +982,7 @@ impl<'a> VM<'a> { ); let max_idx = cursor.bal_index.saturating_sub(1); let &acct_idx = cursor.index.addr_to_idx.get(&address)?; - seed_one_storage_slot_from_bal(&cursor.bal, acct_idx, key, max_idx) + seed_one_storage_slot_from_bal(&cursor.bal, &cursor.index, acct_idx, key, max_idx) }); #[cfg(all(feature = "rayon", not(feature = "eip-8025")))] if let Some(value) = bal_hit { diff --git a/test/tests/levm/bal_view_tests.rs b/test/tests/levm/bal_view_tests.rs index 66215b68815..6e6175a77aa 100644 --- a/test/tests/levm/bal_view_tests.rs +++ b/test/tests/levm/bal_view_tests.rs @@ -51,10 +51,11 @@ mod inner { #[test] fn tx1_sees_tx0_write() { let bal = bal_single_slot_write_at_1(); + let index = bal.build_validation_index(); let key = u256_to_h256(SLOT); // tx 1 has bal_index = 2, so max_idx = 1 (same as seed_db_from_bal semantics). - let result = seed_one_storage_slot_from_bal(&bal, 0, key, 1); + let result = seed_one_storage_slot_from_bal(&bal, &index, 0, key, 1); assert_eq!( result, @@ -116,14 +117,15 @@ mod inner { ); let acct = AccountChanges::new(CONTRACT).with_storage_changes(vec![slot_change]); let bal = BlockAccessList::from_accounts(vec![acct]); + let index = bal.build_validation_index(); let key = u256_to_h256(SLOT); // tx 1 cursor (bal_index=2, max_idx=1): should see V0 from tx 0. - let at_1 = seed_one_storage_slot_from_bal(&bal, 0, key, 1); + let at_1 = seed_one_storage_slot_from_bal(&bal, &index, 0, key, 1); assert_eq!(at_1, Some(V0), "at max_idx=1 should see V0 (tx 0's write)"); // tx 2 cursor (bal_index=3, max_idx=2): should see V1 from tx 1. - let at_2 = seed_one_storage_slot_from_bal(&bal, 0, key, 2); + let at_2 = seed_one_storage_slot_from_bal(&bal, &index, 0, key, 2); assert_eq!(at_2, Some(V1), "at max_idx=2 should see V1 (tx 1's write)"); } From df05e95566d4228cd509d833cead3a7af090eb1a Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Wed, 20 May 2026 13:41:12 +0200 Subject: [PATCH 5/8] fix(l1): cfg-gate SlotChange import in gen_db L2 lint (no rayon feature) flagged unused import: SlotChange, since post_value_at_or_before is rayon-gated. --- crates/vm/levm/src/db/gen_db.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/vm/levm/src/db/gen_db.rs b/crates/vm/levm/src/db/gen_db.rs index ddc3beef20e..c57300841ae 100644 --- a/crates/vm/levm/src/db/gen_db.rs +++ b/crates/vm/levm/src/db/gen_db.rs @@ -7,8 +7,10 @@ use ethrex_common::types::Account; use ethrex_common::types::Code; use ethrex_common::types::CodeMetadata; use ethrex_common::types::block_access_list::{ - BalAddressIndex, BlockAccessList, BlockAccessListRecorder, SlotChange, + BalAddressIndex, BlockAccessList, BlockAccessListRecorder, }; +#[cfg(all(feature = "rayon", not(feature = "eip-8025")))] +use ethrex_common::types::block_access_list::SlotChange; use ethrex_common::utils::ZERO_U256; use super::Database; From 7efae28627930d7179bfbca4d089de06ab488bb0 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Wed, 20 May 2026 14:12:04 +0200 Subject: [PATCH 6/8] style(l1): cargo fmt SlotChange import ordering --- crates/vm/levm/src/db/gen_db.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/vm/levm/src/db/gen_db.rs b/crates/vm/levm/src/db/gen_db.rs index c57300841ae..66806fa51e0 100644 --- a/crates/vm/levm/src/db/gen_db.rs +++ b/crates/vm/levm/src/db/gen_db.rs @@ -6,11 +6,11 @@ use ethrex_common::U256; use ethrex_common::types::Account; use ethrex_common::types::Code; use ethrex_common::types::CodeMetadata; +#[cfg(all(feature = "rayon", not(feature = "eip-8025")))] +use ethrex_common::types::block_access_list::SlotChange; use ethrex_common::types::block_access_list::{ BalAddressIndex, BlockAccessList, BlockAccessListRecorder, }; -#[cfg(all(feature = "rayon", not(feature = "eip-8025")))] -use ethrex_common::types::block_access_list::SlotChange; use ethrex_common::utils::ZERO_U256; use super::Database; From 8c5a2208db4ddea197ac95cf8736f1bbcb638cc5 Mon Sep 17 00:00:00 2001 From: Edgar Date: Wed, 20 May 2026 16:28:21 +0200 Subject: [PATCH 7/8] docs(levm): note storage-lazy invariant on BAL has_all_info shortcut --- crates/vm/levm/src/db/gen_db.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/vm/levm/src/db/gen_db.rs b/crates/vm/levm/src/db/gen_db.rs index 66806fa51e0..0325e8a6090 100644 --- a/crates/vm/levm/src/db/gen_db.rs +++ b/crates/vm/levm/src/db/gen_db.rs @@ -87,6 +87,11 @@ pub fn seed_one_address_info_from_bal( // When BAL covers all account info fields (balance + nonce + code), insert // a default LevmAccount directly to skip the store/shared_base lookup. // For partial coverage, load from store to fill missing fields. + // + // Invariant: `account.storage` is left empty here. Storage is materialized + // lazily through `get_storage_value` (which also consults the cursor). + // Callers must NOT assume `account.storage` is fully populated after this + // path — iterate-all-keys / bulk-read patterns will see an empty map. let has_all_info = balance_pos > 0 && nonce_pos > 0 && code_pos > 0; if has_all_info { use ethrex_common::constants::EMPTY_KECCACK_HASH; From df9a356c2c24ded4e8fee3739ff3142c05efd409 Mon Sep 17 00:00:00 2001 From: Edgar Date: Thu, 21 May 2026 16:07:17 +0200 Subject: [PATCH 8/8] refactor(l1): address iovoid review on bal-lazy-cursor Replace fragile line-number references in seed_db_from_bal doc with descriptive context. --- crates/vm/backends/levm/mod.rs | 9 +++++---- crates/vm/levm/src/db/gen_db.rs | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/vm/backends/levm/mod.rs b/crates/vm/backends/levm/mod.rs index 41cdf3801e3..352196b029e 100644 --- a/crates/vm/backends/levm/mod.rs +++ b/crates/vm/backends/levm/mod.rs @@ -873,10 +873,11 @@ impl LEVM { Ok(updates) } - /// Eager BAL prefix seed — used only by the outer DB path (system-call recovery - /// at `:440` and post-tx outer seed at `:463`). Per-tx parallel execution uses - /// `LazyBalCursor` in `execute_block_parallel`; see also `seed_one_address_info_from_bal` - /// and `seed_one_storage_slot_from_bal` in `ethrex_levm::db::gen_db`. + /// Eager BAL prefix seed — used only by the outer DB path (parallel-execution + /// fallback recovery and post-tx outer seed before request extraction). + /// Per-tx parallel execution uses `LazyBalCursor` in `execute_block_parallel`; + /// see also `seed_one_address_info_from_bal` and `seed_one_storage_slot_from_bal` + /// in `ethrex_levm::db::gen_db`. /// /// Pre-seed a GeneralizedDatabase with BAL-derived state for a specific tx. /// diff --git a/crates/vm/levm/src/db/gen_db.rs b/crates/vm/levm/src/db/gen_db.rs index 0325e8a6090..1cf69a8e20f 100644 --- a/crates/vm/levm/src/db/gen_db.rs +++ b/crates/vm/levm/src/db/gen_db.rs @@ -347,7 +347,6 @@ impl GeneralizedDatabase { tracker.insert(address); } - // Fast path: already cached. if self.current_accounts_state.contains_key(&address) { return self .current_accounts_state