lambdaclass · ilitteri · Mar 2, 2026 · Mar 2, 2026 · Mar 3, 2026 · Mar 3, 2026
@@ -63,6 +63,7 @@ KNOWN_FLAKY_TESTS=(
   "Invalid Missing Ancestor Syncing ReOrg, Timestamp, EmptyTxs=False, CanonicalReOrg=False, Invalid P8"
   "Invalid Missing Ancestor Syncing ReOrg, Timestamp, EmptyTxs=False, CanonicalReOrg=True, Invalid P8"
   "Invalid Missing Ancestor Syncing ReOrg, Transaction Value, EmptyTxs=False, CanonicalReOrg=False, Invalid P9"
+  "Invalid Missing Ancestor Syncing ReOrg, Transaction Nonce, EmptyTxs=False, CanonicalReOrg=True, Invalid P9"
 )
 
 # Build a jq filter that excludes known-flaky tests.

@@ -2,6 +2,10 @@
 
 ## Perf
 
+### 2026-03-03
+
+- Add bloom filter to skip trie seeks for non-existent storage slots [#6288](https://github.com/lambdaclass/ethrex/pull/6288)
+
 ### 2026-03-02
 
 - SIMD-accelerate trie nibble operations for block execution [#6286](https://github.com/lambdaclass/ethrex/pull/6286)

@@ -0,0 +1,137 @@
+use std::fmt;
+use std::sync::OnceLock;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+use ethrex_common::{Address, H256};
+use fastbloom::AtomicBloomFilter;
+use rustc_hash::FxBuildHasher;
+
+const FALSE_POSITIVE_RATE: f64 = 0.01;
+
+/// Bloom filter that tracks which (address, storage_key) pairs have non-zero
+/// storage values. Used to skip expensive trie lookups for slots that were
+/// never written to.
+///
+/// The filter is allocated lazily on first `insert()` to avoid ~240MB of
+/// upfront memory when the bloom is never used (e.g., dev mode, testnets).
+pub struct StorageBloomFilter {
+    filter: OnceLock<AtomicBloomFilter<FxBuildHasher>>,
+    capacity: usize,
+    enabled: AtomicBool,
+}
+
+impl fmt::Debug for StorageBloomFilter {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("StorageBloomFilter").finish()
+    }
+}
+
+impl StorageBloomFilter {
+    pub fn new(capacity: usize) -> Self {
+        Self {
+            filter: OnceLock::new(),
+            capacity,
+            enabled: AtomicBool::new(false),
+        }
+    }
+
+    /// Activate the bloom filter after it has been populated.
+    /// Before this is called, `might_contain` always returns `true` (pass-through).
+    ///
+    /// # Precondition
+    ///
+    /// The filter MUST have been fully populated (via `insert`) for ALL
+    /// (address, storage_key) pairs that exist in the trie before this is
+    /// called. This includes genesis slots, snap-synced data, and all slots
+    /// written during block processing. Calling `enable()` prematurely will
+    /// cause false negatives that silently corrupt storage reads.
+    #[allow(dead_code)]
+    pub fn enable(&self) {
+        self.enabled.store(true, Ordering::Release);
+    }
+
+    /// Record that a non-zero value exists at (address, key).
+    ///
+    /// Called unconditionally on every non-zero storage write, even while the
+    /// filter is disabled. This is intentional warm-up: the filter is populated
+    /// in the background so it is ready when `enable()` is eventually called.
+    pub fn insert(&self, address: Address, key: H256) {
+        let bloom_key = Self::make_key(address, key);
+        self.filter().insert(&bloom_key);
+    }
+
+    /// Returns `true` if the slot *might* contain a non-zero value.
+    /// Returns `false` if the slot was definitely never written.
+    /// When the filter is not yet enabled, always returns `true` (pass-through).
+    pub fn might_contain(&self, address: Address, key: H256) -> bool {
+        if !self.enabled.load(Ordering::Acquire) {
+            return true;
+        }
+        let bloom_key = Self::make_key(address, key);
+        self.filter().contains(&bloom_key)
+    }
+
+    fn filter(&self) -> &AtomicBloomFilter<FxBuildHasher> {
+        self.filter.get_or_init(|| {
+            AtomicBloomFilter::with_false_pos(FALSE_POSITIVE_RATE)
+                .hasher(FxBuildHasher)
+                .expected_items(self.capacity)
+        })
+    }
+
+    fn make_key(address: Address, key: H256) -> [u8; 52] {
+        let mut buf = [0u8; 52];
+        buf[..20].copy_from_slice(address.as_bytes());
+        buf[20..].copy_from_slice(key.as_bytes());
+        buf
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn addr(n: u8) -> Address {
+        Address::from([n; 20])
+    }
+
+    fn key(n: u8) -> H256 {
+        H256::from([n; 32])
+    }
+
+    #[test]
+    fn disabled_is_pass_through() {
+        let bloom = StorageBloomFilter::new(1000);
+        // Before enable, might_contain always returns true
+        assert!(bloom.might_contain(addr(1), key(1)));
+        assert!(bloom.might_contain(addr(99), key(255)));
+    }
+
+    #[test]
+    fn no_false_negatives_after_enable() {
+        let bloom = StorageBloomFilter::new(1000);
+        bloom.insert(addr(1), key(10));
+        bloom.insert(addr(2), key(20));
+        bloom.enable();
+        // Inserted keys must always return true
+        assert!(bloom.might_contain(addr(1), key(10)));
+        assert!(bloom.might_contain(addr(2), key(20)));
+    }
+
+    #[test]
+    fn rejects_unknown_after_enable() {
+        let bloom = StorageBloomFilter::new(1000);
+        bloom.insert(addr(1), key(10));
+        bloom.enable();
+        // A never-inserted key should return false (with high probability)
+        assert!(!bloom.might_contain(addr(99), key(99)));
+    }
+
+    #[test]
+    fn make_key_distinctness() {
+        // Different (address, key) pairs must produce different bloom keys
+        let k1 = StorageBloomFilter::make_key(addr(1), key(2));
+        let k2 = StorageBloomFilter::make_key(addr(2), key(1));
+        assert_ne!(k1, k2);
+    }
+}
@@ -66,6 +66,7 @@
 
 pub mod api;
 pub mod backend;
+mod bloom;
 pub mod error;
 mod layering;
 pub mod rlp;

@@ -1,5 +1,6 @@
 #[cfg(feature = "rocksdb")]
 use crate::backend::rocksdb::RocksDBBackend;
+use crate::bloom::StorageBloomFilter;
 use crate::{
     STORE_METADATA_FILENAME, STORE_SCHEMA_VERSION,
     api::{
@@ -187,6 +188,10 @@ pub struct Store {
     /// Uses FxHashMap for efficient lookups, much smaller than code cache.
     code_metadata_cache: Arc<Mutex<rustc_hash::FxHashMap<H256, CodeMetadata>>>,
 
+    /// Bloom filter tracking (address, storage_key) pairs with non-zero values.
+    /// Used to skip trie lookups for storage slots that were never written.
+    storage_bloom: Arc<StorageBloomFilter>,
+
     background_threads: Arc<ThreadList>,
 }
 
@@ -1164,6 +1169,10 @@ impl Store {
 
     /// CAUTION: This method writes directly to the underlying database, bypassing any caching layer.
     /// For updating the state after block execution, use [`Self::store_block_updates`].
+    ///
+    /// NOTE: This method does not update the storage bloom filter. Slots written
+    /// through this path (e.g., snap sync) will be invisible to `might_contain`
+    /// after `enable()`. A backfill step is needed before enabling the bloom.
     pub async fn write_storage_trie_nodes_batch(
         &self,
         storage_trie_nodes: StorageUpdates,
@@ -1494,6 +1503,7 @@ impl Store {
             last_computed_flatkeyvalue: Arc::new(RwLock::new(last_written)),
             account_code_cache: Arc::new(Mutex::new(CodeCache::default())),
             code_metadata_cache: Arc::new(Mutex::new(rustc_hash::FxHashMap::default())),
+            storage_bloom: Arc::new(StorageBloomFilter::new(200_000_000)),
-            storage_bloom: Arc::new(StorageBloomFilter::new(200_000_000)),
+            storage_bloom: Arc::new(StorageBloomFilter::new(2_000_000)),
-            storage_bloom: Arc::new(StorageBloomFilter::new(200_000_000)),
+            storage_bloom: Arc::new(StorageBloomFilter::new(2_000_000)),
             background_threads: Default::default(),
         };
         let backend_clone = store.backend.clone();
@@ -1740,6 +1750,7 @@ impl Store {
                     if storage_value.is_zero() {
                         storage_trie.remove(&hashed_key)?;
                     } else {
+                        self.storage_bloom.insert(update.address, *storage_key);
-                        self.storage_bloom.insert(update.address, *storage_key);
+                        if self.storage_bloom.is_enabled() {
+                            self.storage_bloom.insert(update.address, *storage_key);
+                        }
-                        self.storage_bloom.insert(update.address, *storage_key);
+                        if self.storage_bloom.is_enabled() {
+                            self.storage_bloom.insert(update.address, *storage_key);
+                        }
                         storage_trie.insert(hashed_key, storage_value.encode_to_vec())?;
                     }
                 }
@@ -1831,6 +1842,7 @@ impl Store {
                     if storage_value.is_zero() {
                         storage_trie.remove(&hashed_key)?;
                     } else {
+                        self.storage_bloom.insert(update.address, *storage_key);
                         storage_trie.insert(hashed_key, storage_value.encode_to_vec())?;
                     }
                 }
@@ -1881,6 +1893,8 @@ impl Store {
                 if !storage_value.is_zero() {
                     let hashed_key = hash_key(&H256(storage_key.to_big_endian()));
                     storage_trie.insert(hashed_key, storage_value.encode_to_vec())?;
+                    // TODO: call storage_bloom.insert(address, storage_key) here when
+                    // bloom is wired up, otherwise genesis-only slots become false negatives.
                 }
             }
 
@@ -2118,6 +2132,15 @@ impl Store {
         address: Address,
         storage_key: H256,
     ) -> Result<Option<U256>, StoreError> {
+        // Fast path: if the bloom filter says this slot was never written, skip the trie.
+        // NOTE: The bloom only tracks writes during the current process lifetime.
+        // For historical state_root queries (RPC), a slot that was non-zero in older
+        // states but later zeroed won't be in the filter. When the bloom is enabled,
+        // this check may need to be limited to latest-state lookups only.
+        if !self.storage_bloom.might_contain(address, storage_key) {
+            return Ok(None);
+        }
+
         let account_hash = hash_address_fixed(&address);
 
         // Pre-acquire shared resources once for both trie opens