From ba98c823d86eca558a4f9c4b8adc260446d95f00 Mon Sep 17 00:00:00 2001 From: brooks Date: Fri, 23 May 2025 17:09:50 -0400 Subject: [PATCH 1/3] Do not insert into accounts lt hash cache if freeze has started --- runtime/src/bank.rs | 12 +++++++++ runtime/src/bank/accounts_lt_hash.rs | 37 ++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index 44f9ec5498b972..3dcc8186135830 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -583,6 +583,7 @@ impl PartialEq for Bank { transaction_account_lock_limit: _, fee_structure: _, cache_for_accounts_lt_hash: _, + freeze_started_for_accounts_lt_hash: _, stats_for_accounts_lt_hash: _, block_id, bank_hash_stats: _, @@ -934,6 +935,12 @@ pub struct Bank { /// and not an intermediate state within this slot. cache_for_accounts_lt_hash: DashMap, + /// A flag to indicate when freeze has started, for the accounts lt hash cache + /// + /// This is similar to `freeze_started`, but needs to be raised *before* any + /// deferred changes to account state is started (instead of afterwards). + freeze_started_for_accounts_lt_hash: AtomicBool, + /// Stats related to the accounts lt hash stats_for_accounts_lt_hash: AccountsLtHashStats, @@ -1140,6 +1147,7 @@ impl Bank { hash_overrides: Arc::new(Mutex::new(HashOverrides::default())), accounts_lt_hash: Mutex::new(AccountsLtHash(LtHash::identity())), cache_for_accounts_lt_hash: DashMap::default(), + freeze_started_for_accounts_lt_hash: AtomicBool::default(), stats_for_accounts_lt_hash: AccountsLtHashStats::default(), block_id: RwLock::new(None), bank_hash_stats: AtomicBankHashStats::default(), @@ -1396,6 +1404,7 @@ impl Bank { hash_overrides: parent.hash_overrides.clone(), accounts_lt_hash: Mutex::new(parent.accounts_lt_hash.lock().unwrap().clone()), cache_for_accounts_lt_hash: DashMap::default(), + freeze_started_for_accounts_lt_hash: AtomicBool::new(false), stats_for_accounts_lt_hash: AccountsLtHashStats::default(), block_id: RwLock::new(None), bank_hash_stats: AtomicBankHashStats::default(), @@ -1873,6 +1882,7 @@ impl Bank { hash_overrides: Arc::new(Mutex::new(HashOverrides::default())), accounts_lt_hash: Mutex::new(AccountsLtHash(LtHash([0xBAD1; LtHash::NUM_ELEMENTS]))), cache_for_accounts_lt_hash: DashMap::default(), + freeze_started_for_accounts_lt_hash: AtomicBool::new(fields.hash != Hash::default()), stats_for_accounts_lt_hash: AccountsLtHashStats::default(), block_id: RwLock::new(None), bank_hash_stats: AtomicBankHashStats::new(&fields.bank_hash_stats), @@ -2641,6 +2651,8 @@ impl Bank { // committed before this write lock can be obtained here. let mut hash = self.hash.write().unwrap(); if *hash == Hash::default() { + self.freeze_started_for_accounts_lt_hash + .store(true, Ordering::Release); // finish up any deferred changes to account state self.collect_rent_eagerly(); self.distribute_transaction_fee_details(); diff --git a/runtime/src/bank/accounts_lt_hash.rs b/runtime/src/bank/accounts_lt_hash.rs index 301965b7c7470b..18450401eb9899 100644 --- a/runtime/src/bank/accounts_lt_hash.rs +++ b/runtime/src/bank/accounts_lt_hash.rs @@ -289,6 +289,13 @@ impl Bank { .load(Ordering::Relaxed), i64 ), + ( + "num_inspect_account_after_freeze_started", + self.stats_for_accounts_lt_hash + .num_inspect_account_after_freeze_started + .load(Ordering::Relaxed), + i64 + ), ( "inspect_account_lookup_ns", self.stats_for_accounts_lt_hash @@ -325,6 +332,22 @@ impl Bank { return; } + if self + .freeze_started_for_accounts_lt_hash + .load(Ordering::Acquire) + { + // If freezing the bank has started, do not add this account to the cache. + // It is possible for the leader to be executing transactions after freeze has started, + // i.e. while any deferred changes to account state is finishing up. This means the + // transaction could load an account *after* it was modified by the deferred changes, + // which would be the wrong initial state of the account. Inserting the wrong initial + // state of an account into the cache will end up producing the wrong accounts lt hash. + self.stats_for_accounts_lt_hash + .num_inspect_account_after_freeze_started + .fetch_add(1, Ordering::Relaxed); + return; + } + // Only insert the account the *first* time we see it. // We want to capture the value of the account *before* any modifications during this slot. let (is_in_cache, lookup_time) = @@ -372,6 +395,8 @@ pub struct Stats { num_inspect_account_hits: AtomicU64, /// the number of times the cache *did not* already contain the account being inspected num_inspect_account_misses: AtomicU64, + /// the number of times an account was inspected after freeze had started + num_inspect_account_after_freeze_started: AtomicU64, /// time spent checking if accounts are in the cache inspect_account_lookup_time_ns: AtomicU64, /// time spent inserting accounts into the cache @@ -777,6 +802,18 @@ mod tests { _ => panic!("wrong initial state for account"), }; } + + // ensure accounts are *not* added to the cache if freeze has started + bank.freeze_started_for_accounts_lt_hash + .store(true, Ordering::Release); + let address = Pubkey::new_unique(); + let num_cache_entries_prev = bank.cache_for_accounts_lt_hash.len(); + bank.inspect_account_for_accounts_lt_hash(&address, &AccountState::Dead, true); + let num_cache_entries_curr = bank.cache_for_accounts_lt_hash.len(); + assert_eq!(num_cache_entries_curr, num_cache_entries_prev); + assert!(!bank.cache_for_accounts_lt_hash.contains_key(&address)); + bank.freeze_started_for_accounts_lt_hash + .store(false, Ordering::Release); } #[test_case(Features::None; "no features")] From 013298a0b15f05da54085ea3298434f58ff620e9 Mon Sep 17 00:00:00 2001 From: brooks Date: Fri, 23 May 2025 19:03:40 -0400 Subject: [PATCH 2/3] grab freeze lock before checking if freeze has started --- runtime/src/bank.rs | 2 +- runtime/src/bank/accounts_lt_hash.rs | 42 +++++++++++++++------------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index 3dcc8186135830..01c50b35a43f7a 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -2652,7 +2652,7 @@ impl Bank { let mut hash = self.hash.write().unwrap(); if *hash == Hash::default() { self.freeze_started_for_accounts_lt_hash - .store(true, Ordering::Release); + .store(true, Relaxed); // finish up any deferred changes to account state self.collect_rent_eagerly(); self.distribute_transaction_fee_details(); diff --git a/runtime/src/bank/accounts_lt_hash.rs b/runtime/src/bank/accounts_lt_hash.rs index 18450401eb9899..aaf6a76189440e 100644 --- a/runtime/src/bank/accounts_lt_hash.rs +++ b/runtime/src/bank/accounts_lt_hash.rs @@ -332,27 +332,30 @@ impl Bank { return; } - if self - .freeze_started_for_accounts_lt_hash - .load(Ordering::Acquire) - { - // If freezing the bank has started, do not add this account to the cache. - // It is possible for the leader to be executing transactions after freeze has started, - // i.e. while any deferred changes to account state is finishing up. This means the - // transaction could load an account *after* it was modified by the deferred changes, - // which would be the wrong initial state of the account. Inserting the wrong initial - // state of an account into the cache will end up producing the wrong accounts lt hash. - self.stats_for_accounts_lt_hash - .num_inspect_account_after_freeze_started - .fetch_add(1, Ordering::Relaxed); - return; - } - // Only insert the account the *first* time we see it. // We want to capture the value of the account *before* any modifications during this slot. let (is_in_cache, lookup_time) = meas_dur!(self.cache_for_accounts_lt_hash.contains_key(address)); if !is_in_cache { + // We need to check if the bank has started freezing. In order to do that safely, we + // must take a read lock on Bank::hash before checking the freeze state. + let freeze_guard = self.freeze_lock(); + let has_freeze_started = self + .freeze_started_for_accounts_lt_hash + .load(Ordering::Relaxed); + if has_freeze_started { + // If freezing the bank has started, do not add this account to the cache. + // It is possible for the leader to be executing transactions after freeze has + // started, i.e. while any deferred changes to account state is finishing up. + // This means the transaction could load an account *after* it was modified by the + // deferred changes, which would be the wrong initial state of the account. + // Inserting the wrong initial state of an account into the cache will end up + // producing the wrong accounts lt hash. + self.stats_for_accounts_lt_hash + .num_inspect_account_after_freeze_started + .fetch_add(1, Ordering::Relaxed); + return; + } let (_, insert_time) = meas_dur!({ self.cache_for_accounts_lt_hash .entry(*address) @@ -366,6 +369,7 @@ impl Bank { CacheValue::InspectAccount(initial_state_of_account) }); }); + drop(freeze_guard); self.stats_for_accounts_lt_hash .num_inspect_account_misses @@ -804,16 +808,14 @@ mod tests { } // ensure accounts are *not* added to the cache if freeze has started - bank.freeze_started_for_accounts_lt_hash - .store(true, Ordering::Release); + // N.B. this test should remain *last*, as Bank::freeze() is not meant to be undone + bank.freeze(); let address = Pubkey::new_unique(); let num_cache_entries_prev = bank.cache_for_accounts_lt_hash.len(); bank.inspect_account_for_accounts_lt_hash(&address, &AccountState::Dead, true); let num_cache_entries_curr = bank.cache_for_accounts_lt_hash.len(); assert_eq!(num_cache_entries_curr, num_cache_entries_prev); assert!(!bank.cache_for_accounts_lt_hash.contains_key(&address)); - bank.freeze_started_for_accounts_lt_hash - .store(false, Ordering::Release); } #[test_case(Features::None; "no features")] From 532b5477238268674f9ce887cacd830d0c8f1099 Mon Sep 17 00:00:00 2001 From: brooks Date: Mon, 26 May 2025 17:27:21 -0400 Subject: [PATCH 3/3] pr: remove freeze_started_for_accounts_lt_hash --- runtime/src/bank.rs | 12 ------------ runtime/src/bank/accounts_lt_hash.rs | 25 ++++++++++++------------- 2 files changed, 12 insertions(+), 25 deletions(-) diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index 01c50b35a43f7a..44f9ec5498b972 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -583,7 +583,6 @@ impl PartialEq for Bank { transaction_account_lock_limit: _, fee_structure: _, cache_for_accounts_lt_hash: _, - freeze_started_for_accounts_lt_hash: _, stats_for_accounts_lt_hash: _, block_id, bank_hash_stats: _, @@ -935,12 +934,6 @@ pub struct Bank { /// and not an intermediate state within this slot. cache_for_accounts_lt_hash: DashMap, - /// A flag to indicate when freeze has started, for the accounts lt hash cache - /// - /// This is similar to `freeze_started`, but needs to be raised *before* any - /// deferred changes to account state is started (instead of afterwards). - freeze_started_for_accounts_lt_hash: AtomicBool, - /// Stats related to the accounts lt hash stats_for_accounts_lt_hash: AccountsLtHashStats, @@ -1147,7 +1140,6 @@ impl Bank { hash_overrides: Arc::new(Mutex::new(HashOverrides::default())), accounts_lt_hash: Mutex::new(AccountsLtHash(LtHash::identity())), cache_for_accounts_lt_hash: DashMap::default(), - freeze_started_for_accounts_lt_hash: AtomicBool::default(), stats_for_accounts_lt_hash: AccountsLtHashStats::default(), block_id: RwLock::new(None), bank_hash_stats: AtomicBankHashStats::default(), @@ -1404,7 +1396,6 @@ impl Bank { hash_overrides: parent.hash_overrides.clone(), accounts_lt_hash: Mutex::new(parent.accounts_lt_hash.lock().unwrap().clone()), cache_for_accounts_lt_hash: DashMap::default(), - freeze_started_for_accounts_lt_hash: AtomicBool::new(false), stats_for_accounts_lt_hash: AccountsLtHashStats::default(), block_id: RwLock::new(None), bank_hash_stats: AtomicBankHashStats::default(), @@ -1882,7 +1873,6 @@ impl Bank { hash_overrides: Arc::new(Mutex::new(HashOverrides::default())), accounts_lt_hash: Mutex::new(AccountsLtHash(LtHash([0xBAD1; LtHash::NUM_ELEMENTS]))), cache_for_accounts_lt_hash: DashMap::default(), - freeze_started_for_accounts_lt_hash: AtomicBool::new(fields.hash != Hash::default()), stats_for_accounts_lt_hash: AccountsLtHashStats::default(), block_id: RwLock::new(None), bank_hash_stats: AtomicBankHashStats::new(&fields.bank_hash_stats), @@ -2651,8 +2641,6 @@ impl Bank { // committed before this write lock can be obtained here. let mut hash = self.hash.write().unwrap(); if *hash == Hash::default() { - self.freeze_started_for_accounts_lt_hash - .store(true, Relaxed); // finish up any deferred changes to account state self.collect_rent_eagerly(); self.distribute_transaction_fee_details(); diff --git a/runtime/src/bank/accounts_lt_hash.rs b/runtime/src/bank/accounts_lt_hash.rs index aaf6a76189440e..ad87254cc00b55 100644 --- a/runtime/src/bank/accounts_lt_hash.rs +++ b/runtime/src/bank/accounts_lt_hash.rs @@ -4,6 +4,7 @@ use { rayon::prelude::*, solana_account::{accounts_equal, AccountSharedData}, solana_accounts_db::accounts_db::AccountsDb, + solana_hash::Hash, solana_lattice_hash::lt_hash::LtHash, solana_measure::{meas_dur, measure::Measure}, solana_pubkey::Pubkey, @@ -290,9 +291,9 @@ impl Bank { i64 ), ( - "num_inspect_account_after_freeze_started", + "num_inspect_account_after_frozen", self.stats_for_accounts_lt_hash - .num_inspect_account_after_freeze_started + .num_inspect_account_after_frozen .load(Ordering::Relaxed), i64 ), @@ -337,14 +338,12 @@ impl Bank { let (is_in_cache, lookup_time) = meas_dur!(self.cache_for_accounts_lt_hash.contains_key(address)); if !is_in_cache { - // We need to check if the bank has started freezing. In order to do that safely, we - // must take a read lock on Bank::hash before checking the freeze state. + // We need to check if the bank is frozen. In order to do that safely, we + // must hold a read lock on Bank::hash to read the frozen state. let freeze_guard = self.freeze_lock(); - let has_freeze_started = self - .freeze_started_for_accounts_lt_hash - .load(Ordering::Relaxed); - if has_freeze_started { - // If freezing the bank has started, do not add this account to the cache. + let is_frozen = *freeze_guard != Hash::default(); + if is_frozen { + // If the bank is frozen, do not add this account to the cache. // It is possible for the leader to be executing transactions after freeze has // started, i.e. while any deferred changes to account state is finishing up. // This means the transaction could load an account *after* it was modified by the @@ -352,7 +351,7 @@ impl Bank { // Inserting the wrong initial state of an account into the cache will end up // producing the wrong accounts lt hash. self.stats_for_accounts_lt_hash - .num_inspect_account_after_freeze_started + .num_inspect_account_after_frozen .fetch_add(1, Ordering::Relaxed); return; } @@ -399,8 +398,8 @@ pub struct Stats { num_inspect_account_hits: AtomicU64, /// the number of times the cache *did not* already contain the account being inspected num_inspect_account_misses: AtomicU64, - /// the number of times an account was inspected after freeze had started - num_inspect_account_after_freeze_started: AtomicU64, + /// the number of times an account was inspected after the bank was frozen + num_inspect_account_after_frozen: AtomicU64, /// time spent checking if accounts are in the cache inspect_account_lookup_time_ns: AtomicU64, /// time spent inserting accounts into the cache @@ -807,7 +806,7 @@ mod tests { }; } - // ensure accounts are *not* added to the cache if freeze has started + // ensure accounts are *not* added to the cache if the bank is frozen // N.B. this test should remain *last*, as Bank::freeze() is not meant to be undone bank.freeze(); let address = Pubkey::new_unique();