From f97fc2aa8813a7730bbc6ecd8576ccefbe5448fc Mon Sep 17 00:00:00 2001 From: jeff washington Date: Thu, 10 Jul 2025 18:46:49 -0500 Subject: [PATCH 1/4] accounts index iter returns pubkeys --- accounts-db/src/accounts_db.rs | 9 ++--- accounts-db/src/accounts_index.rs | 34 +++++++++++-------- .../accounts_index/in_mem_accounts_index.rs | 6 ++-- accounts-db/src/accounts_index/iter.rs | 20 ++++++----- accounts-db/src/append_vec.rs | 10 ------ 5 files changed, 37 insertions(+), 42 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index e840010e1987ff..610d6f5a416e51 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -8527,12 +8527,9 @@ impl AccountsDb { let full_pubkey_range = Pubkey::from([0; 32])..=Pubkey::from([0xff; 32]); self.accounts_index.account_maps.iter().for_each(|map| { - for (pubkey, account_entry) in map.items(&full_pubkey_range) { - info!(" key: {} ref_count: {}", pubkey, account_entry.ref_count(),); - info!( - " slots: {:?}", - *account_entry.slot_list.read().unwrap() - ); + for (pubkey, slot_list) in map.items(&full_pubkey_range) { + info!(" key: {}", pubkey); + info!(" slots: {:?}", slot_list); } }); } diff --git a/accounts-db/src/accounts_index.rs b/accounts-db/src/accounts_index.rs index a34ab351302d3b..023de58292e6ae 100644 --- a/accounts-db/src/accounts_index.rs +++ b/accounts-db/src/accounts_index.rs @@ -730,21 +730,27 @@ impl + Into> AccountsIndex { for pubkey_list in self.iter(range.as_ref(), returns_items) { iterator_timer.stop(); iterator_elapsed += iterator_timer.as_us(); - for (pubkey, list) in pubkey_list { + for pubkey in pubkey_list { num_keys_iterated += 1; - let mut read_lock_timer = Measure::start("read_lock"); - let list_r = &list.slot_list.read().unwrap(); - read_lock_timer.stop(); - read_lock_elapsed += read_lock_timer.as_us(); - let mut latest_slot_timer = Measure::start("latest_slot"); - if let Some(index) = self.latest_slot(Some(ancestors), list_r, max_root) { - latest_slot_timer.stop(); - latest_slot_elapsed += latest_slot_timer.as_us(); - let mut load_account_timer = Measure::start("load_account"); - func(&pubkey, (&list_r[index].1, list_r[index].0)); - load_account_timer.stop(); - load_account_elapsed += load_account_timer.as_us(); - } + self.get_and_then(&pubkey, |entry| { + if let Some(list) = entry { + let mut read_lock_timer = Measure::start("read_lock"); + let list_r = &list.slot_list.read().unwrap(); + read_lock_timer.stop(); + read_lock_elapsed += read_lock_timer.as_us(); + let mut latest_slot_timer = Measure::start("latest_slot"); + if let Some(index) = self.latest_slot(Some(ancestors), list_r, max_root) { + latest_slot_timer.stop(); + latest_slot_elapsed += latest_slot_timer.as_us(); + let mut load_account_timer = Measure::start("load_account"); + func(&pubkey, (&list_r[index].1, list_r[index].0)); + load_account_timer.stop(); + load_account_elapsed += load_account_timer.as_us(); + } + } + let add_to_in_mem_cache = false; + (add_to_in_mem_cache, ()) + }); if config.is_aborted() { return; } diff --git a/accounts-db/src/accounts_index/in_mem_accounts_index.rs b/accounts-db/src/accounts_index/in_mem_accounts_index.rs index 96059575dad204..291f16d4be5d33 100644 --- a/accounts-db/src/accounts_index/in_mem_accounts_index.rs +++ b/accounts-db/src/accounts_index/in_mem_accounts_index.rs @@ -250,7 +250,7 @@ impl + Into> InMemAccountsIndex(&self, range: &R) -> Vec<(Pubkey, Arc>)> + pub fn items(&self, range: &R) -> Vec<(Pubkey, SlotList)> where R: RangeBounds + std::fmt::Debug, { @@ -288,7 +288,7 @@ impl + Into> InMemAccountsIndex + Into> InMemAccountsIndex Vec { Self::update_stat(&self.stats().keys, 1); // easiest implementation is to load everything from disk into cache and return the keys diff --git a/accounts-db/src/accounts_index/iter.rs b/accounts-db/src/accounts_index/iter.rs index 43ded4762e2a7b..aadda27bc2ade6 100644 --- a/accounts-db/src/accounts_index/iter.rs +++ b/accounts-db/src/accounts_index/iter.rs @@ -1,8 +1,5 @@ use { - super::{ - account_map_entry::AccountMapEntry, in_mem_accounts_index::InMemAccountsIndex, - AccountsIndex, DiskIndexValue, IndexValue, - }, + super::{in_mem_accounts_index::InMemAccountsIndex, AccountsIndex, DiskIndexValue, IndexValue}, solana_pubkey::Pubkey, std::{ ops::{Bound, RangeBounds}, @@ -18,7 +15,7 @@ pub struct AccountsIndexIterator<'a, T: IndexValue, U: DiskIndexValue + From end_bound: Bound<&'a Pubkey>, start_bin: usize, end_bin_inclusive: usize, - items: Vec<(Pubkey, Arc>)>, + items: Vec, returns_items: AccountsIndexIteratorReturnsItems, } @@ -61,8 +58,9 @@ impl<'a, T: IndexValue, U: DiskIndexValue + From + Into> AccountsIndexIter impl + Into> Iterator for AccountsIndexIterator<'_, T, U> { - type Item = Vec<(Pubkey, Arc>)>; + type Item = Vec; fn next(&mut self) -> Option { + let range = (self.start_bound, self.end_bound); while self.items.len() < ITER_BATCH_SIZE { if self.start_bin > self.end_bin_inclusive { break; @@ -70,9 +68,13 @@ impl + Into> Iterator let bin = self.start_bin; let map = &self.account_maps[bin]; - let mut items = map.items(&(self.start_bound, self.end_bound)); + let mut items = map + .keys() + .into_iter() + .filter(|k| range.contains(&k)) + .collect::>(); if self.returns_items == AccountsIndexIteratorReturnsItems::Sorted { - items.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + items.sort_unstable(); } self.items.append(&mut items); self.start_bin += 1; @@ -141,7 +143,7 @@ mod tests { let x = iter.next().unwrap(); assert_eq!(x.len(), 2 * ITER_BATCH_SIZE); assert_eq!( - x.is_sorted_by(|a, b| a.0 < b.0), + x.is_sorted(), returns_items == AccountsIndexIteratorReturnsItems::Sorted ); assert_eq!(iter.items.len(), 0); // should be empty. diff --git a/accounts-db/src/append_vec.rs b/accounts-db/src/append_vec.rs index 3b4d6d7181f6f1..74f46b3b3b58cb 100644 --- a/accounts-db/src/append_vec.rs +++ b/accounts-db/src/append_vec.rs @@ -169,7 +169,6 @@ enum AppendVecFileBacking { /// A file-backed block of memory that is used to store the data for each appended item. Mmap(MmapMut), /// This was opened as a read only file - #[cfg_attr(not(unix), allow(dead_code))] File(File), } @@ -368,13 +367,7 @@ impl AppendVec { /// when we can use file i/o as opposed to mmap, this is the trigger to tell us /// that no more appending will occur and we can close the initial mmap. - #[cfg_attr(not(unix), allow(dead_code))] pub(crate) fn reopen_as_readonly(&self) -> Option { - #[cfg(not(unix))] - // must open as mmmap on non-unix - return None; - - #[cfg(unix)] match &self.backing { AppendVecFileBacking::File(_file) => { // already a file, so already read-only @@ -477,7 +470,6 @@ impl AppendVec { } /// Creates an appendvec from file without performing sanitize checks or counting the number of accounts - #[cfg_attr(not(unix), allow(unused_variables))] pub fn new_from_file_unchecked( path: impl Into, current_len: usize, @@ -493,8 +485,6 @@ impl AppendVec { .create(false) .open(&path)?; - #[cfg(unix)] - // we must use mmap on non-linux if storage_access == StorageAccess::File { APPEND_VEC_STATS.files_open.fetch_add(1, Ordering::Relaxed); From ce399a2ad208699c08b588147982f3948859e0cb Mon Sep 17 00:00:00 2001 From: jeff washington Date: Thu, 10 Jul 2025 21:20:35 -0500 Subject: [PATCH 2/4] remove items() from accounts_index --- accounts-db/src/accounts_db.rs | 15 ++++-- .../accounts_index/in_mem_accounts_index.rs | 46 ------------------- 2 files changed, 10 insertions(+), 51 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 610d6f5a416e51..139018a0191ae8 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -8524,12 +8524,17 @@ impl AccountsDb { #[allow(clippy::stable_sort_primitive)] alive_roots.sort(); info!("{}: accounts_index alive_roots: {:?}", label, alive_roots,); - let full_pubkey_range = Pubkey::from([0; 32])..=Pubkey::from([0xff; 32]); - self.accounts_index.account_maps.iter().for_each(|map| { - for (pubkey, slot_list) in map.items(&full_pubkey_range) { - info!(" key: {}", pubkey); - info!(" slots: {:?}", slot_list); + for pubkey in map.keys() { + self.accounts_index.get_and_then(&pubkey, |account_entry| { + if let Some(account_entry) = account_entry { + let list_r = &account_entry.slot_list.read().unwrap(); + info!(" key: {} ref_count: {}", pubkey, account_entry.ref_count(),); + info!(" slots: {:?}", list_r); + } + let add_to_in_mem_cache = false; + (add_to_in_mem_cache, ()) + }); } }); } diff --git a/accounts-db/src/accounts_index/in_mem_accounts_index.rs b/accounts-db/src/accounts_index/in_mem_accounts_index.rs index 291f16d4be5d33..47d8da0a4ea874 100644 --- a/accounts-db/src/accounts_index/in_mem_accounts_index.rs +++ b/accounts-db/src/accounts_index/in_mem_accounts_index.rs @@ -250,52 +250,6 @@ impl + Into> InMemAccountsIndex(&self, range: &R) -> Vec<(Pubkey, SlotList)> - where - R: RangeBounds + std::fmt::Debug, - { - let start = match range.start_bound() { - Bound::Included(bound) | Bound::Excluded(bound) => bound, - Bound::Unbounded => &Pubkey::from([0; 32]), - }; - - let end = match range.end_bound() { - Bound::Included(bound) | Bound::Excluded(bound) => bound, - Bound::Unbounded => &Pubkey::from([0xff; 32]), - }; - - if start > &self.highest_pubkey || end < &self.lowest_pubkey { - // range does not contain any of the keys in this bin. No need to - // load and scan the map. - // Example: - // |-------------------| |-------------------| |-------------------| - // start end low high start end - return vec![]; - } - - let m = Measure::start("items"); - - // For simplicity, we check the range for every pubkey in the map. This - // can be further optimized for case, such as the range contains lowest - // and highest pubkey for this bin. In such case, we can return all - // items in the map without range check on item's pubkey. Since the - // check is cheap when compared with the cost of reading from disk, we - // are not optimizing it for now. - self.hold_range_in_memory(range, true); - let result = self - .map_internal - .read() - .unwrap() - .iter() - .filter(|&(k, _v)| range.contains(k)) - .map(|(k, v)| (*k, v.slot_list.read().unwrap().clone())) - .collect(); - self.hold_range_in_memory(range, false); - Self::update_stat(&self.stats().items, 1); - Self::update_time_stat(&self.stats().items_us, m); - result - } - /// return all keys in this bin pub fn keys(&self) -> Vec { Self::update_stat(&self.stats().keys, 1); From 1345445654d67329bfc7320aa0496a4ee34446b8 Mon Sep 17 00:00:00 2001 From: jeff washington Date: Fri, 11 Jul 2025 12:16:31 -0500 Subject: [PATCH 3/4] revert unintended file --- accounts-db/src/append_vec.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/accounts-db/src/append_vec.rs b/accounts-db/src/append_vec.rs index 74f46b3b3b58cb..3b4d6d7181f6f1 100644 --- a/accounts-db/src/append_vec.rs +++ b/accounts-db/src/append_vec.rs @@ -169,6 +169,7 @@ enum AppendVecFileBacking { /// A file-backed block of memory that is used to store the data for each appended item. Mmap(MmapMut), /// This was opened as a read only file + #[cfg_attr(not(unix), allow(dead_code))] File(File), } @@ -367,7 +368,13 @@ impl AppendVec { /// when we can use file i/o as opposed to mmap, this is the trigger to tell us /// that no more appending will occur and we can close the initial mmap. + #[cfg_attr(not(unix), allow(dead_code))] pub(crate) fn reopen_as_readonly(&self) -> Option { + #[cfg(not(unix))] + // must open as mmmap on non-unix + return None; + + #[cfg(unix)] match &self.backing { AppendVecFileBacking::File(_file) => { // already a file, so already read-only @@ -470,6 +477,7 @@ impl AppendVec { } /// Creates an appendvec from file without performing sanitize checks or counting the number of accounts + #[cfg_attr(not(unix), allow(unused_variables))] pub fn new_from_file_unchecked( path: impl Into, current_len: usize, @@ -485,6 +493,8 @@ impl AppendVec { .create(false) .open(&path)?; + #[cfg(unix)] + // we must use mmap on non-linux if storage_access == StorageAccess::File { APPEND_VEC_STATS.files_open.fetch_add(1, Ordering::Relaxed); From 44b913882e7eb102ea24665cba5dfc18d5b2210e Mon Sep 17 00:00:00 2001 From: jeff washington Date: Fri, 11 Jul 2025 12:18:28 -0500 Subject: [PATCH 4/4] rename --- accounts-db/src/accounts_index.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/accounts-db/src/accounts_index.rs b/accounts-db/src/accounts_index.rs index 023de58292e6ae..5b6235bc6b538d 100644 --- a/accounts-db/src/accounts_index.rs +++ b/accounts-db/src/accounts_index.rs @@ -727,10 +727,10 @@ impl + Into> AccountsIndex { let mut iterator_elapsed = 0; let mut iterator_timer = Measure::start("iterator_elapsed"); - for pubkey_list in self.iter(range.as_ref(), returns_items) { + for pubkeys in self.iter(range.as_ref(), returns_items) { iterator_timer.stop(); iterator_elapsed += iterator_timer.as_us(); - for pubkey in pubkey_list { + for pubkey in pubkeys { num_keys_iterated += 1; self.get_and_then(&pubkey, |entry| { if let Some(list) = entry {