From 5960397fc1960a50aaf2fa9ec965cc21c44f5fd6 Mon Sep 17 00:00:00 2001 From: Kamil Skalski Date: Sat, 26 Jul 2025 19:50:12 +0200 Subject: [PATCH 01/11] Define constant. --- runtime/src/snapshot_utils.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index 258ac529290..55fcb8cc74f 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -1125,6 +1125,11 @@ fn archive_snapshot( .append_dir_all(SNAPSHOTS_DIR, &staging_snapshots_dir) .map_err(E::ArchiveSnapshotsDir)?; + // Balance large and small files with bias towards small (4 small + 1 large), such + // that during unpacking large writes are mixed with file metadata operations + // and towards the end of archive (sizes equalize) writes are >256KiB / file. + const FILE_BALANCING_RATES: (usize, usize) = (4, 1); // (small_files_per_cycle, large_files_per_cycle) + let mut sorted_storage_indices = (0..snapshot_storages.len()).collect::>(); sorted_storage_indices.sort_by_key(|&i| snapshot_storages[i].accounts.len()); for i in 0..sorted_storage_indices.len() { From b2641159f4823578808f5fc40ab258c08b3461a6 Mon Sep 17 00:00:00 2001 From: Kamil Skalski Date: Mon, 28 Jul 2025 08:27:49 +0200 Subject: [PATCH 02/11] Rename and move const --- runtime/src/snapshot_utils.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index 55fcb8cc74f..258ac529290 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -1125,11 +1125,6 @@ fn archive_snapshot( .append_dir_all(SNAPSHOTS_DIR, &staging_snapshots_dir) .map_err(E::ArchiveSnapshotsDir)?; - // Balance large and small files with bias towards small (4 small + 1 large), such - // that during unpacking large writes are mixed with file metadata operations - // and towards the end of archive (sizes equalize) writes are >256KiB / file. - const FILE_BALANCING_RATES: (usize, usize) = (4, 1); // (small_files_per_cycle, large_files_per_cycle) - let mut sorted_storage_indices = (0..snapshot_storages.len()).collect::>(); sorted_storage_indices.sort_by_key(|&i| snapshot_storages[i].accounts.len()); for i in 0..sorted_storage_indices.len() { From 7658e5e99f3c4a09d6d7c71fda11467c895c1ebd Mon Sep 17 00:00:00 2001 From: Kamil Skalski Date: Mon, 28 Jul 2025 12:28:56 +0200 Subject: [PATCH 03/11] Use reshuffle in lt verify. --- accounts-db/src/account_storage.rs | 66 +++++++++++++++++++++++++++++- accounts-db/src/accounts_db.rs | 4 +- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/accounts-db/src/account_storage.rs b/accounts-db/src/account_storage.rs index 6f1f4a23304..cb4dd8f7265 100644 --- a/accounts-db/src/account_storage.rs +++ b/accounts-db/src/account_storage.rs @@ -3,9 +3,13 @@ use { crate::accounts_db::{AccountStorageEntry, AccountsFileId}, dashmap::DashMap, + rayon::iter::{IntoParallelIterator, ParallelIterator}, solana_clock::Slot, solana_nohash_hasher::{BuildNoHashHasher, IntMap}, - std::sync::{Arc, RwLock}, + std::{ + ops::Range, + sync::{Arc, RwLock}, + }, }; pub mod stored_account_info; @@ -293,6 +297,66 @@ impl Default for AccountStorageStatus { } } +pub struct AccountStoragesReshuffler<'a> { + storages: &'a [Arc], + indices: Vec, +} + +impl<'a> AccountStoragesReshuffler<'a> { + pub fn new(storages: &'a [Arc]) -> Self { + let indices = (0..storages.len()).collect(); + Self { storages, indices } + } + + pub fn into_balanced(mut self) -> impl ExactSizeIterator + 'a { + self.indices + .sort_unstable_by_key(|i| self.storages[*i].accounts.len()); + let range = 0..self.indices.len(); + self.indices.into_iter().map(move |i| { + let index = select_from_range_with_start_end_rates(range.clone(), i, (4, 1)); + + self.storages[index].as_ref() + }) + } + + pub fn into_par_balanced( + mut self, + ) -> impl ParallelIterator + 'a { + self.indices + .sort_unstable_by_key(|i| self.storages[*i].accounts.len()); + let range = 0..self.indices.len(); + self.indices.into_par_iter().map(move |i| { + let index = select_from_range_with_start_end_rates(range.clone(), i, (4, 1)); + self.storages[index].as_ref() + }) + } +} + +/// Select the `nth` (`0 <= nth < range.len()`) value from a `range`, choosing values alternately +/// from its start or end according to a `start_rate : end_rate` ratio. +/// +/// For every `start_rate` values selected from the start, `end_rate` values are selected from the end. +/// The resulting sequence alternates in a balanced and interleaved fashion between the range's start and end. +/// ``` +fn select_from_range_with_start_end_rates( + range: Range, + nth: usize, + (start_rate, end_rate): (usize, usize), +) -> usize { + let range_len = range.len(); + let cycle = start_rate + end_rate; + let cycle_index = nth % cycle; + let cycle_num = nth.checked_div(cycle).expect("rates sum must be positive"); + + let index = if cycle_index < start_rate { + cycle_num * start_rate + cycle_index + } else { + let end_index = cycle_num * end_rate + cycle_index - start_rate; + range_len - end_index - 1 + }; + range.start + index +} + #[cfg(test)] pub(crate) mod tests { use { diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 8215d645a55..c738d423cc3 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -22,6 +22,7 @@ mod geyser_plugin_utils; pub mod stats; pub mod tests; +use crate::account_storage::AccountStoragesReshuffler; #[cfg(test)] use crate::append_vec::StoredAccountMeta; #[cfg(feature = "dev-context-only-utils")] @@ -5714,8 +5715,9 @@ impl AccountsDb { storages: &[Arc], duplicates_lt_hash: &DuplicatesLtHash, ) -> AccountsLtHash { + let storages = AccountStoragesReshuffler::new(storages).into_par_balanced(); let mut lt_hash = storages - .par_iter() + // .par_iter() .fold(LtHash::identity, |mut accum, storage| { let obsolete_accounts = storage.get_obsolete_accounts(None); storage From 97d6b717796db0eb00b19fb5d67cba985dfd597c Mon Sep 17 00:00:00 2001 From: Kamil Skalski Date: Mon, 28 Jul 2025 14:12:47 +0200 Subject: [PATCH 04/11] Refactor to common code. --- accounts-db/src/account_storage.rs | 79 +++++++++++++++++++++--------- accounts-db/src/accounts_db.rs | 6 +-- runtime/src/snapshot_utils.rs | 73 +++------------------------ 3 files changed, 67 insertions(+), 91 deletions(-) diff --git a/accounts-db/src/account_storage.rs b/accounts-db/src/account_storage.rs index cb4dd8f7265..ab95992da25 100644 --- a/accounts-db/src/account_storage.rs +++ b/accounts-db/src/account_storage.rs @@ -297,38 +297,43 @@ impl Default for AccountStorageStatus { } } -pub struct AccountStoragesReshuffler<'a> { +pub struct AccountStoragesOrderBalancer<'a> { + small_to_large_ratio: (usize, usize), storages: &'a [Arc], indices: Vec, } -impl<'a> AccountStoragesReshuffler<'a> { - pub fn new(storages: &'a [Arc]) -> Self { - let indices = (0..storages.len()).collect(); - Self { storages, indices } +impl<'a> AccountStoragesOrderBalancer<'a> { + pub fn new( + storages: &'a [Arc], + small_to_large_ratio: (usize, usize), + ) -> Self { + let mut indices: Vec = (0..storages.len()).collect(); + indices.sort_unstable_by_key(|i| storages[*i].written_bytes()); + Self { + storages, + indices, + small_to_large_ratio, + } } - pub fn into_balanced(mut self) -> impl ExactSizeIterator + 'a { - self.indices - .sort_unstable_by_key(|i| self.storages[*i].accounts.len()); - let range = 0..self.indices.len(); - self.indices.into_iter().map(move |i| { - let index = select_from_range_with_start_end_rates(range.clone(), i, (4, 1)); + pub fn into_iter(self) -> impl ExactSizeIterator + 'a { + (0..self.indices.len()).map(move |i| self.nth_storage(i)) + } - self.storages[index].as_ref() - }) + pub fn into_par_iter(self) -> impl ParallelIterator + 'a { + (0..self.indices.len()) + .into_par_iter() + .map(move |i| self.nth_storage(i)) } - pub fn into_par_balanced( - mut self, - ) -> impl ParallelIterator + 'a { - self.indices - .sort_unstable_by_key(|i| self.storages[*i].accounts.len()); - let range = 0..self.indices.len(); - self.indices.into_par_iter().map(move |i| { - let index = select_from_range_with_start_end_rates(range.clone(), i, (4, 1)); - self.storages[index].as_ref() - }) + fn nth_storage(&self, nth: usize) -> &'a AccountStorageEntry { + let range_index = select_from_range_with_start_end_rates( + 0..self.storages.len(), + nth, + self.small_to_large_ratio.clone(), + ); + self.storages[self.indices[range_index]].as_ref() } } @@ -686,4 +691,32 @@ pub(crate) mod tests { .insert(0, storage.get_test_storage()); storage.get_if(|_, _| true); } + + #[test] + fn test_select_range_with_start_end_rates() { + let interleaved: Vec<_> = (0..10) + .map(|i| select_from_range_with_start_end_rates(1..11, i, (2, 1))) + .collect(); + assert_eq!(interleaved, vec![1, 2, 10, 3, 4, 9, 5, 6, 8, 7]); + + let interleaved: Vec<_> = (0..10) + .map(|i| select_from_range_with_start_end_rates(1..11, i, (1, 1))) + .collect(); + assert_eq!(interleaved, vec![1, 10, 2, 9, 3, 8, 4, 7, 5, 6]); + + let interleaved: Vec<_> = (0..9) + .map(|i| select_from_range_with_start_end_rates(1..10, i, (2, 1))) + .collect(); + assert_eq!(interleaved, vec![1, 2, 9, 3, 4, 8, 5, 6, 7]); + + let interleaved: Vec<_> = (0..9) + .map(|i| select_from_range_with_start_end_rates(1..10, i, (1, 2))) + .collect(); + assert_eq!(interleaved, vec![1, 9, 8, 2, 7, 6, 3, 5, 4]); + + let interleaved: Vec<_> = (0..13) + .map(|i| select_from_range_with_start_end_rates(1..14, i, (2, 3))) + .collect(); + assert_eq!(interleaved, vec![1, 2, 13, 12, 11, 3, 4, 10, 9, 8, 5, 6, 7]); + } } diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index c738d423cc3..3bc4d9f232d 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -22,7 +22,7 @@ mod geyser_plugin_utils; pub mod stats; pub mod tests; -use crate::account_storage::AccountStoragesReshuffler; +use crate::account_storage::AccountStoragesOrderBalancer; #[cfg(test)] use crate::append_vec::StoredAccountMeta; #[cfg(feature = "dev-context-only-utils")] @@ -5715,9 +5715,9 @@ impl AccountsDb { storages: &[Arc], duplicates_lt_hash: &DuplicatesLtHash, ) -> AccountsLtHash { - let storages = AccountStoragesReshuffler::new(storages).into_par_balanced(); + let storages = AccountStoragesOrderBalancer::new(storages, (1, 1)); let mut lt_hash = storages - // .par_iter() + .into_par_iter() .fold(LtHash::identity, |mut accum, storage| { let obsolete_accounts = storage.get_obsolete_accounts(None); storage diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index 258ac529290..e0d7cf7041f 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -21,7 +21,7 @@ use { log::*, regex::Regex, solana_accounts_db::{ - account_storage::AccountStorageMap, + account_storage::{AccountStorageMap, AccountStoragesOrderBalancer}, account_storage_reader::AccountStorageReader, accounts_db::{AccountStorageEntry, AtomicAccountsFileId}, accounts_file::{AccountsFile, AccountsFileError, StorageAccess}, @@ -38,7 +38,7 @@ use { io::{self, BufRead, BufReader, BufWriter, Error as IoError, Read, Seek, Write}, mem, num::{NonZeroU64, NonZeroUsize}, - ops::{Range, RangeInclusive}, + ops::RangeInclusive, path::{Path, PathBuf}, process::ExitStatus, str::FromStr, @@ -87,7 +87,7 @@ pub const INCREMENTAL_SNAPSHOT_ARCHIVE_FILENAME_REGEX: &str = r"^incremental-sna // Balance large and small files order in snapshot tar with bias towards small (4 small + 1 large), // such that during unpacking large writes are mixed with file metadata operations // and towards the end of archive (sizes equalize) writes are >256KiB / file. -const INTERLEAVE_TAR_ENTRIES_SMALL_TO_LARGE_RATIO: (usize, usize) = (4, 1); +const INTERLEAVED_SMALL_TO_LARGE_RATIO: (usize, usize) = (4, 1); #[derive(Copy, Clone, Default, Eq, PartialEq, Debug)] pub enum SnapshotVersion { @@ -1125,15 +1125,11 @@ fn archive_snapshot( .append_dir_all(SNAPSHOTS_DIR, &staging_snapshots_dir) .map_err(E::ArchiveSnapshotsDir)?; - let mut sorted_storage_indices = (0..snapshot_storages.len()).collect::>(); - sorted_storage_indices.sort_by_key(|&i| snapshot_storages[i].accounts.len()); - for i in 0..sorted_storage_indices.len() { - let index = select_from_range_with_start_end_rates( - 0..sorted_storage_indices.len(), - i, - INTERLEAVE_TAR_ENTRIES_SMALL_TO_LARGE_RATIO, - ); - let storage = &snapshot_storages[sorted_storage_indices[index]]; + let storages_orderer = AccountStoragesOrderBalancer::new( + snapshot_storages, + INTERLEAVED_SMALL_TO_LARGE_RATIO, + ); + for storage in storages_orderer.into_iter() { let path_in_archive = Path::new(ACCOUNTS_DIR) .join(AccountsFile::file_name(storage.slot(), storage.id())); @@ -1215,31 +1211,6 @@ fn archive_snapshot( }) } -/// Select the `nth` (`0 <= nth < range.len()`) value from a `range`, choosing values alternately -/// from its start or end according to a `start_rate : end_rate` ratio. -/// -/// For every `start_rate` values selected from the start, `end_rate` values are selected from the end. -/// The resulting sequence alternates in a balanced and interleaved fashion between the range's start and end. -/// ``` -fn select_from_range_with_start_end_rates( - range: Range, - nth: usize, - (start_rate, end_rate): (usize, usize), -) -> usize { - let range_len = range.len(); - let cycle = start_rate + end_rate; - let cycle_index = nth % cycle; - let cycle_num = nth.checked_div(cycle).expect("rates sum must be positive"); - - let index = if cycle_index < start_rate { - cycle_num * start_rate + cycle_index - } else { - let end_index = cycle_num * end_rate + cycle_index - start_rate; - range_len - end_index - 1 - }; - range.start + index -} - /// Get the bank snapshots in a directory pub fn get_bank_snapshots(bank_snapshots_dir: impl AsRef) -> Vec { let mut bank_snapshots = Vec::default(); @@ -3699,32 +3670,4 @@ mod tests { .starts_with("invalid full snapshot slot file size")); } } - - #[test] - fn test_select_from_start_or_end_index_by_ratio() { - let interleaved: Vec<_> = (0..10) - .map(|i| select_from_range_with_start_end_rates(1..11, i, (2, 1))) - .collect(); - assert_eq!(interleaved, vec![1, 2, 10, 3, 4, 9, 5, 6, 8, 7]); - - let interleaved: Vec<_> = (0..10) - .map(|i| select_from_range_with_start_end_rates(1..11, i, (1, 1))) - .collect(); - assert_eq!(interleaved, vec![1, 10, 2, 9, 3, 8, 4, 7, 5, 6]); - - let interleaved: Vec<_> = (0..9) - .map(|i| select_from_range_with_start_end_rates(1..10, i, (2, 1))) - .collect(); - assert_eq!(interleaved, vec![1, 2, 9, 3, 4, 8, 5, 6, 7]); - - let interleaved: Vec<_> = (0..9) - .map(|i| select_from_range_with_start_end_rates(1..10, i, (1, 2))) - .collect(); - assert_eq!(interleaved, vec![1, 9, 8, 2, 7, 6, 3, 5, 4]); - - let interleaved: Vec<_> = (0..13) - .map(|i| select_from_range_with_start_end_rates(1..14, i, (2, 3))) - .collect(); - assert_eq!(interleaved, vec![1, 2, 13, 12, 11, 3, 4, 10, 9, 8, 5, 6, 7]); - } } From ff061840c68959c85926906cae52572a0d488088 Mon Sep 17 00:00:00 2001 From: Kamil Skalski Date: Mon, 28 Jul 2025 14:57:45 +0200 Subject: [PATCH 05/11] Adjust rates and constaints on rayon iter. --- accounts-db/src/account_storage.rs | 8 +++++--- accounts-db/src/accounts_db.rs | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/accounts-db/src/account_storage.rs b/accounts-db/src/account_storage.rs index ab95992da25..95e515ecc36 100644 --- a/accounts-db/src/account_storage.rs +++ b/accounts-db/src/account_storage.rs @@ -3,7 +3,7 @@ use { crate::accounts_db::{AccountStorageEntry, AccountsFileId}, dashmap::DashMap, - rayon::iter::{IntoParallelIterator, ParallelIterator}, + rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator}, solana_clock::Slot, solana_nohash_hasher::{BuildNoHashHasher, IntMap}, std::{ @@ -321,7 +321,9 @@ impl<'a> AccountStoragesOrderBalancer<'a> { (0..self.indices.len()).map(move |i| self.nth_storage(i)) } - pub fn into_par_iter(self) -> impl ParallelIterator + 'a { + pub fn into_par_iter( + self, + ) -> impl IndexedParallelIterator + 'a { (0..self.indices.len()) .into_par_iter() .map(move |i| self.nth_storage(i)) @@ -329,7 +331,7 @@ impl<'a> AccountStoragesOrderBalancer<'a> { fn nth_storage(&self, nth: usize) -> &'a AccountStorageEntry { let range_index = select_from_range_with_start_end_rates( - 0..self.storages.len(), + 0..self.indices.len(), nth, self.small_to_large_ratio.clone(), ); diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 3bc4d9f232d..0b35b6f48be 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -5715,9 +5715,11 @@ impl AccountsDb { storages: &[Arc], duplicates_lt_hash: &DuplicatesLtHash, ) -> AccountsLtHash { - let storages = AccountStoragesOrderBalancer::new(storages, (1, 1)); + let storages = AccountStoragesOrderBalancer::new(storages, (7, 1)); let mut lt_hash = storages .into_par_iter() + .with_min_len(16) + .with_max_len(64) .fold(LtHash::identity, |mut accum, storage| { let obsolete_accounts = storage.get_obsolete_accounts(None); storage From 2aa56f948d3c5e31434f437874c20d95e8f3ff59 Mon Sep 17 00:00:00 2001 From: Kamil Skalski Date: Tue, 29 Jul 2025 09:50:46 +0200 Subject: [PATCH 06/11] Tweaks. --- accounts-db/src/accounts_db.rs | 5 ++--- runtime/src/snapshot_utils.rs | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 0b35b6f48be..0db7bf5e50f 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -5715,11 +5715,10 @@ impl AccountsDb { storages: &[Arc], duplicates_lt_hash: &DuplicatesLtHash, ) -> AccountsLtHash { - let storages = AccountStoragesOrderBalancer::new(storages, (7, 1)); + let storages = AccountStoragesOrderBalancer::new(storages, (4, 1)); let mut lt_hash = storages .into_par_iter() - .with_min_len(16) - .with_max_len(64) + .by_uniform_blocks(100) .fold(LtHash::identity, |mut accum, storage| { let obsolete_accounts = storage.get_obsolete_accounts(None); storage diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index e0d7cf7041f..f7564486db6 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -87,7 +87,7 @@ pub const INCREMENTAL_SNAPSHOT_ARCHIVE_FILENAME_REGEX: &str = r"^incremental-sna // Balance large and small files order in snapshot tar with bias towards small (4 small + 1 large), // such that during unpacking large writes are mixed with file metadata operations // and towards the end of archive (sizes equalize) writes are >256KiB / file. -const INTERLEAVED_SMALL_TO_LARGE_RATIO: (usize, usize) = (4, 1); +const INTERLEAVE_TAR_ENTRIES_SMALL_TO_LARGE_RATIO: (usize, usize) = (4, 1); #[derive(Copy, Clone, Default, Eq, PartialEq, Debug)] pub enum SnapshotVersion { @@ -1127,7 +1127,7 @@ fn archive_snapshot( let storages_orderer = AccountStoragesOrderBalancer::new( snapshot_storages, - INTERLEAVED_SMALL_TO_LARGE_RATIO, + INTERLEAVE_TAR_ENTRIES_SMALL_TO_LARGE_RATIO, ); for storage in storages_orderer.into_iter() { let path_in_archive = Path::new(ACCOUNTS_DIR) From b38a8dd30a418fbd7b51c452c0c4308bbd77e6c0 Mon Sep 17 00:00:00 2001 From: Kamil Skalski Date: Tue, 29 Jul 2025 11:31:39 +0200 Subject: [PATCH 07/11] Rearrange index. Allow randomized. --- accounts-db/src/account_storage.rs | 41 +++++++++++++++--------------- accounts-db/src/accounts_db.rs | 10 ++++++-- runtime/src/snapshot_utils.rs | 2 +- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/accounts-db/src/account_storage.rs b/accounts-db/src/account_storage.rs index 95e515ecc36..fd1ec27f9cd 100644 --- a/accounts-db/src/account_storage.rs +++ b/accounts-db/src/account_storage.rs @@ -3,6 +3,7 @@ use { crate::accounts_db::{AccountStorageEntry, AccountsFileId}, dashmap::DashMap, + rand::seq::SliceRandom, rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator}, solana_clock::Slot, solana_nohash_hasher::{BuildNoHashHasher, IntMap}, @@ -298,44 +299,44 @@ impl Default for AccountStorageStatus { } pub struct AccountStoragesOrderBalancer<'a> { - small_to_large_ratio: (usize, usize), storages: &'a [Arc], indices: Vec, } impl<'a> AccountStoragesOrderBalancer<'a> { - pub fn new( + pub fn with_small_to_large_ratio( storages: &'a [Arc], small_to_large_ratio: (usize, usize), ) -> Self { - let mut indices: Vec = (0..storages.len()).collect(); + let len_range = 0..storages.len(); + let mut indices: Vec<_> = len_range.clone().collect(); indices.sort_unstable_by_key(|i| storages[*i].written_bytes()); - Self { - storages, - indices, - small_to_large_ratio, - } + indices.iter_mut().for_each(|i| { + *i = select_from_range_with_start_end_rates( + len_range.clone(), + *i, + small_to_large_ratio.clone(), + ) + }); + Self { storages, indices } + } + + pub fn randomized(storages: &'a [Arc]) -> Self { + let mut indices: Vec = (0..storages.len()).collect(); + indices.shuffle(&mut rand::thread_rng()); + Self { storages, indices } } pub fn into_iter(self) -> impl ExactSizeIterator + 'a { - (0..self.indices.len()).map(move |i| self.nth_storage(i)) + self.indices.into_iter().map(|i| self.storages[i].as_ref()) } pub fn into_par_iter( self, ) -> impl IndexedParallelIterator + 'a { - (0..self.indices.len()) + self.indices .into_par_iter() - .map(move |i| self.nth_storage(i)) - } - - fn nth_storage(&self, nth: usize) -> &'a AccountStorageEntry { - let range_index = select_from_range_with_start_end_rates( - 0..self.indices.len(), - nth, - self.small_to_large_ratio.clone(), - ); - self.storages[self.indices[range_index]].as_ref() + .map(|i| self.storages[i].as_ref()) } } diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 0db7bf5e50f..40f5bea28e8 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -5715,10 +5715,16 @@ impl AccountsDb { storages: &[Arc], duplicates_lt_hash: &DuplicatesLtHash, ) -> AccountsLtHash { - let storages = AccountStoragesOrderBalancer::new(storages, (4, 1)); + const INTERLEAVE_SCAN_ACCOUNTS_SMALL_TO_LARGE_RATIO: (usize, usize) = (9, 1); + const SCAN_ACCOUNTS_BATCH_CYCLES_COUNT: usize = 1000; + let ordering_ratio = INTERLEAVE_SCAN_ACCOUNTS_SMALL_TO_LARGE_RATIO; + let storages = + AccountStoragesOrderBalancer::with_small_to_large_ratio(storages, ordering_ratio); let mut lt_hash = storages .into_par_iter() - .by_uniform_blocks(100) + .by_uniform_blocks( + (ordering_ratio.0 + ordering_ratio.1) * SCAN_ACCOUNTS_BATCH_CYCLES_COUNT, + ) .fold(LtHash::identity, |mut accum, storage| { let obsolete_accounts = storage.get_obsolete_accounts(None); storage diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index f7564486db6..4dc1fd0820b 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -1125,7 +1125,7 @@ fn archive_snapshot( .append_dir_all(SNAPSHOTS_DIR, &staging_snapshots_dir) .map_err(E::ArchiveSnapshotsDir)?; - let storages_orderer = AccountStoragesOrderBalancer::new( + let storages_orderer = AccountStoragesOrderBalancer::with_small_to_large_ratio( snapshot_storages, INTERLEAVE_TAR_ENTRIES_SMALL_TO_LARGE_RATIO, ); From eae5b5bac28bc79cdc115539a710652ae68535cd Mon Sep 17 00:00:00 2001 From: Kamil Skalski Date: Tue, 29 Jul 2025 12:40:09 +0200 Subject: [PATCH 08/11] Switch to randomized. Renames and comments. --- accounts-db/src/account_storage.rs | 16 +++++++++++++--- accounts-db/src/accounts_db.rs | 15 +++++---------- runtime/src/snapshot_utils.rs | 4 ++-- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/accounts-db/src/account_storage.rs b/accounts-db/src/account_storage.rs index fd1ec27f9cd..67ff1f5ce6e 100644 --- a/accounts-db/src/account_storage.rs +++ b/accounts-db/src/account_storage.rs @@ -298,12 +298,21 @@ impl Default for AccountStorageStatus { } } -pub struct AccountStoragesOrderBalancer<'a> { +/// Wrapper over slice of `Arc` that provides an ordered access to storages. +/// +/// A few strategies are available for ordering storages: +/// - `with_small_to_large_ratio`: interleaving small and large storage written bytes +/// - `with_random_order`: orders storages randomly +pub struct AccountStoragesOrderer<'a> { storages: &'a [Arc], indices: Vec, } -impl<'a> AccountStoragesOrderBalancer<'a> { +impl<'a> AccountStoragesOrderer<'a> { + /// Create balaning orderer that interleaves storages with small and large written bytes. + /// + /// Storages are returned in cycles based on `small_to_large_ratio` - `ratio.0` small storages + /// preceding `ratio.1` large storages. pub fn with_small_to_large_ratio( storages: &'a [Arc], small_to_large_ratio: (usize, usize), @@ -321,7 +330,8 @@ impl<'a> AccountStoragesOrderBalancer<'a> { Self { storages, indices } } - pub fn randomized(storages: &'a [Arc]) -> Self { + /// Create randomizing orderer. + pub fn with_random_order(storages: &'a [Arc]) -> Self { let mut indices: Vec = (0..storages.len()).collect(); indices.shuffle(&mut rand::thread_rng()); Self { storages, indices } diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 40f5bea28e8..387cbd49d5d 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -22,7 +22,6 @@ mod geyser_plugin_utils; pub mod stats; pub mod tests; -use crate::account_storage::AccountStoragesOrderBalancer; #[cfg(test)] use crate::append_vec::StoredAccountMeta; #[cfg(feature = "dev-context-only-utils")] @@ -32,7 +31,7 @@ use { account_info::{AccountInfo, Offset, StorageLocation}, account_storage::{ stored_account_info::{StoredAccountInfo, StoredAccountInfoWithoutData}, - AccountStorage, AccountStorageStatus, ShrinkInProgress, + AccountStorage, AccountStorageStatus, AccountStoragesOrderer, ShrinkInProgress, }, accounts_cache::{AccountsCache, CachedAccount, SlotCache}, accounts_db::stats::{ @@ -5715,16 +5714,12 @@ impl AccountsDb { storages: &[Arc], duplicates_lt_hash: &DuplicatesLtHash, ) -> AccountsLtHash { - const INTERLEAVE_SCAN_ACCOUNTS_SMALL_TO_LARGE_RATIO: (usize, usize) = (9, 1); - const SCAN_ACCOUNTS_BATCH_CYCLES_COUNT: usize = 1000; - let ordering_ratio = INTERLEAVE_SCAN_ACCOUNTS_SMALL_TO_LARGE_RATIO; - let storages = - AccountStoragesOrderBalancer::with_small_to_large_ratio(storages, ordering_ratio); + // Randomized order works well with rayon work splitting, since we only care about + // uniform distribution of total work size per batch (other ordering strategies might be + // useful for optimizing disk read sizes and buffers usage in a single IO queue). + let storages = AccountStoragesOrderer::with_random_order(storages); let mut lt_hash = storages .into_par_iter() - .by_uniform_blocks( - (ordering_ratio.0 + ordering_ratio.1) * SCAN_ACCOUNTS_BATCH_CYCLES_COUNT, - ) .fold(LtHash::identity, |mut accum, storage| { let obsolete_accounts = storage.get_obsolete_accounts(None); storage diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index 4dc1fd0820b..48c84f43120 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -21,7 +21,7 @@ use { log::*, regex::Regex, solana_accounts_db::{ - account_storage::{AccountStorageMap, AccountStoragesOrderBalancer}, + account_storage::{AccountStorageMap, AccountStoragesOrderer}, account_storage_reader::AccountStorageReader, accounts_db::{AccountStorageEntry, AtomicAccountsFileId}, accounts_file::{AccountsFile, AccountsFileError, StorageAccess}, @@ -1125,7 +1125,7 @@ fn archive_snapshot( .append_dir_all(SNAPSHOTS_DIR, &staging_snapshots_dir) .map_err(E::ArchiveSnapshotsDir)?; - let storages_orderer = AccountStoragesOrderBalancer::with_small_to_large_ratio( + let storages_orderer = AccountStoragesOrderer::with_small_to_large_ratio( snapshot_storages, INTERLEAVE_TAR_ENTRIES_SMALL_TO_LARGE_RATIO, ); From 582062e2a9ede702494076a4663c33dad8f5f3e9 Mon Sep 17 00:00:00 2001 From: Kamil Skalski Date: Tue, 29 Jul 2025 18:05:46 +0200 Subject: [PATCH 09/11] Checks. --- accounts-db/src/account_storage.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/accounts-db/src/account_storage.rs b/accounts-db/src/account_storage.rs index 67ff1f5ce6e..1ff9482f1ce 100644 --- a/accounts-db/src/account_storage.rs +++ b/accounts-db/src/account_storage.rs @@ -321,11 +321,7 @@ impl<'a> AccountStoragesOrderer<'a> { let mut indices: Vec<_> = len_range.clone().collect(); indices.sort_unstable_by_key(|i| storages[*i].written_bytes()); indices.iter_mut().for_each(|i| { - *i = select_from_range_with_start_end_rates( - len_range.clone(), - *i, - small_to_large_ratio.clone(), - ) + *i = select_from_range_with_start_end_rates(len_range.clone(), *i, small_to_large_ratio) }); Self { storages, indices } } @@ -337,10 +333,6 @@ impl<'a> AccountStoragesOrderer<'a> { Self { storages, indices } } - pub fn into_iter(self) -> impl ExactSizeIterator + 'a { - self.indices.into_iter().map(|i| self.storages[i].as_ref()) - } - pub fn into_par_iter( self, ) -> impl IndexedParallelIterator + 'a { @@ -350,6 +342,15 @@ impl<'a> AccountStoragesOrderer<'a> { } } +impl<'a> IntoIterator for AccountStoragesOrderer<'a> { + type Item = &'a AccountStorageEntry; + type IntoIter = Box + 'a>; + + fn into_iter(self) -> Self::IntoIter { + Box::new(self.indices.into_iter().map(|i| self.storages[i].as_ref())) + } +} + /// Select the `nth` (`0 <= nth < range.len()`) value from a `range`, choosing values alternately /// from its start or end according to a `start_rate : end_rate` ratio. /// From e97e74f50d518803c10f8681b608bebfa57a7a38 Mon Sep 17 00:00:00 2001 From: Kamil Skalski Date: Tue, 29 Jul 2025 21:01:44 +0200 Subject: [PATCH 10/11] Adjustments for PR comments. --- accounts-db/src/account_storage.rs | 37 ++++++++++++++---------------- accounts-db/src/accounts_db.rs | 2 +- runtime/src/snapshot_utils.rs | 2 +- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/accounts-db/src/account_storage.rs b/accounts-db/src/account_storage.rs index 1ff9482f1ce..addfd89ec02 100644 --- a/accounts-db/src/account_storage.rs +++ b/accounts-db/src/account_storage.rs @@ -4,7 +4,7 @@ use { crate::accounts_db::{AccountStorageEntry, AccountsFileId}, dashmap::DashMap, rand::seq::SliceRandom, - rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator}, + rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator}, solana_clock::Slot, solana_nohash_hasher::{BuildNoHashHasher, IntMap}, std::{ @@ -301,15 +301,15 @@ impl Default for AccountStorageStatus { /// Wrapper over slice of `Arc` that provides an ordered access to storages. /// /// A few strategies are available for ordering storages: -/// - `with_small_to_large_ratio`: interleaving small and large storage written bytes +/// - `with_small_to_large_ratio`: interleaving small and large storage file sizes /// - `with_random_order`: orders storages randomly pub struct AccountStoragesOrderer<'a> { storages: &'a [Arc], - indices: Vec, + indices: Box<[usize]>, } impl<'a> AccountStoragesOrderer<'a> { - /// Create balaning orderer that interleaves storages with small and large written bytes. + /// Create balancing orderer that interleaves storages with small and large file sizes. /// /// Storages are returned in cycles based on `small_to_large_ratio` - `ratio.0` small storages /// preceding `ratio.1` large storages. @@ -319,35 +319,32 @@ impl<'a> AccountStoragesOrderer<'a> { ) -> Self { let len_range = 0..storages.len(); let mut indices: Vec<_> = len_range.clone().collect(); - indices.sort_unstable_by_key(|i| storages[*i].written_bytes()); + indices.sort_unstable_by_key(|i| storages[*i].capacity()); indices.iter_mut().for_each(|i| { *i = select_from_range_with_start_end_rates(len_range.clone(), *i, small_to_large_ratio) }); - Self { storages, indices } + Self { + storages, + indices: indices.into_boxed_slice(), + } } /// Create randomizing orderer. pub fn with_random_order(storages: &'a [Arc]) -> Self { let mut indices: Vec = (0..storages.len()).collect(); indices.shuffle(&mut rand::thread_rng()); - Self { storages, indices } + Self { + storages, + indices: indices.into_boxed_slice(), + } } - pub fn into_par_iter( - self, - ) -> impl IndexedParallelIterator + 'a { - self.indices - .into_par_iter() - .map(|i| self.storages[i].as_ref()) + pub fn iter(&'a self) -> impl Iterator + 'a { + self.indices.iter().map(|i| self.storages[*i].as_ref()) } -} - -impl<'a> IntoIterator for AccountStoragesOrderer<'a> { - type Item = &'a AccountStorageEntry; - type IntoIter = Box + 'a>; - fn into_iter(self) -> Self::IntoIter { - Box::new(self.indices.into_iter().map(|i| self.storages[i].as_ref())) + pub fn par_iter(&'a self) -> impl IndexedParallelIterator + 'a { + self.indices.par_iter().map(|i| self.storages[*i].as_ref()) } } diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 387cbd49d5d..b93f3c4af9e 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -5719,7 +5719,7 @@ impl AccountsDb { // useful for optimizing disk read sizes and buffers usage in a single IO queue). let storages = AccountStoragesOrderer::with_random_order(storages); let mut lt_hash = storages - .into_par_iter() + .par_iter() .fold(LtHash::identity, |mut accum, storage| { let obsolete_accounts = storage.get_obsolete_accounts(None); storage diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index 48c84f43120..0ad669ac979 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -1129,7 +1129,7 @@ fn archive_snapshot( snapshot_storages, INTERLEAVE_TAR_ENTRIES_SMALL_TO_LARGE_RATIO, ); - for storage in storages_orderer.into_iter() { + for storage in storages_orderer.iter() { let path_in_archive = Path::new(ACCOUNTS_DIR) .join(AccountsFile::file_name(storage.slot(), storage.id())); From 6c0a181fb96a75d22d9bb374e368ac757f3120c1 Mon Sep 17 00:00:00 2001 From: Kamil Skalski Date: Tue, 29 Jul 2025 21:12:24 +0200 Subject: [PATCH 11/11] Switch to ExactSizeIterator. --- accounts-db/src/account_storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/accounts-db/src/account_storage.rs b/accounts-db/src/account_storage.rs index addfd89ec02..c48ca81b7cc 100644 --- a/accounts-db/src/account_storage.rs +++ b/accounts-db/src/account_storage.rs @@ -339,7 +339,7 @@ impl<'a> AccountStoragesOrderer<'a> { } } - pub fn iter(&'a self) -> impl Iterator + 'a { + pub fn iter(&'a self) -> impl ExactSizeIterator + 'a { self.indices.iter().map(|i| self.storages[*i].as_ref()) }