From 0601dde74489c7350ca861deb1ebc1446c87193e Mon Sep 17 00:00:00 2001 From: Brooks Prumo Date: Thu, 9 Dec 2021 11:18:46 -0600 Subject: [PATCH 1/5] Compute accounts data len during generate_index() --- runtime/src/accounts_db.rs | 75 ++++++++++++++++++++++++++++- runtime/src/serde_snapshot.rs | 24 +++++++-- runtime/src/serde_snapshot/tests.rs | 1 + 3 files changed, 94 insertions(+), 6 deletions(-) diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index e7e3b60d967e82..2bee029e9ab26c 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -220,11 +220,17 @@ pub struct ErrorCounters { pub invalid_writable_account: usize, } +#[derive(Debug, Default, Clone, Copy)] +pub struct IndexGenerationInfo { + pub accounts_data_len: u64, +} + #[derive(Debug, Default, Clone, Copy)] struct SlotIndexGenerationInfo { insert_time_us: u64, num_accounts: u64, num_accounts_rent_exempt: u64, + accounts_data_len: u64, } #[derive(Default, Debug)] @@ -6676,6 +6682,7 @@ impl AccountsDb { let secondary = !self.account_indexes.is_empty(); + let mut accounts_data_len = 0; let mut num_accounts_rent_exempt = 0; let num_accounts = accounts_map.len(); let items = accounts_map.into_iter().map( @@ -6695,6 +6702,7 @@ impl AccountsDb { &self.account_indexes, ); } + accounts_data_len += stored_account.data().len() as u64; if !rent_collector.should_collect_rent(&pubkey, &stored_account, false) || { let (_rent_due, exempt) = rent_collector.get_rent_due(&stored_account); @@ -6729,6 +6737,7 @@ impl AccountsDb { insert_time_us, num_accounts: num_accounts as u64, num_accounts_rent_exempt, + accounts_data_len, } } @@ -6863,7 +6872,7 @@ impl AccountsDb { limit_load_slot_count_from_snapshot: Option, verify: bool, genesis_config: &GenesisConfig, - ) { + ) -> IndexGenerationInfo { let mut slots = self.storage.all_slots(); #[allow(clippy::stable_sort_primitive)] slots.sort(); @@ -6878,6 +6887,7 @@ impl AccountsDb { genesis_config.slots_per_year(), &genesis_config.rent, ); + let accounts_data_len = AtomicU64::new(0); // pass == 0 always runs and generates the index // pass == 1 only runs if verify == true. @@ -6934,9 +6944,12 @@ impl AccountsDb { insert_time_us: insert_us, num_accounts: total_this_slot, num_accounts_rent_exempt: rent_exempt_this_slot, + accounts_data_len: accounts_data_len_this_slot, } = self.generate_index_for_slot(accounts_map, slot, &rent_collector); rent_exempt.fetch_add(rent_exempt_this_slot, Ordering::Relaxed); total_duplicates.fetch_add(total_this_slot, Ordering::Relaxed); + accounts_data_len + .fetch_add(accounts_data_len_this_slot, Ordering::Relaxed); insert_us } else { // verify index matches expected and measure the time to get all items @@ -6990,6 +7003,62 @@ impl AccountsDb { }) .sum(); + // subtract data.len() from accounts_data_len for all old accounts that are in the index twice + if pass == 0 { + let pubkeys_to_accounts_data_len = |pubkeys: &[Pubkey]| { + let mut accounts_data_len_from_duplicates = 0; + pubkeys.iter().for_each(|pubkey| { + if let Some(entry) = self.accounts_index.get_account_read_entry(pubkey) { + let slot_list = entry.slot_list(); + if slot_list.len() < 2 { + return; + } + let mut slot_list = slot_list.clone(); + slot_list.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + assert!(slot_list[0].0 < slot_list[1].0); + slot_list + .into_iter() + .rev() + .skip(1) + .for_each(|(slot, account_info)| { + let maybe_storage_entry = self + .storage + .get_account_storage_entry(slot, account_info.store_id); + let mut accessor = LoadedAccountAccessor::Stored( + maybe_storage_entry + .map(|entry| (entry, account_info.offset)), + ); + let loaded_account = accessor.check_and_get_loaded_account(); + let account = loaded_account.take_account(); + accounts_data_len_from_duplicates += account.data().len(); + }); + } + }); + accounts_data_len_from_duplicates as u64 + }; + + let mut timer = Measure::start("handle accounts data len duplicates"); + let mut unique_pubkeys = HashSet::::default(); + self.uncleaned_pubkeys.iter().for_each(|entry| { + entry.value().iter().for_each(|pubkey| { + unique_pubkeys.insert(*pubkey); + }) + }); + let accounts_data_len_from_duplicates = unique_pubkeys + .into_iter() + .collect::>() + .par_chunks(4096) + .map(pubkeys_to_accounts_data_len) + .sum(); + accounts_data_len.fetch_sub(accounts_data_len_from_duplicates, Ordering::Relaxed); + timer.stop(); + info!( + "accounts data len: {}, {}", + accounts_data_len.load(Ordering::Relaxed), + timer + ); + } + let storage_info_timings = storage_info_timings.into_inner().unwrap(); let mut index_flush_us = 0; @@ -7027,6 +7096,10 @@ impl AccountsDb { } timings.report(); } + + IndexGenerationInfo { + accounts_data_len: accounts_data_len.load(Ordering::Relaxed), + } } fn update_storage_info( diff --git a/runtime/src/serde_snapshot.rs b/runtime/src/serde_snapshot.rs index 0f95d6e473adeb..ed91d276ca5944 100644 --- a/runtime/src/serde_snapshot.rs +++ b/runtime/src/serde_snapshot.rs @@ -5,7 +5,7 @@ use { accounts::Accounts, accounts_db::{ AccountShrinkThreshold, AccountStorageEntry, AccountsDb, AccountsDbConfig, AppendVecId, - BankHashInfo, + BankHashInfo, IndexGenerationInfo, }, accounts_index::AccountSecondaryIndexes, accounts_update_notifier_interface::AccountsUpdateNotifier, @@ -334,7 +334,7 @@ fn reconstruct_bank_from_fields( where E: SerializableStorage + std::marker::Sync, { - let accounts_db = reconstruct_accountsdb_from_fields( + let (accounts_db, reconstructed_accounts_db_info) = reconstruct_accountsdb_from_fields( snapshot_accounts_db_fields, account_paths, unpacked_append_vec_map, @@ -347,6 +347,10 @@ where accounts_db_config, accounts_update_notifier, )?; + debug!( + "accounts data len: {}", + reconstructed_accounts_db_info.accounts_data_len + ); let bank_rc = BankRc::new(Accounts::new_empty(accounts_db), bank_fields.slot); @@ -386,6 +390,12 @@ where Ok(()) } +/// This struct contains side-info while reconstructing the accounts DB from fields. +#[derive(Debug, Default, Copy, Clone)] +struct ReconstructedAccountsDbInfo { + accounts_data_len: u64, +} + #[allow(clippy::too_many_arguments)] fn reconstruct_accountsdb_from_fields( snapshot_accounts_db_fields: SnapshotAccountsDbFields, @@ -399,7 +409,7 @@ fn reconstruct_accountsdb_from_fields( verify_index: bool, accounts_db_config: Option, accounts_update_notifier: Option, -) -> Result +) -> Result<(AccountsDb, ReconstructedAccountsDbInfo), Error> where E: SerializableStorage + std::marker::Sync, { @@ -536,11 +546,12 @@ where }) .unwrap(); - let _ = accounts_db.generate_index( + let IndexGenerationInfo { accounts_data_len } = accounts_db.generate_index( limit_load_slot_count_from_snapshot, verify_index, genesis_config, ); + accounts_db.maybe_add_filler_accounts(&genesis_config.epoch_schedule); handle.join().unwrap(); @@ -557,5 +568,8 @@ where ("accountsdb-notify-at-start-us", measure_notify.as_us(), i64), ); - Ok(Arc::try_unwrap(accounts_db).unwrap()) + Ok(( + Arc::try_unwrap(accounts_db).unwrap(), + ReconstructedAccountsDbInfo { accounts_data_len }, + )) } diff --git a/runtime/src/serde_snapshot/tests.rs b/runtime/src/serde_snapshot/tests.rs index 5de81e824280c1..1ec3a97a7960cb 100644 --- a/runtime/src/serde_snapshot/tests.rs +++ b/runtime/src/serde_snapshot/tests.rs @@ -89,6 +89,7 @@ where Some(crate::accounts_db::ACCOUNTS_DB_CONFIG_FOR_TESTING), None, ) + .map(|(accounts_db, _)| accounts_db) } #[cfg(test)] From 8387bcd46ec883659f2fa7218aaa87590b6fa8dc Mon Sep 17 00:00:00 2001 From: Brooks Prumo Date: Fri, 10 Dec 2021 08:43:20 -0600 Subject: [PATCH 2/5] pr: move accounts data len dedup timer into GenerateIndexTimings --- runtime/src/accounts_db.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index 2bee029e9ab26c..ada5467611eef2 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -247,6 +247,7 @@ struct GenerateIndexTimings { pub index_flush_us: u64, pub rent_exempt: u64, pub total_duplicates: u64, + pub accounts_data_len_dedup_time_us: u64, } #[derive(Default, Debug, PartialEq)] @@ -293,6 +294,11 @@ impl GenerateIndexTimings { i64 ), ("total_items", self.total_items as i64, i64), + ( + "accounts_data_len_dedup_time_us", + self.accounts_data_len_dedup_time_us as i64, + i64 + ), ); } } @@ -7004,6 +7010,8 @@ impl AccountsDb { .sum(); // subtract data.len() from accounts_data_len for all old accounts that are in the index twice + let mut accounts_data_len_dedup_timer = + Measure::start("handle accounts data len duplicates"); if pass == 0 { let pubkeys_to_accounts_data_len = |pubkeys: &[Pubkey]| { let mut accounts_data_len_from_duplicates = 0; @@ -7037,7 +7045,6 @@ impl AccountsDb { accounts_data_len_from_duplicates as u64 }; - let mut timer = Measure::start("handle accounts data len duplicates"); let mut unique_pubkeys = HashSet::::default(); self.uncleaned_pubkeys.iter().for_each(|entry| { entry.value().iter().for_each(|pubkey| { @@ -7051,13 +7058,12 @@ impl AccountsDb { .map(pubkeys_to_accounts_data_len) .sum(); accounts_data_len.fetch_sub(accounts_data_len_from_duplicates, Ordering::Relaxed); - timer.stop(); info!( - "accounts data len: {}, {}", - accounts_data_len.load(Ordering::Relaxed), - timer + "accounts data len: {}", + accounts_data_len.load(Ordering::Relaxed) ); } + accounts_data_len_dedup_timer.stop(); let storage_info_timings = storage_info_timings.into_inner().unwrap(); @@ -7083,6 +7089,7 @@ impl AccountsDb { storage_size_accounts_map_us: storage_info_timings.storage_size_accounts_map_us, storage_size_accounts_map_flatten_us: storage_info_timings .storage_size_accounts_map_flatten_us, + accounts_data_len_dedup_time_us: accounts_data_len_dedup_timer.as_us(), ..GenerateIndexTimings::default() }; From 77b890be78dd91030d9bcf4112703207f003158b Mon Sep 17 00:00:00 2001 From: Brooks Prumo Date: Fri, 10 Dec 2021 08:49:10 -0600 Subject: [PATCH 3/5] pr: move closure to fn --- runtime/src/accounts_db.rs | 66 +++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index ada5467611eef2..49c51760a380b9 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -7013,38 +7013,6 @@ impl AccountsDb { let mut accounts_data_len_dedup_timer = Measure::start("handle accounts data len duplicates"); if pass == 0 { - let pubkeys_to_accounts_data_len = |pubkeys: &[Pubkey]| { - let mut accounts_data_len_from_duplicates = 0; - pubkeys.iter().for_each(|pubkey| { - if let Some(entry) = self.accounts_index.get_account_read_entry(pubkey) { - let slot_list = entry.slot_list(); - if slot_list.len() < 2 { - return; - } - let mut slot_list = slot_list.clone(); - slot_list.sort_unstable_by(|a, b| a.0.cmp(&b.0)); - assert!(slot_list[0].0 < slot_list[1].0); - slot_list - .into_iter() - .rev() - .skip(1) - .for_each(|(slot, account_info)| { - let maybe_storage_entry = self - .storage - .get_account_storage_entry(slot, account_info.store_id); - let mut accessor = LoadedAccountAccessor::Stored( - maybe_storage_entry - .map(|entry| (entry, account_info.offset)), - ); - let loaded_account = accessor.check_and_get_loaded_account(); - let account = loaded_account.take_account(); - accounts_data_len_from_duplicates += account.data().len(); - }); - } - }); - accounts_data_len_from_duplicates as u64 - }; - let mut unique_pubkeys = HashSet::::default(); self.uncleaned_pubkeys.iter().for_each(|entry| { entry.value().iter().for_each(|pubkey| { @@ -7055,7 +7023,7 @@ impl AccountsDb { .into_iter() .collect::>() .par_chunks(4096) - .map(pubkeys_to_accounts_data_len) + .map(|pubkeys| self.pubkeys_to_accounts_data_len(pubkeys)) .sum(); accounts_data_len.fetch_sub(accounts_data_len_from_duplicates, Ordering::Relaxed); info!( @@ -7109,6 +7077,38 @@ impl AccountsDb { } } + /// Used during generate_index() to get the accounts data len from the given pubkeys + fn pubkeys_to_accounts_data_len(&self, pubkeys: &[Pubkey]) -> u64 { + let mut accounts_data_len_from_duplicates = 0; + pubkeys.iter().for_each(|pubkey| { + if let Some(entry) = self.accounts_index.get_account_read_entry(pubkey) { + let slot_list = entry.slot_list(); + if slot_list.len() < 2 { + return; + } + let mut slot_list = slot_list.clone(); + slot_list.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + assert!(slot_list[0].0 < slot_list[1].0); + slot_list + .into_iter() + .rev() + .skip(1) + .for_each(|(slot, account_info)| { + let maybe_storage_entry = self + .storage + .get_account_storage_entry(slot, account_info.store_id); + let mut accessor = LoadedAccountAccessor::Stored( + maybe_storage_entry.map(|entry| (entry, account_info.offset)), + ); + let loaded_account = accessor.check_and_get_loaded_account(); + let account = loaded_account.take_account(); + accounts_data_len_from_duplicates += account.data().len(); + }); + } + }); + accounts_data_len_from_duplicates as u64 + } + fn update_storage_info( storage_info: &StorageSizeAndCountMap, accounts_map: &GenerateIndexAccountsMap<'_>, From 57d88733b5eae88b4cbbcf41068fb1a1d016d75b Mon Sep 17 00:00:00 2001 From: Brooks Prumo Date: Fri, 10 Dec 2021 09:07:35 -0600 Subject: [PATCH 4/5] pr: sort descending --- runtime/src/accounts_db.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index 49c51760a380b9..1a320dd1fc50a8 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -7023,7 +7023,7 @@ impl AccountsDb { .into_iter() .collect::>() .par_chunks(4096) - .map(|pubkeys| self.pubkeys_to_accounts_data_len(pubkeys)) + .map(|pubkeys| self.pubkeys_to_duplicate_accounts_data_len(pubkeys)) .sum(); accounts_data_len.fetch_sub(accounts_data_len_from_duplicates, Ordering::Relaxed); info!( @@ -7077,8 +7077,8 @@ impl AccountsDb { } } - /// Used during generate_index() to get the accounts data len from the given pubkeys - fn pubkeys_to_accounts_data_len(&self, pubkeys: &[Pubkey]) -> u64 { + /// Used during generate_index() to get the _duplicate_ accounts data len from the given pubkeys + fn pubkeys_to_duplicate_accounts_data_len(&self, pubkeys: &[Pubkey]) -> u64 { let mut accounts_data_len_from_duplicates = 0; pubkeys.iter().for_each(|pubkey| { if let Some(entry) = self.accounts_index.get_account_read_entry(pubkey) { @@ -7086,12 +7086,13 @@ impl AccountsDb { if slot_list.len() < 2 { return; } + // Only the account data len in the highest slot should be used, and the rest are + // duplicates. So sort the slot list in descending slot order, skip the first + // item, then sum up the remaining data len, which are the duplicates. let mut slot_list = slot_list.clone(); - slot_list.sort_unstable_by(|a, b| a.0.cmp(&b.0)); - assert!(slot_list[0].0 < slot_list[1].0); + slot_list.sort_unstable_by(|a, b| b.0.cmp(&a.0)); slot_list .into_iter() - .rev() .skip(1) .for_each(|(slot, account_info)| { let maybe_storage_entry = self From 88a73daeffb4a7e90121180886558e89cadb70bb Mon Sep 17 00:00:00 2001 From: Brooks Prumo Date: Fri, 10 Dec 2021 09:22:17 -0600 Subject: [PATCH 5/5] fixup: use select_nth_unstable() --- runtime/src/accounts_db.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index 1a320dd1fc50a8..6a8f91f6678759 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -7090,14 +7090,14 @@ impl AccountsDb { // duplicates. So sort the slot list in descending slot order, skip the first // item, then sum up the remaining data len, which are the duplicates. let mut slot_list = slot_list.clone(); - slot_list.sort_unstable_by(|a, b| b.0.cmp(&a.0)); slot_list - .into_iter() - .skip(1) + .select_nth_unstable_by(0, |a, b| b.0.cmp(&a.0)) + .2 + .iter() .for_each(|(slot, account_info)| { let maybe_storage_entry = self .storage - .get_account_storage_entry(slot, account_info.store_id); + .get_account_storage_entry(*slot, account_info.store_id); let mut accessor = LoadedAccountAccessor::Stored( maybe_storage_entry.map(|entry| (entry, account_info.offset)), );