forked from solana-labs/solana
-
Notifications
You must be signed in to change notification settings - Fork 1k
runtime: bench stakes cache #10760
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
runtime: bench stakes cache #10760
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,200 @@ | ||
| #![allow(clippy::arithmetic_side_effects)] | ||
|
|
||
| use { | ||
| criterion::{Criterion, criterion_group, criterion_main}, | ||
| itertools::iproduct, | ||
| solana_account::{Account, AccountSharedData, ReadableAccount, state_traits::StateMut}, | ||
| solana_native_token::LAMPORTS_PER_SOL, | ||
| solana_pubkey::Pubkey, | ||
| solana_runtime::{ | ||
| bank::Bank, | ||
| genesis_utils::{ | ||
| GenesisConfigInfo, ValidatorVoteKeypairs, create_genesis_config_with_vote_accounts, | ||
| }, | ||
| }, | ||
| solana_sdk_ids::stake as stake_program, | ||
| solana_signer::Signer, | ||
| solana_stake_interface::{ | ||
| stake_flags::StakeFlags, | ||
| state::{Delegation, Meta, Stake, StakeStateV2}, | ||
| }, | ||
| solana_sysvar::epoch_rewards::{self, EpochRewards}, | ||
| solana_vote_interface::state::{MAX_LOCKOUT_HISTORY, VoteStateV4, VoteStateVersions}, | ||
| solana_vote_program::vote_state::process_slot_vote_unchecked, | ||
| std::{hint::black_box, sync::Arc, time::Duration}, | ||
| }; | ||
|
|
||
| #[cfg(not(any(target_env = "msvc", target_os = "freebsd")))] | ||
| #[global_allocator] | ||
| static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc; | ||
|
|
||
| const VOTE_ACCOUNTS: [usize; 2] = [10, 1_000]; | ||
| const STAKE_ACCOUNTS: [usize; 2] = [1_000, 1_000_000]; | ||
| const DELEGATED_STAKE_LAMPORTS: u64 = 1_000 * LAMPORTS_PER_SOL; | ||
| const VALIDATOR_STAKE_LAMPORTS: u64 = 1_000 * LAMPORTS_PER_SOL; | ||
| const GENESIS_MINT_LAMPORTS: u64 = 1_000_000 * LAMPORTS_PER_SOL; | ||
| const SYNTHETIC_VOTE_SLOTS: u64 = (MAX_LOCKOUT_HISTORY as u64) + 42; | ||
|
|
||
| fn create_stake_account(vote_pubkey: &Pubkey, rent_exempt_reserve: u64) -> Account { | ||
| let total_lamports = rent_exempt_reserve + DELEGATED_STAKE_LAMPORTS; | ||
|
|
||
| let meta = Meta { | ||
| rent_exempt_reserve, | ||
| ..Meta::default() | ||
| }; | ||
|
|
||
| let delegation = Delegation { | ||
| voter_pubkey: *vote_pubkey, | ||
| stake: DELEGATED_STAKE_LAMPORTS, | ||
| ..Delegation::default() | ||
| }; | ||
|
|
||
| let stake = Stake { | ||
| delegation, | ||
| credits_observed: 0, | ||
| }; | ||
|
|
||
| let stake_state = StakeStateV2::Stake(meta, stake, StakeFlags::empty()); | ||
|
|
||
| let mut account = AccountSharedData::new( | ||
| total_lamports, | ||
| StakeStateV2::size_of(), | ||
| &stake_program::id(), | ||
| ); | ||
| account.set_state(&stake_state).unwrap(); | ||
| Account::from(account) | ||
| } | ||
|
|
||
| fn populate_vote_accounts(bank: &Bank, vote_pubkeys: Vec<Pubkey>) { | ||
| for vote_pubkey in vote_pubkeys.into_iter() { | ||
| let mut vote_account = bank.get_account(&vote_pubkey).unwrap(); | ||
|
|
||
| let mut vote_state = VoteStateV4::deserialize(vote_account.data(), &vote_pubkey).unwrap(); | ||
|
|
||
| for i in 0..SYNTHETIC_VOTE_SLOTS { | ||
| process_slot_vote_unchecked(&mut vote_state, i); | ||
| } | ||
|
|
||
| let versioned = VoteStateVersions::V4(Box::new(vote_state)); | ||
| vote_account.set_state(&versioned).unwrap(); | ||
|
|
||
| bank.store_account(&vote_pubkey, &vote_account); | ||
| } | ||
| } | ||
|
|
||
| fn setup_bank(vote_accounts: usize, stake_accounts: usize) -> Arc<Bank> { | ||
| let validators = (0..vote_accounts) | ||
| .map(|_| ValidatorVoteKeypairs::new_rand()) | ||
| .collect::<Vec<_>>(); | ||
|
|
||
| let GenesisConfigInfo { | ||
| mut genesis_config, .. | ||
| } = create_genesis_config_with_vote_accounts( | ||
| GENESIS_MINT_LAMPORTS, | ||
| &validators.iter().collect::<Vec<_>>(), | ||
| vec![VALIDATOR_STAKE_LAMPORTS; vote_accounts], | ||
| ); | ||
|
|
||
| let vote_pubkeys = validators | ||
| .iter() | ||
| .map(|v| v.vote_keypair.pubkey()) | ||
| .collect::<Vec<_>>(); | ||
|
|
||
| let stakes_per_vote = stake_accounts / vote_accounts; | ||
| let stake_rent_exempt_reserve = genesis_config.rent.minimum_balance(StakeStateV2::size_of()); | ||
|
|
||
| for vote_pubkey in vote_pubkeys.iter() { | ||
| let stake_account = create_stake_account(vote_pubkey, stake_rent_exempt_reserve); | ||
|
|
||
| for _ in 0..stakes_per_vote { | ||
| let stake_pubkey = Pubkey::new_unique(); | ||
| genesis_config | ||
| .accounts | ||
| .insert(stake_pubkey, stake_account.clone()); | ||
| } | ||
| } | ||
|
|
||
| let initial_bank = Arc::new(Bank::new_for_tests(&genesis_config)); | ||
|
|
||
| populate_vote_accounts(&initial_bank, vote_pubkeys); | ||
|
|
||
| let last_slot_in_epoch = initial_bank.get_slots_in_epoch(0).checked_sub(1).unwrap(); | ||
|
|
||
| Arc::new(Bank::new_from_parent( | ||
| initial_bank, | ||
| &Pubkey::default(), | ||
| last_slot_in_epoch, | ||
| )) | ||
| } | ||
|
|
||
| // start with a bank at the last slot in an epoch, measure advancing the slot | ||
| fn bench_epoch_turnover(c: &mut Criterion) { | ||
| let mut group = c.benchmark_group("bench_epoch_turnover"); | ||
|
|
||
| for (vote_accounts, stake_accounts) in iproduct!(VOTE_ACCOUNTS, STAKE_ACCOUNTS) { | ||
| let name = format!("{vote_accounts}_votes_{stake_accounts}_stakes"); | ||
|
|
||
| let initial_bank = setup_bank(vote_accounts, stake_accounts); | ||
| let first_epoch_slot = initial_bank.slot() + 1; | ||
|
|
||
| group.bench_function(name.as_str(), move |b| { | ||
| b.iter(|| { | ||
| let bank = Bank::new_from_parent( | ||
| initial_bank.clone(), | ||
| &Pubkey::default(), | ||
| first_epoch_slot, | ||
| ); | ||
|
|
||
| black_box(bank); | ||
| }) | ||
| }); | ||
| } | ||
| } | ||
|
|
||
| // start with a bank at the first slot in a new epoch, measure the rewards period | ||
| fn bench_epoch_rewards_period(c: &mut Criterion) { | ||
| let mut group = c.benchmark_group("bench_epoch_rewards_period"); | ||
|
|
||
| for (vote_accounts, stake_accounts) in iproduct!(VOTE_ACCOUNTS, STAKE_ACCOUNTS) { | ||
| let name = format!("{vote_accounts}_votes_{stake_accounts}_stakes"); | ||
|
|
||
| let initial_bank = setup_bank(vote_accounts, stake_accounts); | ||
| let first_epoch_slot = initial_bank.slot() + 1; | ||
|
|
||
| let bank = Arc::new(Bank::new_from_parent( | ||
| initial_bank, | ||
| &Pubkey::default(), | ||
| first_epoch_slot, | ||
| )); | ||
|
|
||
| let rewards_steps = bank | ||
| .get_account(&epoch_rewards::id()) | ||
| .and_then(|account| bincode::deserialize::<EpochRewards>(account.data()).ok()) | ||
| .unwrap() | ||
| .num_partitions; | ||
|
|
||
| let final_rewards_slot = first_epoch_slot + rewards_steps; | ||
|
|
||
| group.bench_function(name.as_str(), move |b| { | ||
| b.iter(|| { | ||
| let mut bank = bank.clone(); | ||
|
|
||
| for slot in (first_epoch_slot + 1)..=final_rewards_slot { | ||
| bank = Arc::new(Bank::new_from_parent(bank, &Pubkey::default(), slot)); | ||
| } | ||
|
|
||
| black_box(bank); | ||
| }) | ||
| }); | ||
| } | ||
| } | ||
|
|
||
| fn config() -> Criterion { | ||
| Criterion::default() | ||
| .sample_size(10) | ||
| .warm_up_time(Duration::from_secs(1)) | ||
| .measurement_time(Duration::from_secs(10)) | ||
| } | ||
|
|
||
| criterion_group! { name = benches; config = config(); targets = bench_epoch_turnover, bench_epoch_rewards_period } | ||
| criterion_main!(benches); | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we enable jemalloc in this bench?
You'll also need to add the following to
Cargo.tomlof the local crate:Without this line, the bench will use the glibc allocator, which is way slower in such scenarios. Usually when I profile benches without jemalloc, all I see is page faults and drops taking the most of the time. 😅
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ive added jemalloc and changed the benches to use the product of trivial/full votes and trivial/full stakes. this makes it easy to add bigger cases when testing locally. tbh the complete case is already slow as hell tho, if anything jemalloc may have made it slightly worse
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for following up late!
Thanks!
Yeah, the unfortunate thing with criterion is that there is no way to set less than 10 samples and then there is no way to control how many iterations are done per sample. TBH I think we shouldn't use criterion for this bench - the approach of "statistical correctness" and hammering with lots of samples and iterations is correct for operations that take nano/microseconds (not milliseconds/seconds like epoch boundary) and are very CPU-bound (while epoch boundary is very heavy on memory operations).
I think our bench should allow to be ran with exactly one iteration, or a very small number like 5-10 (but the difference between iteration should be negligible). I see two options:
binorexample, like bench-tps or account-cluster-bench.I'm trying the approach 2) on my branch right now:
https://github.com/vadorovsky/agave/tree/20260221_stakescachebench
vadorovsky@f2a8b8c
The initial, literal rewrite (your bench rewritten as an "example" bin with a single run + some printlns) still took a lot of time to execute, where the most of time (over 300s) goes on setting up the bank:
The time spent on bank setup is absurd and I was able to confirm that around 304s (out from 306s) goes into
Bank::new_for_tests. I'm definitely going to spend some time profiling and figuring out how we can improve it.I didn't believe it at first, but indeed it seems like jemalloc speeds up the bank initialization by 6s, but slows down the epoch boundary by 25ms:
jemalloc (as above):
glibc:
🤔
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm dumb and I forgot about a very important thing - running these bechmarks with
--profile release-with-debug, so the binaries get optimized. Of course with thedebugprofile they take longer to execute.The times look much better after that.
with jemalloc:
without jemalloc:
Still doesn't change the fact that the bank setup is unacceptably slow and that somehow jemalloc slows epoch turnover (but rewards period is faster). Sticking to jemalloc is still better IMO, since: a) that's what we're using on production; b) the setups is faster by seconds, and the turnover is slower by milliseconds.
The epoch turnover time of 184ms is faster than the one on mainnet - last time I profiled it it was 337ms. I think that's because in this benchmark, we are hard coding the account sizes, while on mainnet there are some larger accounts. We could think of applying some size heuristics like I did in the read-only cache benchmarks:
agave/accounts-db/benches/read_only_accounts_cache.rs
Lines 23 to 36 in 9491071
I guess these times are now somehow OK for occasional running, but I will still check if I can come up with some quick fixes for
Bank::new_for_tests.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i actually have an unrelated project that will need me to gather stats on all mainnet stake accounts, so ill look at the actual sizes while im at it!
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Meh, I can't, at least for now...
I was able to narrow down that this loop takes pretty much the entire ~148s:
agave/runtime/src/bank.rs
Lines 2764 to 2773 in 045045d
Two things which look pretty sus to me is that:
create_account_shared_data, which clones data of the account.Bank::store_accountfor each account separately (that involves refreshing bank hashes separately N times) instead of callingBank::store_accountsfor all accounts at once (that would refresh just once).The point 1) could be fixed if we had an owned
GenesisConfiginstead of a&GenesisConfigreference available there - we could simply just convert the ownedAccounts intoAccountSharedDatas without any cloning. But my attempt to do so wasted solid few hours of my day, with a conclusion that I would need much more time. Not sure if it really makes sense at this point, unless there would be some perf gain for loading snapshots from such work.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok i checked and im skeptical the perf difference is from nonstandard-size stakes. mainnet has 1.2m active stake accounts, 133 of which are 4008 bytes, with no other outliers