-
Notifications
You must be signed in to change notification settings - Fork 5.7k
Discard pre hard fork persisted tower if hard-forking #13527
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -720,11 +720,38 @@ fn post_process_restored_tower( | |
| ledger_path: &Path, | ||
| bank_forks: &BankForks, | ||
| ) -> Tower { | ||
| let mut should_require_tower = config.require_tower; | ||
|
|
||
| restored_tower | ||
| .and_then(|tower| { | ||
| let root_bank = bank_forks.root_bank(); | ||
| let slot_history = root_bank.get_slot_history(); | ||
| tower.adjust_lockouts_after_replay(root_bank.slot(), &slot_history) | ||
| let tower = tower.adjust_lockouts_after_replay(root_bank.slot(), &slot_history); | ||
|
|
||
| if let Some(wait_slot_for_supermajority) = config.wait_for_supermajority { | ||
| if root_bank.slot() == wait_slot_for_supermajority { | ||
| // intentionally fail to restore tower; we're supposedly in a new hard fork; past | ||
| // out-of-chain vote state doesn't make sense at all | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm. we need to check last_voted_slot?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is probably fine for now, but how about storing the shred version in the persistent vote file? I think that might be more robust, we can just discard it if the shred version has a mismatch.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah. makes sense. |
||
| // what if --wait-for-supermajority again if the validator restarted? | ||
| let message = format!("Hardfork is detected; discarding tower restoration result: {:?}", tower); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oops!
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nits: include |
||
| datapoint_error!( | ||
| "tower_error", | ||
| ( | ||
| "error", | ||
| message, | ||
| String | ||
| ), | ||
| ); | ||
| error!("{}", message); | ||
|
|
||
| // unconditionally relax tower requirement so that we can always restore tower | ||
| // from root bank. | ||
| should_require_tower = false; | ||
| return Err(crate::consensus::TowerError::HardFork(wait_slot_for_supermajority)); | ||
| } | ||
| } | ||
|
|
||
| tower | ||
| }) | ||
| .unwrap_or_else(|err| { | ||
| let voting_has_been_active = | ||
|
|
@@ -739,7 +766,7 @@ fn post_process_restored_tower( | |
| ), | ||
| ); | ||
| } | ||
| if config.require_tower && voting_has_been_active { | ||
| if should_require_tower && voting_has_been_active { | ||
| error!("Requested mandatory tower restore failed: {}", err); | ||
| error!( | ||
| "And there is an existing vote_account containing actual votes. \ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,5 +40,15 @@ pub trait Cluster { | |
| fn get_contact_info(&self, pubkey: &Pubkey) -> Option<&ContactInfo>; | ||
| fn exit_node(&mut self, pubkey: &Pubkey) -> ClusterValidatorInfo; | ||
| fn restart_node(&mut self, pubkey: &Pubkey, cluster_validator_info: ClusterValidatorInfo); | ||
| fn create_restart_context( | ||
| &mut self, | ||
| pubkey: &Pubkey, | ||
| cluster_validator_info: &mut ClusterValidatorInfo, | ||
| ) -> (solana_core::cluster_info::Node, Option<ContactInfo>); | ||
| fn restart_node_with_context( | ||
| cluster_validator_info: ClusterValidatorInfo, | ||
| restart_context: (solana_core::cluster_info::Node, Option<ContactInfo>), | ||
| ) -> ClusterValidatorInfo; | ||
| fn add_restarted_node(&mut self, pubkey: &Pubkey, cluster_validator_info: ClusterValidatorInfo); | ||
|
Comment on lines
+43
to
+52
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. some ugly api is needed to work around blocking |
||
| fn exit_restart_node(&mut self, pubkey: &Pubkey, config: ValidatorConfig); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1984,6 +1984,122 @@ fn test_future_tower_master_slave() { | |
| do_test_future_tower(ClusterMode::MasterSlave); | ||
| } | ||
|
|
||
| #[test] | ||
| #[serial] | ||
| fn test_hard_fork() { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there was no test or whatever for hard fork code.. hence this generic test name can be justified. |
||
| solana_logger::setup(); | ||
|
|
||
| // First set up the cluster with 2 nodes | ||
| let slots_per_epoch = 2048; | ||
| let node_stakes = vec![60, 40]; | ||
|
|
||
| let validator_keys = vec![ | ||
| "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4", | ||
| "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8", | ||
| ] | ||
| .iter() | ||
| .map(|s| (Arc::new(Keypair::from_base58_string(s)), true)) | ||
| .take(node_stakes.len()) | ||
| .collect::<Vec<_>>(); | ||
| let validators = validator_keys | ||
| .iter() | ||
| .map(|(kp, _)| kp.pubkey()) | ||
| .collect::<Vec<_>>(); | ||
|
|
||
| let validator_a_pubkey = validators[0]; | ||
| let validator_b_pubkey = validators[1]; | ||
|
|
||
| let config = ClusterConfig { | ||
| cluster_lamports: 100_000, | ||
| node_stakes: node_stakes.clone(), | ||
| validator_configs: vec![ValidatorConfig::default(); node_stakes.len()], | ||
| validator_keys: Some(validator_keys), | ||
| slots_per_epoch, | ||
| stakers_slot_offset: slots_per_epoch, | ||
| skip_warmup_slots: true, | ||
| ..ClusterConfig::default() | ||
| }; | ||
| let cluster = std::sync::Arc::new(std::sync::Mutex::new(LocalCluster::new(&config))); | ||
|
|
||
| let val_a_ledger_path = cluster.lock().unwrap().ledger_path(&validator_a_pubkey); | ||
|
|
||
| loop { | ||
| sleep(Duration::from_millis(100)); | ||
|
|
||
| if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) { | ||
| if root >= 15 { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's call this root |
||
| break; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| let mut validator_a_info = cluster.lock().unwrap().exit_node(&validator_a_pubkey); | ||
| let mut validator_b_info = cluster.lock().unwrap().exit_node(&validator_b_pubkey); | ||
|
|
||
| // setup hard fork! | ||
| let hard_fork_slot = 10; | ||
| let hard_fork_slots = Some(vec![hard_fork_slot]); | ||
| let mut hard_forks = solana_sdk::hard_forks::HardForks::default(); | ||
| hard_forks.register(hard_fork_slot); | ||
|
|
||
| let expected_shred_version = solana_sdk::shred_version::compute_shred_version( | ||
| &cluster.lock().unwrap().genesis_config.hash(), | ||
| Some(&hard_forks), | ||
| ); | ||
|
|
||
| validator_a_info.config.new_hard_forks = hard_fork_slots.clone(); | ||
| validator_a_info.config.wait_for_supermajority = Some(hard_fork_slot); | ||
| validator_a_info.config.expected_shred_version = Some(expected_shred_version); | ||
|
|
||
| validator_b_info.config.new_hard_forks = hard_fork_slots; | ||
| validator_b_info.config.wait_for_supermajority = Some(hard_fork_slot); | ||
| validator_b_info.config.expected_shred_version = Some(expected_shred_version); | ||
|
|
||
| // restart validator A first | ||
| let cluster_for_a = cluster.clone(); | ||
| // Spawn a thread because wait_for_supermajority blocks in Validator::new()! | ||
| let thread = std::thread::spawn(move || { | ||
| let restart_context = cluster_for_a | ||
| .lock() | ||
| .unwrap() | ||
| .create_restart_context(&validator_a_pubkey, &mut validator_a_info); | ||
| let restarted_validator_info = | ||
| LocalCluster::restart_node_with_context(validator_a_info, restart_context); | ||
| cluster_for_a | ||
| .lock() | ||
| .unwrap() | ||
| .add_restarted_node(&validator_a_pubkey, restarted_validator_info); | ||
| }); | ||
|
|
||
| // test validator A actually to wait for supermajority | ||
| let mut last_vote = None; | ||
| for _ in 0..10 { | ||
| sleep(Duration::from_millis(1000)); | ||
|
|
||
| let new_last_vote = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap(); | ||
| if let Some(last_vote) = last_vote { | ||
| assert_eq!(last_vote, new_last_vote); | ||
| } else { | ||
| last_vote = Some(new_last_vote); | ||
| } | ||
| } | ||
|
|
||
| // restart validator B normally | ||
| cluster | ||
| .lock() | ||
| .unwrap() | ||
| .restart_node(&validator_b_pubkey, validator_b_info); | ||
|
|
||
| // validator A should now start so join its thread here | ||
| thread.join().unwrap(); | ||
|
|
||
| // new slots should be rooted after hard-fork cluster relaunch | ||
| cluster | ||
| .lock() | ||
| .unwrap() | ||
| .check_for_new_roots(16, &"hard fork"); | ||
| } | ||
|
|
||
| #[test] | ||
| #[serial] | ||
| fn test_no_optimistic_confirmation_violation_with_tower() { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Investigating why this is necessary, hopefully just a test issue 🙏
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ryoqun, I commented this check out, but haven't been able to repro the failure you described. The test seems to be passing fine on several different machines.