From f2f7dcb771b69e42168d5685ca878591e9d1c75e Mon Sep 17 00:00:00 2001 From: Ryo Onodera Date: Sat, 19 Sep 2020 14:03:54 +0900 Subject: [PATCH 1/9] Persistent tower (#10718) * Save/restore Tower * Avoid unwrap() * Rebase cleanups * Forcibly pass test * Correct reconcilation of votes after validator resume * d b g * Add more tests * fsync and fix test * Add test * Fix fmt * Debug * Fix tests... * save * Clarify error message and code cleaning around it * Move most of code out of tower save hot codepath * Proper comment for the lack of fsync on tower * Clean up * Clean up * Simpler type alias * Manage tower-restored ancestor slots without banks * Add comment * Extract long code blocks... * Add comment * Simplify returned tuple... * Tweak too aggresive log * Fix typo... * Add test * Update comment * Improve test to require non-empty stray restored slots * Measure tower save and dump all tower contents * Log adjust and add threshold related assertions * cleanup adjust * Properly lower stray restored slots priority... * Rust fmt * Fix test.... * Clarify comments a bit and add TowerError::TooNew * Further clean-up arround TowerError * Truly create ancestors by excluding last vote slot * Add comment for stray_restored_slots * Add comment for stray_restored_slots * Use BTreeSet * Consider root_slot into post-replay adjustment * Tweak logging * Add test for stray_restored_ancestors * Reorder some code * Better names for unit tests * Add frozen_abi to SavedTower * Fold long lines * Tweak stray ancestors and too old slot history * Re-adjust error conditon of too old slot history * Test normal ancestors is checked before stray ones * Fix conflict, update tests, adjust behavior a bit * Fix test * Address review comments * Last touch! * Immediately after creating cleaning pr * Revert stray slots * Revert comment... * Report error as metrics * Revert not to panic! and ignore unfixable test... * Normalize lockouts.root_slot more strictly * Add comments for panic! and more assertions * Proper initialize root without vote account * Clarify code and comments based on review feedback * Fix rebase * Further simplify based on assured tower root * Reorder code for more readability Co-authored-by: Michael Vines --- core/benches/consensus.rs | 36 + core/src/consensus.rs | 1149 +++++++++++++++++++++- core/src/heaviest_subtree_fork_choice.rs | 108 +- core/src/replay_stage.rs | 127 ++- core/src/tvu.rs | 5 + core/src/validator.rs | 108 +- ledger/src/ancestor_iterator.rs | 36 + local-cluster/tests/local_cluster.rs | 233 ++++- multinode-demo/bootstrap-validator.sh | 1 + multinode-demo/validator.sh | 1 + programs/vote/src/vote_state/mod.rs | 3 + run.sh | 1 + runtime/src/bank.rs | 4 + sdk/src/slot_history.rs | 6 +- validator/src/main.rs | 3 +- 15 files changed, 1714 insertions(+), 107 deletions(-) create mode 100644 core/benches/consensus.rs diff --git a/core/benches/consensus.rs b/core/benches/consensus.rs new file mode 100644 index 00000000000..64035f4c3af --- /dev/null +++ b/core/benches/consensus.rs @@ -0,0 +1,36 @@ +#![feature(test)] + +extern crate solana_core; +extern crate test; + +use solana_core::consensus::Tower; +use solana_runtime::bank::Bank; +use solana_runtime::bank_forks::BankForks; +use solana_sdk::{ + pubkey::Pubkey, + signature::{Keypair, Signer}, +}; +use std::sync::Arc; +use tempfile::TempDir; +use test::Bencher; + +#[bench] +fn bench_save_tower(bench: &mut Bencher) { + let dir = TempDir::new().unwrap(); + let path = dir.path(); + + let vote_account_pubkey = &Pubkey::default(); + let node_keypair = Arc::new(Keypair::new()); + let heaviest_bank = BankForks::new(Bank::default()).working_bank(); + let tower = Tower::new( + &node_keypair.pubkey(), + &vote_account_pubkey, + 0, + &heaviest_bank, + &path, + ); + + bench.iter(move || { + tower.save(&node_keypair).unwrap(); + }); +} diff --git a/core/src/consensus.rs b/core/src/consensus.rs index 5554fc2edfc..475ae873e83 100644 --- a/core/src/consensus.rs +++ b/core/src/consensus.rs @@ -3,6 +3,8 @@ use crate::{ pubkey_references::PubkeyReferences, }; use chrono::prelude::*; +use solana_ledger::{ancestor_iterator::AncestorIterator, blockstore::Blockstore, blockstore_db}; +use solana_measure::measure::Measure; use solana_runtime::{bank::Bank, bank_forks::BankForks, commitment::VOTE_THRESHOLD_SIZE}; use solana_sdk::{ account::Account, @@ -10,16 +12,23 @@ use solana_sdk::{ hash::Hash, instruction::Instruction, pubkey::Pubkey, + signature::{Keypair, Signature, Signer}, + slot_history::{Check, SlotHistory}, }; use solana_vote_program::{ vote_instruction, vote_state::{BlockTimestamp, Lockout, Vote, VoteState, MAX_LOCKOUT_HISTORY}, }; use std::{ + cmp::Ordering, collections::{HashMap, HashSet}, + fs::{self, File}, + io::BufReader, ops::Bound::{Included, Unbounded}, - sync::{Arc, RwLock}, + path::{Path, PathBuf}, + sync::Arc, }; +use thiserror::Error; #[derive(PartialEq, Clone, Debug)] pub enum SwitchForkDecision { @@ -57,6 +66,8 @@ impl SwitchForkDecision { pub const VOTE_THRESHOLD_DEPTH: usize = 8; pub const SWITCH_FORK_THRESHOLD: f64 = 0.38; +pub type Result = std::result::Result; + pub type Stake = u64; pub type VotedStakes = HashMap; pub type PubkeyVotes = Vec<(Pubkey, Slot)>; @@ -72,7 +83,7 @@ pub(crate) struct ComputedBankState { } #[frozen_abi(digest = "2ZUeCLMVQxmHYbeqMH7M97ifVSKoVErGvRHzyxcQRjgU")] -#[derive(Serialize, AbiExample)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, AbiExample)] pub struct Tower { node_pubkey: Pubkey, threshold_depth: usize, @@ -80,18 +91,34 @@ pub struct Tower { lockouts: VoteState, last_vote: Vote, last_timestamp: BlockTimestamp, + #[serde(skip)] + path: PathBuf, + #[serde(skip)] + tmp_path: PathBuf, // used before atomic fs::rename() + #[serde(skip)] + // Restored last voted slot which cannot be found in SlotHistory at replayed root + // (This is a special field for slashing-free validator restart with edge cases). + // This could be emptied after some time; but left intact indefinitely for easier + // implementation + stray_restored_slot: Option, } impl Default for Tower { fn default() -> Self { - Self { + let mut tower = Self { node_pubkey: Pubkey::default(), threshold_depth: VOTE_THRESHOLD_DEPTH, threshold_size: VOTE_THRESHOLD_SIZE, lockouts: VoteState::default(), last_vote: Vote::default(), last_timestamp: BlockTimestamp::default(), - } + path: PathBuf::default(), + tmp_path: PathBuf::default(), + stray_restored_slot: Option::default(), + }; + // VoteState::root_slot is ensured to be Some in Tower + tower.lockouts.root_slot = Some(Slot::default()); + tower } } @@ -100,14 +127,23 @@ impl Tower { node_pubkey: &Pubkey, vote_account_pubkey: &Pubkey, root: Slot, - heaviest_bank: &Bank, + bank: &Bank, + path: &Path, ) -> Self { - let mut tower = Self::new_with_key(node_pubkey); - tower.initialize_lockouts_from_bank_forks(vote_account_pubkey, root, heaviest_bank); + let path = Self::get_filename(&path, node_pubkey); + let tmp_path = Self::get_tmp_filename(&path); + let mut tower = Self { + node_pubkey: *node_pubkey, + path, + tmp_path, + ..Tower::default() + }; + tower.initialize_lockouts_from_bank(vote_account_pubkey, root, bank); tower } + #[cfg(test)] pub fn new_with_key(node_pubkey: &Pubkey) -> Self { Self { node_pubkey: *node_pubkey, @@ -124,6 +160,40 @@ impl Tower { } } + pub fn new_from_bankforks( + bank_forks: &BankForks, + ledger_path: &Path, + my_pubkey: &Pubkey, + vote_account: &Pubkey, + ) -> Self { + let root_bank = bank_forks.root_bank(); + let (_progress, heaviest_subtree_fork_choice, unlock_heaviest_subtree_fork_choice_slot) = + crate::replay_stage::ReplayStage::initialize_progress_and_fork_choice( + root_bank, + bank_forks.frozen_banks().values().cloned().collect(), + &my_pubkey, + &vote_account, + ); + let root = root_bank.slot(); + + let heaviest_bank = if root > unlock_heaviest_subtree_fork_choice_slot { + bank_forks + .get(heaviest_subtree_fork_choice.best_overall_slot()) + .expect("The best overall slot must be one of `frozen_banks` which all exist in bank_forks") + .clone() + } else { + Tower::find_heaviest_bank(&bank_forks, &my_pubkey).unwrap_or_else(|| root_bank.clone()) + }; + + Self::new( + &my_pubkey, + &vote_account, + root, + &heaviest_bank, + &ledger_path, + ) + } + pub(crate) fn collect_vote_lockouts( node_pubkey: &Pubkey, bank_slot: Slot, @@ -324,17 +394,17 @@ impl Tower { } #[cfg(test)] - fn record_vote(&mut self, slot: Slot, hash: Hash) -> Option { + pub fn record_vote(&mut self, slot: Slot, hash: Hash) -> Option { let vote = Vote::new(vec![slot], hash); self.record_bank_vote(vote) } - pub fn last_vote(&self) -> &Vote { - &self.last_vote + pub fn last_voted_slot(&self) -> Option { + self.last_vote.last_voted_slot() } - pub fn last_voted_slot(&self) -> Option { - self.last_vote().last_voted_slot() + pub fn stray_restored_slot(&self) -> Option { + self.stray_restored_slot } pub fn last_vote_and_timestamp(&mut self) -> Vote { @@ -359,6 +429,13 @@ impl Tower { None } + // root may be forcibly set by arbitrary replay root slot, for example from a root + // after replaying a snapshot. + // Also, tower.root() couldn't be None; do_initialize_lockouts() ensures that. + // Conceptually, every tower must have been constructed from a concrete starting point, + // which establishes the origin of trust (i.e. root) whether booting from genesis (slot 0) or + // snapshot (slot N). In other words, there should be no possibility a Tower doesn't have + // root, unlike young vote accounts. pub fn root(&self) -> Option { self.lockouts.root_slot } @@ -403,7 +480,13 @@ impl Tower { // This case should never happen because bank forks purges all // non-descendants of the root every time root is set if slot != root_slot { - assert!(ancestors[&slot].contains(&root_slot)); + assert!( + ancestors[&slot].contains(&root_slot), + "ancestors: {:?}, slot: {} root: {}", + ancestors[&slot], + slot, + root_slot + ); } } @@ -419,10 +502,31 @@ impl Tower { total_stake: u64, epoch_vote_accounts: &HashMap, ) -> SwitchForkDecision { - let root = self.lockouts.root_slot.unwrap_or(0); self.last_voted_slot() .map(|last_voted_slot| { - let last_vote_ancestors = ancestors.get(&last_voted_slot).unwrap(); + let root = self.lockouts.root_slot.unwrap_or(0); + let empty_ancestors = HashSet::default(); + + let last_vote_ancestors = + ancestors.get(&last_voted_slot).unwrap_or_else(|| { + if !self.is_stray_last_vote() { + // Unless last vote is stray, ancestors.get(last_voted_slot) must + // return Some(_), justifying to panic! here. + // Also, adjust_lockouts_after_replay() correctly makes last_voted_slot None, + // if all saved votes are ancestors of replayed_root_slot. So this code shouldn't be + // touched in that case as well. + // In other words, except being stray, all other slots have been voted on while + // this validator has been running, so we must be able to fetch ancestors for + // all of them. + panic!("no ancestors found with slot: {}", last_voted_slot); + } else { + // bank_forks doesn't have corresponding data for the stray restored last vote, + // meaning some inconsistency between saved tower and ledger. + // (newer snapshot, or only a saved tower is moved over to new setup?) + &empty_ancestors + } + }); + let switch_slot_ancestors = ancestors.get(&switch_slot).unwrap(); if switch_slot == last_voted_slot || switch_slot_ancestors.contains(&last_voted_slot) { @@ -579,13 +683,11 @@ impl Tower { } pub(crate) fn find_heaviest_bank( - bank_forks: &RwLock, + bank_forks: &BankForks, node_pubkey: &Pubkey, ) -> Option> { - let ancestors = bank_forks.read().unwrap().ancestors(); + let ancestors = bank_forks.ancestors(); let mut bank_weights: Vec<_> = bank_forks - .read() - .unwrap() .frozen_banks() .values() .map(|b| { @@ -637,36 +739,367 @@ impl Tower { bank_weight } - fn initialize_lockouts_from_bank_forks( + fn voted_slots(&self) -> Vec { + self.lockouts + .votes + .iter() + .map(|lockout| lockout.slot) + .collect() + } + + pub fn is_stray_last_vote(&self) -> bool { + if let Some(last_voted_slot) = self.last_voted_slot() { + if let Some(stray_restored_slot) = self.stray_restored_slot { + return stray_restored_slot == last_voted_slot; + } + } + + false + } + + // The tower root can be older/newer if the validator booted from a newer/older snapshot, so + // tower lockouts may need adjustment + pub fn adjust_lockouts_after_replay( + self, + replayed_root: Slot, + slot_history: &SlotHistory, + ) -> Result { + info!( + "adjusting lockouts (after replay up to {}): {:?}", + replayed_root, + self.voted_slots() + ); + + // sanity assertions for roots + assert_eq!(slot_history.check(replayed_root), Check::Found); + assert!(self.root().is_some()); + let tower_root = self.root().unwrap(); + // reconcile_blockstore_roots_with_tower() should already have aligned these. + assert!( + tower_root <= replayed_root, + format!( + "tower root: {:?} >= replayed root slot: {}", + tower_root, replayed_root + ) + ); + assert!( + self.last_vote == Vote::default() && self.lockouts.votes.is_empty() + || self.last_vote != Vote::default() && !self.lockouts.votes.is_empty(), + format!( + "last vote: {:?} lockouts.votes: {:?}", + self.last_vote, self.lockouts.votes + ) + ); + + // return immediately if votes are empty... + if self.lockouts.votes.is_empty() { + return Ok(self); + } + + let last_voted_slot = self.last_voted_slot().unwrap(); + if slot_history.check(last_voted_slot) == Check::TooOld { + // We could try hard to anchor with other older votes, but opt to simplify the + // following logic + return Err(TowerError::TooOldTower( + last_voted_slot, + slot_history.oldest(), + )); + } + + self.do_adjust_lockouts_after_replay(tower_root, replayed_root, slot_history) + } + + fn do_adjust_lockouts_after_replay( + mut self, + tower_root: Slot, + replayed_root: Slot, + slot_history: &SlotHistory, + ) -> Result { + // retained slots will be consisted only from divergent slots + let mut retain_flags_for_each_vote_in_reverse: Vec<_> = + Vec::with_capacity(self.lockouts.votes.len()); + + let mut still_in_future = true; + let mut past_outside_history = false; + let mut checked_slot = None; + let mut anchored_slot = None; + + let mut slots_in_tower = vec![tower_root]; + slots_in_tower.extend(self.voted_slots()); + + // iterate over votes + root (if any) in the newest => oldest order + // bail out early if bad condition is found + for slot_in_tower in slots_in_tower.iter().rev() { + let check = slot_history.check(*slot_in_tower); + + if anchored_slot.is_none() && check == Check::Found { + anchored_slot = Some(*slot_in_tower); + } else if anchored_slot.is_some() && check == Check::NotFound { + // this can't happen unless we're fed with bogus snapshot + return Err(TowerError::FatallyInconsistent("diverged ancestor?")); + } + + if still_in_future && check != Check::Future { + still_in_future = false; + } else if !still_in_future && check == Check::Future { + // really odd cases: bad ordered votes? + return Err(TowerError::FatallyInconsistent("time warped?")); + } + if !past_outside_history && check == Check::TooOld { + past_outside_history = true; + } else if past_outside_history && check != Check::TooOld { + // really odd cases: bad ordered votes? + return Err(TowerError::FatallyInconsistent( + "not too old once after got too old?", + )); + } + + if let Some(checked_slot) = checked_slot { + // This is really special, only if tower is initialized (root = slot 0) for genesis and contains + // a vote (= slot 0) for the genesis, the slot 0 can repeat only once + let voting_from_genesis = *slot_in_tower == checked_slot && *slot_in_tower == 0; + + if !voting_from_genesis { + // Unless we're voting since genesis, slots_in_tower must always be older than last checked_slot + // including all vote slot and the root slot. + assert!(*slot_in_tower < checked_slot) + } + } + + checked_slot = Some(*slot_in_tower); + + retain_flags_for_each_vote_in_reverse.push(anchored_slot.is_none()); + } + + // Check for errors if not anchored + info!("adjusted tower's anchored slot: {:?}", anchored_slot); + if anchored_slot.is_none() { + // this error really shouldn't happen unless ledger/tower is corrupted + return Err(TowerError::FatallyInconsistent( + "no common slot for rooted tower", + )); + } + + assert_eq!( + slots_in_tower.len(), + retain_flags_for_each_vote_in_reverse.len() + ); + // pop for the tower root + retain_flags_for_each_vote_in_reverse.pop(); + let mut retain_flags_for_each_vote = + retain_flags_for_each_vote_in_reverse.into_iter().rev(); + + let original_votes_len = self.lockouts.votes.len(); + self.do_initialize_lockouts(replayed_root, move |_| { + retain_flags_for_each_vote.next().unwrap() + }); + + if self.lockouts.votes.is_empty() { + info!( + "All restored votes were behind replayed_root({}); resetting root_slot and last_vote in tower!", + replayed_root + ); + // we might not have banks for those votes so just reset. + // That's because the votes may well past replayed_root + self.last_vote = Vote::default(); + } else { + info!( + "{} restored votes (out of {}) were on different fork or are upcoming votes on unrooted slots: {:?}!", + self.voted_slots().len(), + original_votes_len, + self.voted_slots() + ); + + assert_eq!( + self.last_vote.last_voted_slot().unwrap(), + *self.voted_slots().last().unwrap() + ); + self.stray_restored_slot = Some(self.last_vote.last_voted_slot().unwrap()); + } + + Ok(self) + } + + fn initialize_lockouts_from_bank( &mut self, vote_account_pubkey: &Pubkey, root: Slot, - heaviest_bank: &Bank, + bank: &Bank, ) { - if let Some((_stake, vote_account)) = heaviest_bank.vote_accounts().get(vote_account_pubkey) - { - let mut vote_state = VoteState::deserialize(&vote_account.data) + if let Some((_stake, vote_account)) = bank.vote_accounts().get(vote_account_pubkey) { + let vote_state = VoteState::deserialize(&vote_account.data) .expect("vote_account isn't a VoteState?"); - vote_state.root_slot = Some(root); - vote_state.votes.retain(|v| v.slot > root); + self.lockouts = vote_state; + self.do_initialize_lockouts(root, |v| v.slot > root); trace!( "{} lockouts initialized to {:?}", self.node_pubkey, - vote_state + self.lockouts ); assert_eq!( - vote_state.node_pubkey, self.node_pubkey, + self.lockouts.node_pubkey, self.node_pubkey, "vote account's node_pubkey doesn't match", ); - self.lockouts = vote_state; } else { + self.do_initialize_lockouts(root, |_| true); info!( - "vote account({}) not found in heaviest bank (slot={})", + "vote account({}) not found in bank (slot={})", vote_account_pubkey, - heaviest_bank.slot() + bank.slot() + ); + } + } + + fn do_initialize_lockouts bool>(&mut self, root: Slot, should_retain: F) { + // Updating root is needed to correctly restore from newly-saved tower for the next + // boot + self.lockouts.root_slot = Some(root); + self.lockouts.votes.retain(should_retain); + } + + pub fn get_filename(path: &Path, node_pubkey: &Pubkey) -> PathBuf { + path.join(format!("tower-{}", node_pubkey)) + .with_extension("bin") + } + + pub fn get_tmp_filename(path: &Path) -> PathBuf { + path.with_extension("bin.new") + } + + pub fn save(&self, node_keypair: &Arc) -> Result<()> { + let mut measure = Measure::start("tower_save-ms"); + + if self.node_pubkey != node_keypair.pubkey() { + return Err(TowerError::WrongTower(format!( + "node_pubkey is {:?} but found tower for {:?}", + node_keypair.pubkey(), + self.node_pubkey + ))); + } + + let filename = &self.path; + let new_filename = &self.tmp_path; + { + // overwrite anything if exists + let mut file = File::create(&new_filename)?; + let saved_tower = SavedTower::new(self, node_keypair)?; + bincode::serialize_into(&mut file, &saved_tower)?; + // file.sync_all() hurts performance; pipeline sync-ing and submitting votes to the cluster! + } + fs::rename(&new_filename, &filename)?; + // self.path.parent().sync_all() hurts performance same as the above sync + + measure.stop(); + inc_new_counter_info!("tower_save-ms", measure.as_ms() as usize); + + Ok(()) + } + + pub fn restore(path: &Path, node_pubkey: &Pubkey) -> Result { + let filename = Self::get_filename(path, node_pubkey); + + // Ensure to create parent dir here, because restore() precedes save() always + fs::create_dir_all(&filename.parent().unwrap())?; + + let file = File::open(&filename)?; + let mut stream = BufReader::new(file); + + let saved_tower: SavedTower = bincode::deserialize_from(&mut stream)?; + if !saved_tower.verify(node_pubkey) { + return Err(TowerError::InvalidSignature); + } + let mut tower = saved_tower.deserialize()?; + tower.path = filename; + tower.tmp_path = Self::get_tmp_filename(&tower.path); + + // check that the tower actually belongs to this node + if &tower.node_pubkey != node_pubkey { + return Err(TowerError::WrongTower(format!( + "node_pubkey is {:?} but found tower for {:?}", + node_pubkey, tower.node_pubkey + ))); + } + Ok(tower) + } +} + +#[derive(Error, Debug)] +pub enum TowerError { + #[error("IO Error: {0}")] + IOError(#[from] std::io::Error), + + #[error("Serialization Error: {0}")] + SerializeError(#[from] bincode::Error), + + #[error("The signature on the saved tower is invalid")] + InvalidSignature, + + #[error("The tower does not match this validator: {0}")] + WrongTower(String), + + #[error( + "The tower is too old: \ + newest slot in tower ({0}) << oldest slot in available history ({1})" + )] + TooOldTower(Slot, Slot), + + #[error("The tower is fatally inconsistent with blockstore: {0}")] + FatallyInconsistent(&'static str), +} + +#[frozen_abi(digest = "Gaxfwvx5MArn52mKZQgzHmDCyn5YfCuTHvp5Et3rFfpp")] +#[derive(Default, Clone, Serialize, Deserialize, Debug, PartialEq, AbiExample)] +pub struct SavedTower { + signature: Signature, + data: Vec, +} + +impl SavedTower { + pub fn new(tower: &Tower, keypair: &Arc) -> Result { + let data = bincode::serialize(tower)?; + let signature = keypair.sign_message(&data); + Ok(Self { data, signature }) + } + + pub fn verify(&self, pubkey: &Pubkey) -> bool { + self.signature.verify(pubkey.as_ref(), &self.data) + } + + pub fn deserialize(&self) -> Result { + bincode::deserialize(&self.data).map_err(|e| e.into()) + } +} + +// Given an untimely crash, tower may have roots that are not reflected in blockstore because +// `ReplayState::handle_votable_bank()` saves tower before setting blockstore roots +pub fn reconcile_blockstore_roots_with_tower( + tower: &Tower, + blockstore: &Blockstore, +) -> blockstore_db::Result<()> { + if let Some(tower_root) = tower.root() { + let last_blockstore_root = blockstore.last_root(); + if last_blockstore_root < tower_root { + // Ensure tower_root itself to exist and be marked as rooted in the blockstore + // in addition to its ancestors. + let new_roots: Vec<_> = AncestorIterator::new_inclusive(tower_root, &blockstore) + .take_while(|current| match current.cmp(&last_blockstore_root) { + Ordering::Greater => true, + Ordering::Equal => false, + Ordering::Less => panic!( + "couldn't find a last_blockstore_root upwards from: {}!?", + tower_root + ), + }) + .collect(); + assert!( + !new_roots.is_empty(), + "at least 1 parent slot must be found" ); + + blockstore.set_roots(&new_roots)? } } + Ok(()) } #[cfg(test)] @@ -681,6 +1114,7 @@ pub mod test { progress_map::ForkProgress, replay_stage::{HeaviestForkFailures, ReplayStage}, }; + use solana_ledger::{blockstore::make_slot_entries, get_tmp_ledger_path}; use solana_runtime::{ bank::Bank, bank_forks::BankForks, @@ -688,12 +1122,21 @@ pub mod test { create_genesis_config_with_vote_accounts, GenesisConfigInfo, ValidatorVoteKeypairs, }, }; - use solana_sdk::{clock::Slot, hash::Hash, pubkey::Pubkey, signature::Signer}; + use solana_sdk::{ + clock::Slot, hash::Hash, pubkey::Pubkey, signature::Signer, slot_history::SlotHistory, + }; use solana_vote_program::{ vote_state::{Vote, VoteStateVersions, MAX_LOCKOUT_HISTORY}, vote_transaction, }; - use std::{collections::HashMap, rc::Rc, sync::RwLock}; + use std::{ + collections::HashMap, + fs::{remove_file, OpenOptions}, + io::{Read, Seek, SeekFrom, Write}, + rc::Rc, + sync::RwLock, + }; + use tempfile::TempDir; use trees::{tr, Tree, TreeWalk}; pub(crate) struct VoteSimulator { @@ -1837,4 +2280,644 @@ pub mod test { tower.last_timestamp.timestamp += 1_000_000; // Move last_timestamp well into the future assert!(tower.maybe_timestamp(3).is_none()); // slot 3 gets no timestamp } + + fn run_test_load_tower_snapshot( + modify_original: F, + modify_serialized: G, + ) -> (Tower, Result) + where + F: Fn(&mut Tower, &Pubkey), + G: Fn(&PathBuf), + { + let dir = TempDir::new().unwrap(); + let identity_keypair = Arc::new(Keypair::new()); + + // Use values that will not match the default derived from BankForks + let mut tower = Tower::new_for_tests(10, 0.9); + tower.path = Tower::get_filename(&dir.path().to_path_buf(), &identity_keypair.pubkey()); + tower.tmp_path = Tower::get_tmp_filename(&tower.path); + + modify_original(&mut tower, &identity_keypair.pubkey()); + + tower.save(&identity_keypair).unwrap(); + modify_serialized(&tower.path); + let loaded = Tower::restore(&dir.path(), &identity_keypair.pubkey()); + + (tower, loaded) + } + + #[test] + fn test_switch_threshold_across_tower_reload() { + solana_logger::setup(); + // Init state + let mut vote_simulator = VoteSimulator::new(2); + let my_pubkey = vote_simulator.node_pubkeys[0]; + let other_vote_account = vote_simulator.vote_pubkeys[1]; + let bank0 = vote_simulator + .bank_forks + .read() + .unwrap() + .get(0) + .unwrap() + .clone(); + let total_stake = bank0.total_epoch_stake(); + assert_eq!( + total_stake, + vote_simulator.validator_keypairs.len() as u64 * 10_000 + ); + + // Create the tree of banks + let forks = tr(0) + / (tr(1) + / (tr(2) + / tr(10) + / (tr(43) + / (tr(44) + // Minor fork 2 + / (tr(45) / (tr(46) / (tr(47) / (tr(48) / (tr(49) / (tr(50))))))) + / (tr(110) / tr(111)))))); + + // Fill the BankForks according to the above fork structure + vote_simulator.fill_bank_forks(forks, &HashMap::new()); + for (_, fork_progress) in vote_simulator.progress.iter_mut() { + fork_progress.fork_stats.computed = true; + } + + let ancestors = vote_simulator.bank_forks.read().unwrap().ancestors(); + let descendants = vote_simulator.bank_forks.read().unwrap().descendants(); + let mut tower = Tower::new_with_key(&my_pubkey); + + tower.record_vote(43, Hash::default()); + tower.record_vote(44, Hash::default()); + tower.record_vote(45, Hash::default()); + tower.record_vote(46, Hash::default()); + tower.record_vote(47, Hash::default()); + tower.record_vote(48, Hash::default()); + tower.record_vote(49, Hash::default()); + + // Trying to switch to a descendant of last vote should always work + assert_eq!( + tower.check_switch_threshold( + 50, + &ancestors, + &descendants, + &vote_simulator.progress, + total_stake, + bank0.epoch_vote_accounts(0).unwrap(), + ), + SwitchForkDecision::NoSwitch + ); + + // Trying to switch to another fork at 110 should fail + assert_eq!( + tower.check_switch_threshold( + 110, + &ancestors, + &descendants, + &vote_simulator.progress, + total_stake, + bank0.epoch_vote_accounts(0).unwrap(), + ), + SwitchForkDecision::FailedSwitchThreshold + ); + + vote_simulator.simulate_lockout_interval(111, (10, 49), &other_vote_account); + + assert_eq!( + tower.check_switch_threshold( + 110, + &ancestors, + &descendants, + &vote_simulator.progress, + total_stake, + bank0.epoch_vote_accounts(0).unwrap(), + ), + SwitchForkDecision::SwitchProof(Hash::default()) + ); + + assert_eq!(tower.voted_slots(), vec![43, 44, 45, 46, 47, 48, 49]); + { + let mut tower = tower.clone(); + tower.record_vote(110, Hash::default()); + tower.record_vote(111, Hash::default()); + assert_eq!(tower.voted_slots(), vec![43, 110, 111]); + assert_eq!(tower.lockouts.root_slot, Some(0)); + } + + // Prepare simulated validator restart! + let mut vote_simulator = VoteSimulator::new(2); + let other_vote_account = vote_simulator.vote_pubkeys[1]; + let bank0 = vote_simulator + .bank_forks + .read() + .unwrap() + .get(0) + .unwrap() + .clone(); + let total_stake = bank0.total_epoch_stake(); + let forks = tr(0) + / (tr(1) + / (tr(2) + / tr(10) + / (tr(43) + / (tr(44) + // Minor fork 2 + / (tr(45) / (tr(46) / (tr(47) / (tr(48) / (tr(49) / (tr(50))))))) + / (tr(110) / tr(111)))))); + let replayed_root_slot = 44; + + // Fill the BankForks according to the above fork structure + vote_simulator.fill_bank_forks(forks, &HashMap::new()); + for (_, fork_progress) in vote_simulator.progress.iter_mut() { + fork_progress.fork_stats.computed = true; + } + + // prepend tower restart! + let mut slot_history = SlotHistory::default(); + vote_simulator.set_root(replayed_root_slot); + let ancestors = vote_simulator.bank_forks.read().unwrap().ancestors(); + let descendants = vote_simulator.bank_forks.read().unwrap().descendants(); + for slot in &[0, 1, 2, 43, replayed_root_slot] { + slot_history.add(*slot); + } + let mut tower = tower + .adjust_lockouts_after_replay(replayed_root_slot, &slot_history) + .unwrap(); + + assert_eq!(tower.voted_slots(), vec![45, 46, 47, 48, 49]); + + // Trying to switch to another fork at 110 should fail + assert_eq!( + tower.check_switch_threshold( + 110, + &ancestors, + &descendants, + &vote_simulator.progress, + total_stake, + bank0.epoch_vote_accounts(0).unwrap(), + ), + SwitchForkDecision::FailedSwitchThreshold + ); + + // Add lockout_interval which should be excluded + vote_simulator.simulate_lockout_interval(111, (45, 50), &other_vote_account); + assert_eq!( + tower.check_switch_threshold( + 110, + &ancestors, + &descendants, + &vote_simulator.progress, + total_stake, + bank0.epoch_vote_accounts(0).unwrap(), + ), + SwitchForkDecision::FailedSwitchThreshold + ); + + // Add lockout_interval which should not be excluded + vote_simulator.simulate_lockout_interval(111, (110, 200), &other_vote_account); + assert_eq!( + tower.check_switch_threshold( + 110, + &ancestors, + &descendants, + &vote_simulator.progress, + total_stake, + bank0.epoch_vote_accounts(0).unwrap(), + ), + SwitchForkDecision::SwitchProof(Hash::default()) + ); + + tower.record_vote(110, Hash::default()); + tower.record_vote(111, Hash::default()); + assert_eq!(tower.voted_slots(), vec![110, 111]); + assert_eq!(tower.lockouts.root_slot, Some(replayed_root_slot)); + } + + #[test] + fn test_load_tower_ok() { + let (tower, loaded) = + run_test_load_tower_snapshot(|tower, pubkey| tower.node_pubkey = *pubkey, |_| ()); + let loaded = loaded.unwrap(); + assert_eq!(loaded, tower); + assert_eq!(tower.threshold_depth, 10); + assert!((tower.threshold_size - 0.9_f64).abs() < f64::EPSILON); + assert_eq!(loaded.threshold_depth, 10); + assert!((loaded.threshold_size - 0.9_f64).abs() < f64::EPSILON); + } + + #[test] + fn test_load_tower_wrong_identity() { + let identity_keypair = Arc::new(Keypair::new()); + let tower = Tower::new_with_key(&Pubkey::default()); + assert_matches!( + tower.save(&identity_keypair), + Err(TowerError::WrongTower(_)) + ) + } + + #[test] + fn test_load_tower_invalid_signature() { + let (_, loaded) = run_test_load_tower_snapshot( + |tower, pubkey| tower.node_pubkey = *pubkey, + |path| { + let mut file = OpenOptions::new() + .read(true) + .write(true) + .open(path) + .unwrap(); + let mut buf = [0u8]; + assert_eq!(file.read(&mut buf).unwrap(), 1); + buf[0] = !buf[0]; + assert_eq!(file.seek(SeekFrom::Start(0)).unwrap(), 0); + assert_eq!(file.write(&buf).unwrap(), 1); + }, + ); + assert_matches!(loaded, Err(TowerError::InvalidSignature)) + } + + #[test] + fn test_load_tower_deser_failure() { + let (_, loaded) = run_test_load_tower_snapshot( + |tower, pubkey| tower.node_pubkey = *pubkey, + |path| { + OpenOptions::new() + .write(true) + .truncate(true) + .open(&path) + .unwrap_or_else(|_| panic!("Failed to truncate file: {:?}", path)); + }, + ); + assert_matches!(loaded, Err(TowerError::SerializeError(_))) + } + + #[test] + fn test_load_tower_missing() { + let (_, loaded) = run_test_load_tower_snapshot( + |tower, pubkey| tower.node_pubkey = *pubkey, + |path| { + remove_file(path).unwrap(); + }, + ); + assert_matches!(loaded, Err(TowerError::IOError(_))) + } + + #[test] + fn test_reconcile_blockstore_roots_with_tower_normal() { + solana_logger::setup(); + let blockstore_path = get_tmp_ledger_path!(); + { + let blockstore = Blockstore::open(&blockstore_path).unwrap(); + + let (shreds, _) = make_slot_entries(1, 0, 42); + blockstore.insert_shreds(shreds, None, false).unwrap(); + let (shreds, _) = make_slot_entries(3, 1, 42); + blockstore.insert_shreds(shreds, None, false).unwrap(); + let (shreds, _) = make_slot_entries(4, 1, 42); + blockstore.insert_shreds(shreds, None, false).unwrap(); + assert!(!blockstore.is_root(0)); + assert!(!blockstore.is_root(1)); + assert!(!blockstore.is_root(3)); + assert!(!blockstore.is_root(4)); + + let mut tower = Tower::new_with_key(&Pubkey::default()); + tower.lockouts.root_slot = Some(4); + reconcile_blockstore_roots_with_tower(&tower, &blockstore).unwrap(); + + assert!(!blockstore.is_root(0)); + assert!(blockstore.is_root(1)); + assert!(!blockstore.is_root(3)); + assert!(blockstore.is_root(4)); + } + Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction"); + } + + #[test] + #[should_panic(expected = "couldn't find a last_blockstore_root upwards from: 4!?")] + fn test_reconcile_blockstore_roots_with_tower_panic_no_common_root() { + solana_logger::setup(); + let blockstore_path = get_tmp_ledger_path!(); + { + let blockstore = Blockstore::open(&blockstore_path).unwrap(); + + let (shreds, _) = make_slot_entries(1, 0, 42); + blockstore.insert_shreds(shreds, None, false).unwrap(); + let (shreds, _) = make_slot_entries(3, 1, 42); + blockstore.insert_shreds(shreds, None, false).unwrap(); + let (shreds, _) = make_slot_entries(4, 1, 42); + blockstore.insert_shreds(shreds, None, false).unwrap(); + blockstore.set_roots(&[3]).unwrap(); + assert!(!blockstore.is_root(0)); + assert!(!blockstore.is_root(1)); + assert!(blockstore.is_root(3)); + assert!(!blockstore.is_root(4)); + + let mut tower = Tower::new_with_key(&Pubkey::default()); + tower.lockouts.root_slot = Some(4); + reconcile_blockstore_roots_with_tower(&tower, &blockstore).unwrap(); + } + Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction"); + } + + #[test] + #[should_panic(expected = "at least 1 parent slot must be found")] + fn test_reconcile_blockstore_roots_with_tower_panic_no_parent() { + solana_logger::setup(); + let blockstore_path = get_tmp_ledger_path!(); + { + let blockstore = Blockstore::open(&blockstore_path).unwrap(); + + let (shreds, _) = make_slot_entries(1, 0, 42); + blockstore.insert_shreds(shreds, None, false).unwrap(); + let (shreds, _) = make_slot_entries(3, 1, 42); + blockstore.insert_shreds(shreds, None, false).unwrap(); + assert!(!blockstore.is_root(0)); + assert!(!blockstore.is_root(1)); + assert!(!blockstore.is_root(3)); + + let mut tower = Tower::new_with_key(&Pubkey::default()); + tower.lockouts.root_slot = Some(4); + reconcile_blockstore_roots_with_tower(&tower, &blockstore).unwrap(); + } + Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction"); + } + + #[test] + fn test_adjust_lockouts_after_replay_future_slots() { + solana_logger::setup(); + let mut tower = Tower::new_for_tests(10, 0.9); + tower.record_vote(0, Hash::default()); + tower.record_vote(1, Hash::default()); + tower.record_vote(2, Hash::default()); + tower.record_vote(3, Hash::default()); + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + slot_history.add(1); + + let replayed_root_slot = 1; + tower = tower + .adjust_lockouts_after_replay(replayed_root_slot, &slot_history) + .unwrap(); + + assert_eq!(tower.voted_slots(), vec![2, 3]); + assert_eq!(tower.root(), Some(replayed_root_slot)); + + tower = tower + .adjust_lockouts_after_replay(replayed_root_slot, &slot_history) + .unwrap(); + assert_eq!(tower.voted_slots(), vec![2, 3]); + assert_eq!(tower.root(), Some(replayed_root_slot)); + } + + #[test] + fn test_adjust_lockouts_after_replay_not_found_slots() { + let mut tower = Tower::new_for_tests(10, 0.9); + tower.record_vote(0, Hash::default()); + tower.record_vote(1, Hash::default()); + tower.record_vote(2, Hash::default()); + tower.record_vote(3, Hash::default()); + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + slot_history.add(1); + slot_history.add(4); + + let replayed_root_slot = 4; + tower = tower + .adjust_lockouts_after_replay(replayed_root_slot, &slot_history) + .unwrap(); + + assert_eq!(tower.voted_slots(), vec![2, 3]); + assert_eq!(tower.root(), Some(replayed_root_slot)); + } + + #[test] + fn test_adjust_lockouts_after_replay_all_rooted_with_no_too_old() { + let mut tower = Tower::new_for_tests(10, 0.9); + tower.record_vote(0, Hash::default()); + tower.record_vote(1, Hash::default()); + tower.record_vote(2, Hash::default()); + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + slot_history.add(1); + slot_history.add(2); + slot_history.add(3); + slot_history.add(4); + slot_history.add(5); + + let replayed_root_slot = 5; + tower = tower + .adjust_lockouts_after_replay(replayed_root_slot, &slot_history) + .unwrap(); + + assert_eq!(tower.voted_slots(), vec![] as Vec); + assert_eq!(tower.root(), Some(replayed_root_slot)); + assert_eq!(tower.stray_restored_slot, None); + } + + #[test] + fn test_adjust_lockouts_after_relay_all_rooted_with_too_old() { + use solana_sdk::slot_history::MAX_ENTRIES; + + let mut tower = Tower::new_for_tests(10, 0.9); + tower.record_vote(0, Hash::default()); + tower.record_vote(1, Hash::default()); + tower.record_vote(2, Hash::default()); + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + slot_history.add(1); + slot_history.add(2); + slot_history.add(MAX_ENTRIES); + + tower = tower + .adjust_lockouts_after_replay(MAX_ENTRIES, &slot_history) + .unwrap(); + assert_eq!(tower.voted_slots(), vec![] as Vec); + assert_eq!(tower.root(), Some(MAX_ENTRIES)); + } + + #[test] + fn test_adjust_lockouts_after_replay_anchored_future_slots() { + let mut tower = Tower::new_for_tests(10, 0.9); + tower.record_vote(0, Hash::default()); + tower.record_vote(1, Hash::default()); + tower.record_vote(2, Hash::default()); + tower.record_vote(3, Hash::default()); + tower.record_vote(4, Hash::default()); + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + slot_history.add(1); + slot_history.add(2); + + let replayed_root_slot = 2; + tower = tower + .adjust_lockouts_after_replay(replayed_root_slot, &slot_history) + .unwrap(); + + assert_eq!(tower.voted_slots(), vec![3, 4]); + assert_eq!(tower.root(), Some(replayed_root_slot)); + } + + #[test] + fn test_adjust_lockouts_after_replay_all_not_found() { + let mut tower = Tower::new_for_tests(10, 0.9); + tower.record_vote(5, Hash::default()); + tower.record_vote(6, Hash::default()); + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + slot_history.add(1); + slot_history.add(2); + slot_history.add(7); + + let replayed_root_slot = 7; + tower = tower + .adjust_lockouts_after_replay(replayed_root_slot, &slot_history) + .unwrap(); + + assert_eq!(tower.voted_slots(), vec![5, 6]); + assert_eq!(tower.root(), Some(replayed_root_slot)); + } + + #[test] + fn test_adjust_lockouts_after_replay_all_not_found_even_if_rooted() { + let mut tower = Tower::new_for_tests(10, 0.9); + tower.lockouts.root_slot = Some(4); + tower.record_vote(5, Hash::default()); + tower.record_vote(6, Hash::default()); + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + slot_history.add(1); + slot_history.add(2); + slot_history.add(7); + + let replayed_root_slot = 7; + let result = tower.adjust_lockouts_after_replay(replayed_root_slot, &slot_history); + + assert_eq!( + format!("{}", result.unwrap_err()), + "The tower is fatally inconsistent with blockstore: no common slot for rooted tower" + ); + } + + #[test] + fn test_adjust_lockouts_after_replay_all_future_votes_only_root_found() { + let mut tower = Tower::new_for_tests(10, 0.9); + tower.lockouts.root_slot = Some(2); + tower.record_vote(3, Hash::default()); + tower.record_vote(4, Hash::default()); + tower.record_vote(5, Hash::default()); + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + slot_history.add(1); + slot_history.add(2); + + let replayed_root_slot = 2; + tower = tower + .adjust_lockouts_after_replay(replayed_root_slot, &slot_history) + .unwrap(); + + assert_eq!(tower.voted_slots(), vec![3, 4, 5]); + assert_eq!(tower.root(), Some(replayed_root_slot)); + } + + #[test] + fn test_adjust_lockouts_after_replay_empty() { + let mut tower = Tower::new_for_tests(10, 0.9); + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + + let replayed_root_slot = 0; + tower = tower + .adjust_lockouts_after_replay(replayed_root_slot, &slot_history) + .unwrap(); + + assert_eq!(tower.voted_slots(), vec![] as Vec); + assert_eq!(tower.root(), Some(replayed_root_slot)); + } + + #[test] + fn test_adjust_lockouts_after_replay_too_old_tower() { + use solana_sdk::slot_history::MAX_ENTRIES; + + let mut tower = Tower::new_for_tests(10, 0.9); + tower.record_vote(0, Hash::default()); + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + slot_history.add(MAX_ENTRIES); + + let result = tower.adjust_lockouts_after_replay(MAX_ENTRIES, &slot_history); + assert_eq!( + format!("{}", result.unwrap_err()), + "The tower is too old: newest slot in tower (0) << oldest slot in available history (1)" + ); + } + + #[test] + fn test_adjust_lockouts_after_replay_time_warped() { + let mut tower = Tower::new_for_tests(10, 0.9); + tower.lockouts.votes.push_back(Lockout::new(1)); + tower.lockouts.votes.push_back(Lockout::new(0)); + let vote = Vote::new(vec![0], Hash::default()); + tower.last_vote = vote; + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + + let result = tower.adjust_lockouts_after_replay(0, &slot_history); + assert_eq!( + format!("{}", result.unwrap_err()), + "The tower is fatally inconsistent with blockstore: time warped?" + ); + } + + #[test] + fn test_adjust_lockouts_after_replay_diverged_ancestor() { + let mut tower = Tower::new_for_tests(10, 0.9); + tower.lockouts.votes.push_back(Lockout::new(1)); + tower.lockouts.votes.push_back(Lockout::new(2)); + let vote = Vote::new(vec![2], Hash::default()); + tower.last_vote = vote; + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + slot_history.add(2); + + let result = tower.adjust_lockouts_after_replay(2, &slot_history); + assert_eq!( + format!("{}", result.unwrap_err()), + "The tower is fatally inconsistent with blockstore: diverged ancestor?" + ); + } + + #[test] + fn test_adjust_lockouts_after_replay_out_of_order() { + use solana_sdk::slot_history::MAX_ENTRIES; + + let mut tower = Tower::new_for_tests(10, 0.9); + tower + .lockouts + .votes + .push_back(Lockout::new(MAX_ENTRIES - 1)); + tower.lockouts.votes.push_back(Lockout::new(0)); + tower.lockouts.votes.push_back(Lockout::new(1)); + let vote = Vote::new(vec![1], Hash::default()); + tower.last_vote = vote; + + let mut slot_history = SlotHistory::default(); + slot_history.add(MAX_ENTRIES); + + let result = tower.adjust_lockouts_after_replay(MAX_ENTRIES, &slot_history); + assert_eq!( + format!("{}", result.unwrap_err()), + "The tower is fatally inconsistent with blockstore: not too old once after got too old?" + ); + } } diff --git a/core/src/heaviest_subtree_fork_choice.rs b/core/src/heaviest_subtree_fork_choice.rs index d34941533d9..fc77d762c7d 100644 --- a/core/src/heaviest_subtree_fork_choice.rs +++ b/core/src/heaviest_subtree_fork_choice.rs @@ -492,6 +492,44 @@ impl HeaviestSubtreeForkChoice { ); } + fn heaviest_slot_on_same_voted_fork(&self, tower: &Tower) -> Option { + tower + .last_voted_slot() + .map(|last_voted_slot| { + let heaviest_slot_on_same_voted_fork = self.best_slot(last_voted_slot); + if heaviest_slot_on_same_voted_fork.is_none() { + if !tower.is_stray_last_vote() { + // Unless last vote is stray, self.bast_slot(last_voted_slot) must return + // Some(_), justifying to panic! here. + // Also, adjust_lockouts_after_replay() correctly makes last_voted_slot None, + // if all saved votes are ancestors of replayed_root_slot. So this code shouldn't be + // touched in that case as well. + // In other words, except being stray, all other slots have been voted on while this + // validator has been running, so we must be able to fetch best_slots for all of + // them. + panic!( + "a bank at last_voted_slot({}) is a frozen bank so must have been\ + added to heaviest_subtree_fork_choice at time of freezing", + last_voted_slot, + ) + } else { + // fork_infos doesn't have corresponding data for the stray restored last vote, + // meaning some inconsistency between saved tower and ledger. + // (newer snapshot, or only a saved tower is moved over to new setup?) + return None; + } + } + let heaviest_slot_on_same_voted_fork = heaviest_slot_on_same_voted_fork.unwrap(); + + if heaviest_slot_on_same_voted_fork == last_voted_slot { + None + } else { + Some(heaviest_slot_on_same_voted_fork) + } + }) + .unwrap_or(None) + } + #[cfg(test)] fn set_stake_voted_at(&mut self, slot: Slot, stake_voted_at: u64) { self.fork_infos.get_mut(&slot).unwrap().stake_voted_at = stake_voted_at; @@ -551,26 +589,17 @@ impl ForkChoice for HeaviestSubtreeForkChoice { _ancestors: &HashMap>, bank_forks: &RwLock, ) -> (Arc, Option>) { - let last_voted_slot = tower.last_voted_slot(); - let heaviest_slot_on_same_voted_fork = last_voted_slot.map(|last_voted_slot| { - let heaviest_slot_on_same_voted_fork = - self.best_slot(last_voted_slot).expect("a bank at last_voted_slot is a frozen bank so must have been added to heaviest_subtree_fork_choice at time of freezing"); - if heaviest_slot_on_same_voted_fork == last_voted_slot { - None - } else { - Some(heaviest_slot_on_same_voted_fork) - } - }).unwrap_or(None); - let heaviest_slot = self.best_overall_slot(); let r_bank_forks = bank_forks.read().unwrap(); + ( - r_bank_forks.get(heaviest_slot).unwrap().clone(), - heaviest_slot_on_same_voted_fork.map(|heaviest_slot_on_same_voted_fork| { - r_bank_forks - .get(heaviest_slot_on_same_voted_fork) - .unwrap() - .clone() - }), + r_bank_forks.get(self.best_overall_slot()).unwrap().clone(), + self.heaviest_slot_on_same_voted_fork(tower) + .map(|heaviest_slot_on_same_voted_fork| { + r_bank_forks + .get(heaviest_slot_on_same_voted_fork) + .unwrap() + .clone() + }), ) } } @@ -612,6 +641,7 @@ mod test { use super::*; use crate::consensus::test::VoteSimulator; use solana_runtime::{bank::Bank, bank_utils}; + use solana_sdk::{hash::Hash, slot_history::SlotHistory}; use std::{collections::HashSet, ops::Range}; use trees::tr; @@ -1491,6 +1521,48 @@ mod test { assert!(heaviest_subtree_fork_choice.subtree_diff(0, 6).is_empty()); } + #[test] + fn test_stray_restored_slot() { + let forks = tr(0) / (tr(1) / tr(2)); + let heaviest_subtree_fork_choice = HeaviestSubtreeForkChoice::new_from_tree(forks); + + let mut tower = Tower::new_for_tests(10, 0.9); + tower.record_vote(1, Hash::default()); + + assert_eq!(tower.is_stray_last_vote(), false); + assert_eq!( + heaviest_subtree_fork_choice.heaviest_slot_on_same_voted_fork(&tower), + Some(2) + ); + + // Make slot 1 (existing in bank_forks) a restored stray slot + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + // Work around TooOldSlotHistory + slot_history.add(999); + tower = tower + .adjust_lockouts_after_replay(0, &slot_history) + .unwrap(); + + assert_eq!(tower.is_stray_last_vote(), true); + assert_eq!( + heaviest_subtree_fork_choice.heaviest_slot_on_same_voted_fork(&tower), + Some(2) + ); + + // Make slot 3 (NOT existing in bank_forks) a restored stray slot + tower.record_vote(3, Hash::default()); + tower = tower + .adjust_lockouts_after_replay(0, &slot_history) + .unwrap(); + + assert_eq!(tower.is_stray_last_vote(), true); + assert_eq!( + heaviest_subtree_fork_choice.heaviest_slot_on_same_voted_fork(&tower), + None + ); + } + fn setup_forks() -> HeaviestSubtreeForkChoice { /* Build fork structure: diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index 34a346d73a6..bf69d4c3ab2 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -221,6 +221,7 @@ impl ReplayStage { cluster_info: Arc, ledger_signal_receiver: Receiver, poh_recorder: Arc>, + mut tower: Tower, vote_tracker: Arc, cluster_slots: Arc, retransmit_slots_sender: RetransmitSlotsSender, @@ -258,53 +259,16 @@ impl ReplayStage { let mut all_pubkeys = PubkeyReferences::default(); let verify_recyclers = VerifyRecyclers::default(); let _exit = Finalizer::new(exit.clone()); - let mut progress = ProgressMap::default(); - let mut frozen_banks: Vec<_> = bank_forks - .read() - .unwrap() - .frozen_banks() - .values() - .cloned() - .collect(); - - frozen_banks.sort_by_key(|bank| bank.slot()); - - // Initialize progress map with any root banks - for bank in &frozen_banks { - let prev_leader_slot = progress.get_bank_prev_leader_slot(bank); - progress.insert( - bank.slot(), - ForkProgress::new_from_bank( - bank, - &my_pubkey, - &vote_account, - prev_leader_slot, - 0, - 0, - ), - ); - } - let root_bank = bank_forks.read().unwrap().root_bank().clone(); - let root = root_bank.slot(); - let unlock_heaviest_subtree_fork_choice_slot = - Self::get_unlock_heaviest_subtree_fork_choice(root_bank.cluster_type()); - let mut heaviest_subtree_fork_choice = - HeaviestSubtreeForkChoice::new_from_frozen_banks(root, &frozen_banks); + let ( + mut progress, + mut heaviest_subtree_fork_choice, + unlock_heaviest_subtree_fork_choice_slot, + ) = Self::initialize_progress_and_fork_choice_with_locked_bank_forks( + &bank_forks, + &my_pubkey, + &vote_account, + ); let mut bank_weight_fork_choice = BankWeightForkChoice::default(); - let heaviest_bank = if root > unlock_heaviest_subtree_fork_choice_slot { - bank_forks - .read() - .unwrap() - .get(heaviest_subtree_fork_choice.best_overall_slot()) - .expect( - "The best overall slot must be one of `frozen_banks` which all - exist in bank_forks", - ) - .clone() - } else { - Tower::find_heaviest_bank(&bank_forks, &my_pubkey).unwrap_or(root_bank) - }; - let mut tower = Tower::new(&my_pubkey, &vote_account, root, &heaviest_bank); let mut current_leader = None; let mut last_reset = Hash::default(); let mut partition_exists = false; @@ -655,6 +619,65 @@ impl ReplayStage { .unwrap_or(true) } + fn initialize_progress_and_fork_choice_with_locked_bank_forks( + bank_forks: &RwLock, + my_pubkey: &Pubkey, + vote_account: &Pubkey, + ) -> (ProgressMap, HeaviestSubtreeForkChoice, Slot) { + let (root_bank, frozen_banks) = { + let bank_forks = bank_forks.read().unwrap(); + ( + bank_forks.root_bank().clone(), + bank_forks.frozen_banks().values().cloned().collect(), + ) + }; + + Self::initialize_progress_and_fork_choice( + &root_bank, + frozen_banks, + &my_pubkey, + &vote_account, + ) + } + + pub(crate) fn initialize_progress_and_fork_choice( + root_bank: &Arc, + mut frozen_banks: Vec>, + my_pubkey: &Pubkey, + vote_account: &Pubkey, + ) -> (ProgressMap, HeaviestSubtreeForkChoice, Slot) { + let mut progress = ProgressMap::default(); + + frozen_banks.sort_by_key(|bank| bank.slot()); + + // Initialize progress map with any root banks + for bank in &frozen_banks { + let prev_leader_slot = progress.get_bank_prev_leader_slot(bank); + progress.insert( + bank.slot(), + ForkProgress::new_from_bank( + bank, + &my_pubkey, + &vote_account, + prev_leader_slot, + 0, + 0, + ), + ); + } + let root = root_bank.slot(); + let unlock_heaviest_subtree_fork_choice_slot = + Self::get_unlock_heaviest_subtree_fork_choice(root_bank.cluster_type()); + let heaviest_subtree_fork_choice = + HeaviestSubtreeForkChoice::new_from_frozen_banks(root, &frozen_banks); + + ( + progress, + heaviest_subtree_fork_choice, + unlock_heaviest_subtree_fork_choice_slot, + ) + } + fn report_memory( allocated: &solana_measure::thread_mem_usage::Allocatedp, name: &'static str, @@ -1017,7 +1040,15 @@ impl ReplayStage { } trace!("handle votable bank {}", bank.slot()); let (vote, tower_index) = tower.new_vote_from_bank(bank, vote_account_pubkey); - if let Some(new_root) = tower.record_bank_vote(vote) { + let new_root = tower.record_bank_vote(vote); + let last_vote = tower.last_vote_and_timestamp(); + + if let Err(err) = tower.save(&cluster_info.keypair) { + error!("Unable to save tower: {:?}", err); + std::process::exit(1); + } + + if let Some(new_root) = new_root { // get the root bank before squash let root_bank = bank_forks .read() @@ -1082,7 +1113,7 @@ impl ReplayStage { bank, vote_account_pubkey, authorized_voter_keypairs, - tower.last_vote_and_timestamp(), + last_vote, tower_index, switch_fork_decision, ); diff --git a/core/src/tvu.rs b/core/src/tvu.rs index 369e070b4e5..33d1a5635c6 100644 --- a/core/src/tvu.rs +++ b/core/src/tvu.rs @@ -10,6 +10,7 @@ use crate::{ cluster_info_vote_listener::{VerifiedVoteReceiver, VoteTracker}, cluster_slots::ClusterSlots, completed_data_sets_service::CompletedDataSetsSender, + consensus::Tower, ledger_cleanup_service::LedgerCleanupService, optimistically_confirmed_bank_tracker::BankNotificationSender, poh_recorder::PohRecorder, @@ -91,6 +92,7 @@ impl Tvu { ledger_signal_receiver: Receiver, subscriptions: &Arc, poh_recorder: &Arc>, + tower: Tower, leader_schedule_cache: &Arc, exit: &Arc, completed_slots_receiver: CompletedSlotsReceiver, @@ -206,6 +208,7 @@ impl Tvu { cluster_info.clone(), ledger_signal_receiver, poh_recorder.clone(), + tower, vote_tracker, cluster_slots, retransmit_slots_sender, @@ -305,6 +308,7 @@ pub mod tests { let (replay_vote_sender, _replay_vote_receiver) = unbounded(); let (completed_data_sets_sender, _completed_data_sets_receiver) = unbounded(); let bank_forks = Arc::new(RwLock::new(bank_forks)); + let tower = Tower::new_with_key(&target1_keypair.pubkey()); let tvu = Tvu::new( &vote_keypair.pubkey(), vec![Arc::new(vote_keypair)], @@ -327,6 +331,7 @@ pub mod tests { OptimisticallyConfirmedBank::locked_from_bank_forks_root(&bank_forks), )), &poh_recorder, + tower, &leader_schedule_cache, &exit, completed_slots_receiver, diff --git a/core/src/validator.rs b/core/src/validator.rs index 7740970a25e..6f541b29300 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -6,6 +6,7 @@ use crate::{ cluster_info::{ClusterInfo, Node}, cluster_info_vote_listener::VoteTracker, completed_data_sets_service::CompletedDataSetsService, + consensus::{reconcile_blockstore_roots_with_tower, Tower, TowerError}, contact_info::ContactInfo, gossip_service::GossipService, optimistically_confirmed_bank_tracker::{ @@ -102,6 +103,7 @@ pub struct ValidatorConfig { pub poh_verify: bool, // Perform PoH verification during blockstore processing at boo pub cuda: bool, pub debug_keys: Option>>, + pub require_tower: bool, } impl Default for ValidatorConfig { @@ -136,6 +138,7 @@ impl Default for ValidatorConfig { poh_verify: true, cuda: false, debug_keys: None, + require_tower: false, } } } @@ -278,7 +281,15 @@ impl Validator { cache_block_time_sender, cache_block_time_service, }, - ) = new_banks_from_ledger(config, ledger_path, config.poh_verify, &exit); + tower, + ) = new_banks_from_ledger( + &id, + vote_account, + config, + ledger_path, + config.poh_verify, + &exit, + ); let leader_schedule_cache = Arc::new(leader_schedule_cache); let bank = bank_forks.working_bank(); @@ -537,6 +548,7 @@ impl Validator { ledger_signal_receiver, &subscriptions, &poh_recorder, + tower, &leader_schedule_cache, &exit, completed_slots_receiver, @@ -689,8 +701,81 @@ impl Validator { } } +fn active_vote_account_exists_in_bank(bank: &Arc, vote_account: &Pubkey) -> bool { + if let Some(account) = &bank.get_account(vote_account) { + if let Some(vote_state) = VoteState::from(&account) { + return !vote_state.votes.is_empty(); + } + } + false +} + +fn post_process_restored_tower( + restored_tower: crate::consensus::Result, + validator_identity: &Pubkey, + vote_account: &Pubkey, + config: &ValidatorConfig, + ledger_path: &Path, + bank_forks: &BankForks, +) -> Tower { + restored_tower + .and_then(|tower| { + let root_bank = bank_forks.root_bank(); + let slot_history = root_bank.get_slot_history(); + tower.adjust_lockouts_after_replay(root_bank.slot(), &slot_history) + }) + .unwrap_or_else(|err| { + let voting_has_been_active = + active_vote_account_exists_in_bank(&bank_forks.working_bank(), &vote_account); + let saved_tower_is_missing = if let TowerError::IOError(io_err) = &err { + io_err.kind() == std::io::ErrorKind::NotFound + } else { + false + }; + if !saved_tower_is_missing { + datapoint_error!( + "tower_error", + ( + "error", + format!("Unable to restore tower: {}", err), + String + ), + ); + } + if config.require_tower && voting_has_been_active { + error!("Requested mandatory tower restore failed: {}", err); + error!( + "And there is an existing vote_account containing actual votes. \ + Aborting due to possible conflicting duplicate votes" + ); + process::exit(1); + } + if saved_tower_is_missing && !voting_has_been_active { + // Currently, don't protect against spoofed snapshots with no tower at all + info!( + "Ignoring expected failed tower restore because this is the initial \ + validator start with the vote account..." + ); + } else { + error!( + "Rebuilding a new tower from the latest vote account due to failed tower restore: {}", + err + ); + } + + Tower::new_from_bankforks( + &bank_forks, + &ledger_path, + &validator_identity, + &vote_account, + ) + }) +} + #[allow(clippy::type_complexity)] fn new_banks_from_ledger( + validator_identity: &Pubkey, + vote_account: &Pubkey, config: &ValidatorConfig, ledger_path: &Path, poh_verify: bool, @@ -704,6 +789,7 @@ fn new_banks_from_ledger( LeaderScheduleCache, Option<(Slot, Hash)>, TransactionHistoryServices, + Tower, ) { info!("loading ledger from {:?}...", ledger_path); let genesis_config = open_genesis_config(ledger_path, config.max_genesis_archive_unpacked_size); @@ -735,6 +821,14 @@ fn new_banks_from_ledger( .expect("Failed to open ledger database"); blockstore.set_no_compaction(config.no_rocksdb_compaction); + let restored_tower = Tower::restore(ledger_path, &validator_identity); + if let Ok(tower) = &restored_tower { + reconcile_blockstore_roots_with_tower(&tower, &blockstore).unwrap_or_else(|err| { + error!("Failed to reconcile blockstore with tower: {:?}", err); + std::process::exit(1); + }); + } + let process_options = blockstore_processor::ProcessOptions { poh_verify, dev_halt_at_slot: config.dev_halt_at_slot, @@ -767,6 +861,17 @@ fn new_banks_from_ledger( process::exit(1); }); + let tower = post_process_restored_tower( + restored_tower, + &validator_identity, + &vote_account, + &config, + &ledger_path, + &bank_forks, + ); + + info!("Tower state: {:?}", tower); + leader_schedule_cache.set_fixed_leader_schedule(config.fixed_leader_schedule.clone()); bank_forks.set_snapshot_config(config.snapshot_config.clone()); @@ -781,6 +886,7 @@ fn new_banks_from_ledger( leader_schedule_cache, snapshot_hash, transaction_history_services, + tower, ) } diff --git a/ledger/src/ancestor_iterator.rs b/ledger/src/ancestor_iterator.rs index 0c01757d6ed..6c8099ce98f 100644 --- a/ledger/src/ancestor_iterator.rs +++ b/ledger/src/ancestor_iterator.rs @@ -20,6 +20,13 @@ impl<'a> AncestorIterator<'a> { blockstore, } } + + pub fn new_inclusive(start_slot: Slot, blockstore: &'a Blockstore) -> Self { + Self { + current: blockstore.meta(start_slot).unwrap().map(|_| start_slot), + blockstore, + } + } } impl<'a> Iterator for AncestorIterator<'a> { type Item = Slot; @@ -111,4 +118,33 @@ mod tests { vec![2, 1, 0] ); } + + #[test] + fn test_ancestor_iterator_inclusive() { + let blockstore_path = get_tmp_ledger_path!(); + let blockstore = Blockstore::open(&blockstore_path).unwrap(); + + let (shreds, _) = make_slot_entries(0, 0, 42); + blockstore.insert_shreds(shreds, None, false).unwrap(); + let (shreds, _) = make_slot_entries(1, 0, 42); + blockstore.insert_shreds(shreds, None, false).unwrap(); + let (shreds, _) = make_slot_entries(2, 1, 42); + blockstore.insert_shreds(shreds, None, false).unwrap(); + + assert_eq!( + AncestorIterator::new(2, &blockstore).collect::>(), + vec![1, 0] + ); + // existing start_slot + assert_eq!( + AncestorIterator::new_inclusive(2, &blockstore).collect::>(), + vec![2, 1, 0] + ); + + // non-existing start_slot + assert_eq!( + AncestorIterator::new_inclusive(3, &blockstore).collect::>(), + vec![] as Vec + ); + } } diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index 6cc2b75f6d4..957eb31ba79 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -9,7 +9,7 @@ use solana_client::{ use solana_core::{ broadcast_stage::BroadcastStageType, cluster_info::VALIDATOR_PORT_RANGE, - consensus::{SWITCH_FORK_THRESHOLD, VOTE_THRESHOLD_DEPTH}, + consensus::{Tower, SWITCH_FORK_THRESHOLD, VOTE_THRESHOLD_DEPTH}, gossip_service::discover_cluster, optimistic_confirmation_verifier::OptimisticConfirmationVerifier, validator::ValidatorConfig, @@ -1375,20 +1375,21 @@ fn test_no_voting() { } #[test] -fn test_optimistic_confirmation_violation() { +#[serial] +fn test_optimistic_confirmation_violation_with_no_tower() { solana_logger::setup(); let buf = std::env::var("OPTIMISTIC_CONF_TEST_DUMP_LOG") .err() .map(|_| BufferRedirect::stderr().unwrap()); // First set up the cluster with 2 nodes let slots_per_epoch = 2048; - let node_stakes = vec![50, 51]; + let node_stakes = vec![51, 50]; let validator_keys: Vec<_> = iter::repeat_with(|| (Arc::new(Keypair::new()), true)) .take(node_stakes.len()) .collect(); let config = ClusterConfig { cluster_lamports: 100_000, - node_stakes: vec![51, 50], + node_stakes: node_stakes.clone(), validator_configs: vec![ValidatorConfig::default(); node_stakes.len()], validator_keys: Some(validator_keys), slots_per_epoch, @@ -1422,7 +1423,9 @@ fn test_optimistic_confirmation_violation() { // Mark fork as dead on the heavier validator, this should make the fork effectively // dead, even though it was optimistically confirmed. The smaller validator should - // jump over to the new fork + // create and jump over to a new fork + // Also, remove saved tower to intentionally make the restarted validator to violate the + // optimistic confirmation { let blockstore = Blockstore::open_with_access_type( &exited_validator_info.info.ledger_path, @@ -1440,6 +1443,12 @@ fn test_optimistic_confirmation_violation() { prev_voted_slot ); blockstore.set_dead_slot(prev_voted_slot).unwrap(); + + std::fs::remove_file(Tower::get_filename( + &exited_validator_info.info.ledger_path, + &entry_point_id, + )) + .unwrap(); } cluster.restart_node(&entry_point_id, exited_validator_info); @@ -1479,6 +1488,220 @@ fn test_optimistic_confirmation_violation() { } } +#[test] +#[serial] +#[ignore] +fn test_no_optimistic_confirmation_violation_with_tower() { + solana_logger::setup(); + let mut buf = BufferRedirect::stderr().unwrap(); + + // First set up the cluster with 2 nodes + let slots_per_epoch = 2048; + let node_stakes = vec![51, 50]; + let validator_keys: Vec<_> = iter::repeat_with(|| (Arc::new(Keypair::new()), true)) + .take(node_stakes.len()) + .collect(); + let config = ClusterConfig { + cluster_lamports: 100_000, + node_stakes: node_stakes.clone(), + validator_configs: vec![ValidatorConfig::default(); node_stakes.len()], + validator_keys: Some(validator_keys), + slots_per_epoch, + stakers_slot_offset: slots_per_epoch, + skip_warmup_slots: true, + ..ClusterConfig::default() + }; + let mut cluster = LocalCluster::new(&config); + let entry_point_id = cluster.entry_point_info.id; + // Let the nodes run for a while. Wait for validators to vote on slot `S` + // so that the vote on `S-1` is definitely in gossip and optimistic confirmation is + // detected on slot `S-1` for sure, then stop the heavier of the two + // validators + let client = cluster.get_validator_client(&entry_point_id).unwrap(); + let mut prev_voted_slot = 0; + loop { + let last_voted_slot = client + .get_slot_with_commitment(CommitmentConfig::recent()) + .unwrap(); + if last_voted_slot > 50 { + if prev_voted_slot == 0 { + prev_voted_slot = last_voted_slot; + } else { + break; + } + } + sleep(Duration::from_millis(100)); + } + + let exited_validator_info = cluster.exit_node(&entry_point_id); + + // Mark fork as dead on the heavier validator, this should make the fork effectively + // dead, even though it was optimistically confirmed. The smaller validator should + // create and jump over to a new fork + { + let blockstore = Blockstore::open_with_access_type( + &exited_validator_info.info.ledger_path, + AccessType::PrimaryOnly, + None, + ) + .unwrap_or_else(|e| { + panic!( + "Failed to open ledger at {:?}, err: {}", + exited_validator_info.info.ledger_path, e + ); + }); + info!( + "Setting slot: {} on main fork as dead, should cause fork", + prev_voted_slot + ); + blockstore.set_dead_slot(prev_voted_slot).unwrap(); + } + cluster.restart_node(&entry_point_id, exited_validator_info); + + cluster.check_no_new_roots(400, "test_no_optimistic_confirmation_violation_with_tower"); + + // Check to see that validator didn't detected optimistic confirmation for + // `prev_voted_slot` failed + let expected_log = format!("Optimistic slot {} was not rooted", prev_voted_slot); + let mut output = String::new(); + buf.read_to_string(&mut output).unwrap(); + assert!(!output.contains(&expected_log)); +} + +#[test] +#[serial] +fn test_validator_saves_tower() { + solana_logger::setup(); + + let validator_config = ValidatorConfig { + require_tower: true, + ..ValidatorConfig::default() + }; + let validator_identity_keypair = Arc::new(Keypair::new()); + let validator_id = validator_identity_keypair.pubkey(); + let config = ClusterConfig { + cluster_lamports: 10_000, + node_stakes: vec![100], + validator_configs: vec![validator_config], + validator_keys: Some(vec![(validator_identity_keypair.clone(), true)]), + ..ClusterConfig::default() + }; + let mut cluster = LocalCluster::new(&config); + + let validator_client = cluster.get_validator_client(&validator_id).unwrap(); + + let ledger_path = cluster + .validators + .get(&validator_id) + .unwrap() + .info + .ledger_path + .clone(); + + // Wait for some votes to be generated + let mut last_replayed_root; + loop { + if let Ok(slot) = validator_client.get_slot_with_commitment(CommitmentConfig::recent()) { + trace!("current slot: {}", slot); + if slot > 2 { + // this will be the root next time a validator starts + last_replayed_root = slot; + break; + } + } + sleep(Duration::from_millis(10)); + } + + // Stop validator and check saved tower + let validator_info = cluster.exit_node(&validator_id); + let tower1 = Tower::restore(&ledger_path, &validator_id).unwrap(); + trace!("tower1: {:?}", tower1); + assert_eq!(tower1.root(), Some(0)); + + // Restart the validator and wait for a new root + cluster.restart_node(&validator_id, validator_info); + let validator_client = cluster.get_validator_client(&validator_id).unwrap(); + + // Wait for the first root + loop { + if let Ok(root) = validator_client.get_slot_with_commitment(CommitmentConfig::root()) { + trace!("current root: {}", root); + if root > last_replayed_root + 1 { + last_replayed_root = root; + break; + } + } + sleep(Duration::from_millis(50)); + } + + // Stop validator, and check saved tower + let recent_slot = validator_client + .get_slot_with_commitment(CommitmentConfig::recent()) + .unwrap(); + let validator_info = cluster.exit_node(&validator_id); + let tower2 = Tower::restore(&ledger_path, &validator_id).unwrap(); + trace!("tower2: {:?}", tower2); + assert_eq!(tower2.root(), Some(last_replayed_root)); + last_replayed_root = recent_slot; + + // Rollback saved tower to `tower1` to simulate a validator starting from a newer snapshot + // without having to wait for that snapshot to be generated in this test + tower1.save(&validator_identity_keypair).unwrap(); + + cluster.restart_node(&validator_id, validator_info); + let validator_client = cluster.get_validator_client(&validator_id).unwrap(); + + // Wait for a new root, demonstrating the validator was able to make progress from the older `tower1` + loop { + if let Ok(root) = validator_client.get_slot_with_commitment(CommitmentConfig::root()) { + trace!( + "current root: {}, last_replayed_root: {}", + root, + last_replayed_root + ); + if root > last_replayed_root { + break; + } + } + sleep(Duration::from_millis(50)); + } + + // Check the new root is reflected in the saved tower state + let mut validator_info = cluster.exit_node(&validator_id); + let tower3 = Tower::restore(&ledger_path, &validator_id).unwrap(); + trace!("tower3: {:?}", tower3); + assert!(tower3.root().unwrap() > last_replayed_root); + + // Remove the tower file entirely and allow the validator to start without a tower. It will + // rebuild tower from its vote account contents + fs::remove_file(Tower::get_filename(&ledger_path, &validator_id)).unwrap(); + validator_info.config.require_tower = false; + + cluster.restart_node(&validator_id, validator_info); + let validator_client = cluster.get_validator_client(&validator_id).unwrap(); + + // Wait for a couple more slots to pass so another vote occurs + let current_slot = validator_client + .get_slot_with_commitment(CommitmentConfig::recent()) + .unwrap(); + loop { + if let Ok(slot) = validator_client.get_slot_with_commitment(CommitmentConfig::recent()) { + trace!("current_slot: {}, slot: {}", current_slot, slot); + if slot > current_slot + 1 { + break; + } + } + sleep(Duration::from_millis(50)); + } + + cluster.close_preserve_ledgers(); + + let tower4 = Tower::restore(&ledger_path, &validator_id).unwrap(); + trace!("tower4: {:?}", tower4); + // should tower4 advance 1 slot compared to tower3???? + assert_eq!(tower4.root(), tower3.root().map(|s| s + 1)); +} + fn wait_for_next_snapshot( cluster: &LocalCluster, snapshot_package_output_path: &Path, diff --git a/multinode-demo/bootstrap-validator.sh b/multinode-demo/bootstrap-validator.sh index b11c0418926..486aec9bc67 100755 --- a/multinode-demo/bootstrap-validator.sh +++ b/multinode-demo/bootstrap-validator.sh @@ -90,6 +90,7 @@ ledger_dir="$SOLANA_CONFIG_DIR"/bootstrap-validator args+=( --enable-rpc-exit --enable-rpc-set-log-filter + --require-tower --ledger "$ledger_dir" --rpc-port 8899 --snapshot-interval-slots 200 diff --git a/multinode-demo/validator.sh b/multinode-demo/validator.sh index b9b15663512..5536386d312 100755 --- a/multinode-demo/validator.sh +++ b/multinode-demo/validator.sh @@ -225,6 +225,7 @@ default_arg --ledger "$ledger_dir" default_arg --log - default_arg --enable-rpc-exit default_arg --enable-rpc-set-log-filter +default_arg --require-tower if [[ -n $SOLANA_CUDA ]]; then program=$solana_validator_cuda diff --git a/programs/vote/src/vote_state/mod.rs b/programs/vote/src/vote_state/mod.rs index 34d76e0e92f..3773143aa57 100644 --- a/programs/vote/src/vote_state/mod.rs +++ b/programs/vote/src/vote_state/mod.rs @@ -162,6 +162,9 @@ pub struct VoteState { pub commission: u8, pub votes: VecDeque, + + // This usually the last Lockout which was popped from self.votes. + // However, it can be arbitrary slot, when being used inside Tower pub root_slot: Option, /// the signer for vote transactions diff --git a/run.sh b/run.sh index f7547f6370c..1745ebb5a34 100755 --- a/run.sh +++ b/run.sh @@ -106,6 +106,7 @@ args=( --enable-rpc-exit --enable-rpc-transaction-history --init-complete-file "$dataDir"/init-completed + --require-tower ) solana-validator "${args[@]}" & validator=$! diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index 44175271999..fff025d904d 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -1170,6 +1170,10 @@ impl Bank { }); } + pub fn get_slot_history(&self) -> SlotHistory { + SlotHistory::from_account(&self.get_account(&sysvar::slot_history::id()).unwrap()).unwrap() + } + fn update_epoch_stakes(&mut self, leader_schedule_epoch: Epoch) { // update epoch_stakes cache // if my parent didn't populate for this staker's epoch, we've diff --git a/sdk/src/slot_history.rs b/sdk/src/slot_history.rs index 4765fab7049..293ae38e1b1 100644 --- a/sdk/src/slot_history.rs +++ b/sdk/src/slot_history.rs @@ -63,7 +63,7 @@ impl SlotHistory { } pub fn check(&self, slot: Slot) -> Check { - if slot >= self.next_slot { + if slot > self.newest() { Check::Future } else if slot < self.oldest() { Check::TooOld @@ -77,6 +77,10 @@ impl SlotHistory { pub fn oldest(&self) -> Slot { self.next_slot.saturating_sub(MAX_ENTRIES) } + + pub fn newest(&self) -> Slot { + self.next_slot - 1 + } } #[cfg(test)] diff --git a/validator/src/main.rs b/validator/src/main.rs index c86c05bc751..f5ee822faef 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -1130,7 +1130,7 @@ pub fn main() { Arg::with_name("require_tower") .long("require-tower") .takes_value(false) - .hidden(true) + .help("Refuse to start if saved tower state is not found"), ) .arg( Arg::with_name("expected_genesis_hash") @@ -1424,6 +1424,7 @@ pub fn main() { let restricted_repair_only_mode = matches.is_present("restricted_repair_only_mode"); let mut validator_config = ValidatorConfig { + require_tower: matches.is_present("require_tower"), dev_halt_at_slot: value_t!(matches, "dev_halt_at_slot", Slot).ok(), cuda: matches.is_present("cuda"), expected_genesis_hash: matches From 7650b053bf3bbafd20a6920faaf5a34ebfc41f92 Mon Sep 17 00:00:00 2001 From: Ryo Onodera Date: Thu, 15 Oct 2020 18:30:33 +0900 Subject: [PATCH 2/9] Better tower logs for SwitchForkDecision and etc (#12875) * Better tower logs for SwitchForkDecision and etc * nits * Update comment --- core/src/consensus.rs | 137 ++++++++++++++++++++++++++++++--------- core/src/replay_stage.rs | 8 +-- core/src/validator.rs | 11 +--- 3 files changed, 115 insertions(+), 41 deletions(-) diff --git a/core/src/consensus.rs b/core/src/consensus.rs index 475ae873e83..2384ef37202 100644 --- a/core/src/consensus.rs +++ b/core/src/consensus.rs @@ -30,11 +30,11 @@ use std::{ }; use thiserror::Error; -#[derive(PartialEq, Clone, Debug)] +#[derive(PartialEq, Clone, Debug, AbiExample)] pub enum SwitchForkDecision { SwitchProof(Hash), - NoSwitch, - FailedSwitchThreshold, + SameFork, + FailedSwitchThreshold(u64, u64), } impl SwitchForkDecision { @@ -45,8 +45,11 @@ impl SwitchForkDecision { authorized_voter_pubkey: &Pubkey, ) -> Option { match self { - SwitchForkDecision::FailedSwitchThreshold => None, - SwitchForkDecision::NoSwitch => Some(vote_instruction::vote( + SwitchForkDecision::FailedSwitchThreshold(_, total_stake) => { + assert_ne!(*total_stake, 0); + None + } + SwitchForkDecision::SameFork => Some(vote_instruction::vote( vote_account_pubkey, authorized_voter_pubkey, vote, @@ -61,6 +64,10 @@ impl SwitchForkDecision { } } } + + pub fn can_vote(&self) -> bool { + !matches!(self, SwitchForkDecision::FailedSwitchThreshold(_, _)) + } } pub const VOTE_THRESHOLD_DEPTH: usize = 8; @@ -101,6 +108,8 @@ pub struct Tower { // This could be emptied after some time; but left intact indefinitely for easier // implementation stray_restored_slot: Option, + #[serde(skip)] + pub last_switch_threshold_check: Option<(Slot, SwitchForkDecision)>, } impl Default for Tower { @@ -115,6 +124,7 @@ impl Default for Tower { path: PathBuf::default(), tmp_path: PathBuf::default(), stray_restored_slot: Option::default(), + last_switch_threshold_check: Option::default(), }; // VoteState::root_slot is ensured to be Some in Tower tower.lockouts.root_slot = Some(Slot::default()); @@ -493,7 +503,7 @@ impl Tower { false } - pub(crate) fn check_switch_threshold( + fn make_check_switch_threshold_decision( &self, switch_slot: u64, ancestors: &HashMap>, @@ -520,9 +530,34 @@ impl Tower { // all of them. panic!("no ancestors found with slot: {}", last_voted_slot); } else { - // bank_forks doesn't have corresponding data for the stray restored last vote, - // meaning some inconsistency between saved tower and ledger. - // (newer snapshot, or only a saved tower is moved over to new setup?) + // This condition shouldn't occur under normal validator operation, indicating + // something unusual happened. + // Possible causes include: OS/HW crash, validator process crash, only saved tower + // is moved over to a new setup, etc... + + // However, returning empty ancestors as a fallback here shouldn't result in + // slashing by itself (Note that we couldn't fully preclude any kind of slashing if + // the failure was OS or HW level). + + // Firstly, lockout is ensured elsewhere. + + // Also, there is no risk of optimistic conf. violation. Although empty ancestors + // could result in incorrect (= more than actual) locked_out_stake and + // false-positive SwitchProof later in this function, there should be no such a + // heavier fork candidate, first of all, if the last vote (or any of its + // unavailable ancestors) were already optimistically confirmed. + // The only exception is that other validator is already violating it... + if self.is_first_switch_check() && switch_slot < last_voted_slot { + // `switch < last` is needed not to warn! this message just because of using + // newer snapshots on validator restart + let message = format!( + "bank_forks doesn't have corresponding data for the stray restored \ + last vote({}), meaning some inconsistency between saved tower and ledger.", + last_voted_slot + ); + warn!("{}", message); + datapoint_warn!("tower_warn", ("warn", message, String)); + } &empty_ancestors } }); @@ -532,7 +567,7 @@ impl Tower { if switch_slot == last_voted_slot || switch_slot_ancestors.contains(&last_voted_slot) { // If the `switch_slot is a descendant of the last vote, // no switching proof is necessary - return SwitchForkDecision::NoSwitch; + return SwitchForkDecision::SameFork; } // Should never consider switching to an ancestor @@ -598,7 +633,7 @@ impl Tower { } // Only count lockouts on slots that are: - // 1) Not ancestors of `last_vote` + // 1) Not ancestors of `last_vote`, meaning being on different fork // 2) Not from before the current root as we can't determine if // anything before the root was an ancestor of `last_vote` or not if !last_vote_ancestors.contains(lockout_interval_start) @@ -622,10 +657,43 @@ impl Tower { if (locked_out_stake as f64 / total_stake as f64) > SWITCH_FORK_THRESHOLD { SwitchForkDecision::SwitchProof(switch_proof) } else { - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(locked_out_stake, total_stake) } }) - .unwrap_or(SwitchForkDecision::NoSwitch) + .unwrap_or(SwitchForkDecision::SameFork) + } + + pub(crate) fn check_switch_threshold( + &mut self, + switch_slot: u64, + ancestors: &HashMap>, + descendants: &HashMap>, + progress: &ProgressMap, + total_stake: u64, + epoch_vote_accounts: &HashMap, + ) -> SwitchForkDecision { + let decision = self.make_check_switch_threshold_decision( + switch_slot, + ancestors, + descendants, + progress, + total_stake, + epoch_vote_accounts, + ); + let new_check = Some((switch_slot, decision.clone())); + if new_check != self.last_switch_threshold_check { + trace!( + "new switch threshold check: slot {}: {:?}", + switch_slot, + decision, + ); + self.last_switch_threshold_check = new_check; + } + decision + } + + fn is_first_switch_check(&self) -> bool { + self.last_switch_threshold_check.is_none() } pub fn check_vote_stake_threshold( @@ -932,9 +1000,9 @@ impl Tower { self.lockouts = vote_state; self.do_initialize_lockouts(root, |v| v.slot > root); trace!( - "{} lockouts initialized to {:?}", + "Lockouts in tower for {} is initialized using bank {}", self.node_pubkey, - self.lockouts + bank.slot(), ); assert_eq!( self.lockouts.node_pubkey, self.node_pubkey, @@ -986,6 +1054,7 @@ impl Tower { bincode::serialize_into(&mut file, &saved_tower)?; // file.sync_all() hurts performance; pipeline sync-ing and submitting votes to the cluster! } + trace!("persisted votes: {:?}", self.voted_slots()); fs::rename(&new_filename, &filename)?; // self.path.parent().sync_all() hurts performance same as the above sync @@ -1047,6 +1116,16 @@ pub enum TowerError { FatallyInconsistent(&'static str), } +impl TowerError { + pub fn is_file_missing(&self) -> bool { + if let TowerError::IOError(io_err) = &self { + io_err.kind() == std::io::ErrorKind::NotFound + } else { + false + } + } +} + #[frozen_abi(digest = "Gaxfwvx5MArn52mKZQgzHmDCyn5YfCuTHvp5Et3rFfpp")] #[derive(Default, Clone, Serialize, Deserialize, Debug, PartialEq, AbiExample)] pub struct SavedTower { @@ -1267,7 +1346,7 @@ pub mod test { &ancestors, &descendants, &self.progress, - &tower, + tower, ); // Make sure this slot isn't locked out or failing threshold @@ -1464,11 +1543,11 @@ pub mod test { #[test] fn test_to_vote_instruction() { let vote = Vote::default(); - let mut decision = SwitchForkDecision::FailedSwitchThreshold; + let mut decision = SwitchForkDecision::FailedSwitchThreshold(0, 1); assert!(decision .to_vote_instruction(vote.clone(), &Pubkey::default(), &Pubkey::default()) .is_none()); - decision = SwitchForkDecision::NoSwitch; + decision = SwitchForkDecision::SameFork; assert_eq!( decision.to_vote_instruction(vote.clone(), &Pubkey::default(), &Pubkey::default()), Some(vote_instruction::vote( @@ -1571,7 +1650,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::NoSwitch + SwitchForkDecision::SameFork ); // Trying to switch to another fork at 110 should fail @@ -1584,7 +1663,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Adding another validator lockout on a descendant of last vote should @@ -1599,7 +1678,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Adding another validator lockout on an ancestor of last vote should @@ -1614,7 +1693,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Adding another validator lockout on a different fork, but the lockout @@ -1629,7 +1708,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Adding another validator lockout on a different fork, and the lockout @@ -1646,7 +1725,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Adding another validator lockout on a different fork, and the lockout @@ -1697,7 +1776,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); } @@ -2365,7 +2444,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::NoSwitch + SwitchForkDecision::SameFork ); // Trying to switch to another fork at 110 should fail @@ -2378,7 +2457,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); vote_simulator.simulate_lockout_interval(111, (10, 49), &other_vote_account); @@ -2456,7 +2535,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Add lockout_interval which should be excluded @@ -2470,7 +2549,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Add lockout_interval which should not be excluded diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index bf69d4c3ab2..c3b51717513 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -408,7 +408,7 @@ impl ReplayStage { &ancestors, &descendants, &progress, - &tower, + &mut tower, ); select_vote_and_reset_forks_time.stop(); @@ -1525,7 +1525,7 @@ impl ReplayStage { ancestors: &HashMap>, descendants: &HashMap>, progress: &ProgressMap, - tower: &Tower, + tower: &mut Tower, ) -> SelectVoteAndResetForkResult { // Try to vote on the actual heaviest fork. If the heaviest bank is // locked out or fails the threshold check, the validator will: @@ -1552,7 +1552,7 @@ impl ReplayStage { .epoch_vote_accounts(heaviest_bank.epoch()) .expect("Bank epoch vote accounts must contain entry for the bank's own epoch"), ); - if switch_fork_decision == SwitchForkDecision::FailedSwitchThreshold { + if let SwitchForkDecision::FailedSwitchThreshold(_, _) = switch_fork_decision { // If we can't switch, then reset to the the next votable // bank on the same fork as our last vote, but don't vote info!( @@ -1601,7 +1601,7 @@ impl ReplayStage { if !is_locked_out && vote_threshold && propagation_confirmed - && switch_fork_decision != SwitchForkDecision::FailedSwitchThreshold + && switch_fork_decision.can_vote() { info!("voting: {} {}", bank.slot(), fork_weight); SelectVoteAndResetForkResult { diff --git a/core/src/validator.rs b/core/src/validator.rs index 6f541b29300..ca3161d46bb 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -6,7 +6,7 @@ use crate::{ cluster_info::{ClusterInfo, Node}, cluster_info_vote_listener::VoteTracker, completed_data_sets_service::CompletedDataSetsService, - consensus::{reconcile_blockstore_roots_with_tower, Tower, TowerError}, + consensus::{reconcile_blockstore_roots_with_tower, Tower}, contact_info::ContactInfo, gossip_service::GossipService, optimistically_confirmed_bank_tracker::{ @@ -727,12 +727,7 @@ fn post_process_restored_tower( .unwrap_or_else(|err| { let voting_has_been_active = active_vote_account_exists_in_bank(&bank_forks.working_bank(), &vote_account); - let saved_tower_is_missing = if let TowerError::IOError(io_err) = &err { - io_err.kind() == std::io::ErrorKind::NotFound - } else { - false - }; - if !saved_tower_is_missing { + if !err.is_file_missing() { datapoint_error!( "tower_error", ( @@ -750,7 +745,7 @@ fn post_process_restored_tower( ); process::exit(1); } - if saved_tower_is_missing && !voting_has_been_active { + if err.is_file_missing() && !voting_has_been_active { // Currently, don't protect against spoofed snapshots with no tower at all info!( "Ignoring expected failed tower restore because this is the initial \ From 0edc057981b72e883492561227485120090f07b3 Mon Sep 17 00:00:00 2001 From: Ryo Onodera Date: Fri, 16 Oct 2020 14:44:07 +0900 Subject: [PATCH 3/9] Another some tower logging improvements (#12940) --- core/src/consensus.rs | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/core/src/consensus.rs b/core/src/consensus.rs index 2384ef37202..9d3c6e86d3b 100644 --- a/core/src/consensus.rs +++ b/core/src/consensus.rs @@ -570,9 +570,13 @@ impl Tower { return SwitchForkDecision::SameFork; } - // Should never consider switching to an ancestor - // of your last vote - assert!(!last_vote_ancestors.contains(&switch_slot)); + assert!( + !last_vote_ancestors.contains(&switch_slot), + "Should never consider switching to slot ({}), which is ancestors({:?}) of last vote: {}", + switch_slot, + last_vote_ancestors, + last_voted_slot + ); // By this point, we know the `switch_slot` is on a different fork // (is neither an ancestor nor descendant of `last_vote`), so a @@ -832,16 +836,17 @@ impl Tower { replayed_root: Slot, slot_history: &SlotHistory, ) -> Result { + // sanity assertions for roots + assert!(self.root().is_some()); + let tower_root = self.root().unwrap(); info!( - "adjusting lockouts (after replay up to {}): {:?}", + "adjusting lockouts (after replay up to {}): {:?} tower root: {}", replayed_root, - self.voted_slots() + self.voted_slots(), + tower_root, ); - - // sanity assertions for roots assert_eq!(slot_history.check(replayed_root), Check::Found); - assert!(self.root().is_some()); - let tower_root = self.root().unwrap(); + // reconcile_blockstore_roots_with_tower() should already have aligned these. assert!( tower_root <= replayed_root, From 76e3e36cda89cca4bc138d8cd8dbae2ba08a7dbd Mon Sep 17 00:00:00 2001 From: Ryo Onodera Date: Mon, 19 Oct 2020 16:37:03 +0900 Subject: [PATCH 4/9] Follow up to persistent tower with tests and API cleaning (#12350) * Follow up to persistent tower * Ignore for now... * Hard-code validator identities for easy reasoning * Add a test for opt. conf violation without tower * Fix compile with rust < 1.47 * Remove unused method * More move of assert tweak to the asser pr * Add comments * Clean up * Clean the test addressing various review comments * Clean up a bit --- Cargo.lock | 1 + core/src/bank_weight_fork_choice.rs | 2 +- core/src/consensus.rs | 81 +++--- local-cluster/Cargo.toml | 1 + local-cluster/src/local_cluster.rs | 9 + local-cluster/tests/local_cluster.rs | 374 +++++++++++++++++++-------- sdk/src/signature.rs | 9 + 7 files changed, 326 insertions(+), 151 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4d459bbf5ba..eed3f03d681 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4128,6 +4128,7 @@ name = "solana-local-cluster" version = "1.3.24" dependencies = [ "assert_matches", + "fs_extra", "gag", "itertools 0.9.0", "log 0.4.8", diff --git a/core/src/bank_weight_fork_choice.rs b/core/src/bank_weight_fork_choice.rs index 124f40cde91..a8efbcb69c4 100644 --- a/core/src/bank_weight_fork_choice.rs +++ b/core/src/bank_weight_fork_choice.rs @@ -60,7 +60,7 @@ impl ForkChoice for BankWeightForkChoice { trace!("frozen_banks {}", frozen_banks.len()); let num_old_banks = frozen_banks .iter() - .filter(|b| b.slot() < tower.root().unwrap_or(0)) + .filter(|b| b.slot() < tower.root()) .count(); let last_voted_slot = tower.last_voted_slot(); diff --git a/core/src/consensus.rs b/core/src/consensus.rs index 9d3c6e86d3b..92b5edc3a42 100644 --- a/core/src/consensus.rs +++ b/core/src/consensus.rs @@ -387,17 +387,14 @@ impl Tower { pub fn record_bank_vote(&mut self, vote: Vote) -> Option { let slot = vote.last_voted_slot().unwrap_or(0); trace!("{} record_vote for {}", self.node_pubkey, slot); - let root_slot = self.lockouts.root_slot; + let old_root = self.root(); self.lockouts.process_vote_unchecked(&vote); self.last_vote = vote; + let new_root = self.root(); - datapoint_info!( - "tower-vote", - ("latest", slot, i64), - ("root", self.lockouts.root_slot.unwrap_or(0), i64) - ); - if root_slot != self.lockouts.root_slot { - Some(self.lockouts.root_slot.unwrap()) + datapoint_info!("tower-vote", ("latest", slot, i64), ("root", new_root, i64)); + if old_root != new_root { + Some(new_root) } else { None } @@ -446,8 +443,8 @@ impl Tower { // which establishes the origin of trust (i.e. root) whether booting from genesis (slot 0) or // snapshot (slot N). In other words, there should be no possibility a Tower doesn't have // root, unlike young vote accounts. - pub fn root(&self) -> Option { - self.lockouts.root_slot + pub fn root(&self) -> Slot { + self.lockouts.root_slot.unwrap() } // a slot is recent if it's newer than the last vote we have @@ -514,7 +511,7 @@ impl Tower { ) -> SwitchForkDecision { self.last_voted_slot() .map(|last_voted_slot| { - let root = self.lockouts.root_slot.unwrap_or(0); + let root = self.root(); let empty_ancestors = HashSet::default(); let last_vote_ancestors = @@ -837,8 +834,7 @@ impl Tower { slot_history: &SlotHistory, ) -> Result { // sanity assertions for roots - assert!(self.root().is_some()); - let tower_root = self.root().unwrap(); + let tower_root = self.root(); info!( "adjusting lockouts (after replay up to {}): {:?} tower root: {}", replayed_root, @@ -1160,28 +1156,27 @@ pub fn reconcile_blockstore_roots_with_tower( tower: &Tower, blockstore: &Blockstore, ) -> blockstore_db::Result<()> { - if let Some(tower_root) = tower.root() { - let last_blockstore_root = blockstore.last_root(); - if last_blockstore_root < tower_root { - // Ensure tower_root itself to exist and be marked as rooted in the blockstore - // in addition to its ancestors. - let new_roots: Vec<_> = AncestorIterator::new_inclusive(tower_root, &blockstore) - .take_while(|current| match current.cmp(&last_blockstore_root) { - Ordering::Greater => true, - Ordering::Equal => false, - Ordering::Less => panic!( - "couldn't find a last_blockstore_root upwards from: {}!?", - tower_root - ), - }) - .collect(); - assert!( - !new_roots.is_empty(), - "at least 1 parent slot must be found" - ); + let tower_root = tower.root(); + let last_blockstore_root = blockstore.last_root(); + if last_blockstore_root < tower_root { + // Ensure tower_root itself to exist and be marked as rooted in the blockstore + // in addition to its ancestors. + let new_roots: Vec<_> = AncestorIterator::new_inclusive(tower_root, &blockstore) + .take_while(|current| match current.cmp(&last_blockstore_root) { + Ordering::Greater => true, + Ordering::Equal => false, + Ordering::Less => panic!( + "couldn't find a last_blockstore_root upwards from: {}!?", + tower_root + ), + }) + .collect(); + assert!( + !new_roots.is_empty(), + "at least 1 parent slot must be found" + ); - blockstore.set_roots(&new_roots)? - } + blockstore.set_roots(&new_roots)? } Ok(()) } @@ -2744,13 +2739,13 @@ pub mod test { .unwrap(); assert_eq!(tower.voted_slots(), vec![2, 3]); - assert_eq!(tower.root(), Some(replayed_root_slot)); + assert_eq!(tower.root(), replayed_root_slot); tower = tower .adjust_lockouts_after_replay(replayed_root_slot, &slot_history) .unwrap(); assert_eq!(tower.voted_slots(), vec![2, 3]); - assert_eq!(tower.root(), Some(replayed_root_slot)); + assert_eq!(tower.root(), replayed_root_slot); } #[test] @@ -2772,7 +2767,7 @@ pub mod test { .unwrap(); assert_eq!(tower.voted_slots(), vec![2, 3]); - assert_eq!(tower.root(), Some(replayed_root_slot)); + assert_eq!(tower.root(), replayed_root_slot); } #[test] @@ -2796,7 +2791,7 @@ pub mod test { .unwrap(); assert_eq!(tower.voted_slots(), vec![] as Vec); - assert_eq!(tower.root(), Some(replayed_root_slot)); + assert_eq!(tower.root(), replayed_root_slot); assert_eq!(tower.stray_restored_slot, None); } @@ -2819,7 +2814,7 @@ pub mod test { .adjust_lockouts_after_replay(MAX_ENTRIES, &slot_history) .unwrap(); assert_eq!(tower.voted_slots(), vec![] as Vec); - assert_eq!(tower.root(), Some(MAX_ENTRIES)); + assert_eq!(tower.root(), MAX_ENTRIES); } #[test] @@ -2842,7 +2837,7 @@ pub mod test { .unwrap(); assert_eq!(tower.voted_slots(), vec![3, 4]); - assert_eq!(tower.root(), Some(replayed_root_slot)); + assert_eq!(tower.root(), replayed_root_slot); } #[test] @@ -2863,7 +2858,7 @@ pub mod test { .unwrap(); assert_eq!(tower.voted_slots(), vec![5, 6]); - assert_eq!(tower.root(), Some(replayed_root_slot)); + assert_eq!(tower.root(), replayed_root_slot); } #[test] @@ -2907,7 +2902,7 @@ pub mod test { .unwrap(); assert_eq!(tower.voted_slots(), vec![3, 4, 5]); - assert_eq!(tower.root(), Some(replayed_root_slot)); + assert_eq!(tower.root(), replayed_root_slot); } #[test] @@ -2923,7 +2918,7 @@ pub mod test { .unwrap(); assert_eq!(tower.voted_slots(), vec![] as Vec); - assert_eq!(tower.root(), Some(replayed_root_slot)); + assert_eq!(tower.root(), replayed_root_slot); } #[test] diff --git a/local-cluster/Cargo.toml b/local-cluster/Cargo.toml index 75584dd844e..5de7aba9d1e 100644 --- a/local-cluster/Cargo.toml +++ b/local-cluster/Cargo.toml @@ -11,6 +11,7 @@ homepage = "https://solana.com/" [dependencies] itertools = "0.9.0" gag = "0.1.10" +fs_extra = "1.1.0" log = "0.4.8" rand = "0.7.0" solana-config-program = { path = "../programs/config", version = "1.3.24" } diff --git a/local-cluster/src/local_cluster.rs b/local-cluster/src/local_cluster.rs index 53dd74b921d..ecc9847f47a 100644 --- a/local-cluster/src/local_cluster.rs +++ b/local-cluster/src/local_cluster.rs @@ -370,6 +370,15 @@ impl LocalCluster { validator_pubkey } + pub fn ledger_path(&self, validator_pubkey: &Pubkey) -> std::path::PathBuf { + self.validators + .get(validator_pubkey) + .unwrap() + .info + .ledger_path + .clone() + } + fn close(&mut self) { self.close_preserve_ledgers(); } diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index 957eb31ba79..8adabdc5b62 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -16,7 +16,9 @@ use solana_core::{ }; use solana_download_utils::download_snapshot; use solana_ledger::{ - blockstore::Blockstore, blockstore_db::AccessType, leader_schedule::FixedSchedule, + blockstore::{Blockstore, PurgeType}, + blockstore_db::AccessType, + leader_schedule::FixedSchedule, leader_schedule::LeaderSchedule, }; use solana_local_cluster::{ @@ -36,6 +38,7 @@ use solana_sdk::{ genesis_config::ClusterType, hash::Hash, poh_config::PohConfig, + pubkey::Pubkey, signature::{Keypair, Signer}, system_transaction, }; @@ -1376,7 +1379,7 @@ fn test_no_voting() { #[test] #[serial] -fn test_optimistic_confirmation_violation_with_no_tower() { +fn test_optimistic_confirmation_violation_detection() { solana_logger::setup(); let buf = std::env::var("OPTIMISTIC_CONF_TEST_DUMP_LOG") .err() @@ -1384,9 +1387,14 @@ fn test_optimistic_confirmation_violation_with_no_tower() { // First set up the cluster with 2 nodes let slots_per_epoch = 2048; let node_stakes = vec![51, 50]; - let validator_keys: Vec<_> = iter::repeat_with(|| (Arc::new(Keypair::new()), true)) - .take(node_stakes.len()) - .collect(); + let validator_keys: Vec<_> = vec![ + "4qhhXNTbKD1a5vxDDLZcHKj7ELNeiivtUBxn3wUK1F5VRsQVP89VUhfXqSfgiFB14GfuBgtrQ96n9NvWQADVkcCg", + "3kHBzVwie5vTEaY6nFCPeFT8qDpoXzn7dCEioGRNBTnUDpvwnG85w8Wq63gVWpVTP8k2a8cgcWRjSXyUkEygpXWS", + ] + .iter() + .map(|s| (Arc::new(Keypair::from_base58_string(s)), true)) + .take(node_stakes.len()) + .collect(); let config = ClusterConfig { cluster_lamports: 100_000, node_stakes: node_stakes.clone(), @@ -1427,28 +1435,15 @@ fn test_optimistic_confirmation_violation_with_no_tower() { // Also, remove saved tower to intentionally make the restarted validator to violate the // optimistic confirmation { - let blockstore = Blockstore::open_with_access_type( - &exited_validator_info.info.ledger_path, - AccessType::PrimaryOnly, - None, - ) - .unwrap_or_else(|e| { - panic!( - "Failed to open ledger at {:?}, err: {}", - exited_validator_info.info.ledger_path, e - ); - }); + let blockstore = open_blockstore(&exited_validator_info.info.ledger_path); info!( "Setting slot: {} on main fork as dead, should cause fork", prev_voted_slot ); + // marking this voted slot as dead makes the saved tower garbage + // effectively. That's because its stray last vote becomes stale (= no + // ancestor in bank forks). blockstore.set_dead_slot(prev_voted_slot).unwrap(); - - std::fs::remove_file(Tower::get_filename( - &exited_validator_info.info.ledger_path, - &entry_point_id, - )) - .unwrap(); } cluster.restart_node(&entry_point_id, exited_validator_info); @@ -1488,86 +1483,6 @@ fn test_optimistic_confirmation_violation_with_no_tower() { } } -#[test] -#[serial] -#[ignore] -fn test_no_optimistic_confirmation_violation_with_tower() { - solana_logger::setup(); - let mut buf = BufferRedirect::stderr().unwrap(); - - // First set up the cluster with 2 nodes - let slots_per_epoch = 2048; - let node_stakes = vec![51, 50]; - let validator_keys: Vec<_> = iter::repeat_with(|| (Arc::new(Keypair::new()), true)) - .take(node_stakes.len()) - .collect(); - let config = ClusterConfig { - cluster_lamports: 100_000, - node_stakes: node_stakes.clone(), - validator_configs: vec![ValidatorConfig::default(); node_stakes.len()], - validator_keys: Some(validator_keys), - slots_per_epoch, - stakers_slot_offset: slots_per_epoch, - skip_warmup_slots: true, - ..ClusterConfig::default() - }; - let mut cluster = LocalCluster::new(&config); - let entry_point_id = cluster.entry_point_info.id; - // Let the nodes run for a while. Wait for validators to vote on slot `S` - // so that the vote on `S-1` is definitely in gossip and optimistic confirmation is - // detected on slot `S-1` for sure, then stop the heavier of the two - // validators - let client = cluster.get_validator_client(&entry_point_id).unwrap(); - let mut prev_voted_slot = 0; - loop { - let last_voted_slot = client - .get_slot_with_commitment(CommitmentConfig::recent()) - .unwrap(); - if last_voted_slot > 50 { - if prev_voted_slot == 0 { - prev_voted_slot = last_voted_slot; - } else { - break; - } - } - sleep(Duration::from_millis(100)); - } - - let exited_validator_info = cluster.exit_node(&entry_point_id); - - // Mark fork as dead on the heavier validator, this should make the fork effectively - // dead, even though it was optimistically confirmed. The smaller validator should - // create and jump over to a new fork - { - let blockstore = Blockstore::open_with_access_type( - &exited_validator_info.info.ledger_path, - AccessType::PrimaryOnly, - None, - ) - .unwrap_or_else(|e| { - panic!( - "Failed to open ledger at {:?}, err: {}", - exited_validator_info.info.ledger_path, e - ); - }); - info!( - "Setting slot: {} on main fork as dead, should cause fork", - prev_voted_slot - ); - blockstore.set_dead_slot(prev_voted_slot).unwrap(); - } - cluster.restart_node(&entry_point_id, exited_validator_info); - - cluster.check_no_new_roots(400, "test_no_optimistic_confirmation_violation_with_tower"); - - // Check to see that validator didn't detected optimistic confirmation for - // `prev_voted_slot` failed - let expected_log = format!("Optimistic slot {} was not rooted", prev_voted_slot); - let mut output = String::new(); - buf.read_to_string(&mut output).unwrap(); - assert!(!output.contains(&expected_log)); -} - #[test] #[serial] fn test_validator_saves_tower() { @@ -1616,7 +1531,7 @@ fn test_validator_saves_tower() { let validator_info = cluster.exit_node(&validator_id); let tower1 = Tower::restore(&ledger_path, &validator_id).unwrap(); trace!("tower1: {:?}", tower1); - assert_eq!(tower1.root(), Some(0)); + assert_eq!(tower1.root(), 0); // Restart the validator and wait for a new root cluster.restart_node(&validator_id, validator_info); @@ -1641,7 +1556,7 @@ fn test_validator_saves_tower() { let validator_info = cluster.exit_node(&validator_id); let tower2 = Tower::restore(&ledger_path, &validator_id).unwrap(); trace!("tower2: {:?}", tower2); - assert_eq!(tower2.root(), Some(last_replayed_root)); + assert_eq!(tower2.root(), last_replayed_root); last_replayed_root = recent_slot; // Rollback saved tower to `tower1` to simulate a validator starting from a newer snapshot @@ -1670,11 +1585,11 @@ fn test_validator_saves_tower() { let mut validator_info = cluster.exit_node(&validator_id); let tower3 = Tower::restore(&ledger_path, &validator_id).unwrap(); trace!("tower3: {:?}", tower3); - assert!(tower3.root().unwrap() > last_replayed_root); + assert!(tower3.root() > last_replayed_root); // Remove the tower file entirely and allow the validator to start without a tower. It will // rebuild tower from its vote account contents - fs::remove_file(Tower::get_filename(&ledger_path, &validator_id)).unwrap(); + remove_tower(&ledger_path, &validator_id); validator_info.config.require_tower = false; cluster.restart_node(&validator_id, validator_info); @@ -1699,7 +1614,252 @@ fn test_validator_saves_tower() { let tower4 = Tower::restore(&ledger_path, &validator_id).unwrap(); trace!("tower4: {:?}", tower4); // should tower4 advance 1 slot compared to tower3???? - assert_eq!(tower4.root(), tower3.root().map(|s| s + 1)); + assert_eq!(tower4.root(), tower3.root() + 1); +} + +fn open_blockstore(ledger_path: &Path) -> Blockstore { + Blockstore::open_with_access_type(ledger_path, AccessType::PrimaryOnly, None).unwrap_or_else( + |e| { + panic!("Failed to open ledger at {:?}, err: {}", ledger_path, e); + }, + ) +} + +fn purge_slots(blockstore: &Blockstore, start_slot: Slot, slot_count: Slot) { + blockstore.purge_from_next_slots(start_slot, start_slot + slot_count); + blockstore.purge_slots(start_slot, start_slot + slot_count, PurgeType::Exact); +} + +fn last_vote_in_tower(ledger_path: &Path, node_pubkey: &Pubkey) -> Option { + let tower = Tower::restore(&ledger_path, &node_pubkey); + if let Err(tower_err) = tower { + if tower_err.is_file_missing() { + return None; + } else { + panic!("tower restore failed...: {:?}", tower_err); + } + } + // actually saved tower must have at least one vote. + let last_vote = Tower::restore(&ledger_path, &node_pubkey) + .unwrap() + .last_voted_slot() + .unwrap(); + Some(last_vote) +} + +fn remove_tower(ledger_path: &Path, node_pubkey: &Pubkey) { + fs::remove_file(Tower::get_filename(&ledger_path, &node_pubkey)).unwrap(); +} + +// A bit convoluted test case; but this roughly follows this test theoretical scenario: +// +// Step 1: You have validator A + B with 31% and 36% of the stake: +// +// S0 -> S1 -> S2 -> S3 (A + B vote, optimistically confirmed) +// +// Step 2: Turn off A + B, and truncate the ledger after slot `S3` (simulate votes not +// landing in next slot). +// Start validator C with 33% of the stake with same ledger, but only up to slot S2. +// Have `C` generate some blocks like: +// +// S0 -> S1 -> S2 -> S4 +// +// Step 3: Then restart `A` which had 31% of the stake. With the tower, from `A`'s +// perspective it sees: +// +// S0 -> S1 -> S2 -> S3 (voted) +// | +// -> S4 -> S5 (C's vote for S4) +// +// The fork choice rule weights look like: +// +// S0 -> S1 -> S2 (ABC) -> S3 +// | +// -> S4 (C) -> S5 +// +// Step 4: +// Without the persisted tower: +// `A` would choose to vote on the fork with `S4 -> S5`. This is true even if `A` +// generates a new fork starting at slot `S3` because `C` has more stake than `A` +// so `A` will eventually pick the fork `C` is on. +// +// Furthermore `B`'s vote on `S3` is not observable because there are no +// descendants of slot `S3`, so that fork will not be chosen over `C`'s fork +// +// With the persisted tower: +// `A` should not be able to generate a switching proof. +// +fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: bool) { + solana_logger::setup(); + + // First set up the cluster with 4 nodes + let slots_per_epoch = 2048; + let node_stakes = vec![31, 36, 33, 0]; + + // Each pubkeys are prefixed with A, B, C and D. + // D is needed to avoid NoPropagatedConfirmation erorrs + let validator_keys = vec![ + "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4", + "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8", + "4mx9yoFBeYasDKBGDWCTWGJdWuJCKbgqmuP8bN9umybCh5Jzngw7KQxe99Rf5uzfyzgba1i65rJW4Wqk7Ab5S8ye", + "3zsEPEDsjfEay7te9XqNjRTCE7vwuT6u4DHzBJC19yp7GS8BuNRMRjnpVrKCBzb3d44kxc4KPGSHkCmk6tEfswCg", + ] + .iter() + .map(|s| (Arc::new(Keypair::from_base58_string(s)), true)) + .take(node_stakes.len()) + .collect::>(); + let validators = validator_keys + .iter() + .map(|(kp, _)| kp.pubkey()) + .collect::>(); + let (validator_a_pubkey, validator_b_pubkey, validator_c_pubkey) = + (validators[0], validators[1], validators[2]); + + let config = ClusterConfig { + cluster_lamports: 100_000, + node_stakes: node_stakes.clone(), + validator_configs: vec![ValidatorConfig::default(); node_stakes.len()], + validator_keys: Some(validator_keys), + slots_per_epoch, + stakers_slot_offset: slots_per_epoch, + skip_warmup_slots: true, + ..ClusterConfig::default() + }; + let mut cluster = LocalCluster::new(&config); + + let base_slot = 26; // S2 + let next_slot_on_a = 27; // S3 + let truncated_slots = 100; // just enough to purge all following slots after the S2 and S3 + + let val_a_ledger_path = cluster.ledger_path(&validator_a_pubkey); + let val_b_ledger_path = cluster.ledger_path(&validator_b_pubkey); + let val_c_ledger_path = cluster.ledger_path(&validator_c_pubkey); + + // Immediately kill validator C + let validator_c_info = cluster.exit_node(&validator_c_pubkey); + + // Step 1: + // Let validator A, B, (D) run for a while. + let (mut validator_a_finished, mut validator_b_finished) = (false, false); + while !(validator_a_finished && validator_b_finished) { + sleep(Duration::from_millis(100)); + + if let Some(last_vote) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) { + if !validator_a_finished && last_vote >= next_slot_on_a { + validator_a_finished = true; + } + } + if let Some(last_vote) = last_vote_in_tower(&val_b_ledger_path, &validator_b_pubkey) { + if !validator_b_finished && last_vote >= next_slot_on_a { + validator_b_finished = true; + } + } + } + // kill them at once after the above loop; otherwise one might stall the other! + let validator_a_info = cluster.exit_node(&validator_a_pubkey); + let _validator_b_info = cluster.exit_node(&validator_b_pubkey); + + // Step 2: + // Stop validator and truncate ledger + info!("truncate validator C's ledger"); + { + // first copy from validator A's ledger + std::fs::remove_dir_all(&validator_c_info.info.ledger_path).unwrap(); + let mut opt = fs_extra::dir::CopyOptions::new(); + opt.copy_inside = true; + fs_extra::dir::copy(&val_a_ledger_path, &val_c_ledger_path, &opt).unwrap(); + // Remove A's tower in the C's new copied ledger + remove_tower(&validator_c_info.info.ledger_path, &validator_a_pubkey); + + let blockstore = open_blockstore(&validator_c_info.info.ledger_path); + purge_slots(&blockstore, base_slot + 1, truncated_slots); + } + info!("truncate validator A's ledger"); + { + let blockstore = open_blockstore(&val_a_ledger_path); + purge_slots(&blockstore, next_slot_on_a + 1, truncated_slots); + if !with_tower { + info!("Removing tower!"); + remove_tower(&val_a_ledger_path, &validator_a_pubkey); + + // Remove next_slot_on_a from ledger to force validator A to select + // votes_on_c_fork. Otherwise the validator A will immediately vote + // for 27 on restart, because it hasn't gotten the heavier fork from + // validator C yet. + // Then it will be stuck on 27 unable to switch because C doesn't + // have enough stake to generate a switching proof + purge_slots(&blockstore, next_slot_on_a, truncated_slots); + } else { + info!("Not removing tower!"); + } + } + + // Step 3: + // Run validator C only to make it produce and vote on its own fork. + info!("Restart validator C again!!!"); + let val_c_ledger_path = validator_c_info.info.ledger_path.clone(); + cluster.restart_node(&validator_c_pubkey, validator_c_info); + + let mut votes_on_c_fork = std::collections::BTreeSet::new(); // S4 and S5 + for _ in 0..100 { + sleep(Duration::from_millis(100)); + + if let Some(last_vote) = last_vote_in_tower(&val_c_ledger_path, &validator_c_pubkey) { + if last_vote != base_slot { + votes_on_c_fork.insert(last_vote); + // Collect 4 votes + if votes_on_c_fork.len() >= 4 { + break; + } + } + } + } + assert!(!votes_on_c_fork.is_empty()); + info!("collected validator C's votes: {:?}", votes_on_c_fork); + + // Step 4: + // verify whether there was violation or not + info!("Restart validator A again!!!"); + cluster.restart_node(&validator_a_pubkey, validator_a_info); + + // monitor for actual votes from validator A + let mut bad_vote_detected = false; + for _ in 0..100 { + sleep(Duration::from_millis(100)); + + if let Some(last_vote) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) { + if votes_on_c_fork.contains(&last_vote) { + bad_vote_detected = true; + break; + } + } + } + + // an elaborate way of assert!(with_tower && !bad_vote_detected || ...) + let expects_optimistic_confirmation_violation = !with_tower; + if bad_vote_detected != expects_optimistic_confirmation_violation { + if bad_vote_detected { + panic!("No violation expected because of persisted tower!"); + } else { + panic!("Violation expected because of removed persisted tower!"); + } + } else if bad_vote_detected { + info!("THIS TEST expected violations. And indeed, there was some, because of removed persisted tower."); + } else { + info!("THIS TEST expected no violation. And indeed, there was none, thanks to persisted tower."); + } +} + +#[test] +#[serial] +fn test_no_optimistic_confirmation_violation_with_tower() { + do_test_optimistic_confirmation_violation_with_or_without_tower(true); +} + +#[test] +#[serial] +fn test_optimistic_confirmation_violation_without_tower() { + do_test_optimistic_confirmation_violation_with_or_without_tower(false); } fn wait_for_next_snapshot( diff --git a/sdk/src/signature.rs b/sdk/src/signature.rs index 7a254217854..0a8985a23e0 100644 --- a/sdk/src/signature.rs +++ b/sdk/src/signature.rs @@ -43,6 +43,15 @@ impl Keypair { self.0.to_bytes() } + pub fn from_base58_string(s: &str) -> Self { + Self::from_bytes(&bs58::decode(s).into_vec().unwrap()).unwrap() + } + + pub fn to_base58_string(&self) -> String { + // Remove .iter() once we're rust 1.47+ + bs58::encode(&self.0.to_bytes().iter()).into_string() + } + pub fn secret(&self) -> &ed25519_dalek::SecretKey { &self.0.secret } From d3c085bc39d3ddf9c2ff20d7556d73b62944308e Mon Sep 17 00:00:00 2001 From: Ryo Onodera Date: Wed, 21 Oct 2020 10:26:20 +0900 Subject: [PATCH 5/9] Various clean-ups before assert adjustment (#13006) * Various clean-ups before assert adjustment * oops --- core/src/consensus.rs | 137 ++++++++++++++++------- core/src/heaviest_subtree_fork_choice.rs | 6 +- core/src/validator.rs | 2 +- 3 files changed, 98 insertions(+), 47 deletions(-) diff --git a/core/src/consensus.rs b/core/src/consensus.rs index 92b5edc3a42..ebd91cd8168 100644 --- a/core/src/consensus.rs +++ b/core/src/consensus.rs @@ -107,6 +107,8 @@ pub struct Tower { // (This is a special field for slashing-free validator restart with edge cases). // This could be emptied after some time; but left intact indefinitely for easier // implementation + // Further, stray slot can be stale or not. `Stale` here means whether given + // bank_forks (=~ ledger) lacks the slot or not. stray_restored_slot: Option, #[serde(skip)] pub last_switch_threshold_check: Option<(Slot, SwitchForkDecision)>, @@ -438,7 +440,7 @@ impl Tower { // root may be forcibly set by arbitrary replay root slot, for example from a root // after replaying a snapshot. - // Also, tower.root() couldn't be None; do_initialize_lockouts() ensures that. + // Also, tower.root() couldn't be None; initialize_lockouts() ensures that. // Conceptually, every tower must have been constructed from a concrete starting point, // which establishes the origin of trust (i.e. root) whether booting from genesis (slot 0) or // snapshot (slot N). In other words, there should be no possibility a Tower doesn't have @@ -517,7 +519,7 @@ impl Tower { let last_vote_ancestors = ancestors.get(&last_voted_slot).unwrap_or_else(|| { if !self.is_stray_last_vote() { - // Unless last vote is stray, ancestors.get(last_voted_slot) must + // Unless last vote is stray and stale, ancestors.get(last_voted_slot) must // return Some(_), justifying to panic! here. // Also, adjust_lockouts_after_replay() correctly makes last_voted_slot None, // if all saved votes are ancestors of replayed_root_slot. So this code shouldn't be @@ -527,8 +529,8 @@ impl Tower { // all of them. panic!("no ancestors found with slot: {}", last_voted_slot); } else { - // This condition shouldn't occur under normal validator operation, indicating - // something unusual happened. + // This condition (stale stray last vote) shouldn't occur under normal validator + // operation, indicating something unusual happened. // Possible causes include: OS/HW crash, validator process crash, only saved tower // is moved over to a new setup, etc... @@ -829,17 +831,18 @@ impl Tower { // The tower root can be older/newer if the validator booted from a newer/older snapshot, so // tower lockouts may need adjustment pub fn adjust_lockouts_after_replay( - self, + mut self, replayed_root: Slot, slot_history: &SlotHistory, ) -> Result { // sanity assertions for roots let tower_root = self.root(); info!( - "adjusting lockouts (after replay up to {}): {:?} tower root: {}", + "adjusting lockouts (after replay up to {}): {:?} tower root: {} replayed root: {}", replayed_root, self.voted_slots(), tower_root, + replayed_root, ); assert_eq!(slot_history.check(replayed_root), Check::Found); @@ -860,30 +863,24 @@ impl Tower { ) ); - // return immediately if votes are empty... - if self.lockouts.votes.is_empty() { - return Ok(self); - } - - let last_voted_slot = self.last_voted_slot().unwrap(); - if slot_history.check(last_voted_slot) == Check::TooOld { - // We could try hard to anchor with other older votes, but opt to simplify the - // following logic - return Err(TowerError::TooOldTower( - last_voted_slot, - slot_history.oldest(), - )); + if let Some(last_voted_slot) = self.last_voted_slot() { + if slot_history.check(last_voted_slot) == Check::TooOld { + // We could try hard to anchor with other older votes, but opt to simplify the + // following logic + return Err(TowerError::TooOldTower( + last_voted_slot, + slot_history.oldest(), + )); + } + self.adjust_lockouts_with_slot_history(slot_history)?; + self.initialize_root(replayed_root); } - self.do_adjust_lockouts_after_replay(tower_root, replayed_root, slot_history) + Ok(self) } - fn do_adjust_lockouts_after_replay( - mut self, - tower_root: Slot, - replayed_root: Slot, - slot_history: &SlotHistory, - ) -> Result { + fn adjust_lockouts_with_slot_history(&mut self, slot_history: &SlotHistory) -> Result<()> { + let tower_root = self.root(); // retained slots will be consisted only from divergent slots let mut retain_flags_for_each_vote_in_reverse: Vec<_> = Vec::with_capacity(self.lockouts.votes.len()); @@ -931,7 +928,12 @@ impl Tower { if !voting_from_genesis { // Unless we're voting since genesis, slots_in_tower must always be older than last checked_slot // including all vote slot and the root slot. - assert!(*slot_in_tower < checked_slot) + assert!( + *slot_in_tower < checked_slot, + "slot_in_tower({}) < checked_slot({})", + *slot_in_tower, + checked_slot + ); } } @@ -959,15 +961,10 @@ impl Tower { retain_flags_for_each_vote_in_reverse.into_iter().rev(); let original_votes_len = self.lockouts.votes.len(); - self.do_initialize_lockouts(replayed_root, move |_| { - retain_flags_for_each_vote.next().unwrap() - }); + self.initialize_lockouts(move |_| retain_flags_for_each_vote.next().unwrap()); if self.lockouts.votes.is_empty() { - info!( - "All restored votes were behind replayed_root({}); resetting root_slot and last_vote in tower!", - replayed_root - ); + info!("All restored votes were behind; resetting root_slot and last_vote in tower!"); // we might not have banks for those votes so just reset. // That's because the votes may well past replayed_root self.last_vote = Vote::default(); @@ -986,7 +983,7 @@ impl Tower { self.stray_restored_slot = Some(self.last_vote.last_voted_slot().unwrap()); } - Ok(self) + Ok(()) } fn initialize_lockouts_from_bank( @@ -999,7 +996,8 @@ impl Tower { let vote_state = VoteState::deserialize(&vote_account.data) .expect("vote_account isn't a VoteState?"); self.lockouts = vote_state; - self.do_initialize_lockouts(root, |v| v.slot > root); + self.initialize_root(root); + self.initialize_lockouts(|v| v.slot > root); trace!( "Lockouts in tower for {} is initialized using bank {}", self.node_pubkey, @@ -1010,7 +1008,7 @@ impl Tower { "vote account's node_pubkey doesn't match", ); } else { - self.do_initialize_lockouts(root, |_| true); + self.initialize_root(root); info!( "vote account({}) not found in bank (slot={})", vote_account_pubkey, @@ -1019,13 +1017,16 @@ impl Tower { } } - fn do_initialize_lockouts bool>(&mut self, root: Slot, should_retain: F) { - // Updating root is needed to correctly restore from newly-saved tower for the next - // boot - self.lockouts.root_slot = Some(root); + fn initialize_lockouts bool>(&mut self, should_retain: F) { self.lockouts.votes.retain(should_retain); } + // Updating root is needed to correctly restore from newly-saved tower for the next + // boot + fn initialize_root(&mut self, root: Slot) { + self.lockouts.root_slot = Some(root); + } + pub fn get_filename(path: &Path, node_pubkey: &Pubkey) -> PathBuf { path.join(format!("tower-{}", node_pubkey)) .with_extension("bin") @@ -1176,7 +1177,7 @@ pub fn reconcile_blockstore_roots_with_tower( "at least 1 parent slot must be found" ); - blockstore.set_roots(&new_roots)? + blockstore.set_roots(&new_roots)?; } Ok(()) } @@ -2796,7 +2797,7 @@ pub mod test { } #[test] - fn test_adjust_lockouts_after_relay_all_rooted_with_too_old() { + fn test_adjust_lockouts_after_replay_all_rooted_with_too_old() { use solana_sdk::slot_history::MAX_ENTRIES; let mut tower = Tower::new_for_tests(10, 0.9); @@ -2999,4 +3000,54 @@ pub mod test { "The tower is fatally inconsistent with blockstore: not too old once after got too old?" ); } + + #[test] + #[should_panic(expected = "slot_in_tower(2) < checked_slot(1)")] + fn test_adjust_lockouts_after_replay_reversed_votes() { + let mut tower = Tower::new_for_tests(10, 0.9); + tower.lockouts.votes.push_back(Lockout::new(2)); + tower.lockouts.votes.push_back(Lockout::new(1)); + let vote = Vote::new(vec![1], Hash::default()); + tower.last_vote = vote; + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + slot_history.add(2); + + tower + .adjust_lockouts_after_replay(2, &slot_history) + .unwrap(); + } + + #[test] + #[should_panic(expected = "slot_in_tower(3) < checked_slot(3)")] + fn test_adjust_lockouts_after_replay_repeated_non_root_votes() { + let mut tower = Tower::new_for_tests(10, 0.9); + tower.lockouts.votes.push_back(Lockout::new(2)); + tower.lockouts.votes.push_back(Lockout::new(3)); + tower.lockouts.votes.push_back(Lockout::new(3)); + let vote = Vote::new(vec![3], Hash::default()); + tower.last_vote = vote; + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + slot_history.add(2); + + tower + .adjust_lockouts_after_replay(2, &slot_history) + .unwrap(); + } + + #[test] + fn test_adjust_lockouts_after_replay_vote_on_genesis() { + let mut tower = Tower::new_for_tests(10, 0.9); + tower.lockouts.votes.push_back(Lockout::new(0)); + let vote = Vote::new(vec![0], Hash::default()); + tower.last_vote = vote; + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + + assert!(tower.adjust_lockouts_after_replay(0, &slot_history).is_ok()); + } } diff --git a/core/src/heaviest_subtree_fork_choice.rs b/core/src/heaviest_subtree_fork_choice.rs index fc77d762c7d..236980e973a 100644 --- a/core/src/heaviest_subtree_fork_choice.rs +++ b/core/src/heaviest_subtree_fork_choice.rs @@ -499,7 +499,7 @@ impl HeaviestSubtreeForkChoice { let heaviest_slot_on_same_voted_fork = self.best_slot(last_voted_slot); if heaviest_slot_on_same_voted_fork.is_none() { if !tower.is_stray_last_vote() { - // Unless last vote is stray, self.bast_slot(last_voted_slot) must return + // Unless last vote is stray and stale, self.bast_slot(last_voted_slot) must return // Some(_), justifying to panic! here. // Also, adjust_lockouts_after_replay() correctly makes last_voted_slot None, // if all saved votes are ancestors of replayed_root_slot. So this code shouldn't be @@ -508,12 +508,12 @@ impl HeaviestSubtreeForkChoice { // validator has been running, so we must be able to fetch best_slots for all of // them. panic!( - "a bank at last_voted_slot({}) is a frozen bank so must have been\ + "a bank at last_voted_slot({}) is a frozen bank so must have been \ added to heaviest_subtree_fork_choice at time of freezing", last_voted_slot, ) } else { - // fork_infos doesn't have corresponding data for the stray restored last vote, + // fork_infos doesn't have corresponding data for the stale stray last vote, // meaning some inconsistency between saved tower and ledger. // (newer snapshot, or only a saved tower is moved over to new setup?) return None; diff --git a/core/src/validator.rs b/core/src/validator.rs index ca3161d46bb..114ce72228d 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -741,7 +741,7 @@ fn post_process_restored_tower( error!("Requested mandatory tower restore failed: {}", err); error!( "And there is an existing vote_account containing actual votes. \ - Aborting due to possible conflicting duplicate votes" + Aborting due to possible conflicting duplicate votes", ); process::exit(1); } From 987a8a56ebe7c0b1d9252a50aeb1b3b2dd0be73e Mon Sep 17 00:00:00 2001 From: carllin Date: Wed, 21 Oct 2020 15:56:50 -0700 Subject: [PATCH 6/9] Fix test_optimistic_confirmation_violation_without_tower() (#13043) Co-authored-by: Carl Lin --- local-cluster/tests/local_cluster.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index 8adabdc5b62..6bda1011a1f 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -16,6 +16,7 @@ use solana_core::{ }; use solana_download_utils::download_snapshot; use solana_ledger::{ + ancestor_iterator::AncestorIterator, blockstore::{Blockstore, PurgeType}, blockstore_db::AccessType, leader_schedule::FixedSchedule, @@ -1697,7 +1698,11 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b let node_stakes = vec![31, 36, 33, 0]; // Each pubkeys are prefixed with A, B, C and D. - // D is needed to avoid NoPropagatedConfirmation erorrs + // D is needed to: + // 1) Propagate A's votes for S2 to validator C after A shuts down so that + // C can avoid NoPropagatedConfirmation errors and continue to generate blocks + // 2) Provide gossip discovery for `A` when it restarts because `A` will restart + // at a different gossip port than the entrypoint saved in C's gossip table let validator_keys = vec![ "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4", "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8", @@ -1824,17 +1829,28 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b // monitor for actual votes from validator A let mut bad_vote_detected = false; + let mut a_votes = vec![]; for _ in 0..100 { sleep(Duration::from_millis(100)); if let Some(last_vote) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) { - if votes_on_c_fork.contains(&last_vote) { + a_votes.push(last_vote); + let blockstore = Blockstore::open_with_access_type( + &val_a_ledger_path, + AccessType::TryPrimaryThenSecondary, + None, + ) + .unwrap(); + let mut ancestors = AncestorIterator::new(last_vote, &blockstore); + if ancestors.any(|a| votes_on_c_fork.contains(&a)) { bad_vote_detected = true; break; } } } + info!("Observed A's votes on: {:?}", a_votes); + // an elaborate way of assert!(with_tower && !bad_vote_detected || ...) let expects_optimistic_confirmation_violation = !with_tower; if bad_vote_detected != expects_optimistic_confirmation_violation { From df1e551b39bc74521010bb5f9d0b77ec1d361ff4 Mon Sep 17 00:00:00 2001 From: Ryo Onodera Date: Mon, 26 Oct 2020 11:08:20 +0900 Subject: [PATCH 7/9] Allow existence of vote on root in saved tower (#13135) --- core/src/consensus.rs | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/core/src/consensus.rs b/core/src/consensus.rs index ebd91cd8168..b69c348066a 100644 --- a/core/src/consensus.rs +++ b/core/src/consensus.rs @@ -921,11 +921,12 @@ impl Tower { } if let Some(checked_slot) = checked_slot { - // This is really special, only if tower is initialized (root = slot 0) for genesis and contains - // a vote (= slot 0) for the genesis, the slot 0 can repeat only once - let voting_from_genesis = *slot_in_tower == checked_slot && *slot_in_tower == 0; + // This is really special, only if tower is initialized and contains + // a vote for the root, the root slot can repeat only once + let voting_for_root = + *slot_in_tower == checked_slot && *slot_in_tower == tower_root; - if !voting_from_genesis { + if !voting_for_root { // Unless we're voting since genesis, slots_in_tower must always be older than last checked_slot // including all vote slot and the root slot. assert!( @@ -3038,6 +3039,23 @@ pub mod test { .unwrap(); } + #[test] + fn test_adjust_lockouts_after_replay_vote_on_root() { + let mut tower = Tower::new_for_tests(10, 0.9); + tower.lockouts.root_slot = Some(42); + tower.lockouts.votes.push_back(Lockout::new(42)); + tower.lockouts.votes.push_back(Lockout::new(43)); + tower.lockouts.votes.push_back(Lockout::new(44)); + let vote = Vote::new(vec![44], Hash::default()); + tower.last_vote = vote; + + let mut slot_history = SlotHistory::default(); + slot_history.add(42); + + let tower = tower.adjust_lockouts_after_replay(42, &slot_history); + assert_eq!(tower.unwrap().voted_slots(), [43, 44]); + } + #[test] fn test_adjust_lockouts_after_replay_vote_on_genesis() { let mut tower = Tower::new_for_tests(10, 0.9); From 079a4d02b2d6e9da09f4bea8987f9c988a9d8a23 Mon Sep 17 00:00:00 2001 From: Ryo Onodera Date: Fri, 30 Oct 2020 19:31:23 +0900 Subject: [PATCH 8/9] Fix tower/blockstore unsync due to external causes (#12671) * Fix tower/blockstore unsync due to external causes * Add and clean up long comments * Clean up test * Comment about warped_slot_history * Run test_future_tower with master-only/master-slave * Update comments about false leader condition --- core/src/consensus.rs | 225 +++++++++++++++++++-------- local-cluster/tests/local_cluster.rs | 129 ++++++++++++++- sdk/src/slot_history.rs | 2 +- 3 files changed, 287 insertions(+), 69 deletions(-) diff --git a/core/src/consensus.rs b/core/src/consensus.rs index b69c348066a..e55d341b242 100644 --- a/core/src/consensus.rs +++ b/core/src/consensus.rs @@ -515,6 +515,59 @@ impl Tower { .map(|last_voted_slot| { let root = self.root(); let empty_ancestors = HashSet::default(); + let empty_ancestors_due_to_minor_unsynced_ledger = || { + // This condition (stale stray last vote) shouldn't occur under normal validator + // operation, indicating something unusual happened. + // This condition could be introduced by manual ledger mishandling, + // validator SEGV, OS/HW crash, or plain No Free Space FS error. + + // However, returning empty ancestors as a fallback here shouldn't result in + // slashing by itself (Note that we couldn't fully preclude any kind of slashing if + // the failure was OS or HW level). + + // Firstly, lockout is ensured elsewhere. + + // Also, there is no risk of optimistic conf. violation. Although empty ancestors + // could result in incorrect (= more than actual) locked_out_stake and + // false-positive SwitchProof later in this function, there should be no such a + // heavier fork candidate, first of all, if the last vote (or any of its + // unavailable ancestors) were already optimistically confirmed. + // The only exception is that other validator is already violating it... + if self.is_first_switch_check() && switch_slot < last_voted_slot { + // `switch < last` is needed not to warn! this message just because of using + // newer snapshots on validator restart + let message = format!( + "bank_forks doesn't have corresponding data for the stray restored \ + last vote({}), meaning some inconsistency between saved tower and ledger.", + last_voted_slot + ); + warn!("{}", message); + datapoint_warn!("tower_warn", ("warn", message, String)); + } + &empty_ancestors + }; + + let suspended_decision_due_to_major_unsynced_ledger = || { + // This peculiar corner handling is needed mainly for a tower which is newer than + // blockstore. (Yeah, we tolerate it for ease of maintaining validator by operators) + // This condition could be introduced by manual ledger mishandling, + // validator SEGV, OS/HW crash, or plain No Free Space FS error. + + // When we're in this clause, it basically means validator is badly running + // with a future tower while replaying past slots, especially problematic is + // last_voted_slot. + // So, don't re-vote on it by returning pseudo FailedSwitchThreshold, otherwise + // there would be slashing because of double vote on one of last_vote_ancestors. + // (Well, needless to say, re-creating the duplicate block must be handled properly + // at the banking stage: https://github.com/solana-labs/solana/issues/8232) + // + // To be specific, the replay stage is tricked into a false perception where + // last_vote_ancestors is AVAILABLE for descendant-of-`switch_slot`, stale, and + // stray slots (which should always be empty_ancestors). + // + // This is covered by test_future_tower_* in local_cluster + SwitchForkDecision::FailedSwitchThreshold(0, total_stake) + }; let last_vote_ancestors = ancestors.get(&last_voted_slot).unwrap_or_else(|| { @@ -529,35 +582,7 @@ impl Tower { // all of them. panic!("no ancestors found with slot: {}", last_voted_slot); } else { - // This condition (stale stray last vote) shouldn't occur under normal validator - // operation, indicating something unusual happened. - // Possible causes include: OS/HW crash, validator process crash, only saved tower - // is moved over to a new setup, etc... - - // However, returning empty ancestors as a fallback here shouldn't result in - // slashing by itself (Note that we couldn't fully preclude any kind of slashing if - // the failure was OS or HW level). - - // Firstly, lockout is ensured elsewhere. - - // Also, there is no risk of optimistic conf. violation. Although empty ancestors - // could result in incorrect (= more than actual) locked_out_stake and - // false-positive SwitchProof later in this function, there should be no such a - // heavier fork candidate, first of all, if the last vote (or any of its - // unavailable ancestors) were already optimistically confirmed. - // The only exception is that other validator is already violating it... - if self.is_first_switch_check() && switch_slot < last_voted_slot { - // `switch < last` is needed not to warn! this message just because of using - // newer snapshots on validator restart - let message = format!( - "bank_forks doesn't have corresponding data for the stray restored \ - last vote({}), meaning some inconsistency between saved tower and ledger.", - last_voted_slot - ); - warn!("{}", message); - datapoint_warn!("tower_warn", ("warn", message, String)); - } - &empty_ancestors + empty_ancestors_due_to_minor_unsynced_ledger() } }); @@ -569,13 +594,18 @@ impl Tower { return SwitchForkDecision::SameFork; } - assert!( - !last_vote_ancestors.contains(&switch_slot), - "Should never consider switching to slot ({}), which is ancestors({:?}) of last vote: {}", - switch_slot, - last_vote_ancestors, - last_voted_slot - ); + if last_vote_ancestors.contains(&switch_slot) { + if !self.is_stray_last_vote() { + panic!( + "Should never consider switching to slot ({}), which is ancestors({:?}) of last vote: {}", + switch_slot, + last_vote_ancestors, + last_voted_slot + ); + } else { + return suspended_decision_due_to_major_unsynced_ledger(); + } + } // By this point, we know the `switch_slot` is on a different fork // (is neither an ancestor nor descendant of `last_vote`), so a @@ -846,14 +876,6 @@ impl Tower { ); assert_eq!(slot_history.check(replayed_root), Check::Found); - // reconcile_blockstore_roots_with_tower() should already have aligned these. - assert!( - tower_root <= replayed_root, - format!( - "tower root: {:?} >= replayed root slot: {}", - tower_root, replayed_root - ) - ); assert!( self.last_vote == Vote::default() && self.lockouts.votes.is_empty() || self.last_vote != Vote::default() && !self.lockouts.votes.is_empty(), @@ -864,16 +886,59 @@ impl Tower { ); if let Some(last_voted_slot) = self.last_voted_slot() { - if slot_history.check(last_voted_slot) == Check::TooOld { - // We could try hard to anchor with other older votes, but opt to simplify the - // following logic - return Err(TowerError::TooOldTower( + if tower_root <= replayed_root { + // Normally, we goes into this clause with possible help of + // reconcile_blockstore_roots_with_tower() + if slot_history.check(last_voted_slot) == Check::TooOld { + // We could try hard to anchor with other older votes, but opt to simplify the + // following logic + return Err(TowerError::TooOldTower( + last_voted_slot, + slot_history.oldest(), + )); + } + + self.adjust_lockouts_with_slot_history(slot_history)?; + self.initialize_root(replayed_root); + } else { + // This should never occur under normal operation. + // While this validator's voting is suspended this way, + // suspended_decision_due_to_major_unsynced_ledger() will be also touched. + let message = format!( + "For some reason, we're REPROCESSING slots which has already been \ + voted and ROOTED by us; \ + VOTING will be SUSPENDED UNTIL {}!", last_voted_slot, - slot_history.oldest(), - )); + ); + error!("{}", message); + datapoint_error!("tower_error", ("error", message, String)); + + // Let's pass-through adjust_lockouts_with_slot_history just for sanitization, + // using a synthesized SlotHistory. + + let mut warped_slot_history = (*slot_history).clone(); + // Blockstore doesn't have the tower_root slot because of + // (replayed_root < tower_root) in this else clause, meaning the tower is from + // the future from the view of blockstore. + // Pretend the blockstore has the future tower_root to anchor exactly with that + // slot by adding tower_root to a slot history. The added slot will be newer + // than all slots in the slot history (remember tower_root > replayed_root), + // satisfying the slot history invariant. + // Thus, the whole process will be safe as well because tower_root exists + // within both tower and slot history, guaranteeing the success of adjustment + // and retaining all of future votes correctly while sanitizing. + warped_slot_history.add(tower_root); + + self.adjust_lockouts_with_slot_history(&warped_slot_history)?; + // don't update root; future tower's root should be kept across validator + // restarts to continue to show the scary messages at restarts until the next + // voting. } - self.adjust_lockouts_with_slot_history(slot_history)?; - self.initialize_root(replayed_root); + } else { + // This else clause is for newly created tower. + // initialize_lockouts_from_bank() should ensure the following invariant, + // otherwise we're screwing something up. + assert_eq!(tower_root, replayed_root); } Ok(self) @@ -1152,8 +1217,11 @@ impl SavedTower { } } -// Given an untimely crash, tower may have roots that are not reflected in blockstore because -// `ReplayState::handle_votable_bank()` saves tower before setting blockstore roots +// Given an untimely crash, tower may have roots that are not reflected in blockstore, +// or the reverse of this. +// That's because we don't impose any ordering guarantee or any kind of write barriers +// between tower (plain old POSIX fs calls) and blockstore (through RocksDB), when +// `ReplayState::handle_votable_bank()` saves tower before setting blockstore roots. pub fn reconcile_blockstore_roots_with_tower( tower: &Tower, blockstore: &Blockstore, @@ -1173,12 +1241,23 @@ pub fn reconcile_blockstore_roots_with_tower( ), }) .collect(); - assert!( - !new_roots.is_empty(), - "at least 1 parent slot must be found" - ); - - blockstore.set_roots(&new_roots)?; + if !new_roots.is_empty() { + info!( + "Reconciling slots as root based on tower root: {:?} ({}..{}) ", + new_roots, tower_root, last_blockstore_root + ); + blockstore.set_roots(&new_roots)?; + } else { + // This indicates we're in bad state; but still don't panic here. + // That's because we might have a chance of recovering properly with + // newer snapshot. + warn!( + "Couldn't find any ancestor slots from tower root ({}) \ + towards blockstore root ({}); blockstore pruned or only \ + tower moved into new ledger?", + tower_root, last_blockstore_root, + ); + } } Ok(()) } @@ -2700,8 +2779,7 @@ pub mod test { } #[test] - #[should_panic(expected = "at least 1 parent slot must be found")] - fn test_reconcile_blockstore_roots_with_tower_panic_no_parent() { + fn test_reconcile_blockstore_roots_with_tower_nop_no_parent() { solana_logger::setup(); let blockstore_path = get_tmp_ledger_path!(); { @@ -2717,7 +2795,9 @@ pub mod test { let mut tower = Tower::new_with_key(&Pubkey::default()); tower.lockouts.root_slot = Some(4); + assert_eq!(blockstore.last_root(), 0); reconcile_blockstore_roots_with_tower(&tower, &blockstore).unwrap(); + assert_eq!(blockstore.last_root(), 0); } Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction"); } @@ -3068,4 +3148,25 @@ pub mod test { assert!(tower.adjust_lockouts_after_replay(0, &slot_history).is_ok()); } + + #[test] + fn test_adjust_lockouts_after_replay_future_tower() { + let mut tower = Tower::new_for_tests(10, 0.9); + tower.lockouts.votes.push_back(Lockout::new(13)); + tower.lockouts.votes.push_back(Lockout::new(14)); + let vote = Vote::new(vec![14], Hash::default()); + tower.last_vote = vote; + tower.initialize_root(12); + + let mut slot_history = SlotHistory::default(); + slot_history.add(0); + slot_history.add(2); + + let tower = tower + .adjust_lockouts_after_replay(2, &slot_history) + .unwrap(); + assert_eq!(tower.root(), 12); + assert_eq!(tower.voted_slots(), vec![13, 14]); + assert_eq!(tower.stray_restored_slot, Some(14)); + } } diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index 6bda1011a1f..6e838a01f58 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -43,6 +43,7 @@ use solana_sdk::{ signature::{Keypair, Signer}, system_transaction, }; +use solana_vote_program::vote_state::MAX_LOCKOUT_HISTORY; use std::sync::atomic::{AtomicBool, Ordering}; use std::{ collections::{HashMap, HashSet}, @@ -1631,7 +1632,7 @@ fn purge_slots(blockstore: &Blockstore, start_slot: Slot, slot_count: Slot) { blockstore.purge_slots(start_slot, start_slot + slot_count, PurgeType::Exact); } -fn last_vote_in_tower(ledger_path: &Path, node_pubkey: &Pubkey) -> Option { +fn restore_tower(ledger_path: &Path, node_pubkey: &Pubkey) -> Option { let tower = Tower::restore(&ledger_path, &node_pubkey); if let Err(tower_err) = tower { if tower_err.is_file_missing() { @@ -1641,11 +1642,15 @@ fn last_vote_in_tower(ledger_path: &Path, node_pubkey: &Pubkey) -> Option } } // actually saved tower must have at least one vote. - let last_vote = Tower::restore(&ledger_path, &node_pubkey) - .unwrap() - .last_voted_slot() - .unwrap(); - Some(last_vote) + Tower::restore(&ledger_path, &node_pubkey).ok() +} + +fn last_vote_in_tower(ledger_path: &Path, node_pubkey: &Pubkey) -> Option { + restore_tower(ledger_path, node_pubkey).map(|tower| tower.last_voted_slot().unwrap()) +} + +fn root_in_tower(ledger_path: &Path, node_pubkey: &Pubkey) -> Option { + restore_tower(ledger_path, node_pubkey).map(|tower| tower.root()) } fn remove_tower(ledger_path: &Path, node_pubkey: &Pubkey) { @@ -1866,6 +1871,118 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b } } +enum ClusterMode { + MasterOnly, + MasterSlave, +} + +fn do_test_future_tower(cluster_mode: ClusterMode) { + solana_logger::setup(); + + // First set up the cluster with 4 nodes + let slots_per_epoch = 2048; + let node_stakes = match cluster_mode { + ClusterMode::MasterOnly => vec![100], + ClusterMode::MasterSlave => vec![100, 0], + }; + + let validator_keys = vec![ + "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4", + "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8", + ] + .iter() + .map(|s| (Arc::new(Keypair::from_base58_string(s)), true)) + .take(node_stakes.len()) + .collect::>(); + let validators = validator_keys + .iter() + .map(|(kp, _)| kp.pubkey()) + .collect::>(); + let validator_a_pubkey = match cluster_mode { + ClusterMode::MasterOnly => validators[0], + ClusterMode::MasterSlave => validators[1], + }; + + let config = ClusterConfig { + cluster_lamports: 100_000, + node_stakes: node_stakes.clone(), + validator_configs: vec![ValidatorConfig::default(); node_stakes.len()], + validator_keys: Some(validator_keys), + slots_per_epoch, + stakers_slot_offset: slots_per_epoch, + skip_warmup_slots: true, + ..ClusterConfig::default() + }; + let mut cluster = LocalCluster::new(&config); + + let val_a_ledger_path = cluster.ledger_path(&validator_a_pubkey); + + loop { + sleep(Duration::from_millis(100)); + + if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) { + if root >= 15 { + break; + } + } + } + let purged_slot_before_restart = 10; + let validator_a_info = cluster.exit_node(&validator_a_pubkey); + { + // create a warped future tower without mangling the tower itself + info!( + "Revert blockstore before slot {} and effectively create a future tower", + purged_slot_before_restart, + ); + let blockstore = open_blockstore(&val_a_ledger_path); + purge_slots(&blockstore, purged_slot_before_restart, 100); + } + + cluster.restart_node(&validator_a_pubkey, validator_a_info); + + let mut newly_rooted = false; + let some_root_after_restart = purged_slot_before_restart + 25; // 25 is arbitrary; just wait a bit + for _ in 0..600 { + sleep(Duration::from_millis(100)); + + if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) { + if root >= some_root_after_restart { + newly_rooted = true; + break; + } + } + } + let _validator_a_info = cluster.exit_node(&validator_a_pubkey); + if newly_rooted { + // there should be no forks; i.e. monotonically increasing ancestor chain + let last_vote = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap(); + let blockstore = open_blockstore(&val_a_ledger_path); + let actual_block_ancestors = AncestorIterator::new_inclusive(last_vote, &blockstore) + .take_while(|a| *a >= some_root_after_restart) + .collect::>(); + let expected_countinuous_no_fork_votes = (some_root_after_restart..=last_vote) + .rev() + .collect::>(); + assert_eq!(actual_block_ancestors, expected_countinuous_no_fork_votes); + assert!(actual_block_ancestors.len() > MAX_LOCKOUT_HISTORY); + info!("validator managed to handle future tower!"); + } else { + panic!("no root detected"); + } +} + +#[test] +#[serial] +fn test_future_tower_master_only() { + do_test_future_tower(ClusterMode::MasterOnly); +} + +#[test] +#[serial] +fn test_future_tower_master_slave() { + do_test_future_tower(ClusterMode::MasterSlave); +} + #[test] #[serial] fn test_no_optimistic_confirmation_violation_with_tower() { diff --git a/sdk/src/slot_history.rs b/sdk/src/slot_history.rs index 293ae38e1b1..3d6208cdc9e 100644 --- a/sdk/src/slot_history.rs +++ b/sdk/src/slot_history.rs @@ -6,7 +6,7 @@ use bv::BitVec; use bv::BitsMut; #[repr(C)] -#[derive(Serialize, Deserialize, PartialEq)] +#[derive(Clone, Serialize, Deserialize, PartialEq)] pub struct SlotHistory { pub bits: BitVec, pub next_slot: Slot, From 4f39ddc8a2b4c0c873758547670a84419e2724cb Mon Sep 17 00:00:00 2001 From: carllin Date: Thu, 12 Nov 2020 06:29:04 -0800 Subject: [PATCH 9/9] Discard pre hard fork persisted tower if hard-forking (#13536) * Discard pre hard fork persisted tower if hard-forking * Relax config.require_tower * Add cluster test * nits * Remove unnecessary check Co-authored-by: Ryo Onodera Co-authored-by: Carl Lin --- core/src/consensus.rs | 3 + core/src/validator.rs | 31 ++++++- local-cluster/src/cluster.rs | 10 +++ local-cluster/src/local_cluster.rs | 31 +++++-- local-cluster/tests/local_cluster.rs | 116 +++++++++++++++++++++++++++ 5 files changed, 184 insertions(+), 7 deletions(-) diff --git a/core/src/consensus.rs b/core/src/consensus.rs index e55d341b242..384759c00dc 100644 --- a/core/src/consensus.rs +++ b/core/src/consensus.rs @@ -1182,6 +1182,9 @@ pub enum TowerError { #[error("The tower is fatally inconsistent with blockstore: {0}")] FatallyInconsistent(&'static str), + + #[error("The tower is useless because of new hard fork: {0}")] + HardFork(Slot), } impl TowerError { diff --git a/core/src/validator.rs b/core/src/validator.rs index 114ce72228d..66228714329 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -718,11 +718,38 @@ fn post_process_restored_tower( ledger_path: &Path, bank_forks: &BankForks, ) -> Tower { + let mut should_require_tower = config.require_tower; + restored_tower .and_then(|tower| { let root_bank = bank_forks.root_bank(); let slot_history = root_bank.get_slot_history(); - tower.adjust_lockouts_after_replay(root_bank.slot(), &slot_history) + let tower = tower.adjust_lockouts_after_replay(root_bank.slot(), &slot_history); + + if let Some(wait_slot_for_supermajority) = config.wait_for_supermajority { + if root_bank.slot() == wait_slot_for_supermajority { + // intentionally fail to restore tower; we're supposedly in a new hard fork; past + // out-of-chain vote state doesn't make sense at all + // what if --wait-for-supermajority again if the validator restarted? + let message = format!("Hardfork is detected; discarding tower restoration result: {:?}", tower); + datapoint_error!( + "tower_error", + ( + "error", + message, + String + ), + ); + error!("{}", message); + + // unconditionally relax tower requirement so that we can always restore tower + // from root bank. + should_require_tower = false; + return Err(crate::consensus::TowerError::HardFork(wait_slot_for_supermajority)); + } + } + + tower }) .unwrap_or_else(|err| { let voting_has_been_active = @@ -737,7 +764,7 @@ fn post_process_restored_tower( ), ); } - if config.require_tower && voting_has_been_active { + if should_require_tower && voting_has_been_active { error!("Requested mandatory tower restore failed: {}", err); error!( "And there is an existing vote_account containing actual votes. \ diff --git a/local-cluster/src/cluster.rs b/local-cluster/src/cluster.rs index 6b507cf1f82..fe369212d79 100644 --- a/local-cluster/src/cluster.rs +++ b/local-cluster/src/cluster.rs @@ -40,5 +40,15 @@ pub trait Cluster { fn get_contact_info(&self, pubkey: &Pubkey) -> Option<&ContactInfo>; fn exit_node(&mut self, pubkey: &Pubkey) -> ClusterValidatorInfo; fn restart_node(&mut self, pubkey: &Pubkey, cluster_validator_info: ClusterValidatorInfo); + fn create_restart_context( + &mut self, + pubkey: &Pubkey, + cluster_validator_info: &mut ClusterValidatorInfo, + ) -> (solana_core::cluster_info::Node, Option); + fn restart_node_with_context( + cluster_validator_info: ClusterValidatorInfo, + restart_context: (solana_core::cluster_info::Node, Option), + ) -> ClusterValidatorInfo; + fn add_node(&mut self, pubkey: &Pubkey, cluster_validator_info: ClusterValidatorInfo); fn exit_restart_node(&mut self, pubkey: &Pubkey, config: ValidatorConfig); } diff --git a/local-cluster/src/local_cluster.rs b/local-cluster/src/local_cluster.rs index ecc9847f47a..cf5d014bc14 100644 --- a/local-cluster/src/local_cluster.rs +++ b/local-cluster/src/local_cluster.rs @@ -615,7 +615,11 @@ impl Cluster for LocalCluster { node } - fn restart_node(&mut self, pubkey: &Pubkey, mut cluster_validator_info: ClusterValidatorInfo) { + fn create_restart_context( + &mut self, + pubkey: &Pubkey, + cluster_validator_info: &mut ClusterValidatorInfo, + ) -> (solana_core::cluster_info::Node, Option) { // Update the stored ContactInfo for this node let node = Node::new_localhost_with_pubkey(&pubkey); cluster_validator_info.info.contact_info = node.info.clone(); @@ -627,10 +631,28 @@ impl Cluster for LocalCluster { self.entry_point_info = node.info.clone(); None } else { - Some(&self.entry_point_info) + Some(self.entry_point_info.clone()) } }; + (node, entry_point_info) + } + + fn restart_node(&mut self, pubkey: &Pubkey, mut cluster_validator_info: ClusterValidatorInfo) { + let restart_context = self.create_restart_context(pubkey, &mut cluster_validator_info); + let cluster_validator_info = + Self::restart_node_with_context(cluster_validator_info, restart_context); + self.add_node(pubkey, cluster_validator_info); + } + + fn add_node(&mut self, pubkey: &Pubkey, cluster_validator_info: ClusterValidatorInfo) { + self.validators.insert(*pubkey, cluster_validator_info); + } + + fn restart_node_with_context( + mut cluster_validator_info: ClusterValidatorInfo, + (node, entry_point_info): (Node, Option), + ) -> ClusterValidatorInfo { // Restart the node let validator_info = &cluster_validator_info.info; cluster_validator_info.config.account_paths = @@ -641,12 +663,11 @@ impl Cluster for LocalCluster { &validator_info.ledger_path, &validator_info.voting_keypair.pubkey(), vec![validator_info.voting_keypair.clone()], - entry_point_info, + entry_point_info.as_ref(), &cluster_validator_info.config, ); - cluster_validator_info.validator = Some(restarted_node); - self.validators.insert(*pubkey, cluster_validator_info); + cluster_validator_info } fn exit_restart_node(&mut self, pubkey: &Pubkey, validator_config: ValidatorConfig) { diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index 6e838a01f58..58dbab623dc 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -1983,6 +1983,122 @@ fn test_future_tower_master_slave() { do_test_future_tower(ClusterMode::MasterSlave); } +#[test] +fn test_hard_fork_invalidates_tower() { + solana_logger::setup(); + + // First set up the cluster with 2 nodes + let slots_per_epoch = 2048; + let node_stakes = vec![60, 40]; + + let validator_keys = vec![ + "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4", + "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8", + ] + .iter() + .map(|s| (Arc::new(Keypair::from_base58_string(s)), true)) + .take(node_stakes.len()) + .collect::>(); + let validators = validator_keys + .iter() + .map(|(kp, _)| kp.pubkey()) + .collect::>(); + + let validator_a_pubkey = validators[0]; + let validator_b_pubkey = validators[1]; + + let config = ClusterConfig { + cluster_lamports: 100_000, + node_stakes: node_stakes.clone(), + validator_configs: vec![ValidatorConfig::default(); node_stakes.len()], + validator_keys: Some(validator_keys), + slots_per_epoch, + stakers_slot_offset: slots_per_epoch, + skip_warmup_slots: true, + ..ClusterConfig::default() + }; + let cluster = std::sync::Arc::new(std::sync::Mutex::new(LocalCluster::new(&config))); + + let val_a_ledger_path = cluster.lock().unwrap().ledger_path(&validator_a_pubkey); + + let min_root = 15; + loop { + sleep(Duration::from_millis(100)); + + if let Some(root) = root_in_tower(&val_a_ledger_path, &validator_a_pubkey) { + if root >= min_root { + break; + } + } + } + + let mut validator_a_info = cluster.lock().unwrap().exit_node(&validator_a_pubkey); + let mut validator_b_info = cluster.lock().unwrap().exit_node(&validator_b_pubkey); + + // setup hard fork at slot < a previously rooted slot! + let hard_fork_slot = min_root - 5; + let hard_fork_slots = Some(vec![hard_fork_slot]); + let mut hard_forks = solana_sdk::hard_forks::HardForks::default(); + hard_forks.register(hard_fork_slot); + + let expected_shred_version = solana_sdk::shred_version::compute_shred_version( + &cluster.lock().unwrap().genesis_config.hash(), + Some(&hard_forks), + ); + + validator_a_info.config.new_hard_forks = hard_fork_slots.clone(); + validator_a_info.config.wait_for_supermajority = Some(hard_fork_slot); + validator_a_info.config.expected_shred_version = Some(expected_shred_version); + + validator_b_info.config.new_hard_forks = hard_fork_slots; + validator_b_info.config.wait_for_supermajority = Some(hard_fork_slot); + validator_b_info.config.expected_shred_version = Some(expected_shred_version); + + // restart validator A first + let cluster_for_a = cluster.clone(); + // Spawn a thread because wait_for_supermajority blocks in Validator::new()! + let thread = std::thread::spawn(move || { + let restart_context = cluster_for_a + .lock() + .unwrap() + .create_restart_context(&validator_a_pubkey, &mut validator_a_info); + let restarted_validator_info = + LocalCluster::restart_node_with_context(validator_a_info, restart_context); + cluster_for_a + .lock() + .unwrap() + .add_node(&validator_a_pubkey, restarted_validator_info); + }); + + // test validator A actually to wait for supermajority + let mut last_vote = None; + for _ in 0..10 { + sleep(Duration::from_millis(1000)); + + let new_last_vote = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap(); + if let Some(last_vote) = last_vote { + assert_eq!(last_vote, new_last_vote); + } else { + last_vote = Some(new_last_vote); + } + } + + // restart validator B normally + cluster + .lock() + .unwrap() + .restart_node(&validator_b_pubkey, validator_b_info); + + // validator A should now start so join its thread here + thread.join().unwrap(); + + // new slots should be rooted after hard-fork cluster relaunch + cluster + .lock() + .unwrap() + .check_for_new_roots(16, &"hard fork"); +} + #[test] #[serial] fn test_no_optimistic_confirmation_violation_with_tower() {