From f85bc9dac1c4411db434aa2c473834911cfa8a34 Mon Sep 17 00:00:00 2001 From: Carl Lin Date: Fri, 14 May 2021 03:16:26 -0700 Subject: [PATCH 1/3] Propagate dead slots up to replay --- ledger/src/blockstore.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ledger/src/blockstore.rs b/ledger/src/blockstore.rs index 2c207249feecb9..bdf13880dcdf04 100644 --- a/ledger/src/blockstore.rs +++ b/ledger/src/blockstore.rs @@ -2757,12 +2757,7 @@ impl Blockstore { .zip(slot_metas) .filter_map(|(height, meta)| { meta.map(|meta| { - let valid_next_slots: Vec = meta - .next_slots - .iter() - .cloned() - .filter(|s| !self.is_dead(*s)) - .collect(); + let valid_next_slots: Vec = meta.next_slots.to_vec(); (*height, valid_next_slots) }) }) From cb5bdfdd8dfa04aef2d6a471f58c44b72844e80a Mon Sep 17 00:00:00 2001 From: Carl Lin Date: Fri, 14 May 2021 02:05:38 -0700 Subject: [PATCH 2/3] Fix dead slot handling in blockstore processor --- ledger/src/blockstore_processor.rs | 38 +++++++++++++++++----------- local-cluster/tests/local_cluster.rs | 11 +++++--- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/ledger/src/blockstore_processor.rs b/ledger/src/blockstore_processor.rs index 1a3de33f349265..33165bca268c99 100644 --- a/ledger/src/blockstore_processor.rs +++ b/ledger/src/blockstore_processor.rs @@ -549,7 +549,6 @@ fn do_process_blockstore_from_root( "" }, ); - assert!(bank_forks.active_banks().is_empty()); // We might be promptly restarted after bad capitalization was detected while creating newer snapshot. // In that case, we're most likely restored from the last good snapshot and replayed up to this root. @@ -698,7 +697,6 @@ pub fn confirm_slot( allow_dead_slots: bool, ) -> result::Result<(), BlockstoreProcessorError> { let slot = bank.slot(); - let (entries, num_shreds, slot_full) = { let mut load_elapsed = Measure::start("load_elapsed"); let load_result = blockstore @@ -869,11 +867,6 @@ fn process_next_slots( .unwrap(), *next_slot, )); - trace!( - "New bank for slot {}, parent slot is {}", - next_slot, - bank.slot(), - ); pending_slots.push((next_meta, next_bank, bank.last_blockhash())); } } @@ -942,8 +935,7 @@ fn load_frozen_forks( } let mut progress = ConfirmationProgress::new(last_entry_hash); - - if process_single_slot( + let process_result = process_single_slot( blockstore, &bank, opts, @@ -953,9 +945,17 @@ fn load_frozen_forks( cache_block_time_sender, None, timing, - ) - .is_err() - { + ); + // Insert even dead banks into the BankForks so that cleanup + // of account state created on Bank creation via `Bank::new_from_parent()` + // will occur when the bank is dropped. Otherwise, this state is not + // cleaned + all_banks.insert(bank.slot(), bank.clone()); + + if let Err(e) = process_result { + warn!("processing single slot {} error {:?}", slot, e); + initial_forks.insert(bank.slot(), bank.clone()); + all_banks.insert(bank.slot(), bank.clone()); continue; } txs += progress.num_txs; @@ -963,7 +963,6 @@ fn load_frozen_forks( // Block must be frozen by this point, otherwise `process_single_slot` would // have errored above assert!(bank.is_frozen()); - all_banks.insert(bank.slot(), bank.clone()); // If we've reached the last known root in blockstore, start looking // for newer cluster confirmed roots @@ -1798,8 +1797,18 @@ pub mod tests { // Should see the parent of the dead child assert_eq!(frozen_bank_slots(&bank_forks), vec![0, 1, 2, 3]); - assert_eq!(bank_forks.working_bank().slot(), 3); + // Dead slot should be included in the processing result + assert_eq!(bank_forks.working_bank().slot(), 4); + + assert_eq!( + &bank_forks[4] + .parents() + .iter() + .map(|bank| bank.slot()) + .collect::>(), + &[2, 1, 0] + ); assert_eq!( &bank_forks[3] .parents() @@ -1816,7 +1825,6 @@ pub mod tests { .collect::>(), &[1, 0] ); - assert_eq!(bank_forks.working_bank().slot(), 3); verify_fork_infos(&bank_forks); } diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index 7f384767555631..4fcece1aef863c 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -2122,9 +2122,14 @@ fn test_optimistic_confirmation_violation_detection() { "Setting slot: {} on main fork as dead, should cause fork", prev_voted_slot ); - // marking this voted slot as dead makes the saved tower garbage - // effectively. That's because its stray last vote becomes stale (= no - // ancestor in bank forks). + // Necessary otherwise tower will inform this validator that it's latest + // vote is on slot `prev_voted_slot`. This will then prevent this validator + // from resetting to the parent of `prev_voted_slot` to create an alternative fork because + // 1) Validator can't vote on earlier ancestor of last vote due to switch threshold (can't vote + // on ancestors of last vote) + // 2) Won't reset to this earlier ancestor becasue reset can only happen on same voted fork if + // it's for the last vote slot or later + remove_tower(&exited_validator_info.info.ledger_path, &entry_point_id); blockstore.set_dead_slot(prev_voted_slot).unwrap(); } From 2d1be87211eb6f672a6c5a746bea6b5c6073b0b6 Mon Sep 17 00:00:00 2001 From: Carl Lin Date: Thu, 20 May 2021 23:59:45 -0700 Subject: [PATCH 3/3] PR comments --- ledger/src/blockstore.rs | 7 +----- ledger/src/blockstore_processor.rs | 38 ++++++++++++------------------ 2 files changed, 16 insertions(+), 29 deletions(-) diff --git a/ledger/src/blockstore.rs b/ledger/src/blockstore.rs index bdf13880dcdf04..36f00e2fe99416 100644 --- a/ledger/src/blockstore.rs +++ b/ledger/src/blockstore.rs @@ -2755,12 +2755,7 @@ impl Blockstore { let result: HashMap> = slots .iter() .zip(slot_metas) - .filter_map(|(height, meta)| { - meta.map(|meta| { - let valid_next_slots: Vec = meta.next_slots.to_vec(); - (*height, valid_next_slots) - }) - }) + .filter_map(|(height, meta)| meta.map(|meta| (*height, meta.next_slots.to_vec()))) .collect(); Ok(result) diff --git a/ledger/src/blockstore_processor.rs b/ledger/src/blockstore_processor.rs index 33165bca268c99..1a3de33f349265 100644 --- a/ledger/src/blockstore_processor.rs +++ b/ledger/src/blockstore_processor.rs @@ -549,6 +549,7 @@ fn do_process_blockstore_from_root( "" }, ); + assert!(bank_forks.active_banks().is_empty()); // We might be promptly restarted after bad capitalization was detected while creating newer snapshot. // In that case, we're most likely restored from the last good snapshot and replayed up to this root. @@ -697,6 +698,7 @@ pub fn confirm_slot( allow_dead_slots: bool, ) -> result::Result<(), BlockstoreProcessorError> { let slot = bank.slot(); + let (entries, num_shreds, slot_full) = { let mut load_elapsed = Measure::start("load_elapsed"); let load_result = blockstore @@ -867,6 +869,11 @@ fn process_next_slots( .unwrap(), *next_slot, )); + trace!( + "New bank for slot {}, parent slot is {}", + next_slot, + bank.slot(), + ); pending_slots.push((next_meta, next_bank, bank.last_blockhash())); } } @@ -935,7 +942,8 @@ fn load_frozen_forks( } let mut progress = ConfirmationProgress::new(last_entry_hash); - let process_result = process_single_slot( + + if process_single_slot( blockstore, &bank, opts, @@ -945,17 +953,9 @@ fn load_frozen_forks( cache_block_time_sender, None, timing, - ); - // Insert even dead banks into the BankForks so that cleanup - // of account state created on Bank creation via `Bank::new_from_parent()` - // will occur when the bank is dropped. Otherwise, this state is not - // cleaned - all_banks.insert(bank.slot(), bank.clone()); - - if let Err(e) = process_result { - warn!("processing single slot {} error {:?}", slot, e); - initial_forks.insert(bank.slot(), bank.clone()); - all_banks.insert(bank.slot(), bank.clone()); + ) + .is_err() + { continue; } txs += progress.num_txs; @@ -963,6 +963,7 @@ fn load_frozen_forks( // Block must be frozen by this point, otherwise `process_single_slot` would // have errored above assert!(bank.is_frozen()); + all_banks.insert(bank.slot(), bank.clone()); // If we've reached the last known root in blockstore, start looking // for newer cluster confirmed roots @@ -1797,18 +1798,8 @@ pub mod tests { // Should see the parent of the dead child assert_eq!(frozen_bank_slots(&bank_forks), vec![0, 1, 2, 3]); + assert_eq!(bank_forks.working_bank().slot(), 3); - // Dead slot should be included in the processing result - assert_eq!(bank_forks.working_bank().slot(), 4); - - assert_eq!( - &bank_forks[4] - .parents() - .iter() - .map(|bank| bank.slot()) - .collect::>(), - &[2, 1, 0] - ); assert_eq!( &bank_forks[3] .parents() @@ -1825,6 +1816,7 @@ pub mod tests { .collect::>(), &[1, 0] ); + assert_eq!(bank_forks.working_bank().slot(), 3); verify_fork_infos(&bank_forks); }