Skip to content

Commit 15dbe7f

Browse files
authored
replay: do not start leader for a block we already have shreds for (#2416)
* replay: do not start leader for a block we already have shreds for * pr feedback: comment, move existing check to blockstore fn * move blockstore read after tick height check * pr feedback: resuse blockstore fn in next_leader_slot
1 parent f1de5c0 commit 15dbe7f

File tree

4 files changed

+157
-15
lines changed

4 files changed

+157
-15
lines changed

core/src/replay_stage.rs

+137-7
Original file line numberDiff line numberDiff line change
@@ -1992,6 +1992,14 @@ impl ReplayStage {
19921992
}
19931993
}
19941994

1995+
/// Checks if it is time for us to start producing a leader block.
1996+
/// Fails if:
1997+
/// - Current PoH has not satisfied criteria to start my leader block
1998+
/// - Startup verification is not complete,
1999+
/// - Bank forks already contains a bank for this leader slot
2000+
/// - We have not landed a vote yet and the `wait_for_vote_to_start_leader` flag is set
2001+
/// - We have failed the propagated check
2002+
/// Returns whether a new working bank was created and inserted into bank forks.
19952003
#[allow(clippy::too_many_arguments)]
19962004
fn maybe_start_leader(
19972005
my_pubkey: &Pubkey,
@@ -2005,7 +2013,7 @@ impl ReplayStage {
20052013
banking_tracer: &Arc<BankingTracer>,
20062014
has_new_vote_been_rooted: bool,
20072015
track_transaction_indexes: bool,
2008-
) {
2016+
) -> bool {
20092017
// all the individual calls to poh_recorder.read() are designed to
20102018
// increase granularity, decrease contention
20112019

@@ -2019,7 +2027,7 @@ impl ReplayStage {
20192027
} => (poh_slot, parent_slot),
20202028
PohLeaderStatus::NotReached => {
20212029
trace!("{} poh_recorder hasn't reached_leader_slot", my_pubkey);
2022-
return;
2030+
return false;
20232031
}
20242032
};
20252033

@@ -2035,12 +2043,12 @@ impl ReplayStage {
20352043

20362044
if !parent.is_startup_verification_complete() {
20372045
info!("startup verification incomplete, so skipping my leader slot");
2038-
return;
2046+
return false;
20392047
}
20402048

20412049
if bank_forks.read().unwrap().get(poh_slot).is_some() {
20422050
warn!("{} already have bank in forks at {}?", my_pubkey, poh_slot);
2043-
return;
2051+
return false;
20442052
}
20452053
trace!(
20462054
"{} poh_slot {} parent_slot {}",
@@ -2052,7 +2060,7 @@ impl ReplayStage {
20522060
if let Some(next_leader) = leader_schedule_cache.slot_leader_at(poh_slot, Some(&parent)) {
20532061
if !has_new_vote_been_rooted {
20542062
info!("Haven't landed a vote, so skipping my leader slot");
2055-
return;
2063+
return false;
20562064
}
20572065

20582066
trace!(
@@ -2064,7 +2072,7 @@ impl ReplayStage {
20642072

20652073
// I guess I missed my slot
20662074
if next_leader != *my_pubkey {
2067-
return;
2075+
return false;
20682076
}
20692077

20702078
datapoint_info!(
@@ -2098,7 +2106,7 @@ impl ReplayStage {
20982106
latest_unconfirmed_leader_slot,
20992107
);
21002108
}
2101-
return;
2109+
return false;
21022110
}
21032111

21042112
let root_slot = bank_forks.read().unwrap().root();
@@ -2133,8 +2141,10 @@ impl ReplayStage {
21332141
.write()
21342142
.unwrap()
21352143
.set_bank(tpu_bank, track_transaction_indexes);
2144+
true
21362145
} else {
21372146
error!("{} No next leader found", my_pubkey);
2147+
false
21382148
}
21392149
}
21402150

@@ -9097,4 +9107,124 @@ pub(crate) mod tests {
90979107
.is_candidate(&(5, bank_forks.bank_hash(5).unwrap()))
90989108
.unwrap());
90999109
}
9110+
9111+
#[test]
9112+
fn test_skip_leader_slot_for_existing_slot() {
9113+
solana_logger::setup();
9114+
9115+
let ReplayBlockstoreComponents {
9116+
blockstore,
9117+
my_pubkey,
9118+
leader_schedule_cache,
9119+
poh_recorder,
9120+
vote_simulator,
9121+
rpc_subscriptions,
9122+
..
9123+
} = replay_blockstore_components(None, 1, None);
9124+
let VoteSimulator {
9125+
bank_forks,
9126+
mut progress,
9127+
..
9128+
} = vote_simulator;
9129+
9130+
let working_bank = bank_forks.read().unwrap().working_bank();
9131+
assert!(working_bank.is_complete());
9132+
assert!(working_bank.is_frozen());
9133+
// Mark startup verification as complete to avoid skipping leader slots
9134+
working_bank.set_startup_verification_complete();
9135+
9136+
// Insert a block two slots greater than current bank. This slot does
9137+
// not have a corresponding Bank in BankForks; this emulates a scenario
9138+
// where the block had previously been created and added to BankForks,
9139+
// but then got removed. This could be the case if the Bank was not on
9140+
// the major fork.
9141+
let dummy_slot = working_bank.slot() + 2;
9142+
let initial_slot = working_bank.slot();
9143+
let num_entries = 10;
9144+
let merkle_variant = true;
9145+
let (shreds, _) = make_slot_entries(dummy_slot, initial_slot, num_entries, merkle_variant);
9146+
blockstore.insert_shreds(shreds, None, false).unwrap();
9147+
9148+
// Reset PoH recorder to the completed bank to ensure consistent state
9149+
ReplayStage::reset_poh_recorder(
9150+
&my_pubkey,
9151+
&blockstore,
9152+
working_bank.clone(),
9153+
&poh_recorder,
9154+
&leader_schedule_cache,
9155+
);
9156+
9157+
// Register just over one slot worth of ticks directly with PoH recorder
9158+
let num_poh_ticks =
9159+
(working_bank.ticks_per_slot() * working_bank.hashes_per_tick().unwrap()) + 1;
9160+
poh_recorder
9161+
.write()
9162+
.map(|mut poh_recorder| {
9163+
for _ in 0..num_poh_ticks + 1 {
9164+
poh_recorder.tick();
9165+
}
9166+
})
9167+
.unwrap();
9168+
9169+
let poh_recorder = Arc::new(poh_recorder);
9170+
let (retransmit_slots_sender, _) = unbounded();
9171+
let (banking_tracer, _) = BankingTracer::new(None).unwrap();
9172+
// A vote has not technically been rooted, but it doesn't matter for
9173+
// this test to use true to avoid skipping the leader slot
9174+
let has_new_vote_been_rooted = true;
9175+
let track_transaction_indexes = false;
9176+
9177+
// We should not attempt to start leader for the dummy_slot
9178+
assert_matches!(
9179+
poh_recorder.read().unwrap().reached_leader_slot(&my_pubkey),
9180+
PohLeaderStatus::NotReached
9181+
);
9182+
assert!(!ReplayStage::maybe_start_leader(
9183+
&my_pubkey,
9184+
&bank_forks,
9185+
&poh_recorder,
9186+
&leader_schedule_cache,
9187+
&rpc_subscriptions,
9188+
&mut progress,
9189+
&retransmit_slots_sender,
9190+
&mut SkippedSlotsInfo::default(),
9191+
&banking_tracer,
9192+
has_new_vote_been_rooted,
9193+
track_transaction_indexes,
9194+
));
9195+
9196+
// Register another slots worth of ticks with PoH recorder
9197+
poh_recorder
9198+
.write()
9199+
.map(|mut poh_recorder| {
9200+
for _ in 0..num_poh_ticks + 1 {
9201+
poh_recorder.tick();
9202+
}
9203+
})
9204+
.unwrap();
9205+
9206+
// We should now start leader for dummy_slot + 1
9207+
let good_slot = dummy_slot + 1;
9208+
assert!(ReplayStage::maybe_start_leader(
9209+
&my_pubkey,
9210+
&bank_forks,
9211+
&poh_recorder,
9212+
&leader_schedule_cache,
9213+
&rpc_subscriptions,
9214+
&mut progress,
9215+
&retransmit_slots_sender,
9216+
&mut SkippedSlotsInfo::default(),
9217+
&banking_tracer,
9218+
has_new_vote_been_rooted,
9219+
track_transaction_indexes,
9220+
));
9221+
// Get the new working bank, which is also the new leader bank/slot
9222+
let working_bank = bank_forks.read().unwrap().working_bank();
9223+
// The new bank's slot must NOT be dummy_slot as the blockstore already
9224+
// had a shred inserted for dummy_slot prior to maybe_start_leader().
9225+
// maybe_start_leader() must not pick dummy_slot to avoid creating a
9226+
// duplicate block.
9227+
assert_eq!(working_bank.slot(), good_slot);
9228+
assert_eq!(working_bank.parent_slot(), initial_slot);
9229+
}
91009230
}

ledger/src/blockstore.rs

+7
Original file line numberDiff line numberDiff line change
@@ -4049,6 +4049,13 @@ impl Blockstore {
40494049
Ok(duplicate_slots_iterator.map(|(slot, _)| slot))
40504050
}
40514051

4052+
pub fn has_existing_shreds_for_slot(&self, slot: Slot) -> bool {
4053+
match self.meta(slot).unwrap() {
4054+
Some(meta) => meta.received > 0,
4055+
None => false,
4056+
}
4057+
}
4058+
40524059
/// Returns the max root or 0 if it does not exist
40534060
pub fn max_root(&self) -> Slot {
40544061
self.max_root.load(Ordering::Relaxed)

ledger/src/leader_schedule_cache.rs

+4-8
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,10 @@ impl LeaderScheduleCache {
139139
.map(move |i| i as Slot + first_slot)
140140
})
141141
.skip_while(|slot| {
142-
match blockstore {
143-
None => false,
144-
// Skip slots we have already sent a shred for.
145-
Some(blockstore) => match blockstore.meta(*slot).unwrap() {
146-
Some(meta) => meta.received > 0,
147-
None => false,
148-
},
149-
}
142+
// Skip slots we already have shreds for
143+
blockstore
144+
.map(|bs| bs.has_existing_shreds_for_slot(*slot))
145+
.unwrap_or(false)
150146
});
151147
let first_slot = schedule.next()?;
152148
let max_slot = first_slot.saturating_add(max_slot_range);

poh/src/poh_recorder.rs

+9
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,15 @@ impl PohRecorder {
576576
return PohLeaderStatus::NotReached;
577577
}
578578

579+
if self.blockstore.has_existing_shreds_for_slot(next_poh_slot) {
580+
// We already have existing shreds for this slot. This can happen when this block was previously
581+
// created and added to BankForks, however a recent PoH reset caused this bank to be removed
582+
// as it was not part of the rooted fork. If this slot is not the first slot for this leader,
583+
// and the first slot was previously ticked over, the check in `leader_schedule_cache::next_leader_slot`
584+
// will not suffice, as it only checks if there are shreds for the first slot.
585+
return PohLeaderStatus::NotReached;
586+
}
587+
579588
assert!(next_tick_height >= self.start_tick_height);
580589
let poh_slot = next_poh_slot;
581590
let parent_slot = self.start_slot();

0 commit comments

Comments
 (0)