diff --git a/Cargo.lock b/Cargo.lock index 7f0f523e70039e..a074972eadb7a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -524,6 +524,7 @@ name = "agave-votor" version = "4.0.0-alpha.0" dependencies = [ "agave-logger", + "agave-votor", "agave-votor-messages", "anyhow", "bincode", @@ -586,7 +587,11 @@ version = "4.0.0-alpha.0" dependencies = [ "agave-feature-set", "agave-logger", + "agave-votor-messages", + "bitvec", + "bytemuck", "log", + "num_enum", "serde", "solana-address 2.0.0", "solana-bls-signatures", @@ -596,6 +601,7 @@ dependencies = [ "solana-frozen-abi-macro", "solana-hash 3.1.0", "solana-pubkey 4.0.0", + "tempfile", ] [[package]] diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index 51980b6dcf2865..56d690cdab614b 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -4139,7 +4139,6 @@ impl ReplayStage { ) }, ) - .expect("rooting must succeed") } // To avoid code duplication and keep compatibility with alpenglow, we add this diff --git a/dev-bins/Cargo.lock b/dev-bins/Cargo.lock index 7c8b03fd89c2fb..45de234d7281ae 100644 --- a/dev-bins/Cargo.lock +++ b/dev-bins/Cargo.lock @@ -409,6 +409,7 @@ dependencies = [ "solana-signature", "solana-signer", "solana-signer-store", + "solana-streamer", "solana-time-utils", "solana-transaction", "solana-transaction-error", @@ -423,7 +424,10 @@ version = "4.0.0-alpha.0" dependencies = [ "agave-feature-set", "agave-logger", + "bitvec", + "bytemuck", "log", + "num_enum", "serde", "solana-address 2.0.0", "solana-bls-signatures", diff --git a/programs/sbf/Cargo.lock b/programs/sbf/Cargo.lock index 0665690fa78f58..90d6ab8fd52370 100644 --- a/programs/sbf/Cargo.lock +++ b/programs/sbf/Cargo.lock @@ -392,6 +392,7 @@ dependencies = [ "solana-signature", "solana-signer", "solana-signer-store", + "solana-streamer", "solana-time-utils", "solana-transaction", "solana-transaction-error", @@ -406,7 +407,10 @@ version = "4.0.0-alpha.0" dependencies = [ "agave-feature-set", "agave-logger", + "bitvec", + "bytemuck", "log", + "num_enum", "serde", "solana-address 2.0.0", "solana-bls-signatures", diff --git a/votor-messages/Cargo.toml b/votor-messages/Cargo.toml index a3490f300caa74..1cf49b742fb080 100644 --- a/votor-messages/Cargo.toml +++ b/votor-messages/Cargo.toml @@ -22,7 +22,10 @@ frozen-abi = [ [dependencies] agave-feature-set = { workspace = true } agave-logger = { workspace = true } +bitvec = { workspace = true } +bytemuck = { workspace = true } log = { workspace = true } +num_enum = { workspace = true } serde = { workspace = true } solana-address = { workspace = true, features = ["curve25519"] } solana-bls-signatures = { workspace = true, features = [ @@ -39,5 +42,9 @@ solana-frozen-abi-macro = { workspace = true, optional = true, features = [ solana-hash = { workspace = true, features = ["serde"] } solana-pubkey = { workspace = true } +[dev-dependencies] +agave-votor-messages = { path = ".", features = ["dev-context-only-utils"] } +tempfile = { workspace = true } + [lints] workspace = true diff --git a/votor-messages/src/consensus_message.rs b/votor-messages/src/consensus_message.rs index 4ea229223ace82..6e71991dc80d73 100644 --- a/votor-messages/src/consensus_message.rs +++ b/votor-messages/src/consensus_message.rs @@ -12,14 +12,13 @@ pub const BLS_KEYPAIR_DERIVE_SEED: &[u8; 9] = b"alpenglow"; /// Block, a (slot, hash) tuple pub type Block = (Slot, Hash); - /// A consensus vote. #[cfg_attr( feature = "frozen-abi", derive(AbiExample), frozen_abi(digest = "5eorzdc18a1sNEUDLAKPgrHCqHmA8ssuTwKSGsZLwBqR") )] -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] pub struct VoteMessage { /// The type of the vote. pub vote: Vote, @@ -55,23 +54,96 @@ impl CertificateType { /// Get the slot of the certificate pub fn slot(&self) -> Slot { match self { - Self::Finalize(slot) - | Self::FinalizeFast(slot, _) - | Self::Notarize(slot, _) - | Self::NotarizeFallback(slot, _) - | Self::Skip(slot) - | Self::Genesis(slot, _) => *slot, + CertificateType::Finalize(slot) + | CertificateType::FinalizeFast(slot, _) + | CertificateType::Notarize(slot, _) + | CertificateType::NotarizeFallback(slot, _) + | CertificateType::Genesis(slot, _) + | CertificateType::Skip(slot) => *slot, } } + /// Is this a fast finalize certificate? + pub fn is_fast_finalization(&self) -> bool { + matches!(self, Self::FinalizeFast(_, _)) + } + + /// Is this a finalize / fast finalize certificate? + pub fn is_finalization(&self) -> bool { + matches!(self, Self::Finalize(_) | Self::FinalizeFast(_, _)) + } + + /// Is this a notarize fallback certificate? + pub fn is_notarize_fallback(&self) -> bool { + matches!(self, Self::NotarizeFallback(_, _)) + } + + /// Is this a skip certificate? + pub fn is_skip(&self) -> bool { + matches!(self, Self::Skip(_)) + } + + /// Is this a genesis certificate? + pub fn is_genesis(&self) -> bool { + matches!(self, Self::Genesis(_, _)) + } + /// Gets the block associated with this certificate, if present pub fn to_block(self) -> Option { match self { - Self::Finalize(_) | Self::Skip(_) => None, + CertificateType::Finalize(_) | CertificateType::Skip(_) => None, + CertificateType::Notarize(slot, block_id) + | CertificateType::NotarizeFallback(slot, block_id) + | CertificateType::Genesis(slot, block_id) + | CertificateType::FinalizeFast(slot, block_id) => Some((slot, block_id)), + } + } + + /// Reconstructs the single source `Vote` payload for this certificate. + /// + /// This method is used primarily by the signature verifier. For + /// certificates formed by aggregating a single type of vote + /// (e.g., a `Notarize` certificate from `Notarize` votes), this function + /// reconstructs the canonical message payload that was signed by validators. + /// + /// For `NotarizeFallback` and `Skip` certificates, this function returns the + /// appropriate payload *only* if the certificate was formed from a single + /// vote type (e.g., exclusively from `Notarize` or `Skip` votes). For + /// certificates formed from a mix of two vote types, use the `to_source_votes` + /// function. + pub fn to_source_vote(self) -> Vote { + match self { Self::Notarize(slot, block_id) - | Self::NotarizeFallback(slot, block_id) | Self::FinalizeFast(slot, block_id) - | Self::Genesis(slot, block_id) => Some((slot, block_id)), + | Self::NotarizeFallback(slot, block_id) => Vote::new_notarization_vote(slot, block_id), + Self::Finalize(slot) => Vote::new_finalization_vote(slot), + Self::Skip(slot) => Vote::new_skip_vote(slot), + Self::Genesis(slot, block_id) => Vote::new_genesis_vote(slot, block_id), + } + } + + /// Reconstructs the two distinct source `Vote` payloads for this certificate. + /// + /// This method is primarily used by the signature verifier for certificates that + /// can be formed by aggregating two different types of votes. For example, a + /// `NotarizeFallback` certificate accepts both `Notarize` and `NotarizeFallback`. + /// + /// It reconstructs both potential message payloads that were signed by validators, which + /// the verifier uses to check the single aggregate signature. + pub fn to_source_votes(self) -> Option<(Vote, Vote)> { + match self { + Self::NotarizeFallback(slot, block_id) => { + let vote1 = Vote::new_notarization_vote(slot, block_id); + let vote2 = Vote::new_notarization_fallback_vote(slot, block_id); + Some((vote1, vote2)) + } + Self::Skip(slot) => { + let vote1 = Vote::new_skip_vote(slot); + let vote2 = Vote::new_skip_fallback_vote(slot); + Some((vote1, vote2)) + } + // Other certificate types do not use Base3 encoding. + _ => None, } } } @@ -118,4 +190,17 @@ impl ConsensusMessage { rank, }) } + + /// Create a new certificate. + pub fn new_certificate( + cert_type: CertificateType, + bitmap: Vec, + signature: BLSSignature, + ) -> Self { + Self::Certificate(Certificate { + cert_type, + signature, + bitmap, + }) + } } diff --git a/votor-messages/src/vote.rs b/votor-messages/src/vote.rs index 205bb1ae8d620d..72c5710ced8dfa 100644 --- a/votor-messages/src/vote.rs +++ b/votor-messages/src/vote.rs @@ -12,7 +12,7 @@ use { derive(AbiExample, AbiEnumVisitor), frozen_abi(digest = "AgKoR2cpjUSVCW7Cpihob5nDiPcFt1PXmoPKWJg3zuSB") )] -#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum Vote { /// A notarization vote Notarize(NotarizationVote), @@ -28,6 +28,23 @@ pub enum Vote { Genesis(GenesisVote), } +/// Enum of different types of [`Vote`]s. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum VoteType { + /// Finalize vote. + Finalize, + /// Notarize vote. + Notarize, + /// Notarize fallback vote. + NotarizeFallback, + /// Skip vote + Skip, + /// Skip fallback vote. + SkipFallback, + /// Genesis vote. + Genesis, +} + impl Vote { /// Create a new notarization vote pub fn new_notarization_vote(slot: Slot, block_id: Hash) -> Self { @@ -54,7 +71,7 @@ impl Vote { Self::from(SkipFallbackVote { slot }) } - /// Create a new skip fallback vote + /// Create a new genesis vote pub fn new_genesis_vote(slot: Slot, block_id: Hash) -> Self { Self::from(GenesisVote { slot, block_id }) } @@ -106,14 +123,26 @@ impl Vote { matches!(self, Self::SkipFallback(_)) } + /// Whether the vote is a genesis vote + pub fn is_genesis_vote(&self) -> bool { + matches!(self, Self::Genesis(_)) + } + /// Whether the vote is a notarization or finalization pub fn is_notarization_or_finalization(&self) -> bool { matches!(self, Self::Notarize(_) | Self::Finalize(_)) } - /// Whether the vote is a genesis vote - pub fn is_genesis_vote(&self) -> bool { - matches!(self, Self::Genesis(_)) + /// Returns the [`VoteType`] for the vote. + pub fn get_type(&self) -> VoteType { + match self { + Vote::Notarize(_) => VoteType::Notarize, + Vote::NotarizeFallback(_) => VoteType::NotarizeFallback, + Vote::Skip(_) => VoteType::Skip, + Vote::SkipFallback(_) => VoteType::SkipFallback, + Vote::Finalize(_) => VoteType::Finalize, + Vote::Genesis(_) => VoteType::Genesis, + } } } @@ -159,7 +188,7 @@ impl From for Vote { derive(AbiExample), frozen_abi(digest = "5AdwChAjsj5QUXLdpDnGGK2L2nA8y8EajVXi6jsmTv1m") )] -#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] pub struct NotarizationVote { /// The slot this vote is cast for. pub slot: Slot, @@ -173,7 +202,7 @@ pub struct NotarizationVote { derive(AbiExample), frozen_abi(digest = "2XQ5N6YLJjF28w7cMFFUQ9SDgKuf9JpJNtAiXSPA8vR2") )] -#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] pub struct FinalizationVote { /// The slot this vote is cast for. pub slot: Slot, @@ -187,7 +216,7 @@ pub struct FinalizationVote { derive(AbiExample), frozen_abi(digest = "G8Nrx3sMYdnLpHsCNark3BGA58BmW2sqNnqjkYhQHtN") )] -#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] pub struct SkipVote { /// The slot this vote is cast for. pub slot: Slot, @@ -199,7 +228,7 @@ pub struct SkipVote { derive(AbiExample), frozen_abi(digest = "7j5ZPwwyz1FaG3fpyQv5PVnQXicdSmqSk8NvqzkG1Eqz") )] -#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] pub struct NotarizationFallbackVote { /// The slot this vote is cast for. pub slot: Slot, @@ -213,7 +242,7 @@ pub struct NotarizationFallbackVote { derive(AbiExample), frozen_abi(digest = "WsUNum8V62gjRU1yAnPuBMAQui4YvMwD1RwrzHeYkeF") )] -#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] pub struct SkipFallbackVote { /// The slot this vote is cast for. pub slot: Slot, @@ -227,8 +256,8 @@ pub struct SkipFallbackVote { )] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] pub struct GenesisVote { - /// The slot this genesis vote is for + /// The slot this vote is cast for. pub slot: Slot, - /// The block hash being voted on + /// The block id this vote is for. pub block_id: Hash, } diff --git a/votor/Cargo.toml b/votor/Cargo.toml index a1d46b32d67f10..da9cea1b9096ed 100644 --- a/votor/Cargo.toml +++ b/votor/Cargo.toml @@ -11,7 +11,10 @@ edition = { workspace = true } [features] agave-unstable-api = [] -dev-context-only-utils = ["solana-runtime/dev-context-only-utils"] +dev-context-only-utils = [ + "solana-runtime/dev-context-only-utils", + "agave-votor-messages/dev-context-only-utils", +] frozen-abi = [ "dep:solana-frozen-abi", "dep:solana-frozen-abi-macro", @@ -20,6 +23,8 @@ frozen-abi = [ "solana-ledger/frozen-abi", "solana-runtime/frozen-abi", "solana-signature/frozen-abi", + "solana-vote/frozen-abi", + "solana-vote-program/frozen-abi", "agave-votor-messages/frozen-abi", ] @@ -44,7 +49,7 @@ serde_bytes = { workspace = true } solana-account = { workspace = true } solana-accounts-db = { workspace = true } solana-bloom = { workspace = true } -solana-bls-signatures = { workspace = true } +solana-bls-signatures = { workspace = true, features = ["solana-signer-derive"] } solana-client = { workspace = true } solana-clock = { workspace = true } solana-connection-cache = { workspace = true } @@ -69,6 +74,7 @@ solana-runtime = { workspace = true } solana-signature = { workspace = true } solana-signer = { workspace = true } solana-signer-store = { workspace = true } +solana-streamer = { workspace = true } solana-time-utils = { workspace = true } solana-transaction = { workspace = true } solana-transaction-error = { workspace = true } @@ -77,6 +83,7 @@ solana-vote-program = { workspace = true } thiserror = { workspace = true } [dev-dependencies] +agave-votor = { path = ".", features = ["dev-context-only-utils"] } rand = { workspace = true } solana-net-utils = { workspace = true } solana-perf = { workspace = true, features = ["dev-context-only-utils"] } diff --git a/votor/src/common.rs b/votor/src/common.rs index fcb3a1001d8ffc..bdf58aaad1c895 100644 --- a/votor/src/common.rs +++ b/votor/src/common.rs @@ -1,6 +1,8 @@ use { agave_votor_messages::{ - consensus_message::CertificateType, migration::GENESIS_VOTE_THRESHOLD, vote::Vote, + consensus_message::CertificateType, + migration::GENESIS_VOTE_THRESHOLD, + vote::{Vote, VoteType}, }, std::time::Duration, }; @@ -8,70 +10,50 @@ use { // Core consensus types and constants pub type Stake = u64; -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum VoteType { - Finalize, - Notarize, - NotarizeFallback, - Skip, - SkipFallback, - Genesis, -} - -impl VoteType { - pub fn get_type(vote: &Vote) -> VoteType { - match vote { - Vote::Notarize(_) => VoteType::Notarize, - Vote::NotarizeFallback(_) => VoteType::NotarizeFallback, - Vote::Skip(_) => VoteType::Skip, - Vote::SkipFallback(_) => VoteType::SkipFallback, - Vote::Finalize(_) => VoteType::Finalize, - Vote::Genesis(_) => VoteType::Genesis, - } - } - - #[allow(dead_code)] - pub fn is_notarize_type(&self) -> bool { - matches!(self, Self::Notarize | Self::NotarizeFallback) +pub const fn conflicting_types(vote_type: VoteType) -> &'static [VoteType] { + match vote_type { + VoteType::Finalize => &[VoteType::NotarizeFallback, VoteType::Skip], + VoteType::Notarize => &[VoteType::Skip, VoteType::NotarizeFallback], + VoteType::NotarizeFallback => &[VoteType::Finalize, VoteType::Notarize], + VoteType::Skip => &[ + VoteType::Finalize, + VoteType::Notarize, + VoteType::SkipFallback, + ], + VoteType::SkipFallback => &[VoteType::Skip], + VoteType::Genesis => &[ + VoteType::Finalize, + VoteType::Notarize, + VoteType::NotarizeFallback, + VoteType::Skip, + VoteType::SkipFallback, + ], } } -/// For a given [`CertificateType`], returns the fractional stake, the [`Vote`], and the optional fallback [`Vote`] required to construct it. +/// Lookup from `CertificateId` to the `VoteType`s that contribute, +/// as well as the stake fraction required for certificate completion. /// -/// Must be in sync with [`vote_to_certificate_ids`]. -pub(crate) fn certificate_limits_and_votes( +/// Must be in sync with `vote_to_cert_types` +pub(crate) const fn certificate_limits_and_vote_types( cert_type: &CertificateType, -) -> (f64, Vote, Option) { +) -> (f64, &'static [VoteType]) { match cert_type { - CertificateType::Notarize(slot, block_id) => { - (0.6, Vote::new_notarization_vote(*slot, *block_id), None) - } - CertificateType::NotarizeFallback(slot, block_id) => ( - 0.6, - Vote::new_notarization_vote(*slot, *block_id), - Some(Vote::new_notarization_fallback_vote(*slot, *block_id)), - ), - CertificateType::FinalizeFast(slot, block_id) => { - (0.8, Vote::new_notarization_vote(*slot, *block_id), None) + CertificateType::Notarize(_, _) => (0.6, &[VoteType::Notarize]), + CertificateType::NotarizeFallback(_, _) => { + (0.6, &[VoteType::Notarize, VoteType::NotarizeFallback]) } - CertificateType::Finalize(slot) => (0.6, Vote::new_finalization_vote(*slot), None), - CertificateType::Skip(slot) => ( - 0.6, - Vote::new_skip_vote(*slot), - Some(Vote::new_skip_fallback_vote(*slot)), - ), - CertificateType::Genesis(slot, block_id) => ( - GENESIS_VOTE_THRESHOLD, - Vote::new_genesis_vote(*slot, *block_id), - None, - ), + CertificateType::FinalizeFast(_, _) => (0.8, &[VoteType::Notarize]), + CertificateType::Finalize(_) => (0.6, &[VoteType::Finalize]), + CertificateType::Skip(_) => (0.6, &[VoteType::Skip, VoteType::SkipFallback]), + CertificateType::Genesis(_, _) => (GENESIS_VOTE_THRESHOLD, &[VoteType::Genesis]), } } /// Lookup from `Vote` to the `CertificateId`s the vote accounts for /// /// Must be in sync with `certificate_limits_and_vote_types` and `VoteType::get_type` -pub fn vote_to_certificate_ids(vote: &Vote) -> Vec { +pub fn vote_to_cert_types(vote: &Vote) -> Vec { match vote { Vote::Notarize(vote) => vec![ CertificateType::Notarize(vote.slot, vote.block_id), diff --git a/votor/src/consensus_metrics.rs b/votor/src/consensus_metrics.rs index cb20e8b5f6ca47..dab12f0ff03d15 100644 --- a/votor/src/consensus_metrics.rs +++ b/votor/src/consensus_metrics.rs @@ -18,7 +18,7 @@ use { }, }; -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum ConsensusMetricsEvent { /// A vote was received from the node with `id`. Vote { id: Pubkey, vote: Vote }, @@ -86,7 +86,7 @@ impl NodeVoteMetrics { Vote::Skip(_) => self.skip.increment(elapsed), Vote::SkipFallback(_) => self.skip_fallback.increment(elapsed), Vote::Finalize(_) => self.final_.increment(elapsed), - Vote::Genesis(_) => Ok(()), + Vote::Genesis(_) => Ok(()), // Only for migration, tracked elsewhere }; match res { Ok(()) => (), @@ -117,10 +117,13 @@ pub enum RecordBlockHashError { pub struct ConsensusMetrics { /// Used to track this node's view of how the other nodes on the network are voting. node_metrics: BTreeMap, + /// Used to track when this node received blocks from different leaders in the network. leader_metrics: BTreeMap, + /// Counts number of times metrics recording failed. metrics_recording_failed: usize, + /// Tracks when individual slots began. /// /// Relies on [`TimerManager`] to notify of start of slots. @@ -152,10 +155,8 @@ impl ConsensusMetrics { Builder::new() .name("solConsMetrics".into()) .spawn(move || { - info!("ConsensusMetricsService has started"); let mut metrics = Self::new(epoch, receiver); metrics.run(exit); - info!("ConsensusMetricsService has stopped"); }) .expect("Failed to start consensus metrics thread") } @@ -163,20 +164,20 @@ impl ConsensusMetrics { fn run(&mut self, exit: Arc) { while !exit.load(Ordering::Relaxed) { match self.receiver.recv_timeout(Duration::from_secs(1)) { - Ok((recorded, events)) => { + Ok((received, events)) => { for event in events { match event { ConsensusMetricsEvent::Vote { id, vote } => { - self.record_vote(id, &vote, recorded); + self.record_vote(id, &vote, received); } ConsensusMetricsEvent::BlockHashSeen { leader, slot } => { - self.record_block_hash_seen(leader, slot, recorded); + self.record_block_hash_seen(leader, slot, received); } ConsensusMetricsEvent::MaybeNewEpoch { epoch } => { self.maybe_new_epoch(epoch); } ConsensusMetricsEvent::StartOfSlot { slot } => { - self.record_start_of_slot(slot, recorded); + self.record_start_of_slot(slot, received); } } } @@ -193,23 +194,23 @@ impl ConsensusMetrics { } /// Records a `vote` from the node with `id`. - fn record_vote(&mut self, id: Pubkey, vote: &Vote, recorded: Instant) { + fn record_vote(&mut self, id: Pubkey, vote: &Vote, received: Instant) { let Some(start) = self.start_of_slot.get(&vote.slot()) else { self.metrics_recording_failed = self.metrics_recording_failed.saturating_add(1); return; }; let node = self.node_metrics.entry(id).or_default(); - let elapsed = recorded.duration_since(*start); + let elapsed = received.duration_since(*start); node.record_vote(vote, elapsed); } /// Records when a block for `slot` was seen and the `leader` is responsible for producing it. - fn record_block_hash_seen(&mut self, leader: Pubkey, slot: Slot, recorded: Instant) { + fn record_block_hash_seen(&mut self, leader: Pubkey, slot: Slot, received: Instant) { let Some(start) = self.start_of_slot.get(&slot) else { self.metrics_recording_failed = self.metrics_recording_failed.saturating_add(1); return; }; - let elapsed = recorded.duration_since(*start).as_micros(); + let elapsed = received.duration_since(*start).as_micros(); let elapsed = match elapsed.try_into() { Ok(e) => e, Err(err) => { @@ -236,57 +237,65 @@ impl ConsensusMetrics { } /// Records when a given slot started. - fn record_start_of_slot(&mut self, slot: Slot, recorded: Instant) { - self.start_of_slot.entry(slot).or_insert(recorded); + fn record_start_of_slot(&mut self, slot: Slot, received: Instant) { + self.start_of_slot.entry(slot).or_insert(received); } /// Performs end of epoch reporting and reset all the statistics for the subsequent epoch. - fn end_of_epoch_reporting(&mut self) { + fn end_of_epoch_reporting(&mut self, epoch: Epoch) { for (addr, metrics) in &self.node_metrics { let addr = addr.to_string(); - datapoint_info!("votor_consensus_metrics", + datapoint_info!("consensus_vote_metrics", "address" => addr, ("notar_vote_count", metrics.notar.entries(), i64), - ("notar_vote_mean", metrics.notar.mean().ok(), Option), - ("notar_vote_stddev", metrics.notar.stddev(), Option), - ("notar_vote_maximum", metrics.notar.maximum().ok(), Option), + ("notar_vote_us_mean", metrics.notar.mean().ok(), Option), + ("notar_vote_us_stddev", metrics.notar.stddev(), Option), + ("notar_vote_us_maximum", metrics.notar.maximum().ok(), Option), ("notar_fallback_vote_count", metrics.notar_fallback.entries(), i64), - ("notar_fallback_vote_mean", metrics.notar_fallback.mean().ok(), Option), - ("notar_fallback_vote_stddev", metrics.notar_fallback.stddev(), Option), - ("notar_fallback_vote_maximum", metrics.notar_fallback.maximum().ok(), Option), + ("notar_fallback_vote_us_mean", metrics.notar_fallback.mean().ok(), Option), + ("notar_fallback_vote_us_stddev", metrics.notar_fallback.stddev(), Option), + ("notar_fallback_vote_us_maximum", metrics.notar_fallback.maximum().ok(), Option), ("skip_vote_count", metrics.skip.entries(), i64), - ("skip_vote_mean", metrics.skip.mean().ok(), Option), - ("skip_vote_stddev", metrics.skip.stddev(), Option), - ("skip_vote_maximum", metrics.skip.maximum().ok(), Option), + ("skip_vote_us_mean", metrics.skip.mean().ok(), Option), + ("skip_vote_us_stddev", metrics.skip.stddev(), Option), + ("skip_vote_us_maximum", metrics.skip.maximum().ok(), Option), ("skip_fallback_vote_count", metrics.skip_fallback.entries(), i64), - ("skip_fallback_vote_mean", metrics.skip_fallback.mean().ok(), Option), - ("skip_fallback_vote_stddev", metrics.skip_fallback.stddev(), Option), - ("skip_fallback_vote_maximum", metrics.skip_fallback.maximum().ok(), Option), + ("skip_fallback_vote_us_mean", metrics.skip_fallback.mean().ok(), Option), + ("skip_fallback_vote_us_stddev", metrics.skip_fallback.stddev(), Option), + ("skip_fallback_vote_us_maximum", metrics.skip_fallback.maximum().ok(), Option), ("finalize_vote_count", metrics.final_.entries(), i64), - ("finalize_vote_mean", metrics.final_.mean().ok(), Option), - ("finalize_vote_stddev", metrics.final_.stddev(), Option), - ("finalize_vote_maximum", metrics.final_.maximum().ok(), Option), + ("finalize_vote_us_mean", metrics.final_.mean().ok(), Option), + ("finalize_vote_us_stddev", metrics.final_.stddev(), Option), + ("finalize_vote_us_maximum", metrics.final_.maximum().ok(), Option), ); } for (addr, histogram) in &self.leader_metrics { let addr = addr.to_string(); - datapoint_info!("votor_consensus_metrics", + datapoint_info!("consensus_block_hash_seen_metrics", "address" => addr, - ("blocks_seen_vote_count", histogram.entries(), i64), - ("blocks_seen_vote_mean", histogram.mean().ok(), Option), - ("blocks_seen_vote_stddev", histogram.stddev(), Option), - ("blocks_seen_vote_maximum", histogram.maximum().ok(), Option), + ("block_hash_seen_count", histogram.entries(), i64), + ("block_hash_seen_us_mean", histogram.mean().ok(), Option), + ("block_hash_seen_us_stddev", histogram.stddev(), Option), + ("block_hash_seen_us_maximum", histogram.maximum().ok(), Option), ); } - self.node_metrics.clear(); - self.leader_metrics.clear(); - self.start_of_slot.clear(); + datapoint_info!( + "consensus_metrics_internals", + ("start_of_slot_count", self.start_of_slot.len(), i64), + ( + "metrics_recording_failed", + self.metrics_recording_failed, + i64 + ), + ); + + *self = Self::new(epoch, self.receiver.clone()); } /// This function can be called if there is a new [`Epoch`] and it will carry out end of epoch reporting. @@ -294,7 +303,7 @@ impl ConsensusMetrics { assert!(epoch >= self.current_epoch); if epoch != self.current_epoch { self.current_epoch = epoch; - self.end_of_epoch_reporting(); + self.end_of_epoch_reporting(epoch); } } } diff --git a/votor/src/consensus_pool.rs b/votor/src/consensus_pool.rs index f4345d2b1f8618..63f75c586f8f27 100644 --- a/votor/src/consensus_pool.rs +++ b/votor/src/consensus_pool.rs @@ -1,25 +1,29 @@ //! Defines ConsensusPool to store received and generated votes and certificates. - use { crate::{ - common::{certificate_limits_and_votes, vote_to_certificate_ids, Stake}, + commitment::CommitmentError, + common::{ + certificate_limits_and_vote_types, conflicting_types, vote_to_cert_types, Stake, + MAX_ENTRIES_PER_PUBKEY_FOR_NOTARIZE_LITE, MAX_ENTRIES_PER_PUBKEY_FOR_OTHER_TYPES, + }, consensus_pool::{ - certificate_builder::{BuildError as CertificateBuilderError, CertificateBuilder}, parent_ready_tracker::ParentReadyTracker, slot_stake_counters::SlotStakeCounters, stats::ConsensusPoolStats, - vote_pool::{AddVoteError as VotePoolError, VotePool}, + vote_pool::{DuplicateBlockVotePool, SimpleVotePool, VotePool}, }, event::VotorEvent, }, agave_votor_messages::{ consensus_message::{Block, Certificate, CertificateType, ConsensusMessage, VoteMessage}, - vote::Vote, + migration::MigrationStatus, + vote::{Vote, VoteType}, }, - log::trace, + certificate_builder::{BuildError as CertificateBuildError, CertificateBuilder}, + log::{error, trace}, solana_clock::{Epoch, Slot}, solana_epoch_schedule::EpochSchedule, - solana_gossip::cluster_info::ClusterInfo, + solana_hash::Hash, solana_pubkey::Pubkey, solana_runtime::{bank::Bank, epoch_stakes::VersionedEpochStakes}, std::{ @@ -36,34 +40,37 @@ mod slot_stake_counters; mod stats; mod vote_pool; +pub type PoolId = (Slot, VoteType); + /// Different failure cases from calling `add_vote()`. -#[derive(Debug, Error)] -enum AddVoteError { +#[derive(Debug, Error, PartialEq)] +pub(crate) enum AddVoteError { + #[error("Conflicting vote type: {0:?} vs existing {1:?} for slot: {2} pubkey: {3}")] + ConflictingVoteType(VoteType, VoteType, Slot, Pubkey), + #[error("Epoch stakes missing for epoch: {0}")] EpochStakesNotFound(Epoch), + #[error("Unrooted slot")] UnrootedSlot, - #[error("Certificate builder error: {0}")] - CertificateBuilder(#[from] CertificateBuilderError), - #[error("Invalid rank: {0}")] - InvalidRank(u16), - #[error("vote pool returned error: {0}")] - VotePool(#[from] VotePoolError), -} + #[error("Certificate error: {0}")] + Certificate(#[from] CertificateBuildError), -/// Different failure cases from calling `add_certificate()`. -#[derive(Debug, Error)] -enum AddCertError { - #[error("Unrooted slot")] - UnrootedSlot, + #[error("{0} channel disconnected")] + ChannelDisconnected(String), + + #[error("Voting Service queue full")] + VotingServiceQueueFull, + + #[error("Invalid rank: {0}")] + InvalidRank(u16), } -/// Different failure cases from calling `add_message()`. -#[derive(Debug, PartialEq, Eq, Error)] -pub(crate) enum AddMessageError { - #[error("internal failure {0}")] - Internal(String), +impl From for AddVoteError { + fn from(_: CommitmentError) -> Self { + AddVoteError::ChannelDisconnected("CommitmentSender".to_string()) + } } fn get_key_and_stakes( @@ -89,15 +96,13 @@ fn get_key_and_stakes( } Ok((*vote_key, stake, epoch_stakes.total_stake())) } - /// Container to store received votes and certificates. /// /// Based on received votes and certificates, generates new `VotorEvent`s and generates new certificates. pub(crate) struct ConsensusPool { - cluster_info: Arc, - // Storage for per slot votes. - // Adding new votes in the vote uses the prior votes to check for invalid and duplicate votes. - vote_pools: BTreeMap, + my_pubkey: Pubkey, + // Vote pools to do bean counting for votes. + vote_pools: BTreeMap, /// Completed certificates completed_certificates: BTreeMap>, /// Tracks slots which have reached the parent ready condition: @@ -113,74 +118,180 @@ pub(crate) struct ConsensusPool { stats: ConsensusPoolStats, /// Slot stake counters, used to calculate safe_to_notar and safe_to_skip slot_stake_counters_map: BTreeMap, + /// Stores details about the genesis vote during the migration + migration_status: Option>, } impl ConsensusPool { - pub(crate) fn new_from_root_bank(cluster_info: Arc, bank: &Bank) -> Self { + pub(crate) fn new_from_root_bank_pre_migration( + my_pubkey: Pubkey, + bank: &Bank, + migration_status: Arc, + ) -> Self { + let mut pool = Self::new_from_root_bank(my_pubkey, bank); + pool.migration_status = Some(migration_status); + pool + } + + pub fn new_from_root_bank(my_pubkey: Pubkey, bank: &Bank) -> Self { // To account for genesis and snapshots we allow default block id until // block id can be serialized as part of the snapshot let root_block = (bank.slot(), bank.block_id().unwrap_or_default()); - let parent_ready_tracker = ParentReadyTracker::new(cluster_info.clone(), root_block); + let parent_ready_tracker = ParentReadyTracker::new(my_pubkey, root_block); Self { - cluster_info, + my_pubkey, vote_pools: BTreeMap::new(), completed_certificates: BTreeMap::new(), highest_finalized_slot: None, highest_finalized_with_notarize: None, parent_ready_tracker, - stats: ConsensusPoolStats::default(), + stats: ConsensusPoolStats::new(), slot_stake_counters_map: BTreeMap::new(), + migration_status: None, + } + } + + fn new_vote_pool(vote_type: VoteType) -> VotePool { + match vote_type { + VoteType::NotarizeFallback => VotePool::DuplicateBlockVotePool( + DuplicateBlockVotePool::new(MAX_ENTRIES_PER_PUBKEY_FOR_NOTARIZE_LITE), + ), + VoteType::Notarize => VotePool::DuplicateBlockVotePool(DuplicateBlockVotePool::new( + MAX_ENTRIES_PER_PUBKEY_FOR_OTHER_TYPES, + )), + _ => VotePool::SimpleVotePool(SimpleVotePool::default()), + } + } + + fn update_vote_pool( + &mut self, + vote: VoteMessage, + validator_vote_key: Pubkey, + validator_stake: Stake, + ) -> Option { + let vote_type = vote.vote.get_type(); + let pool = self + .vote_pools + .entry((vote.vote.slot(), vote_type)) + .or_insert_with(|| Self::new_vote_pool(vote_type)); + match pool { + VotePool::SimpleVotePool(pool) => { + pool.add_vote(validator_vote_key, validator_stake, vote) + } + VotePool::DuplicateBlockVotePool(pool) => { + pool.add_vote(validator_vote_key, validator_stake, vote) + } } } - /// Builds new [`Certificate`]s that depend on votes of type [`Vote`] if enough stake has voted for them. - fn build_certs( + /// For a new vote `slot` , `vote_type` checks if any + /// of the related certificates are newly complete. + /// For each newly constructed certificate + /// - Insert it into `self.certificates` + /// - Potentially update `self.highest_finalized_slot`, + /// - If we have a new highest finalized slot, return it + /// - update any newly created events + fn update_certificates( &mut self, vote: &Vote, + block_id: Option, + events: &mut Vec, total_stake: Stake, ) -> Result>, AddVoteError> { - let Some(vote_pool) = self.vote_pools.get(&vote.slot()) else { - return Ok(vec![]); - }; + let slot = vote.slot(); let mut new_certificates_to_send = Vec::new(); - for cert_type in vote_to_certificate_ids(vote) { + for cert_type in vote_to_cert_types(vote) { // If the certificate is already complete, skip it if self.completed_certificates.contains_key(&cert_type) { continue; } // Otherwise check whether the certificate is complete - let (limit, vote, fallback_vote) = certificate_limits_and_votes(&cert_type); - let accumulated_stake = vote_pool - .get_stake(&vote) - .saturating_add(fallback_vote.map_or(0, |v| vote_pool.get_stake(&v))); - + let (limit, vote_types) = certificate_limits_and_vote_types(&cert_type); + let accumulated_stake = vote_types + .iter() + .filter_map(|vote_type| { + Some(match self.vote_pools.get(&(slot, *vote_type))? { + VotePool::SimpleVotePool(pool) => pool.total_stake(), + VotePool::DuplicateBlockVotePool(pool) => { + pool.total_stake_by_block_id(block_id.as_ref().expect( + "Duplicate block pool for {vote_type:?} expects a block id for \ + certificate {cert_type:?}", + )) + } + }) + }) + .sum::(); if accumulated_stake as f64 / (total_stake as f64) < limit { continue; } let mut cert_builder = CertificateBuilder::new(cert_type); - cert_builder.aggregate(&vote_pool.get_votes(&vote)).unwrap(); - if let Some(v) = fallback_vote { - cert_builder.aggregate(&vote_pool.get_votes(&v)).unwrap(); - } + vote_types.iter().for_each(|vote_type| { + if let Some(vote_pool) = self.vote_pools.get(&(slot, *vote_type)) { + match vote_pool { + VotePool::SimpleVotePool(pool) => { + cert_builder.aggregate(pool.votes()).unwrap(); + } + VotePool::DuplicateBlockVotePool(pool) => { + if let Some(votes) = pool.votes(block_id.as_ref().unwrap()) { + cert_builder.aggregate(votes).unwrap(); + } + } + }; + } + }); let new_cert = Arc::new(cert_builder.build()?); + self.insert_certificate(cert_type, new_cert.clone(), events); self.stats.incr_cert_type(&new_cert.cert_type, true); new_certificates_to_send.push(new_cert); } Ok(new_certificates_to_send) } - /// Inserts a new [`Certificate`]. - /// - /// Based on the type of certificate being inserted, updates [`self.parent_ready_tracker`] and other metadata on self. - fn insert_certificate(&mut self, cert: Arc, events: &mut Vec) { - let cert_type = cert.cert_type; - trace!( - "{}: Inserting certificate {:?}", - self.cluster_info.id(), - cert_type - ); - self.completed_certificates.insert(cert_type, cert); + fn has_conflicting_vote( + &self, + slot: Slot, + vote_type: VoteType, + validator_vote_key: &Pubkey, + block_id: &Option, + ) -> Option { + for conflicting_type in conflicting_types(vote_type) { + if let Some(pool) = self.vote_pools.get(&(slot, *conflicting_type)) { + let is_conflicting = match pool { + // In a simple vote pool, just check if the validator previously voted at all. If so, that's a conflict + VotePool::SimpleVotePool(pool) => { + pool.has_prev_validator_vote(validator_vote_key) + } + // In a duplicate block vote pool, because some conflicts between things like Notarize and NotarizeFallback + // for different blocks are allowed, we need a more specific check. + // TODO: This can be made much cleaner/safer if VoteType carried the bank hash, block id so we + // could check which exact VoteType(blockid, bankhash) was the source of the conflict. + VotePool::DuplicateBlockVotePool(pool) => { + if let Some(block_id) = &block_id { + // Reject votes for the same block with a conflicting type, i.e. + // a NotarizeFallback vote for the same block as a Notarize vote. + pool.has_prev_validator_vote_for_block(validator_vote_key, block_id) + } else { + pool.has_prev_validator_vote(validator_vote_key) + } + } + }; + if is_conflicting { + return Some(*conflicting_type); + } + } + } + None + } + + fn insert_certificate( + &mut self, + cert_type: CertificateType, + cert: Arc, + events: &mut Vec, + ) { + trace!("{}: Inserting certificate {:?}", self.my_pubkey, cert_type); + self.completed_certificates.insert(cert_type, cert.clone()); match cert_type { CertificateType::NotarizeFallback(slot, block_id) => { self.parent_ready_tracker @@ -231,7 +342,14 @@ impl ConsensusPool { self.highest_finalized_with_notarize = Some((slot, true)); } } - CertificateType::Genesis(_slot, _block_id) => {} + CertificateType::Genesis(slot, block_id) => { + if let Some(ref migration_status) = self.migration_status { + migration_status.set_genesis_certificate(cert); + } + // The genesis block is automatically certified + self.parent_ready_tracker + .add_new_notar_fallback_or_stronger((slot, block_id), events); + } } } @@ -251,22 +369,18 @@ impl ConsensusPool { my_vote_pubkey: &Pubkey, message: ConsensusMessage, events: &mut Vec, - ) -> Result<(Option, Vec>), AddMessageError> { + ) -> Result<(Option, Vec>), AddVoteError> { let current_highest_finalized_slot = self.highest_finalized_slot; let new_certficates_to_send = match message { - ConsensusMessage::Vote(vote_message) => self - .add_vote( - epoch_schedule, - epoch_stakes_map, - root_slot, - my_vote_pubkey, - vote_message, - events, - ) - .map_err(|e| AddMessageError::Internal(e.to_string()))?, - ConsensusMessage::Certificate(cert) => self - .add_certificate(root_slot, cert, events) - .map_err(|e| AddMessageError::Internal(e.to_string()))?, + ConsensusMessage::Vote(vote_message) => self.add_vote( + epoch_schedule, + epoch_stakes_map, + root_slot, + my_vote_pubkey, + vote_message, + events, + )?, + ConsensusMessage::Certificate(cert) => self.add_certificate(root_slot, cert, events)?, }; // If we have a new highest finalized slot, return it let new_finalized_slot = if self.highest_finalized_slot > current_highest_finalized_slot { @@ -303,43 +417,50 @@ impl ConsensusPool { self.stats.out_of_range_votes = self.stats.out_of_range_votes.saturating_add(1); return Err(AddVoteError::UnrootedSlot); } - let vote = vote_message.vote; - match self - .vote_pools - .entry(vote_slot) - .or_insert(VotePool::new(vote_slot)) - .add_vote(validator_vote_key, validator_stake, vote_message) + let block_id = vote.block_id().map(|block_id| { + if !matches!( + vote, + Vote::Notarize(_) | Vote::NotarizeFallback(_) | Vote::Genesis(_) + ) { + panic!("expected Notarize/ NotarizeFallback/ Genesis vote"); + } + *block_id + }); + let vote_type = vote.get_type(); + if let Some(conflicting_type) = + self.has_conflicting_vote(vote_slot, vote_type, &validator_vote_key, &block_id) { - Ok(stake) => { + self.stats.conflicting_votes = self.stats.conflicting_votes.saturating_add(1); + return Err(AddVoteError::ConflictingVoteType( + vote_type, + conflicting_type, + vote_slot, + validator_vote_key, + )); + } + match self.update_vote_pool(vote_message, validator_vote_key, validator_stake) { + None => { + // No new vote pool entry was created, just return empty vec + self.stats.exist_votes = self.stats.exist_votes.saturating_add(1); + return Ok(vec![]); + } + Some(entry_stake) => { let fallback_vote_counters = self .slot_stake_counters_map .entry(vote_slot) .or_insert_with(|| SlotStakeCounters::new(total_stake)); fallback_vote_counters.add_vote( - &vote, - stake, + vote, + entry_stake, my_vote_pubkey == &validator_vote_key, events, &mut self.stats, ); } - Err(e) => match e { - vote_pool::AddVoteError::Duplicate => { - self.stats.exist_votes = self.stats.exist_votes.saturating_add(1); - return Ok(vec![]); - } - vote_pool::AddVoteError::Invalid => { - self.stats.invalid_votes = self.stats.invalid_votes.saturating_add(1); - return Err(e.into()); - } - }, } - self.stats.incr_ingested_vote(&vote); - self.build_certs(&vote, total_stake).inspect(|certs| { - for cert in certs { - self.insert_certificate(cert.clone(), events) - } - }) + self.stats.incr_ingested_vote_type(vote_type); + + self.update_certificates(vote, block_id, events, total_stake) } fn add_certificate( @@ -347,20 +468,22 @@ impl ConsensusPool { root_slot: Slot, cert: Certificate, events: &mut Vec, - ) -> Result>, AddCertError> { - let cert_type = &cert.cert_type; + ) -> Result>, AddVoteError> { + let cert_type = cert.cert_type; self.stats.incoming_certs = self.stats.incoming_certs.saturating_add(1); if cert_type.slot() < root_slot { self.stats.out_of_range_certs = self.stats.out_of_range_certs.saturating_add(1); - return Err(AddCertError::UnrootedSlot); + return Err(AddVoteError::UnrootedSlot); } - if self.completed_certificates.contains_key(cert_type) { + if self.completed_certificates.contains_key(&cert_type) { self.stats.exist_certs = self.stats.exist_certs.saturating_add(1); return Ok(vec![]); } - self.stats.incr_cert_type(cert_type, false); let cert = Arc::new(cert); - self.insert_certificate(cert.clone(), events); + self.insert_certificate(cert_type, cert.clone(), events); + + self.stats.incr_cert_type(&cert_type, false); + Ok(vec![cert]) } @@ -438,6 +561,11 @@ impl ConsensusPool { .contains_key(&CertificateType::Skip(slot)) } + #[cfg(test)] + pub(crate) fn my_pubkey(&self) -> Pubkey { + self.my_pubkey + } + #[cfg(test)] fn make_start_leader_decision( &self, @@ -487,14 +615,22 @@ impl ConsensusPool { | CertificateType::FinalizeFast(s, _) | CertificateType::Notarize(s, _) | CertificateType::NotarizeFallback(s, _) - | CertificateType::Skip(s) - | CertificateType::Genesis(s, _) => s >= &root_slot, + | CertificateType::Genesis(s, _) + | CertificateType::Skip(s) => s >= &root_slot, }); - self.vote_pools = self.vote_pools.split_off(&root_slot); + self.vote_pools = self.vote_pools.split_off(&(root_slot, VoteType::Finalize)); self.slot_stake_counters_map = self.slot_stake_counters_map.split_off(&root_slot); self.parent_ready_tracker.set_root(root_slot); } + /// Updates the pubkey used for logging purposes only. + /// This avoids the need to recreate the entire certificate pool since it's + /// not distinguished by the pubkey. + pub fn update_pubkey(&mut self, new_pubkey: Pubkey) { + self.my_pubkey = new_pubkey; + self.parent_ready_tracker.update_pubkey(new_pubkey); + } + pub(crate) fn maybe_report(&mut self) { self.stats.maybe_report(); } @@ -525,11 +661,7 @@ impl ConsensusPool { _ => None, }; if cert_to_send.is_some() { - trace!( - "{}: Refreshing certificate {:?}", - self.cluster_info.id(), - cert_type - ); + trace!("{}: Refreshing certificate {:?}", self.my_pubkey, cert_type); } cert_to_send }) @@ -547,10 +679,7 @@ mod tests { VerifiableSignature, }, solana_clock::Slot, - solana_gossip::contact_info::ContactInfo, solana_hash::Hash, - solana_keypair::Keypair, - solana_net_utils::SocketAddrSpace, solana_runtime::{ bank::{Bank, NewBankOptions}, bank_forks::BankForks, @@ -563,16 +692,6 @@ mod tests { test_case::test_case, }; - fn new_cluster_info() -> Arc { - let keypair = Keypair::new(); - let contact_info = ContactInfo::new_localhost(&keypair.pubkey(), 0); - Arc::new(ClusterInfo::new( - contact_info, - Arc::new(keypair), - SocketAddrSpace::Unspecified, - )) - } - fn dummy_vote_message( keypairs: &[ValidatorVoteKeypairs], vote: &Vote, @@ -614,7 +733,7 @@ mod tests { let root_bank = bank_forks.read().unwrap().root_bank(); ( validator_keypairs, - ConsensusPool::new_from_root_bank(new_cluster_info(), &root_bank), + ConsensusPool::new_from_root_bank(Pubkey::new_unique(), &root_bank), bank_forks, ) } @@ -626,32 +745,34 @@ mod tests { vote: Vote, ) { for rank in 0..6 { - pool.add_message( + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(validator_keypairs, &vote, rank), + &mut vec![] + ) + .is_ok()); + } + assert!(pool + .add_message( bank.epoch_schedule(), bank.epoch_stakes_map(), bank.slot(), &Pubkey::new_unique(), - dummy_vote_message(validator_keypairs, &vote, rank), - &mut vec![], + dummy_vote_message(validator_keypairs, &vote, 6), + &mut vec![] ) - .unwrap(); - } - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(validator_keypairs, &vote, 6), - &mut vec![], - ) - .unwrap(); + .is_ok()); match vote { Vote::Notarize(vote) => assert_eq!(pool.highest_notarized_slot(), vote.slot), Vote::NotarizeFallback(vote) => assert_eq!(pool.highest_notarized_slot(), vote.slot), Vote::Skip(vote) => assert_eq!(pool.highest_skip_slot(), vote.slot), Vote::SkipFallback(vote) => assert_eq!(pool.highest_skip_slot(), vote.slot), Vote::Finalize(vote) => assert_eq!(pool.highest_finalized_slot(), vote.slot), - Vote::Genesis(_) => {} + Vote::Genesis(_genesis_vote) => (), } } @@ -665,15 +786,16 @@ mod tests { ) { for slot in start..=end { let vote = Vote::new_skip_vote(slot); - pool.add_message( - root_bank.epoch_schedule(), - root_bank.epoch_stakes_map(), - root_bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(keypairs, &vote, rank), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + root_bank.epoch_schedule(), + root_bank.epoch_stakes_map(), + root_bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(keypairs, &vote, rank), + &mut vec![] + ) + .is_ok()); } } @@ -927,40 +1049,12 @@ mod tests { assert!(pool.make_start_leader_decision(my_leader_slot, parent_slot, first_alpenglow_slot)); } - #[test] - fn test_add_vote_and_create_new_certificate_with_types() { - let slot = 5; - let vote = Vote::new_finalization_vote(slot); - let cert_types = vec![CertificateType::Finalize(slot)]; - do_test_add_vote_and_create_new_certificate_with_types(vote, cert_types); - - let slot = 6; - let block_id = Hash::new_unique(); - let vote = Vote::new_notarization_vote(slot, block_id); - let cert_types = vec![ - CertificateType::Notarize(slot, block_id), - CertificateType::NotarizeFallback(slot, block_id), - ]; - do_test_add_vote_and_create_new_certificate_with_types(vote, cert_types); - - let slot = 7; - let block_id = Hash::new_unique(); - let vote = Vote::new_notarization_fallback_vote(slot, block_id); - let cert_types = vec![CertificateType::NotarizeFallback(slot, block_id)]; - do_test_add_vote_and_create_new_certificate_with_types(vote, cert_types); - - let slot = 8; - let vote = Vote::new_skip_vote(slot); - let cert_types = vec![CertificateType::Skip(slot)]; - do_test_add_vote_and_create_new_certificate_with_types(vote, cert_types); - - let slot = 9; - let vote = Vote::new_skip_fallback_vote(slot); - let cert_types = vec![CertificateType::Skip(slot)]; - do_test_add_vote_and_create_new_certificate_with_types(vote, cert_types); - } - - fn do_test_add_vote_and_create_new_certificate_with_types( + #[test_case(Vote::new_finalization_vote(5), vec![CertificateType::Finalize(5)])] + #[test_case(Vote::new_notarization_vote(6, Hash::default()), vec![CertificateType::Notarize(6, Hash::default()), CertificateType::NotarizeFallback(6, Hash::default())])] + #[test_case(Vote::new_notarization_fallback_vote(7, Hash::default()), vec![CertificateType::NotarizeFallback(7, Hash::default())])] + #[test_case(Vote::new_skip_vote(8), vec![CertificateType::Skip(8)])] + #[test_case(Vote::new_skip_fallback_vote(9), vec![CertificateType::Skip(9)])] + fn test_add_vote_and_create_new_certificate_with_types( vote: Vote, expected_cert_types: Vec, ) { @@ -972,41 +1066,44 @@ mod tests { Vote::NotarizeFallback(_) => |pool: &ConsensusPool| pool.highest_notarized_slot(), Vote::Skip(_) => |pool: &ConsensusPool| pool.highest_skip_slot(), Vote::SkipFallback(_) => |pool: &ConsensusPool| pool.highest_skip_slot(), - Vote::Genesis(_) => |_pool: &ConsensusPool| 0, + Vote::Genesis(_genesis_vote) => |_pool: &ConsensusPool| 0, }; let bank = bank_forks.read().unwrap().root_bank(); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, my_validator_ix), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, my_validator_ix), + &mut vec![] + ) + .is_ok()); let slot = vote.slot(); assert!(highest_slot_fn(&pool) < slot); // Same key voting again shouldn't make a certificate - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, my_validator_ix), - &mut vec![], - ) - .unwrap(); - assert!(highest_slot_fn(&pool) < slot); - for rank in 0..4 { - pool.add_message( + assert!(pool + .add_message( bank.epoch_schedule(), bank.epoch_stakes_map(), bank.slot(), &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, rank), - &mut vec![], + dummy_vote_message(&validator_keypairs, &vote, my_validator_ix), + &mut vec![] ) - .unwrap(); + .is_ok()); + assert!(highest_slot_fn(&pool) < slot); + for rank in 0..4 { + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, rank), + &mut vec![] + ) + .is_ok()); } assert!(highest_slot_fn(&pool) < slot); let new_validator_ix = 6; @@ -1027,9 +1124,9 @@ mod tests { } // Assert certs_to_send contains the expected certificate types for expected_cert_type in expected_cert_types { - assert!(certs_to_send - .iter() - .any(|cert| { cert.cert_type == expected_cert_type })); + assert!(certs_to_send.iter().any(|cert| { + cert.cert_type == expected_cert_type && cert.cert_type.slot() == slot + })); } assert_eq!(highest_slot_fn(&pool), slot); // Now add the same certificate again, this should silently exit. @@ -1131,8 +1228,8 @@ mod tests { fn test_add_vote_zero_stake() { let (_, mut pool, bank_forks) = create_initial_state(); let bank = bank_forks.read().unwrap().root_bank(); - let err = pool - .add_message( + assert_eq!( + pool.add_message( bank.epoch_schedule(), bank.epoch_stakes_map(), bank.slot(), @@ -1142,12 +1239,9 @@ mod tests { rank: 100, signature: BLSSignature::default(), }), - &mut vec![], - ) - .unwrap_err(); - assert_eq!( - err, - AddMessageError::Internal("Invalid rank: 100".to_string()) + &mut vec![] + ), + Err(AddVoteError::InvalidRank(100)) ); } @@ -1178,15 +1272,16 @@ mod tests { let slot = (i as u64).saturating_add(16); let vote = Vote::new_skip_vote(slot); // These should not extend the skip range - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, i), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, i), + &mut vec![] + ) + .is_ok()); } assert_single_certificate_range(&pool, 15, 15); @@ -1299,44 +1394,47 @@ mod tests { let bank = bank_forks.read().unwrap().root_bank(); // 10% vote for skip 2 let vote = Vote::new_skip_vote(2); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, 6), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, 6), + &mut vec![] + ) + .is_ok()); assert_eq!(pool.highest_skip_slot(), 2); assert_single_certificate_range(&pool, 2, 2); // 10% vote for skip 4 let vote = Vote::new_skip_vote(4); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, 7), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, 7), + &mut vec![] + ) + .is_ok()); assert_eq!(pool.highest_skip_slot(), 4); assert_single_certificate_range(&pool, 2, 2); assert_single_certificate_range(&pool, 4, 4); // 10% vote for skip 3 let vote = Vote::new_skip_vote(3); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, 8), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, 8), + &mut vec![] + ) + .is_ok()); assert_eq!(pool.highest_skip_slot(), 4); assert_single_certificate_range(&pool, 2, 4); assert!(pool.skip_certified(3)); @@ -1371,15 +1469,16 @@ mod tests { let bank = bank_forks.read().unwrap().root_bank(); // Range expansion on a singleton vote should be ok let vote = Vote::new_skip_vote(1); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, 6), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, 6), + &mut vec![] + ) + .is_ok()); assert_eq!(pool.highest_skip_slot(), 1); add_skip_vote_range( &mut pool, @@ -1408,15 +1507,16 @@ mod tests { // AlreadyExists, silently fail let vote = Vote::new_skip_vote(20); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, 6), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, 6), + &mut vec![] + ) + .is_ok()); } #[test] @@ -1496,39 +1596,38 @@ mod tests { // Add a skip from myself. let vote = Vote::new_skip_vote(2); let mut new_events = vec![]; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &my_vote_key, - dummy_vote_message(&validator_keypairs, &vote, 0), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &my_vote_key, + dummy_vote_message(&validator_keypairs, &vote, 0), + &mut new_events + ) + .is_ok()); assert!(new_events.is_empty()); // 40% notarized, should succeed for rank in 1..5 { let vote = Vote::new_notarization_vote(2, block_id); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, rank), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); } assert_eq!(new_events.len(), 1); - match new_events[0] { - VotorEvent::SafeToNotar((event_slot, event_block_id)) => { - assert_eq!(block_id, event_block_id); - assert_eq!(slot, event_slot); - } - _ => { - panic!("Expected SafeToNotar event"); - } + if let VotorEvent::SafeToNotar((event_slot, event_block_id)) = new_events[0] { + assert_eq!(block_id, event_block_id); + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToNotar event"); } new_events.clear(); @@ -1539,60 +1638,59 @@ mod tests { // Add 20% notarize, but no vote from myself, should fail for rank in 1..3 { let vote = Vote::new_notarization_vote(3, block_id); - pool.add_message( + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); + } + assert!(new_events.is_empty()); + + // Add a notarize from myself for some other block, but still not enough notar or skip, should fail. + let vote = Vote::new_notarization_vote(3, Hash::new_unique()); + assert!(pool + .add_message( bank.epoch_schedule(), bank.epoch_stakes_map(), bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, rank), - &mut new_events, + &my_vote_key, + dummy_vote_message(&validator_keypairs, &vote, 0), + &mut new_events ) - .unwrap(); - } - assert!(new_events.is_empty()); - - // Add a notarize from myself for some other block, but still not enough notar or skip, should fail. - let vote = Vote::new_notarization_vote(3, Hash::new_unique()); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &my_vote_key, - dummy_vote_message(&validator_keypairs, &vote, 0), - &mut new_events, - ) - .unwrap(); + .is_ok()); assert!(new_events.is_empty()); // Now add 40% skip, should succeed // Funny thing is in this case we will also get SafeToSkip(3) for rank in 3..7 { let vote = Vote::new_skip_vote(3); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, rank), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); } assert_eq!(new_events.len(), 2); - match new_events[0] { - VotorEvent::SafeToSkip(event_slot) => { - assert_eq!(slot, event_slot); - } - _ => { - panic!("Expected SafeToSkip event"); - } + if let VotorEvent::SafeToSkip(event_slot) = new_events[0] { + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToSkip event"); } - match new_events[1] { - VotorEvent::SafeToNotar((event_slot, event_block_id)) => { - assert_eq!(block_id, event_block_id); - assert_eq!(slot, event_slot); - } - _ => panic!("Expected SafeToNotar event"), + if let VotorEvent::SafeToNotar((event_slot, event_block_id)) = new_events[1] { + assert_eq!(block_id, event_block_id); + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToNotar event"); } new_events.clear(); @@ -1601,24 +1699,24 @@ mod tests { let duplicate_block_id = Hash::new_unique(); for rank in 7..9 { let vote = Vote::new_notarization_vote(3, duplicate_block_id); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, rank), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); } assert_eq!(new_events.len(), 1); - match new_events[0] { - VotorEvent::SafeToNotar((event_slot, event_block_id)) => { - assert_eq!(duplicate_block_id, event_block_id); - assert_eq!(slot, event_slot); - } - _ => panic!("Expected SafeToNotar event"), + if let VotorEvent::SafeToNotar((event_slot, event_block_id)) = new_events[0] { + assert_eq!(duplicate_block_id, event_block_id); + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToNotar event"); } } @@ -1634,50 +1732,125 @@ mod tests { // Add a notarize from myself. let block_id = Hash::new_unique(); let vote = Vote::new_notarization_vote(2, block_id); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &my_vote_key, - dummy_vote_message(&validator_keypairs, &vote, 0), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &my_vote_key, + dummy_vote_message(&validator_keypairs, &vote, 0), + &mut new_events + ) + .is_ok()); // Should still fail because there are no other votes. assert!(new_events.is_empty()); // Add 50% skip, should succeed for rank in 1..6 { let vote = Vote::new_skip_vote(2); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, rank), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); } assert_eq!(new_events.len(), 1); - match new_events[0] { - VotorEvent::SafeToSkip(event_slot) => assert_eq!(slot, event_slot), - _ => panic!("Expected SafeToSkip event"), + if let VotorEvent::SafeToSkip(event_slot) = new_events[0] { + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToSkip event"); } new_events.clear(); // Add 10% more notarize, will not send new SafeToSkip because the event was already sent let vote = Vote::new_notarization_vote(2, block_id); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, 6), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, 6), + &mut new_events + ) + .is_ok()); assert!(new_events.is_empty()); } + fn create_new_vote(vote_type: VoteType, slot: Slot) -> Vote { + match vote_type { + VoteType::Notarize => Vote::new_notarization_vote(slot, Hash::default()), + VoteType::NotarizeFallback => { + Vote::new_notarization_fallback_vote(slot, Hash::default()) + } + VoteType::Skip => Vote::new_skip_vote(slot), + VoteType::SkipFallback => Vote::new_skip_fallback_vote(slot), + VoteType::Finalize => Vote::new_finalization_vote(slot), + VoteType::Genesis => Vote::new_genesis_vote(slot, Hash::default()), + } + } + + fn test_reject_conflicting_vote( + pool: &mut ConsensusPool, + bank: &Bank, + validator_keypairs: &[ValidatorVoteKeypairs], + vote_type_1: VoteType, + vote_type_2: VoteType, + slot: Slot, + ) { + let vote_1 = create_new_vote(vote_type_1, slot); + let vote_2 = create_new_vote(vote_type_2, slot); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(validator_keypairs, &vote_1, 0), + &mut vec![] + ) + .is_ok()); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(validator_keypairs, &vote_2, 0), + &mut vec![] + ) + .is_err()); + } + + #[test] + fn test_reject_conflicting_votes_with_type() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + let mut slot = 2; + for vote_type_1 in [ + VoteType::Finalize, + VoteType::Notarize, + VoteType::NotarizeFallback, + VoteType::Skip, + VoteType::SkipFallback, + ] { + let conflicting_vote_types = conflicting_types(vote_type_1); + for vote_type_2 in conflicting_vote_types { + test_reject_conflicting_vote( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + vote_type_1, + *vote_type_2, + slot, + ); + } + slot = slot.saturating_add(4); + } + } + #[test] fn test_handle_new_root() { let validator_keypairs = (0..10) @@ -1685,7 +1858,7 @@ mod tests { .collect::>(); let bank_forks = create_bank_forks(&validator_keypairs); let mut pool = ConsensusPool::new_from_root_bank( - new_cluster_info(), + Pubkey::new_unique(), &bank_forks.read().unwrap().root_bank(), ); @@ -1696,29 +1869,31 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - root_bank.epoch_schedule(), - root_bank.epoch_stakes_map(), - root_bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_1.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + root_bank.epoch_schedule(), + root_bank.epoch_stakes_map(), + root_bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_1.clone()), + &mut vec![] + ) + .is_ok()); let cert_2 = Certificate { cert_type: CertificateType::FinalizeFast(2, Hash::new_unique()), signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - root_bank.epoch_schedule(), - root_bank.epoch_stakes_map(), - root_bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_2.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + root_bank.epoch_schedule(), + root_bank.epoch_stakes_map(), + root_bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_2.clone()), + &mut vec![] + ) + .is_ok()); assert!(pool.skip_certified(1)); assert!(pool.is_finalized(2)); @@ -1778,29 +1953,31 @@ mod tests { bitmap: Vec::new(), }; let bank = bank_forks.read().unwrap().root_bank(); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_3.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_3.clone()), + &mut vec![] + ) + .is_ok()); let cert_4 = Certificate { cert_type: CertificateType::Finalize(4), signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_4.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_4.clone()), + &mut vec![] + ) + .is_ok()); // Should return both certificates let certs = pool.get_certs_for_standstill(); assert_eq!(certs.len(), 2); @@ -1815,15 +1992,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_5.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_5.clone()), + &mut vec![] + ) + .is_ok()); // Add Finalize cert on 5 let cert_5_finalize = Certificate { @@ -1831,15 +2009,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_5_finalize.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_5_finalize.clone()), + &mut vec![] + ) + .is_ok()); // Add FinalizeFast cert on 5 let cert_5 = Certificate { @@ -1847,15 +2026,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_5.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_5.clone()), + &mut vec![] + ) + .is_ok()); // Should return only FinalizeFast cert on 5 let certs = pool.get_certs_for_standstill(); assert_eq!(certs.len(), 1); @@ -1870,15 +2050,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_6.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_6.clone()), + &mut vec![] + ) + .is_ok()); // Should return certs on 5 and 6 let certs = pool.get_certs_for_standstill(); assert_eq!(certs.len(), 2); @@ -1893,30 +2074,32 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_6_finalize.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_6_finalize.clone()), + &mut vec![] + ) + .is_ok()); // Add a NotarizeFallback cert on 6 let cert_6_notarize_fallback = Certificate { cert_type: CertificateType::NotarizeFallback(6, Hash::new_unique()), signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_6_notarize_fallback.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_6_notarize_fallback.clone()), + &mut vec![] + ) + .is_ok()); // This should not be returned because 6 is the current highest finalized slot // only Notarize/Finalze/FinalizeFast should be returned let certs = pool.get_certs_for_standstill(); @@ -1932,15 +2115,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_7.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_7.clone()), + &mut vec![] + ) + .is_ok()); // Should return certs on 6 and 7 let certs = pool.get_certs_for_standstill(); assert_eq!(certs.len(), 3); @@ -1959,29 +2143,31 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_8_finalize), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_8_finalize), + &mut vec![] + ) + .is_ok()); let cert_8_notarize = Certificate { cert_type: CertificateType::Notarize(8, Hash::new_unique()), signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_8_notarize), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_8_notarize), + &mut vec![] + ) + .is_ok()); // Should only return certs on 8 now let certs = pool.get_certs_for_standstill(); @@ -2006,15 +2192,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert), - &mut events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert), + &mut events, + ) + .is_ok()); } // events should now contain ParentReady for slot 4 error!("Events: {events:?}"); @@ -2033,15 +2220,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert), - &mut events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert), + &mut events, + ) + .is_ok()); } // events should now contain ParentReady for slot 8 error!("Events: {events:?}"); @@ -2060,7 +2248,24 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert), + &mut events, + ) + .is_ok()); + } + let cert = Certificate { + cert_type: CertificateType::FinalizeFast(11, hash), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( bank.epoch_schedule(), bank.epoch_stakes_map(), bank.slot(), @@ -2068,22 +2273,7 @@ mod tests { ConsensusMessage::Certificate(cert), &mut events, ) - .unwrap(); - } - let cert = Certificate { - cert_type: CertificateType::FinalizeFast(11, hash), - signature: BLSSignature::default(), - bitmap: Vec::new(), - }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert), - &mut events, - ) - .unwrap(); + .is_ok()); // events should now contain ParentReady for slot 12 error!("Events: {events:?}"); assert!(events @@ -2113,9 +2303,24 @@ mod tests { let signed_message = bincode::serialize(&vote).unwrap(); - vote_message - .signature - .verify(&bls_pubkey, &signed_message) - .expect("BLS signature verification failed for VoteMessage"); + assert!( + vote_message + .signature + .verify(&bls_pubkey, &signed_message) + .is_ok(), + "BLS signature verification failed for VoteMessage" + ); + } + + #[test] + fn test_update_pubkey() { + let new_pubkey = Pubkey::new_unique(); + let (_, mut pool, _) = create_initial_state(); + let old_pubkey = pool.my_pubkey(); + assert_eq!(pool.parent_ready_tracker.my_pubkey(), old_pubkey); + assert_ne!(old_pubkey, new_pubkey); + pool.update_pubkey(new_pubkey); + assert_eq!(pool.my_pubkey(), new_pubkey); + assert_eq!(pool.parent_ready_tracker.my_pubkey(), new_pubkey); } } diff --git a/votor/src/consensus_pool/certificate_builder.rs b/votor/src/consensus_pool/certificate_builder.rs index c3ee633f15382e..c67719065c21f9 100644 --- a/votor/src/consensus_pool/certificate_builder.rs +++ b/votor/src/consensus_pool/certificate_builder.rs @@ -1,5 +1,5 @@ use { - crate::common::certificate_limits_and_votes, + crate::common::certificate_limits_and_vote_types, agave_votor_messages::consensus_message::{Certificate, CertificateType, VoteMessage}, bitvec::prelude::*, solana_bls_signatures::{BlsError, SignatureProjective}, @@ -7,7 +7,7 @@ use { thiserror::Error, }; -/// Maximum number of validators in a certificate +/// Maximum number of validators in a certificate. /// /// There are around 1500 validators currently. For a clean power-of-two /// implementation, we should choose either 2048 or 4096. Choose a more @@ -17,7 +17,7 @@ use { const MAXIMUM_VALIDATORS: usize = 4096; /// Different types of errors that can be returned from the [`CertificateBuilder::aggregate()`] function. -#[derive(Debug, PartialEq, Eq, Error)] +#[derive(Debug, Error, PartialEq, Eq)] pub(super) enum AggregateError { #[error("BLS error: {0}")] Bls(#[from] BlsError), @@ -25,19 +25,24 @@ pub(super) enum AggregateError { InvalidRank(u16), #[error("Validator already included")] ValidatorAlreadyIncluded, - #[error("assumption for vote_types array broken")] - InvalidVoteTypes, } /// Different types of errors that can be returned from the [`CertificateBuilder::build()`] function. -#[derive(Debug, PartialEq, Eq, Error)] +#[derive(Debug, Error, PartialEq)] pub(crate) enum BuildError { + #[error("Encoding failed: {0:?}")] + Encode(EncodeError), #[error("BLS error: {0}")] Bls(#[from] BlsError), +} + +/// Different types of errors that can be returned from the [`CertificateBuilder::build_for_rewards()`] function. +#[derive(Debug, Error, PartialEq)] +pub enum BuildForRewardsError { #[error("Encoding failed: {0:?}")] Encode(EncodeError), - #[error("Invalid rank: {0}")] - InvalidRank(usize), + #[error("rewards certs of these types are not needed")] + InvalidCertType, } fn default_bitvec() -> BitVec { @@ -49,15 +54,10 @@ fn build_cert_from_bitmap( cert_type: CertificateType, signature: SignatureProjective, mut bitmap: BitVec, -) -> Result { +) -> Result { let new_len = bitmap.last_one().map_or(0, |i| i.saturating_add(1)); - // checks in `aggregate()` guarantee that this assertion is valid - debug_assert!(new_len <= MAXIMUM_VALIDATORS); - if new_len > MAXIMUM_VALIDATORS { - return Err(BuildError::InvalidRank(new_len)); - } bitmap.resize(new_len, false); - let bitmap = encode_base2(&bitmap).map_err(BuildError::Encode)?; + let bitmap = encode_base2(&bitmap)?; Ok(Certificate { cert_type, signature: signature.into(), @@ -71,18 +71,13 @@ fn build_cert_from_bitmaps( signature: SignatureProjective, mut bitmap0: BitVec, mut bitmap1: BitVec, -) -> Result { +) -> Result { let last_one_0 = bitmap0.last_one().map_or(0, |i| i.saturating_add(1)); let last_one_1 = bitmap1.last_one().map_or(0, |i| i.saturating_add(1)); - let new_len = last_one_0.max(last_one_1); - // checks in `aggregate()` guarantee that this assertion is valid - debug_assert!(new_len <= MAXIMUM_VALIDATORS); - if new_len > MAXIMUM_VALIDATORS { - return Err(BuildError::InvalidRank(new_len)); - } - bitmap0.resize(new_len, false); - bitmap1.resize(new_len, false); - let bitmap = encode_base3(&bitmap0, &bitmap1).map_err(BuildError::Encode)?; + let new_length = last_one_0.max(last_one_1); + bitmap0.resize(new_length, false); + bitmap1.resize(new_length, false); + let bitmap = encode_base3(&bitmap0, &bitmap1)?; Ok(Certificate { cert_type, signature: signature.into(), @@ -110,10 +105,20 @@ enum BuilderType { signature: SignatureProjective, bitmap: BitVec, }, - /// A [`Certificate`] of type NotarFallback or Skip will be produced. + /// A [`Certificate`] of type Skip will be produced. + /// + /// It can require two types of [`VoteMessage`]s. + /// In order to be able to produce certificates for reward purposes, signature aggregates for the two types are tracked separately. + Skip { + signature0: SignatureProjective, + bitmap0: BitVec, + sig_and_bitmap1: Option<(SignatureProjective, BitVec)>, + }, + /// A [`Certificate`] of type NotarFallback will be produced. /// /// It can require two types of [`VoteMessage`]s. - DoubleVote { + /// This certificate is not used for rewards so its signature can be aggregated in a single container. + NotarFallback { signature: SignatureProjective, bitmap0: BitVec, bitmap1: Option>, @@ -124,17 +129,17 @@ impl BuilderType { /// Creates a new instance of [`BuilderType`]. fn new(cert_type: &CertificateType) -> Self { match cert_type { - CertificateType::NotarizeFallback(_, _) | CertificateType::Skip(_) => { - Self::DoubleVote { - signature: SignatureProjective::identity(), - bitmap0: default_bitvec(), - bitmap1: None, - } - } - CertificateType::Finalize(_) - | CertificateType::FinalizeFast(_, _) - | CertificateType::Notarize(_, _) - | CertificateType::Genesis(_, _) => Self::SingleVote { + CertificateType::Skip(_) => Self::Skip { + signature0: SignatureProjective::identity(), + bitmap0: default_bitvec(), + sig_and_bitmap1: None, + }, + CertificateType::NotarizeFallback(_, _) => Self::NotarFallback { + signature: SignatureProjective::identity(), + bitmap0: default_bitvec(), + bitmap1: None, + }, + _ => Self::SingleVote { signature: SignatureProjective::identity(), bitmap: default_bitvec(), }, @@ -147,48 +152,62 @@ impl BuilderType { cert_type: &CertificateType, msgs: &[VoteMessage], ) -> Result<(), AggregateError> { - let (_, vote, fallback_vote) = certificate_limits_and_votes(cert_type); + let vote_types = certificate_limits_and_vote_types(cert_type).1; match self { - Self::DoubleVote { + Self::Skip { + signature0, + bitmap0, + sig_and_bitmap1, + } => { + assert_eq!(vote_types.len(), 2); + for msg in msgs { + let vote_type = msg.vote.get_type(); + if vote_type == vote_types[0] { + try_set_bitmap(bitmap0, msg.rank)?; + } else { + assert_eq!(vote_type, vote_types[1]); + let (_, bitmap) = sig_and_bitmap1 + .get_or_insert((SignatureProjective::identity(), default_bitvec())); + try_set_bitmap(bitmap, msg.rank)?; + } + } + signature0.aggregate_with(msgs.iter().filter_map(|msg| { + (msg.vote.get_type() == vote_types[0]).then_some(&msg.signature) + }))?; + sig_and_bitmap1 + .as_mut() + .map(|(signature, _)| { + signature.aggregate_with(msgs.iter().filter_map(|msg| { + (msg.vote.get_type() == vote_types[1]).then_some(&msg.signature) + })) + }) + .unwrap_or(Ok(()))?; + Ok(()) + } + + Self::NotarFallback { signature, bitmap0, bitmap1, } => { - debug_assert!(fallback_vote.is_some()); - let Some(fallback_vote) = fallback_vote else { - return Err(AggregateError::InvalidVoteTypes); - }; + assert_eq!(vote_types.len(), 2); for msg in msgs { - if msg.vote == vote { + let vote_type = msg.vote.get_type(); + if vote_type == vote_types[0] { try_set_bitmap(bitmap0, msg.rank)?; } else { - debug_assert_eq!(msg.vote, fallback_vote); - if msg.vote != fallback_vote { - return Err(AggregateError::InvalidVoteTypes); - } - match bitmap1 { - Some(bitmap) => try_set_bitmap(bitmap, msg.rank)?, - None => { - let mut bitmap = default_bitvec(); - try_set_bitmap(&mut bitmap, msg.rank)?; - *bitmap1 = Some(bitmap); - } - } + assert_eq!(vote_type, vote_types[1]); + let bitmap = bitmap1.get_or_insert(default_bitvec()); + try_set_bitmap(bitmap, msg.rank)?; } } Ok(signature.aggregate_with(msgs.iter().map(|m| &m.signature))?) } Self::SingleVote { signature, bitmap } => { - debug_assert!(fallback_vote.is_none()); - if fallback_vote.is_some() { - return Err(AggregateError::InvalidVoteTypes); - } + assert_eq!(vote_types.len(), 1); for msg in msgs { - debug_assert_eq!(msg.vote, vote); - if msg.vote != vote { - return Err(AggregateError::InvalidVoteTypes); - } + assert_eq!(msg.vote.get_type(), vote_types[0]); try_set_bitmap(bitmap, msg.rank)?; } Ok(signature.aggregate_with(msgs.iter().map(|m| &m.signature))?) @@ -200,18 +219,56 @@ impl BuilderType { fn build(self, cert_type: CertificateType) -> Result { match self { Self::SingleVote { signature, bitmap } => { - build_cert_from_bitmap(cert_type, signature, bitmap) + build_cert_from_bitmap(cert_type, signature, bitmap).map_err(BuildError::Encode) } - Self::DoubleVote { + Self::Skip { + mut signature0, + bitmap0, + sig_and_bitmap1, + } => match sig_and_bitmap1 { + None => build_cert_from_bitmap(cert_type, signature0, bitmap0) + .map_err(BuildError::Encode), + Some((signature1, bitmap1)) => { + signature0.aggregate_with([signature1].iter())?; + build_cert_from_bitmaps(cert_type, signature0, bitmap0, bitmap1) + .map_err(BuildError::Encode) + } + }, + Self::NotarFallback { signature, bitmap0, bitmap1, } => match bitmap1 { - None => build_cert_from_bitmap(cert_type, signature, bitmap0), - Some(bitmap1) => build_cert_from_bitmaps(cert_type, signature, bitmap0, bitmap1), + None => build_cert_from_bitmap(cert_type, signature, bitmap0) + .map_err(BuildError::Encode), + Some(bitmap1) => build_cert_from_bitmaps(cert_type, signature, bitmap0, bitmap1) + .map_err(BuildError::Encode), }, } } + + /// Builds a [`Certificate`] for rewards purposes from the builder. + fn build_for_rewards( + self, + cert_type: CertificateType, + ) -> Result { + match self { + Self::Skip { + signature0, + bitmap0, + sig_and_bitmap1: _, + } => build_cert_from_bitmap(cert_type, signature0, bitmap0) + .map_err(BuildForRewardsError::Encode), + Self::SingleVote { signature, bitmap } => match cert_type { + CertificateType::Notarize(_, _) => { + build_cert_from_bitmap(cert_type, signature, bitmap) + .map_err(BuildForRewardsError::Encode) + } + _ => Err(BuildForRewardsError::InvalidCertType), + }, + Self::NotarFallback { .. } => Err(BuildForRewardsError::InvalidCertType), + } + } } /// Builder for creating [`Certificate`] by using BLS signature aggregation. @@ -239,6 +296,12 @@ impl CertificateBuilder { pub(super) fn build(self) -> Result { self.builder_type.build(self.cert_type) } + + /// Builds a [`Certificate`] for rewards purposes from the builder. + #[allow(dead_code)] + pub(super) fn build_for_rewards(self) -> Result { + self.builder_type.build_for_rewards(self.cert_type) + } } #[cfg(test)] @@ -300,9 +363,11 @@ mod tests { .aggregate(&messages_2) .expect("Failed to aggregate notarization fallback votes"); - let cert = builder.build().expect("Failed to build certificate"); - assert_eq!(cert.cert_type, cert_type); - match decode(&cert.bitmap, MAXIMUM_VALIDATORS).expect("Failed to decode bitmap") { + let certificate_message = builder.build().expect("Failed to build certificate"); + assert_eq!(certificate_message.cert_type, cert_type); + match decode(&certificate_message.bitmap, MAXIMUM_VALIDATORS) + .expect("Failed to decode bitmap") + { Decoded::Base3(bitmap1, bitmap2) => { assert_eq!(bitmap1.len(), 8); assert_eq!(bitmap2.len(), 8); @@ -323,9 +388,11 @@ mod tests { builder .aggregate(&messages_1) .expect("Failed to aggregate notarization votes"); - let cert = builder.build().expect("Failed to build certificate"); - assert_eq!(cert.cert_type, cert_type); - match decode(&cert.bitmap, MAXIMUM_VALIDATORS).expect("Failed to decode bitmap") { + let certificate_message = builder.build().expect("Failed to build certificate"); + assert_eq!(certificate_message.cert_type, cert_type); + match decode(&certificate_message.bitmap, MAXIMUM_VALIDATORS) + .expect("Failed to decode bitmap") + { Decoded::Base2(bitmap1) => { assert_eq!(bitmap1.len(), 7); for i in rank_1 { @@ -342,9 +409,11 @@ mod tests { builder .aggregate(&messages_2) .expect("Failed to aggregate notarization fallback votes"); - let cert = builder.build().expect("Failed to build certificate"); - assert_eq!(cert.cert_type, cert_type); - match decode(&cert.bitmap, MAXIMUM_VALIDATORS).expect("Failed to decode bitmap") { + let certificate_message = builder.build().expect("Failed to build certificate"); + assert_eq!(certificate_message.cert_type, cert_type); + match decode(&certificate_message.bitmap, MAXIMUM_VALIDATORS) + .expect("Failed to decode bitmap") + { Decoded::Base3(bitmap1, bitmap2) => { assert_eq!(bitmap1.count_ones(), 0); assert_eq!(bitmap2.len(), 8); diff --git a/votor/src/consensus_pool/parent_ready_tracker.rs b/votor/src/consensus_pool/parent_ready_tracker.rs index 73ecb1e4021d58..5a4f654e5bd387 100644 --- a/votor/src/consensus_pool/parent_ready_tracker.rs +++ b/votor/src/consensus_pool/parent_ready_tracker.rs @@ -16,8 +16,8 @@ use { crate::{common::MAX_ENTRIES_PER_PUBKEY_FOR_NOTARIZE_LITE, event::VotorEvent}, agave_votor_messages::consensus_message::Block, solana_clock::{Slot, NUM_CONSECUTIVE_LEADER_SLOTS}, - solana_gossip::cluster_info::ClusterInfo, - std::{collections::HashMap, sync::Arc}, + solana_pubkey::Pubkey, + std::collections::HashMap, }; #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -27,8 +27,10 @@ pub(crate) enum BlockProductionParent { Parent(Block), } +#[derive(Clone, Debug, Default)] pub(crate) struct ParentReadyTracker { - cluster_info: Arc, + /// Our pubkey for logging + my_pubkey: Pubkey, /// Parent ready status for each slot slot_statuses: HashMap, @@ -56,7 +58,7 @@ struct ParentReadyStatus { impl ParentReadyTracker { /// Creates a new tracker with the root bank as implicitely notarized fallback - pub(super) fn new(cluster_info: Arc, root_block @ (root_slot, _): Block) -> Self { + pub(super) fn new(my_pubkey: Pubkey, root_block @ (root_slot, _): Block) -> Self { let mut slot_statuses = HashMap::new(); slot_statuses.insert( root_slot, @@ -75,7 +77,7 @@ impl ParentReadyTracker { }, ); Self { - cluster_info, + my_pubkey, slot_statuses, root: root_slot, highest_with_parent_ready: root_slot.saturating_add(1), @@ -98,7 +100,7 @@ impl ParentReadyTracker { } trace!( "{}: Adding new notar fallback for {block:?}", - self.cluster_info.id() + self.my_pubkey ); status.notar_fallbacks.push(block); assert!(status.notar_fallbacks.len() <= MAX_ENTRIES_PER_PUBKEY_FOR_NOTARIZE_LITE); @@ -107,7 +109,7 @@ impl ParentReadyTracker { for s in slot.saturating_add(1).. { trace!( "{}: Adding new parent ready for {s} parent {block:?}", - self.cluster_info.id() + self.my_pubkey ); let status = self.slot_statuses.entry(s).or_default(); if !status.parents_ready.contains(&block) { @@ -136,7 +138,7 @@ impl ParentReadyTracker { return; } - trace!("{}: Adding new skip for {slot:?}", self.cluster_info.id()); + trace!("{}: Adding new skip for {slot:?}", self.my_pubkey); let status = self.slot_statuses.entry(slot).or_default(); status.skip = true; @@ -175,7 +177,7 @@ impl ParentReadyTracker { for s in future_slots { trace!( "{}: Adding new parent ready for {s} parents {potential_parents:?}", - self.cluster_info.id(), + self.my_pubkey, ); let status = self.slot_statuses.entry(s).or_default(); for &block in &potential_parents { @@ -231,30 +233,29 @@ impl ParentReadyTracker { self.root = root; self.slot_statuses.retain(|&s, _| s >= root); } + + /// Updates the pubkey. Note that the pubkey is used for logging purposes only. + pub fn update_pubkey(&mut self, new_pubkey: Pubkey) { + self.my_pubkey = new_pubkey; + } + + #[cfg(test)] + pub fn my_pubkey(&self) -> Pubkey { + self.my_pubkey + } } #[cfg(test)] mod tests { use { super::*, itertools::Itertools, solana_clock::NUM_CONSECUTIVE_LEADER_SLOTS, - solana_gossip::contact_info::ContactInfo, solana_hash::Hash, solana_keypair::Keypair, - solana_net_utils::SocketAddrSpace, solana_signer::Signer, + solana_hash::Hash, solana_pubkey::Pubkey, }; - fn new_cluster_info() -> Arc { - let keypair = Keypair::new(); - let contact_info = ContactInfo::new_localhost(&keypair.pubkey(), 0); - Arc::new(ClusterInfo::new( - contact_info, - Arc::new(keypair), - SocketAddrSpace::Unspecified, - )) - } - #[test] fn basic() { let genesis = Block::default(); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), genesis); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); let mut events = vec![]; for i in 1..2 * NUM_CONSECUTIVE_LEADER_SLOTS { @@ -268,7 +269,7 @@ mod tests { #[test] fn skips() { let genesis = Block::default(); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), genesis); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); let mut events = vec![]; let block = (1, Hash::new_unique()); @@ -285,7 +286,7 @@ mod tests { #[test] fn out_of_order() { let genesis = Block::default(); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), genesis); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); let mut events = vec![]; let block = (1, Hash::new_unique()); @@ -305,7 +306,7 @@ mod tests { fn snapshot_wfsm() { let root_slot = 2147; let root_block = (root_slot, Hash::new_unique()); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), root_block); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), root_block); let mut events = vec![]; assert!(tracker.parent_ready(root_slot + 1, root_block)); @@ -332,7 +333,7 @@ mod tests { #[test] fn highest_parent_ready_out_of_order() { let genesis = Block::default(); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), genesis); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); let mut events = vec![]; assert_eq!(tracker.highest_parent_ready(), 1); @@ -354,7 +355,7 @@ mod tests { #[test] fn missed_window() { let genesis = Block::default(); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), genesis); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); let mut events = vec![]; assert_eq!(tracker.highest_parent_ready(), 1); assert_eq!( @@ -384,7 +385,7 @@ mod tests { #[test] fn pick_more_skips() { let genesis = Block::default(); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), genesis); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); let mut events = vec![]; for i in 1..=10 { diff --git a/votor/src/consensus_pool/slot_stake_counters.rs b/votor/src/consensus_pool/slot_stake_counters.rs index 60e6e4ed2e9ce8..8c0a343753f722 100644 --- a/votor/src/consensus_pool/slot_stake_counters.rs +++ b/votor/src/consensus_pool/slot_stake_counters.rs @@ -1,6 +1,3 @@ -#![allow(dead_code)] -// TODO(wen): remove allow(dead_code) when consensus_pool is fully integrated - use { crate::{ common::{ diff --git a/votor/src/consensus_pool/stats.rs b/votor/src/consensus_pool/stats.rs index 4011f5a7bf434b..b89666e1d1e97c 100644 --- a/votor/src/consensus_pool/stats.rs +++ b/votor/src/consensus_pool/stats.rs @@ -1,12 +1,11 @@ use { - agave_votor_messages::{consensus_message::CertificateType, vote::Vote}, + agave_votor_messages::{consensus_message::CertificateType, vote::VoteType}, solana_metrics::datapoint_info, std::time::{Duration, Instant}, }; const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(10); -/// Struct to hold stats for different certificate types. #[derive(Default)] struct CertificateStats { finalize: u64, @@ -19,8 +18,8 @@ struct CertificateStats { impl CertificateStats { /// Increments the stats associated with the certificate type by one. - fn increment(&mut self, cert_type: &CertificateType) { - match cert_type { + fn increment(&mut self, certificate: &CertificateType) { + match certificate { CertificateType::Finalize(_) => self.finalize = self.finalize.saturating_add(1), CertificateType::FinalizeFast(_, _) => { self.finalize_fast = self.finalize_fast.saturating_add(1) @@ -34,78 +33,20 @@ impl CertificateStats { } } - /// Reports the certificate related statistics. - fn report(&self, header: &'static str) { - let Self { - finalize, - finalize_fast, - notarize, - notarize_fallback, - skip, - genesis, - } = *self; + fn record(&self, header: &'static str) { datapoint_info!( header, - ("finalize", finalize, i64), - ("finalize_fast", finalize_fast, i64), - ("notarize", notarize, i64), - ("notarize_fallback", notarize_fallback, i64), - ("skip", skip, i64), - ("genesis", genesis, i64), - ) - } -} - -/// Struct to hold stats for different vote types. -#[derive(Default)] -struct VoteStats { - notarize: u64, - finalize: u64, - skip: u64, - notarize_fallback: u64, - skip_fallback: u64, - genesis: u64, -} - -impl VoteStats { - /// Increments the stats associated with the votes by one. - fn increment(&mut self, vote: &Vote) { - match vote { - Vote::Notarize(_) => self.notarize = self.notarize.saturating_add(1), - Vote::NotarizeFallback(_) => { - self.notarize_fallback = self.notarize_fallback.saturating_add(1) - } - Vote::Skip(_) => self.skip = self.skip.saturating_add(1), - Vote::SkipFallback(_) => self.skip_fallback = self.skip_fallback.saturating_add(1), - Vote::Finalize(_) => self.finalize = self.finalize.saturating_add(1), - Vote::Genesis(_) => self.genesis = self.genesis.saturating_add(1), - } - } - - /// Reports the vote related statistics. - fn report(&self) { - let Self { - finalize, - notarize, - notarize_fallback, - skip, - skip_fallback, - genesis, - } = *self; - datapoint_info!( - "consensus_ingested_votes", - ("finalize", finalize, i64), - ("notarize", notarize, i64), - ("notarize_fallback", notarize_fallback, i64), - ("skip", skip, i64), - ("skip_fallback", skip_fallback, i64), - ("genesis", genesis, i64), + ("finalize", self.finalize, i64), + ("finalize_fast", self.finalize_fast, i64), + ("notarize", self.notarize, i64), + ("notarize_fallback", self.notarize_fallback, i64), + ("skip", self.skip, i64), ) } } pub(crate) struct ConsensusPoolStats { - pub(crate) invalid_votes: u32, + pub(crate) conflicting_votes: u32, pub(crate) event_safe_to_notarize: u32, pub(crate) event_safe_to_skip: u32, pub(crate) exist_certs: u32, @@ -114,16 +55,25 @@ pub(crate) struct ConsensusPoolStats { pub(crate) incoming_votes: u32, pub(crate) out_of_range_certs: u32, pub(crate) out_of_range_votes: u32, + new_certs_generated: CertificateStats, new_certs_ingested: CertificateStats, - ingested_votes: VoteStats, + pub(crate) ingested_votes: Vec, + pub(crate) last_request_time: Instant, } impl Default for ConsensusPoolStats { fn default() -> Self { + Self::new() + } +} + +impl ConsensusPoolStats { + pub fn new() -> Self { + let num_vote_types = (VoteType::Genesis as usize).saturating_add(1); Self { - invalid_votes: 0, + conflicting_votes: 0, event_safe_to_notarize: 0, event_safe_to_skip: 0, exist_certs: 0, @@ -132,69 +82,99 @@ impl Default for ConsensusPoolStats { incoming_votes: 0, out_of_range_certs: 0, out_of_range_votes: 0, + new_certs_ingested: CertificateStats::default(), new_certs_generated: CertificateStats::default(), - ingested_votes: VoteStats::default(), + ingested_votes: vec![0; num_vote_types], + last_request_time: Instant::now(), } } -} -impl ConsensusPoolStats { - pub fn incr_ingested_vote(&mut self, vote: &Vote) { - self.ingested_votes.increment(vote); + pub fn incr_ingested_vote_type(&mut self, vote_type: VoteType) { + let index = vote_type as usize; + + self.ingested_votes[index] = self.ingested_votes[index].saturating_add(1); } - pub fn incr_cert_type(&mut self, cert_type: &CertificateType, is_generated: bool) { + pub fn incr_cert_type(&mut self, certificate: &CertificateType, is_generated: bool) { if is_generated { - self.new_certs_generated.increment(cert_type); + self.new_certs_generated.increment(certificate); } else { - self.new_certs_ingested.increment(cert_type); - }; + self.new_certs_ingested.increment(certificate); + } } + /// Reports the certificate related statistics. fn report(&self) { - let Self { - invalid_votes, - event_safe_to_skip, - event_safe_to_notarize, - exist_votes, - exist_certs, - incoming_votes, - incoming_certs, - out_of_range_votes, - out_of_range_certs, - ingested_votes, - new_certs_generated, - new_certs_ingested, - last_request_time: _, - } = self; datapoint_info!( "consensus_pool_stats", - ("vote_pool_invalid_votes", *invalid_votes as i64, i64), - ("event_safe_to_skip", *event_safe_to_skip as i64, i64), + ("conflicting_votes", self.conflicting_votes as i64, i64), + ("event_safe_to_skip", self.event_safe_to_skip as i64, i64), ( "event_safe_to_notarize", - *event_safe_to_notarize as i64, + self.event_safe_to_notarize as i64, + i64 + ), + ("exist_votes", self.exist_votes as i64, i64), + ("exist_certs", self.exist_certs as i64, i64), + ("incoming_votes", self.incoming_votes as i64, i64), + ("incoming_certs", self.incoming_certs as i64, i64), + ("out_of_range_votes", self.out_of_range_votes as i64, i64), + ("out_of_range_certs", self.out_of_range_certs as i64, i64), + ); + + datapoint_info!( + "consensus_ingested_votes", + ( + "finalize", + *self + .ingested_votes + .get(VoteType::Finalize as usize) + .unwrap() as i64, + i64 + ), + ( + "notarize", + *self + .ingested_votes + .get(VoteType::Notarize as usize) + .unwrap() as i64, + i64 + ), + ( + "notarize_fallback", + *self + .ingested_votes + .get(VoteType::NotarizeFallback as usize) + .unwrap() as i64, + i64 + ), + ( + "skip", + *self.ingested_votes.get(VoteType::Skip as usize).unwrap() as i64, + i64 + ), + ( + "skip_fallback", + *self + .ingested_votes + .get(VoteType::SkipFallback as usize) + .unwrap() as i64, i64 ), - ("exist_votes", *exist_votes as i64, i64), - ("exist_certs", *exist_certs as i64, i64), - ("incoming_votes", *incoming_votes as i64, i64), - ("incoming_certs", *incoming_certs as i64, i64), - ("out_of_range_votes", *out_of_range_votes as i64, i64), - ("out_of_range_certs", *out_of_range_certs as i64, i64), ); - ingested_votes.report(); - new_certs_generated.report("consensus_pool_generated_certs"); - new_certs_ingested.report("consensus_pool_ingested_certs"); + self.new_certs_ingested + .record("consensus_pool_ingested_certs"); + self.new_certs_generated + .record("consensus_pool_generated_certs"); } pub fn maybe_report(&mut self) { if self.last_request_time.elapsed() >= STATS_REPORT_INTERVAL { self.report(); - *self = Self::default(); + *self = Self::new(); } } } diff --git a/votor/src/consensus_pool/vote_pool.rs b/votor/src/consensus_pool/vote_pool.rs index 3aa7e2ee493711..1b0829bb1fb090 100644 --- a/votor/src/consensus_pool/vote_pool.rs +++ b/votor/src/consensus_pool/vote_pool.rs @@ -1,334 +1,129 @@ -//! Container to store received votes and associated stakes. -//! -//! Implements various checks for invalid votes as defined by the Alpenglow paper e.g. lemma 20 and 22. -//! Further detects duplicate votes which are defined as identical vote from the same sender received multiple times. - use { crate::common::Stake, - agave_votor_messages::{consensus_message::VoteMessage, vote::Vote}, - solana_clock::Slot, + agave_votor_messages::consensus_message::VoteMessage, solana_hash::Hash, solana_pubkey::Pubkey, - std::collections::{btree_map::Entry, BTreeMap}, - thiserror::Error, + std::collections::{BTreeMap, BTreeSet}, }; -/// As per the Alpenglow paper, a validator is allowed to vote notar fallback on at most 3 different block id for a given slot. -const MAX_NOTAR_FALLBACK_PER_VALIDATOR: usize = 3; +/// There are two types of vote pools: +/// - SimpleVotePool: Tracks all votes of a specfic vote type made by validators for some slot N, but only one vote per block. +/// - DuplicateBlockVotePool: Tracks all votes of a specfic vote type made by validators for some slot N, +/// but allows votes for different blocks by the same validator. Only relevant for VotePool's that are of type +/// Notarization or NotarizationFallback +pub(super) enum VotePool { + SimpleVotePool(SimpleVotePool), + DuplicateBlockVotePool(DuplicateBlockVotePool), +} -#[derive(Debug, PartialEq, Eq, Error)] -pub(crate) enum AddVoteError { - #[error("duplicate vote")] - Duplicate, - /// These are invalid votes as defined in the Alpenglow paper e.g. lemma 20 and 22. - #[error("invalid votes")] - Invalid, +#[derive(Default)] +pub(super) struct SimpleVotePool { + votes: Vec, + total_stake: Stake, + prev_voted_validators: BTreeSet, } -/// Helper function to reduce some code duplication. -fn insert_vote( - map: &mut BTreeMap, - voter: Pubkey, - vote: VoteMessage, -) -> Result<(), AddVoteError> { - match map.entry(voter) { - Entry::Occupied(_) => Err(AddVoteError::Duplicate), - Entry::Vacant(e) => { - e.insert(vote); - Ok(()) +impl SimpleVotePool { + pub(super) fn add_vote( + &mut self, + validator_vote_key: Pubkey, + validator_stake: Stake, + vote: VoteMessage, + ) -> Option { + if !self.prev_voted_validators.insert(validator_vote_key) { + return None; } + self.votes.push(vote); + self.total_stake = self.total_stake.saturating_add(validator_stake); + Some(self.total_stake) } -} - -/// Container to store per slot votes. -struct InternalVotePool { - /// The slot this instance of Votes is responsible for. - slot: Slot, - /// Skip votes are stored in map indexed by validator. - skip: BTreeMap, - /// Skip fallback votes are stored in map indexed by validator. - skip_fallback: BTreeMap, - /// Finalize votes are stored in map indexed by validator. - finalize: BTreeMap, - /// Notar votes are stored in map indexed by validator. - notar: BTreeMap, - /// A validator can vote notar fallback on upto 3 blocks. - /// - /// Per validator, we store a map of which block ids the validator has voted notar fallback on. - notar_fallback: BTreeMap>, - /// Genesis votes are stored in map indexed by validator. - genesis: BTreeMap, -} -impl InternalVotePool { - fn new(slot: Slot) -> Self { - Self { - slot, - skip: BTreeMap::default(), - skip_fallback: BTreeMap::default(), - finalize: BTreeMap::default(), - notar: BTreeMap::default(), - notar_fallback: BTreeMap::default(), - genesis: BTreeMap::default(), - } + pub(super) fn votes(&self) -> &[VoteMessage] { + &self.votes } - /// Adds votes. - /// - /// Checks for different types of invalid and duplicate votes returning appropriate errors. - fn add_vote(&mut self, voter: Pubkey, vote: VoteMessage) -> Result<(), AddVoteError> { - debug_assert_eq!(self.slot, vote.vote.slot()); - match vote.vote { - Vote::Notarize(notar) => { - if self.skip.contains_key(&voter) { - return Err(AddVoteError::Invalid); - } - match self.notar.entry(voter) { - Entry::Occupied(e) => { - // unwrap should be safe as we should only store notar type votes here - if e.get().vote.block_id().unwrap() == ¬ar.block_id { - Err(AddVoteError::Duplicate) - } else { - Err(AddVoteError::Invalid) - } - } - Entry::Vacant(e) => { - e.insert(vote); - Ok(()) - } - } - } - Vote::NotarizeFallback(notar_fallback) => { - if self.finalize.contains_key(&voter) { - return Err(AddVoteError::Invalid); - } - match self.notar_fallback.entry(voter) { - Entry::Vacant(e) => { - e.insert(BTreeMap::from([(notar_fallback.block_id, vote)])); - Ok(()) - } - Entry::Occupied(mut e) => { - let map = e.get_mut(); - let map_len = map.len(); - match map.entry(notar_fallback.block_id) { - Entry::Vacant(map_e) => { - if map_len == MAX_NOTAR_FALLBACK_PER_VALIDATOR { - Err(AddVoteError::Invalid) - } else { - map_e.insert(vote); - Ok(()) - } - } - Entry::Occupied(_) => Err(AddVoteError::Duplicate), - } - } - } - } - Vote::Skip(_) => { - if self.notar.contains_key(&voter) || self.finalize.contains_key(&voter) { - return Err(AddVoteError::Invalid); - } - insert_vote(&mut self.skip, voter, vote) - } - Vote::SkipFallback(_) => { - if self.finalize.contains_key(&voter) { - return Err(AddVoteError::Invalid); - } - insert_vote(&mut self.skip_fallback, voter, vote) - } - Vote::Finalize(_) => { - if self.skip.contains_key(&voter) || self.skip_fallback.contains_key(&voter) { - return Err(AddVoteError::Invalid); - } - if let Some(map) = self.notar_fallback.get(&voter) { - debug_assert!(!map.is_empty()); - return Err(AddVoteError::Invalid); - } - insert_vote(&mut self.finalize, voter, vote) - } - Vote::Genesis(genesis) => { - match self.genesis.entry(voter) { - Entry::Occupied(e) => { - // unwrap should be safe as we should only store genesis type votes here - if e.get().vote.block_id().unwrap() == &genesis.block_id { - Err(AddVoteError::Duplicate) - } else { - Err(AddVoteError::Invalid) - } - } - Entry::Vacant(e) => { - e.insert(vote); - Ok(()) - } - } - } - } + pub(super) fn total_stake(&self) -> Stake { + self.total_stake } - /// Get [`VoteMessage`]s for the corresponding [`Vote`]. - /// - // TODO: figure out how to return an iterator here instead which would require `CertificateBuilder::aggregate()` to accept an iterator. - fn get_votes(&self, vote: &Vote) -> Vec { - match vote { - Vote::Finalize(_) => self.finalize.values().cloned().collect(), - Vote::Notarize(notar) => self - .notar - .values() - .filter(|vote| { - // unwrap should be safe as we should only store notar votes here - vote.vote.block_id().unwrap() == ¬ar.block_id - }) - .cloned() - .collect(), - Vote::NotarizeFallback(nf) => self - .notar_fallback - .values() - .filter_map(|map| map.get(&nf.block_id)) - .cloned() - .collect(), - Vote::Skip(_) => self.skip.values().cloned().collect(), - Vote::SkipFallback(_) => self.skip_fallback.values().cloned().collect(), - Vote::Genesis(genesis) => self - .genesis - .values() - .filter(|vote| { - // unwrap should be safe as we should only store genesis votes here - vote.vote.block_id().unwrap() == &genesis.block_id - }) - .cloned() - .collect(), - } + pub(super) fn has_prev_validator_vote(&self, validator_vote_key: &Pubkey) -> bool { + self.prev_voted_validators.contains(validator_vote_key) } } -/// Container to store the total stakes for different types of votes. -struct Stakes { - slot: Slot, - /// Total stake that has voted skip. - skip: Stake, - /// Total stake that has voted skil fallback. - skip_fallback: Stake, - /// Total stake that has voted finalize. - finalize: Stake, - /// Stake that has voted notar. - /// - /// Different validators may vote notar for different blocks, so this tracks stake per block id. - notar: BTreeMap, - /// Stake that has voted notar fallback. - /// - /// A single validator may vote for upto 3 blocks and different validators can vote for different blocks. - /// Hence, this tracks stake per block id. - notar_fallback: BTreeMap, - /// Stake that has voted genesis. - genesis: BTreeMap, +#[derive(Default)] +struct VoteEntry { + votes: Vec, + total_stake_by_key: Stake, } -impl Stakes { - fn new(slot: Slot) -> Self { +pub(super) struct DuplicateBlockVotePool { + max_entries_per_pubkey: usize, + vote_entries: BTreeMap, + prev_voted_block_ids: BTreeMap>, +} + +impl DuplicateBlockVotePool { + pub(super) fn new(max_entries_per_pubkey: usize) -> Self { Self { - slot, - skip: 0, - skip_fallback: 0, - finalize: 0, - notar: BTreeMap::default(), - notar_fallback: BTreeMap::default(), - genesis: BTreeMap::default(), + max_entries_per_pubkey, + vote_entries: BTreeMap::new(), + prev_voted_block_ids: BTreeMap::new(), } } - /// Updates the corresponding stake after a vote has been successfully added to the pool. - /// - /// Returns the total stake of the corresponding type (and block id in case of notar or notar-fallback) after the update. - fn add_stake(&mut self, voter_stake: Stake, vote: &Vote) -> Stake { - debug_assert_eq!(self.slot, vote.slot()); - match vote { - Vote::Notarize(notar) => { - let stake = self.notar.entry(notar.block_id).or_default(); - *stake = (*stake).saturating_add(voter_stake); - *stake - } - Vote::NotarizeFallback(nf) => { - let stake = self.notar_fallback.entry(nf.block_id).or_default(); - *stake = (*stake).saturating_add(voter_stake); - *stake - } - Vote::Skip(_) => { - self.skip = self.skip.saturating_add(voter_stake); - self.skip - } - Vote::SkipFallback(_) => { - self.skip_fallback = self.skip_fallback.saturating_add(voter_stake); - self.skip_fallback - } - Vote::Finalize(_) => { - self.finalize = self.finalize.saturating_add(voter_stake); - self.finalize - } - Vote::Genesis(genesis) => { - let stake = self.genesis.entry(genesis.block_id).or_default(); - *stake = (*stake).saturating_add(voter_stake); - *stake - } + pub(super) fn add_vote( + &mut self, + validator_vote_key: Pubkey, + validator_stake: Stake, + vote: VoteMessage, + ) -> Option { + let block_id = *vote.vote.block_id().unwrap(); + // Check whether the validator_vote_key already used the same voted_block_id or exceeded max_entries_per_pubkey + // If so, return false, otherwise add the voted_block_id to the prev_votes + let prev_voted_block_ids = self + .prev_voted_block_ids + .entry(validator_vote_key) + .or_default(); + if prev_voted_block_ids.contains(&block_id) + || prev_voted_block_ids.len() >= self.max_entries_per_pubkey + { + return None; } - } + prev_voted_block_ids.insert(block_id); - /// Get the stake corresponding to the [`Vote`]. - fn get_stake(&self, vote: &Vote) -> Stake { - match vote { - Vote::Notarize(notar) => *self.notar.get(¬ar.block_id).unwrap_or(&0), - Vote::NotarizeFallback(nf) => *self.notar_fallback.get(&nf.block_id).unwrap_or(&0), - Vote::Skip(_) => self.skip, - Vote::SkipFallback(_) => self.skip_fallback, - Vote::Finalize(_) => self.finalize, - Vote::Genesis(genesis) => *self.genesis.get(&genesis.block_id).unwrap_or(&0), - } + let vote_entry = self.vote_entries.entry(block_id).or_default(); + vote_entry.votes.push(vote); + vote_entry.total_stake_by_key = vote_entry + .total_stake_by_key + .saturating_add(validator_stake); + Some(vote_entry.total_stake_by_key) } -} - -/// Container to store per slot votes and associated stake. -/// -/// When adding new votes, various checks for invalid and duplicate votes is performed. -pub(super) struct VotePool { - /// The slot this instance of the pool is responsible for. - slot: Slot, - /// Stores seen votes. - votes: InternalVotePool, - /// Stores total stake that voted. - stakes: Stakes, -} -impl VotePool { - pub(super) fn new(slot: Slot) -> Self { - Self { - slot, - votes: InternalVotePool::new(slot), - stakes: Stakes::new(slot), - } + pub(super) fn total_stake_by_block_id(&self, block_id: &Hash) -> Stake { + self.vote_entries + .get(block_id) + .map_or(0, |vote_entries| vote_entries.total_stake_by_key) } - /// Adds a vote to the pool. - /// - /// On success, returns the total stake of the corresponding vote type. - pub(super) fn add_vote( - &mut self, - voter: Pubkey, - voter_stake: Stake, - msg: VoteMessage, - ) -> Result { - debug_assert_eq!(self.slot, msg.vote.slot()); - let vote = msg.vote; - self.votes.add_vote(voter, msg)?; - Ok(self.stakes.add_stake(voter_stake, &vote)) + pub(super) fn votes(&self, block_id: &Hash) -> Option<&[VoteMessage]> { + self.vote_entries + .get(block_id) + .map(|entry| entry.votes.as_slice()) } - /// Returns the [`Stake`] corresponding to the specific [`Vote`]. - pub(super) fn get_stake(&self, vote: &Vote) -> Stake { - self.stakes.get_stake(vote) + pub(super) fn has_prev_validator_vote_for_block( + &self, + validator_vote_key: &Pubkey, + block_id: &Hash, + ) -> bool { + self.prev_voted_block_ids + .get(validator_vote_key) + .is_some_and(|vs| vs.contains(block_id)) } - /// Returns a list of votes corresponding to the specific [`Vote`]. - pub(super) fn get_votes(&self, vote: &Vote) -> Vec { - self.votes.get_votes(vote) + pub(super) fn has_prev_validator_vote(&self, validator_vote_key: &Pubkey) -> bool { + self.prev_voted_block_ids.contains_key(validator_vote_key) } } @@ -341,328 +136,105 @@ mod test { }; #[test] - fn test_notar_failures() { - let voter = Pubkey::new_unique(); - let signature = BLSSignature::default(); - let rank = 1; - let slot = 1; - - let mut votes = InternalVotePool::new(slot); - let skip = VoteMessage { - vote: Vote::new_skip_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, skip).unwrap(); - let notar = VoteMessage { - vote: Vote::new_notarization_vote(slot, Hash::new_unique()), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, notar), - Err(AddVoteError::Invalid) - )); - - let mut votes = InternalVotePool::new(slot); - let notar = VoteMessage { - vote: Vote::new_notarization_vote(slot, Hash::new_unique()), - signature, - rank, - }; - votes.add_vote(voter, notar).unwrap(); - let notar = VoteMessage { - vote: Vote::new_notarization_vote(slot, Hash::new_unique()), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, notar), - Err(AddVoteError::Invalid) - )); - - let mut votes = InternalVotePool::new(slot); - let notar = VoteMessage { - vote: Vote::new_notarization_vote(slot, Hash::new_unique()), - signature, - rank, - }; - votes.add_vote(voter, notar.clone()).unwrap(); - assert!(matches!( - votes.add_vote(voter, notar), - Err(AddVoteError::Duplicate) - )); - } - - #[test] - fn test_notar_fallback_failures() { - let voter = Pubkey::new_unique(); - let signature = BLSSignature::default(); - let rank = 1; - let slot = 1; - - let mut votes = InternalVotePool::new(slot); - let finalize = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, finalize).unwrap(); - let nf = VoteMessage { - vote: Vote::new_notarization_fallback_vote(slot, Hash::default()), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, nf), - Err(AddVoteError::Invalid) - )); - - let mut votes = InternalVotePool::new(slot); - for _ in 0..3 { - let nf = VoteMessage { - vote: Vote::new_notarization_fallback_vote(slot, Hash::new_unique()), - signature, - rank, - }; - votes.add_vote(voter, nf).unwrap(); - } - let nf = VoteMessage { - vote: Vote::new_notarization_fallback_vote(slot, Hash::new_unique()), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, nf), - Err(AddVoteError::Invalid) - )); - - let mut votes = InternalVotePool::new(slot); - let nf = VoteMessage { - vote: Vote::new_notarization_fallback_vote(slot, Hash::new_unique()), - signature, - rank, + fn test_skip_vote_pool() { + let mut vote_pool = SimpleVotePool::default(); + let vote = Vote::new_skip_vote(5); + let vote_message = VoteMessage { + vote, + signature: BLSSignature::default(), + rank: 1, }; - votes.add_vote(voter, nf.clone()).unwrap(); - assert!(matches!( - votes.add_vote(voter, nf), - Err(AddVoteError::Duplicate) - )); - } - - #[test] - fn test_skip_failures() { - let voter = Pubkey::new_unique(); - let signature = BLSSignature::default(); - let rank = 1; - let slot = 1; + let my_pubkey = Pubkey::new_unique(); - let mut votes = InternalVotePool::new(slot); - let notar = VoteMessage { - vote: Vote::new_notarization_vote(slot, Hash::new_unique()), - signature, - rank, - }; - votes.add_vote(voter, notar).unwrap(); - let skip = VoteMessage { - vote: Vote::new_skip_vote(slot), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, skip), - Err(AddVoteError::Invalid) - )); + assert_eq!(vote_pool.add_vote(my_pubkey, 10, vote_message), Some(10)); + assert_eq!(vote_pool.total_stake(), 10); - let mut votes = InternalVotePool::new(slot); - let finalize = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, finalize).unwrap(); - let skip = VoteMessage { - vote: Vote::new_skip_vote(slot), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, skip), - Err(AddVoteError::Invalid) - )); + // Adding the same key again should fail + assert_eq!(vote_pool.add_vote(my_pubkey, 10, vote_message), None); + assert_eq!(vote_pool.total_stake(), 10); - let mut votes = InternalVotePool::new(slot); - let skip = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, skip.clone()).unwrap(); - assert!(matches!( - votes.add_vote(voter, skip), - Err(AddVoteError::Duplicate) - )); + // Adding a different key should succeed + let new_pubkey = Pubkey::new_unique(); + assert_eq!(vote_pool.add_vote(new_pubkey, 60, vote_message), Some(70)); + assert_eq!(vote_pool.total_stake(), 70); } #[test] - fn test_skip_fallback_failures() { - let voter = Pubkey::new_unique(); - let signature = BLSSignature::default(); - let rank = 1; - let slot = 1; - - let mut votes = InternalVotePool::new(slot); - let finalize = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, finalize).unwrap(); - let sf = VoteMessage { - vote: Vote::new_skip_fallback_vote(slot), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, sf), - Err(AddVoteError::Invalid) - )); - - let mut votes = InternalVotePool::new(slot); - let sf = VoteMessage { - vote: Vote::new_skip_fallback_vote(slot), - signature, - rank, + fn test_notarization_pool() { + let mut vote_pool = DuplicateBlockVotePool::new(1); + let my_pubkey = Pubkey::new_unique(); + let block_id = Hash::new_unique(); + let vote = Vote::new_notarization_vote(3, block_id); + let vote = VoteMessage { + vote, + signature: BLSSignature::default(), + rank: 1, }; - votes.add_vote(voter, sf.clone()).unwrap(); - assert!(matches!( - votes.add_vote(voter, sf), - Err(AddVoteError::Duplicate) - )); - } + assert_eq!(vote_pool.add_vote(my_pubkey, 10, vote), Some(10)); + assert_eq!(vote_pool.total_stake_by_block_id(&block_id), 10); - #[test] - fn test_finalize_failures() { - let voter = Pubkey::new_unique(); - let signature = BLSSignature::default(); - let rank = 1; - let slot = 1; - - let mut votes = InternalVotePool::new(slot); - let skip = VoteMessage { - vote: Vote::new_skip_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, skip).unwrap(); - let finalize = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, finalize), - Err(AddVoteError::Invalid) - )); + // Adding the same key again should fail + assert_eq!(vote_pool.add_vote(my_pubkey, 10, vote), None); - let mut votes = InternalVotePool::new(slot); - let sf = VoteMessage { - vote: Vote::new_skip_fallback_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, sf).unwrap(); - let finalize = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, finalize), - Err(AddVoteError::Invalid) - )); + // Adding a different bankhash should fail + assert_eq!(vote_pool.add_vote(my_pubkey, 10, vote), None); - let mut votes = InternalVotePool::new(slot); - let finalize = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, finalize.clone()).unwrap(); - assert!(matches!( - votes.add_vote(voter, finalize), - Err(AddVoteError::Duplicate) - )); + // Adding a different key should succeed + let new_pubkey = Pubkey::new_unique(); + assert_eq!(vote_pool.add_vote(new_pubkey, 60, vote), Some(70)); + assert_eq!(vote_pool.total_stake_by_block_id(&block_id), 70); } #[test] - fn test_stakes() { - let slot = 123; - let stake = 54321; - let mut stakes = Stakes::new(slot); - let vote = Vote::new_skip_vote(slot); - assert_eq!(stakes.add_stake(stake, &vote), stake); - assert_eq!(stakes.get_stake(&vote), stake); - - let mut stakes = Stakes::new(slot); - let vote = Vote::new_skip_fallback_vote(slot); - assert_eq!(stakes.add_stake(stake, &vote), stake); - assert_eq!(stakes.get_stake(&vote), stake); - - let mut stakes = Stakes::new(slot); - let vote = Vote::new_finalization_vote(slot); - assert_eq!(stakes.add_stake(stake, &vote), stake); - assert_eq!(stakes.get_stake(&vote), stake); - - let mut stakes = Stakes::new(slot); - let stake0 = 10; - let stake1 = 20; - let hash0 = Hash::new_unique(); - let hash1 = Hash::new_unique(); - let vote0 = Vote::new_notarization_vote(slot, hash0); - let vote1 = Vote::new_notarization_vote(slot, hash1); - assert_eq!(stakes.add_stake(stake0, &vote0), stake0); - assert_eq!(stakes.add_stake(stake1, &vote1), stake1); - assert_eq!(stakes.get_stake(&vote0), stake0); - assert_eq!(stakes.get_stake(&vote1), stake1); - - let mut stakes = Stakes::new(slot); - let stake0 = 10; - let stake1 = 20; - let hash0 = Hash::new_unique(); - let hash1 = Hash::new_unique(); - let vote0 = Vote::new_notarization_fallback_vote(slot, hash0); - let vote1 = Vote::new_notarization_fallback_vote(slot, hash1); - assert_eq!(stakes.add_stake(stake0, &vote0), stake0); - assert_eq!(stakes.add_stake(stake1, &vote1), stake1); - assert_eq!(stakes.get_stake(&vote0), stake0); - assert_eq!(stakes.get_stake(&vote1), stake1); - } + fn test_notarization_fallback_pool() { + agave_logger::setup(); + let mut vote_pool = DuplicateBlockVotePool::new(3); + let my_pubkey = Pubkey::new_unique(); + + let votes = (0..4) + .map(|_| { + let vote = Vote::new_notarization_fallback_vote(7, Hash::new_unique()); + VoteMessage { + vote, + signature: BLSSignature::default(), + rank: 1, + } + }) + .collect::>(); + + // Adding the first 3 votes should succeed, but total_stake should remain at 10 + for vote in votes.iter().take(3).cloned() { + assert_eq!(vote_pool.add_vote(my_pubkey, 10, vote), Some(10)); + assert_eq!( + vote_pool.total_stake_by_block_id(vote.vote.block_id().unwrap()), + 10 + ); + } + // Adding the 4th vote should fail + assert_eq!(vote_pool.add_vote(my_pubkey, 10, votes[3]), None); + assert_eq!( + vote_pool.total_stake_by_block_id(votes[3].vote.block_id().unwrap()), + 0 + ); - #[test] - fn test_vote_pool() { - let slot = 1; - let mut vote_pool = VotePool::new(slot); + // Adding a different key should succeed + let new_pubkey = Pubkey::new_unique(); + for vote in votes.iter().skip(1).take(2).cloned() { + assert_eq!(vote_pool.add_vote(new_pubkey, 60, vote), Some(70)); + assert_eq!( + vote_pool.total_stake_by_block_id(vote.vote.block_id().unwrap()), + 70 + ); + } - let voter = Pubkey::new_unique(); - let signature = BLSSignature::default(); - let rank = 1; - let vote = Vote::new_finalization_vote(slot); - let vote_message = VoteMessage { - vote, - signature, - rank, - }; - let stake = 12345; + // The new key only added 2 votes, so adding block_ids[3] should succeed + assert_eq!(vote_pool.add_vote(new_pubkey, 60, votes[3]), Some(60)); assert_eq!( - vote_pool - .add_vote(voter, stake, vote_message.clone()) - .unwrap(), - stake + vote_pool.total_stake_by_block_id(votes[3].vote.block_id().unwrap()), + 60 ); - assert_eq!(vote_pool.get_stake(&vote), stake); - let returned_votes = vote_pool.get_votes(&vote); - assert_eq!(returned_votes.len(), 1); - assert_eq!(returned_votes[0], vote_message); + + // Now if adding the same key again, it should fail + assert_eq!(vote_pool.add_vote(new_pubkey, 60, votes[0]), None); } } diff --git a/votor/src/consensus_pool_service.rs b/votor/src/consensus_pool_service.rs index 3193214626cb3b..5ed6a570ce8872 100644 --- a/votor/src/consensus_pool_service.rs +++ b/votor/src/consensus_pool_service.rs @@ -1,19 +1,23 @@ //! Service in charge of ingesting new messages into the certificate pool //! and notifying votor of new events that occur + +mod stats; + use { crate::{ - commitment::{ - update_commitment_cache, CommitmentAggregationData, CommitmentError, CommitmentType, - }, + commitment::{update_commitment_cache, CommitmentAggregationData, CommitmentType}, + common::DELTA_STANDSTILL, consensus_pool::{ - parent_ready_tracker::BlockProductionParent, AddMessageError, ConsensusPool, + parent_ready_tracker::BlockProductionParent, AddVoteError, ConsensusPool, }, event::{LeaderWindowInfo, VotorEvent, VotorEventSender}, voting_service::BLSOp, - votor::Votor, }, - agave_votor_messages::consensus_message::{Certificate, ConsensusMessage}, - crossbeam_channel::{Receiver, RecvTimeoutError, Sender, TrySendError}, + agave_votor_messages::{ + consensus_message::{Certificate, ConsensusMessage}, + migration::MigrationStatus, + }, + crossbeam_channel::{select, Receiver, Sender, TrySendError}, solana_clock::Slot, solana_gossip::cluster_info::ClusterInfo, solana_ledger::{ @@ -22,24 +26,21 @@ use { }, solana_pubkey::Pubkey, solana_runtime::{bank::Bank, bank_forks::SharableBanks}, - stats::Stats, + stats::ConsensusPoolServiceStats, std::{ sync::{ atomic::{AtomicBool, Ordering}, - Arc, Condvar, Mutex, + Arc, }, thread::{self, Builder, JoinHandle}, time::{Duration, Instant}, }, - thiserror::Error, }; -mod stats; - /// Inputs for the certificate pool thread pub(crate) struct ConsensusPoolContext { pub(crate) exit: Arc, - pub(crate) start: Arc<(Mutex, Condvar)>, + pub(crate) migration_status: Arc, pub(crate) cluster_info: Arc, pub(crate) my_vote_pubkey: Pubkey, @@ -50,43 +51,26 @@ pub(crate) struct ConsensusPoolContext { // TODO: for now we ingest our own votes into the certificate pool // just like regular votes. However do we need to convert // Vote -> BLSMessage -> Vote? - // consider adding a separate pathway in consensus_pool.add_transaction for ingesting own votes + // consider adding a separate pathway in consensus_pool.add_message() for ingesting own votes pub(crate) consensus_message_receiver: Receiver, pub(crate) bls_sender: Sender, pub(crate) event_sender: VotorEventSender, pub(crate) commitment_sender: Sender, - - pub(crate) delta_standstill: Duration, } pub(crate) struct ConsensusPoolService { t_ingest: JoinHandle<()>, } -#[derive(Debug, Error)] -enum ServiceError { - #[error("Failed to add message into the consensus pool: {0}")] - AddMessage(#[from] AddMessageError), - #[error("Channel {0} disconnected")] - ChannelDisconnected(String), - #[error("Channel is full")] - ChannelFull, - #[error("Failed to add block event: {0}")] - FailedToAddBlockEvent(String), -} - impl ConsensusPoolService { - pub(crate) fn new(mut ctx: ConsensusPoolContext) -> Self { + pub(crate) fn new(ctx: ConsensusPoolContext) -> Self { let t_ingest = Builder::new() - .name("solCnsPoolIngst".to_string()) + .name("solCertPoolIngest".to_string()) .spawn(move || { - info!("ConsensusPoolService has started"); - if let Err(e) = Self::consensus_pool_ingest_loop(&mut ctx) { - ctx.exit.store(true, Ordering::Relaxed); - error!("ConsensusPoolService exited with error: {e}"); + if let Err(e) = Self::consensus_pool_ingest_loop(ctx) { + info!("Certificate pool service exited: {e:?}. Shutting down"); } - info!("ConsensusPoolService has stopped"); }) .unwrap(); @@ -100,16 +84,16 @@ impl ConsensusPoolService { new_finalized_slot: Option, new_certificates_to_send: Vec>, standstill_timer: &mut Instant, - stats: &mut Stats, - ) -> Result<(), ServiceError> { + stats: &mut ConsensusPoolServiceStats, + ) -> Result<(), AddVoteError> { // If we have a new finalized slot, update the root and send new certificates if new_finalized_slot.is_some() { // Reset standstill timer *standstill_timer = Instant::now(); stats.new_finalized_slot += 1; } - let root_bank = sharable_banks.root(); - consensus_pool.prune_old_state(root_bank.slot()); + let bank = sharable_banks.root(); + consensus_pool.prune_old_state(bank.slot()); stats.prune_old_state_called += 1; // Send new certificates to peers Self::send_certificates(bls_sender, new_certificates_to_send, stats) @@ -117,25 +101,26 @@ impl ConsensusPoolService { fn send_certificates( bls_sender: &Sender, - certs: Vec>, - stats: &mut Stats, - ) -> Result<(), ServiceError> { - let certs_len = certs.len(); - for (i, certificate) in certs.into_iter().enumerate() { - // The BLS cert channel is expected to be large enough, so we don't - // handle certificate re-send here. - match bls_sender.try_send(BLSOp::PushCertificate { certificate }) { - Ok(()) => { + certificates_to_send: Vec>, + stats: &mut ConsensusPoolServiceStats, + ) -> Result<(), AddVoteError> { + for (i, certificate) in certificates_to_send.iter().enumerate() { + // The buffer should normally be large enough, so we don't handle + // certificate re-send here. + match bls_sender.try_send(BLSOp::PushCertificate { + certificate: certificate.clone(), + }) { + Ok(_) => { stats.certificates_sent += 1; } Err(TrySendError::Disconnected(_)) => { - return Err(ServiceError::ChannelDisconnected( + return Err(AddVoteError::ChannelDisconnected( "VotingService".to_string(), )); } Err(TrySendError::Full(_)) => { - stats.certificates_dropped += certs_len.saturating_sub(i); - return Err(ServiceError::ChannelFull); + stats.certificates_dropped += certificates_to_send.len().saturating_sub(i); + return Err(AddVoteError::VotingServiceQueueFull); } } } @@ -149,8 +134,8 @@ impl ConsensusPoolService { consensus_pool: &mut ConsensusPool, events: &mut Vec, standstill_timer: &mut Instant, - stats: &mut Stats, - ) -> Result<(), ServiceError> { + stats: &mut ConsensusPoolServiceStats, + ) -> Result<(), AddVoteError> { match message { ConsensusMessage::Certificate(_) => { stats.received_certificates += 1; @@ -181,55 +166,97 @@ impl ConsensusPoolService { ) } - // Main loop for the consensus pool service. Only exits when signalled or if - // any channel is disconnected. - fn consensus_pool_ingest_loop(ctx: &mut ConsensusPoolContext) -> Result<(), ServiceError> { + fn handle_channel_disconnected( + ctx: &mut ConsensusPoolContext, + channel_name: &str, + ) -> Result<(), ()> { + info!( + "{}: {} disconnected. Exiting", + ctx.cluster_info.id(), + channel_name + ); + ctx.exit.store(true, Ordering::Relaxed); + Err(()) + } + + // Main loop for the certificate pool service, it only exits when any channel is disconnected + fn consensus_pool_ingest_loop(mut ctx: ConsensusPoolContext) -> Result<(), ()> { let mut events = vec![]; let mut my_pubkey = ctx.cluster_info.id(); let root_bank = ctx.sharable_banks.root(); - let mut consensus_pool = - ConsensusPool::new_from_root_bank(ctx.cluster_info.clone(), &root_bank); - // Wait until migration has completed - info!("{my_pubkey}: Consensus pool loop initialized, waiting for Alpenglow migration"); - Votor::wait_for_migration_or_exit(&ctx.exit, &ctx.start); - info!("{my_pubkey}: Consensus pool loop starting"); + // Unlike the other votor threads, consensus pool starts even before alpenglow is enabled + // As it is required to track the Genesis Vote. + let mut consensus_pool = if ctx.migration_status.is_alpenglow_enabled() { + ConsensusPool::new_from_root_bank(my_pubkey, &root_bank) + } else { + ConsensusPool::new_from_root_bank_pre_migration( + my_pubkey, + &root_bank, + ctx.migration_status.clone(), + ) + }; - let mut stats = Stats::default(); + info!("{}: Certificate pool loop starting", &my_pubkey); + let mut stats = ConsensusPoolServiceStats::new(); + let mut highest_parent_ready = root_bank.slot(); // Standstill tracking let mut standstill_timer = Instant::now(); // Kick off parent ready - let root_block = (root_bank.slot(), root_bank.block_id().unwrap_or_default()); - let mut highest_parent_ready = root_bank.slot(); - events.push(VotorEvent::ParentReady { - slot: root_bank.slot().checked_add(1).unwrap(), - parent_block: root_block, - }); + let mut kick_off_parent_ready = false; - // Ingest votes into consensus pool and notify voting loop of new events + // Ingest votes into certificate pool and notify voting loop of new events while !ctx.exit.load(Ordering::Relaxed) { // Update the current pubkey if it has changed let new_pubkey = ctx.cluster_info.id(); if my_pubkey != new_pubkey { my_pubkey = new_pubkey; - info!("Consensus pool pubkey updated to {my_pubkey}"); + consensus_pool.update_pubkey(my_pubkey); + warn!("Certificate pool pubkey updated to {my_pubkey}"); + } + + // Kick off parent ready event, this either happens: + // - When we first migrate to alpenglow from TowerBFT - kick off with genesis block + // - If we startup post alpenglow migration - kick off with root block + if !kick_off_parent_ready && ctx.migration_status.is_alpenglow_enabled() { + let genesis_block = ctx + .migration_status + .genesis_block() + .expect("Alpenglow is enabled"); + let root_bank = ctx.sharable_banks.root(); + // can expect once we have block id in snapshots (SIMD-0333) + let root_block = (root_bank.slot(), root_bank.block_id().unwrap_or_default()); + let kick_off_block @ (kick_off_slot, _) = genesis_block.max(root_block); + let start_slot = kick_off_slot.checked_add(1).unwrap(); + + events.push(VotorEvent::ParentReady { + slot: start_slot, + parent_block: kick_off_block, + }); + highest_parent_ready = start_slot; + kick_off_parent_ready = true; } Self::add_produce_block_event( &mut highest_parent_ready, &consensus_pool, &my_pubkey, - ctx, + &mut ctx, &mut events, &mut stats, - )?; + ); - if standstill_timer.elapsed() > ctx.delta_standstill { - events.push(VotorEvent::Standstill( - consensus_pool.highest_finalized_slot(), - )); + if standstill_timer.elapsed() > DELTA_STANDSTILL { + // No reason to pollute channel with Standstill before the + // migration is complete. We still need standstill to refresh the + // Genesis cert though. + if kick_off_parent_ready { + events.push(VotorEvent::Standstill( + consensus_pool.highest_finalized_slot(), + )); + } stats.standstill = true; standstill_timer = Instant::now(); match Self::send_certificates( @@ -238,8 +265,8 @@ impl ConsensusPoolService { &mut stats, ) { Ok(()) => (), - Err(ServiceError::ChannelDisconnected(channel_name)) => { - return Err(ServiceError::ChannelDisconnected(channel_name)); + Err(AddVoteError::ChannelDisconnected(channel_name)) => { + return Self::handle_channel_disconnected(&mut ctx, channel_name.as_str()); } Err(e) => { trace!("{my_pubkey}: unable to push standstill certificates into pool {e}"); @@ -247,29 +274,27 @@ impl ConsensusPoolService { } } - events + if events .drain(..) .try_for_each(|event| ctx.event_sender.send(event)) - .map_err(|_| { - ServiceError::ChannelDisconnected("Votor event receiver".to_string()) - })?; - - let consensus_message_receiver = ctx.consensus_message_receiver.clone(); - let messages = match consensus_message_receiver.recv_timeout(Duration::from_secs(1)) { - Ok(first_message) => { - std::iter::once(first_message).chain(consensus_message_receiver.try_iter()) - } - Err(RecvTimeoutError::Timeout) => continue, - Err(RecvTimeoutError::Disconnected) => { - return Err(ServiceError::ChannelDisconnected( - "BLS receiver".to_string(), - )); - } + .is_err() + { + return Self::handle_channel_disconnected(&mut ctx, "Votor event receiver"); + } + + let messages: Vec = select! { + recv(ctx.consensus_message_receiver) -> msg => { + let Ok(first) = msg else { + return Self::handle_channel_disconnected(&mut ctx, "BLS receiver"); + }; + std::iter::once(first).chain(ctx.consensus_message_receiver.try_iter()).collect() + }, + default(Duration::from_secs(1)) => continue }; for message in messages { match Self::process_consensus_message( - ctx, + &mut ctx, &my_pubkey, message, &mut consensus_pool, @@ -278,11 +303,12 @@ impl ConsensusPoolService { &mut stats, ) { Ok(()) => {} - Err(ServiceError::ChannelDisconnected(n)) => { - return Err(ServiceError::ChannelDisconnected(n)); + Err(AddVoteError::ChannelDisconnected(channel_name)) => { + return Self::handle_channel_disconnected(&mut ctx, channel_name.as_str()) } Err(e) => { - warn!("{my_pubkey}: process_consensus_message() failed with {e}"); + // This is a non critical error, a duplicate vote for example + trace!("{}: unable to push vote into pool {}", &my_pubkey, e); stats.add_message_failed += 1; } } @@ -293,10 +319,9 @@ impl ConsensusPoolService { Ok(()) } - /// Adds a message to the consensus pool and updates the commitment cache if necessary + /// Adds a vote to the certificate pool and updates the commitment cache if necessary /// - /// Returns any newly finalized slot as well as any new certificates to broadcast out. - /// Returns error if consensus message could not be added to the pool. + /// If a new finalization slot was recognized, returns the slot fn add_message_and_maybe_update_commitment( root_bank: &Bank, my_pubkey: &Pubkey, @@ -305,7 +330,7 @@ impl ConsensusPoolService { consensus_pool: &mut ConsensusPool, votor_events: &mut Vec, commitment_sender: &Sender, - ) -> Result<(Option, Vec>), ServiceError> { + ) -> Result<(Option, Vec>), AddVoteError> { let (new_finalized_slot, new_certificates_to_send) = consensus_pool.add_message( root_bank.epoch_schedule(), root_bank.epoch_stakes_map(), @@ -322,12 +347,7 @@ impl ConsensusPoolService { CommitmentType::Finalized, new_finalized_slot, commitment_sender, - ) - .map_err(|e| match e { - CommitmentError::ChannelDisconnected => { - ServiceError::ChannelDisconnected("CommitmentSender".to_string()) - } - })?; + )?; Ok((Some(new_finalized_slot), new_certificates_to_send)) } @@ -337,8 +357,8 @@ impl ConsensusPoolService { my_pubkey: &Pubkey, ctx: &mut ConsensusPoolContext, events: &mut Vec, - stats: &mut Stats, - ) -> Result<(), ServiceError> { + stats: &mut ConsensusPoolServiceStats, + ) { let Some(new_highest_parent_ready) = events .iter() .filter_map(|event| match event { @@ -348,11 +368,11 @@ impl ConsensusPoolService { .max() .copied() else { - return Ok(()); + return; }; if new_highest_parent_ready <= *highest_parent_ready { - return Ok(()); + return; } *highest_parent_ready = new_highest_parent_ready; @@ -361,14 +381,16 @@ impl ConsensusPoolService { .leader_schedule_cache .slot_leader_at(*highest_parent_ready, Some(&root_bank)) else { - return Err(ServiceError::FailedToAddBlockEvent(format!( + error!( "Unable to compute the leader at slot {highest_parent_ready}. Something is wrong, \ exiting" - ))); + ); + ctx.exit.store(true, Ordering::Relaxed); + return; }; if &leader_pubkey != my_pubkey { - return Ok(()); + return; } let start_slot = *highest_parent_ready; @@ -379,7 +401,7 @@ impl ConsensusPoolService { "{my_pubkey}: We have already produced shreds in the window \ {start_slot}-{end_slot}, skipping production of our leader window" ); - return Ok(()); + return; } match consensus_pool @@ -395,22 +417,23 @@ impl ConsensusPoolService { } BlockProductionParent::ParentNotReady => { // This can't happen, place holder depending on how we hook up optimistic - return Err(ServiceError::FailedToAddBlockEvent( - "Must have a block production parent".to_string(), - )); + ctx.exit.store(true, Ordering::Relaxed); + panic!( + "Must have a block production parent: {:#?}", + consensus_pool.parent_ready_tracker + ); } BlockProductionParent::Parent(parent_block) => { events.push(VotorEvent::ProduceWindow(LeaderWindowInfo { start_slot, end_slot, parent_block, + // TODO: we can just remove this skip_timer: Instant::now(), })); stats.parent_ready_produce_window += 1; } } - - Ok(()) } pub(crate) fn join(self) -> thread::Result<()> { @@ -422,7 +445,6 @@ impl ConsensusPoolService { mod tests { use { super::*, - crate::common::DELTA_STANDSTILL, agave_votor_messages::{ consensus_message::{CertificateType, VoteMessage, BLS_KEYPAIR_DERIVE_SEED}, vote::Vote, @@ -442,7 +464,7 @@ mod tests { }, }, solana_signer::Signer, - std::sync::{Arc, Mutex}, + std::sync::Arc, test_case::test_case, }; @@ -458,7 +480,7 @@ mod tests { sharable_banks: SharableBanks, } - fn setup(delta_standstill: Option) -> ConsensusPoolServiceTestComponents { + fn setup() -> ConsensusPoolServiceTestComponents { let (consensus_message_sender, consensus_message_receiver) = crossbeam_channel::unbounded(); let (bls_sender, bls_receiver) = crossbeam_channel::unbounded(); let (event_sender, event_receiver) = crossbeam_channel::unbounded(); @@ -495,7 +517,7 @@ mod tests { Arc::new(LeaderScheduleCache::new_from_bank(&sharable_banks.root())); let ctx = ConsensusPoolContext { exit: exit.clone(), - start: Arc::new((Mutex::new(true), Condvar::new())), + migration_status: Arc::new(MigrationStatus::post_migration_status()), cluster_info: Arc::new(cluster_info), my_vote_pubkey: Pubkey::new_unique(), blockstore: Arc::new(blockstore), @@ -505,7 +527,6 @@ mod tests { bls_sender, event_sender, commitment_sender, - delta_standstill: delta_standstill.unwrap_or(DELTA_STANDSTILL), }; ConsensusPoolServiceTestComponents { consensus_pool_service: ConsensusPoolService::new(ctx), @@ -524,8 +545,8 @@ mod tests { let start = Instant::now(); let mut event_received = false; while start.elapsed() < Duration::from_secs(5) { - let res = receiver.recv_timeout(Duration::from_millis(500)); - if let Ok(event) = res { + let recv = receiver.recv_timeout(Duration::from_millis(500)); + if let Ok(event) = recv { if condition(&event) { event_received = true; break; @@ -550,7 +571,7 @@ mod tests { #[test] fn test_receive_and_send_consensus_message() { agave_logger::setup(); - let setup_result = setup(None); + let setup_result = setup(); // validator 0 to 7 send Notarize on slot 2 let block_id = Hash::new_unique(); @@ -576,13 +597,14 @@ mod tests { |event| { if let BLSOp::PushCertificate { certificate } = event { assert_eq!(certificate.cert_type.slot(), target_slot); - let certificate_type = certificate.cert_type; - assert!(matches!( - certificate_type, - CertificateType::Notarize(_, _) - | CertificateType::FinalizeFast(_, _) - | CertificateType::NotarizeFallback(_, _) - )); + assert!( + matches!(certificate.cert_type, CertificateType::Notarize(_, _)) + || matches!(certificate.cert_type, CertificateType::FinalizeFast(_, _)) + || matches!( + certificate.cert_type, + CertificateType::NotarizeFallback(_, _) + ) + ); true } else { false @@ -639,7 +661,7 @@ mod tests { #[test] fn test_send_produce_block_event() { - let setup_result = setup(None); + let setup_result = setup(); // Find when is the next leader slot for me (validator 0) let my_pubkey = setup_result.validator_keypairs[0].node_keypair.pubkey(); let next_leader_slot = setup_result @@ -682,9 +704,9 @@ mod tests { #[test] fn test_send_standstill() { - let delta_standstill_for_test = Duration::from_millis(100); - let setup_result = setup(Some(delta_standstill_for_test)); - thread::sleep(delta_standstill_for_test); + let setup_result = setup(); + // Do nothing for a little more than DELTA_STANDSTILL + thread::sleep(DELTA_STANDSTILL + Duration::from_millis(100)); // Verify that we received a standstill event wait_for_event( &setup_result.event_receiver, @@ -699,7 +721,7 @@ mod tests { #[test_case("votor_event_receiver")] #[test_case("commitment_receiver")] fn test_channel_disconnection(channel_name: &str) { - let setup_result = setup(None); + let setup_result = setup(); // A lot of the receiver needs a finalize certificate to trigger an exit if channel_name != "consensus_message_receiver" { let finalize_certificate = Certificate { diff --git a/votor/src/consensus_pool_service/stats.rs b/votor/src/consensus_pool_service/stats.rs index cc46229767a0ec..0f9948330c131d 100644 --- a/votor/src/consensus_pool_service/stats.rs +++ b/votor/src/consensus_pool_service/stats.rs @@ -6,9 +6,10 @@ use { }, }; -const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(1); +const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(10); -pub(super) struct Stats { +#[derive(Debug)] +pub(super) struct ConsensusPoolServiceStats { pub(super) add_message_failed: Saturating, pub(super) certificates_sent: Saturating, pub(super) certificates_dropped: Saturating, @@ -22,8 +23,8 @@ pub(super) struct Stats { last_request_time: Instant, } -impl Default for Stats { - fn default() -> Self { +impl ConsensusPoolServiceStats { + pub fn new() -> Self { Self { add_message_failed: Saturating(0), certificates_sent: Saturating(0), @@ -38,9 +39,7 @@ impl Default for Stats { last_request_time: Instant::now(), } } -} -impl Stats { fn report(&self) { let &Self { add_message_failed: Saturating(add_message_failed), @@ -53,7 +52,7 @@ impl Stats { received_certificates: Saturating(received_certificates), standstill, prune_old_state_called: Saturating(prune_old_state_called), - last_request_time: _, + .. } = self; datapoint_info!( "consensus_pool_service", @@ -73,7 +72,7 @@ impl Stats { ), ("received_votes", received_votes, i64), ("received_certificates", received_certificates, i64), - ("entered_standstill_bool", standstill, bool), + ("in_standstill_bool", standstill, bool), ("prune_old_state_called", prune_old_state_called, i64), ); } @@ -81,7 +80,7 @@ impl Stats { pub(super) fn maybe_report(&mut self) { if self.last_request_time.elapsed() >= STATS_REPORT_INTERVAL { self.report(); - *self = Self::default(); + *self = Self::new(); } } } diff --git a/votor/src/event_handler.rs b/votor/src/event_handler.rs index 8166fcfea30e5e..7f28ce11eecaac 100644 --- a/votor/src/event_handler.rs +++ b/votor/src/event_handler.rs @@ -1,20 +1,21 @@ //! Handles incoming VotorEvents to take action or //! notify block creation loop + use { crate::{ commitment::{update_commitment_cache, CommitmentType}, - consensus_metrics::{ConsensusMetricsEvent, ConsensusMetricsEventSender}, + consensus_metrics::ConsensusMetricsEvent, event::{CompletedBlock, VotorEvent, VotorEventReceiver}, event_handler::stats::EventHandlerStats, - root_utils::{self, RootContext, SetRootError}, + root_utils::{self, RootContext}, timer_manager::TimerManager, vote_history::{VoteHistory, VoteHistoryError}, voting_service::BLSOp, voting_utils::{generate_vote_message, VoteError, VotingContext}, - votor::{SharedContext, Votor}, + votor::SharedContext, }, - agave_votor_messages::{consensus_message::Block, vote::Vote}, - crossbeam_channel::{RecvTimeoutError, TrySendError}, + agave_votor_messages::{consensus_message::Block, migration::MigrationStatus, vote::Vote}, + crossbeam_channel::{select, RecvError, SendError}, parking_lot::RwLock, solana_clock::Slot, solana_hash::Hash, @@ -29,7 +30,7 @@ use { collections::{BTreeMap, BTreeSet}, sync::{ atomic::{AtomicBool, Ordering}, - Arc, Condvar, Mutex, + Arc, }, thread::{self, Builder, JoinHandle}, time::{Duration, Instant}, @@ -46,7 +47,7 @@ pub(crate) type PendingBlocks = BTreeMap>; /// Inputs for the event handler thread pub(crate) struct EventHandlerContext { pub(crate) exit: Arc, - pub(crate) start: Arc<(Mutex, Condvar)>, + pub(crate) migration_status: Arc, pub(crate) event_receiver: VotorEventReceiver, pub(crate) timer_manager: Arc>, @@ -60,19 +61,16 @@ pub(crate) struct EventHandlerContext { #[derive(Debug, Error)] enum EventLoopError { #[error("Receiver is disconnected")] - ReceiverDisconnected(#[from] RecvTimeoutError), + ReceiverDisconnected(#[from] RecvError), #[error("Sender is disconnected")] - SenderDisconnected, + SenderDisconnected(#[from] SendError<()>), #[error("Error generating and inserting vote")] VotingError(#[from] VoteError), #[error("Set identity error")] SetIdentityError(#[from] VoteHistoryError), - - #[error("Set root error: {0}")] - SetRoot(#[from] SetRootError), } pub(crate) struct EventHandler { @@ -93,12 +91,10 @@ impl EventHandler { let t_event_handler = Builder::new() .name("solVotorEvLoop".to_string()) .spawn(move || { - info!("EventHandler has started"); if let Err(e) = Self::event_loop(ctx) { + info!("Event loop exited: {e:?}. Shutting down"); exit.store(true, Ordering::Relaxed); - error!("EventHandler exited with error: {e}"); } - info!("EventHandler has stopped"); }) .unwrap(); @@ -108,7 +104,7 @@ impl EventHandler { fn event_loop(context: EventHandlerContext) -> Result<(), EventLoopError> { let EventHandlerContext { exit, - start, + migration_status, event_receiver, timer_manager, shared_context: ctx, @@ -125,21 +121,41 @@ impl EventHandler { // Wait until migration has completed info!("{}: Event loop initialized", local_context.my_pubkey); - Votor::wait_for_migration_or_exit(&exit, &start); - info!("{}: Event loop starting", local_context.my_pubkey); + let Some(genesis_block) = migration_status.wait_for_migration_or_exit(&exit) else { + // Exited during migration + return Ok(()); + }; + let root_slot = vctx.sharable_banks.root().slot(); + info!( + "{}: Event loop starting genesis {genesis_block:?} root {root_slot}", + local_context.my_pubkey + ); + + // Check for set identity + if let Err(e) = Self::handle_set_identity(&mut local_context.my_pubkey, &ctx, &mut vctx) { + error!( + "Unable to load new vote history when attempting to change identity at startup \ + from {} to {} on voting loop startup, Exiting: {}", + vctx.vote_history.node_pubkey, + ctx.cluster_info.id(), + e + ); + return Err(EventLoopError::SetIdentityError(e)); + } while !exit.load(Ordering::Relaxed) { let mut receive_event_time = Measure::start("receive_event"); - let event = match event_receiver.recv_timeout(Duration::from_secs(1)) { - Ok(event) => event, - Err(RecvTimeoutError::Timeout) => continue, - Err(e) => return Err(EventLoopError::ReceiverDisconnected(e)), + let event = select! { + recv(event_receiver) -> msg => { + msg? + }, + default(Duration::from_secs(1)) => continue }; receive_event_time.stop(); local_context.stats.receive_event_time_us = local_context .stats .receive_event_time_us - .saturating_add(receive_event_time.as_us()); + .saturating_add(receive_event_time.as_us() as u32); let root_bank = vctx.sharable_banks.root(); if event.should_ignore(root_bank.slot()) { @@ -165,15 +181,13 @@ impl EventHandler { let mut send_votes_batch_time = Measure::start("send_votes_batch"); for vote in votes { local_context.stats.incr_vote(&vote); - vctx.bls_sender - .send(vote) - .map_err(|_| EventLoopError::SenderDisconnected)?; + vctx.bls_sender.send(vote).map_err(|_| SendError(()))?; } send_votes_batch_time.stop(); local_context.stats.send_votes_batch_time_us = local_context .stats .send_votes_batch_time_us - .saturating_add(send_votes_batch_time.as_us()); + .saturating_add(send_votes_batch_time.as_us() as u32); local_context.stats.maybe_report(); } @@ -197,12 +211,8 @@ impl EventHandler { timer_manager.write().set_timeouts(slot); local_context.stats.timeout_set = local_context.stats.timeout_set.saturating_add(1); } - let mut highest_parent_ready = ctx - .leader_window_notifier - .highest_parent_ready - .write() - .unwrap(); + let mut highest_parent_ready = ctx.highest_parent_ready.write().unwrap(); let (current_slot, _) = *highest_parent_ready; if slot > current_slot { @@ -211,24 +221,6 @@ impl EventHandler { Ok(()) } - fn send_to_metrics( - consensus_metrics_sender: &ConsensusMetricsEventSender, - consensus_metrics_events: Vec, - stats: &mut EventHandlerStats, - ) -> Result<(), EventLoopError> { - // Do not kill or block event handler threads just because metrics - // send failed (maybe because the queue is full). - match consensus_metrics_sender.try_send((Instant::now(), consensus_metrics_events)) { - Ok(()) => Ok(()), - Err(TrySendError::Disconnected(_)) => Err(EventLoopError::SenderDisconnected), - Err(TrySendError::Full(_)) => { - warn!("send_to_metrics failed: queue is full"); - stats.metrics_queue_became_full = true; - Ok(()) - } - } - } - fn handle_event( event: VotorEvent, timer_manager: &RwLock, @@ -238,22 +230,22 @@ impl EventHandler { local_context: &mut LocalContext, ) -> Result, EventLoopError> { let mut votes = vec![]; - let LocalContext { - my_pubkey, - pending_blocks, - finalized_blocks, - received_shred, - stats, + let &mut LocalContext { + ref mut my_pubkey, + ref mut pending_blocks, + ref mut finalized_blocks, + ref mut received_shred, + ref mut stats, } = local_context; match event { // Block has completed replay VotorEvent::Block(CompletedBlock { slot, bank }) => { debug_assert!(bank.is_frozen()); + let now = Instant::now(); let mut consensus_metrics_events = vec![ConsensusMetricsEvent::StartOfSlot { slot }]; if slot == first_of_consecutive_leader_slots(slot) { - // all slots except the first in the window would typically start when - // the block is seen so the recording would essentially record 0. + // all slots except the first in the window would typically start when the block is seen so the recording would essentially record 0. // hence we skip it. consensus_metrics_events.push(ConsensusMetricsEvent::BlockHashSeen { leader: *bank.leader_id(), @@ -263,11 +255,9 @@ impl EventHandler { consensus_metrics_events.push(ConsensusMetricsEvent::MaybeNewEpoch { epoch: bank.epoch(), }); - Self::send_to_metrics( - &vctx.consensus_metrics_sender, - consensus_metrics_events, - stats, - )?; + vctx.consensus_metrics_sender + .send((now, consensus_metrics_events)) + .map_err(|_| SendError(()))?; let (block, parent_block) = Self::get_block_parent_block(&bank); info!("{my_pubkey}: Block {block:?} parent {parent_block:?}"); if Self::try_notar( @@ -294,12 +284,12 @@ impl EventHandler { finalized_blocks, received_shred, stats, - )?; - if let Some((ready_slot, parent_block)) = + ); + if let Some(parent_block) = Self::add_missing_parent_ready(block, ctx, vctx, local_context) { Self::handle_parent_ready_event( - ready_slot, + slot, parent_block, vctx, ctx, @@ -324,11 +314,12 @@ impl EventHandler { // Received a parent ready notification for `slot` VotorEvent::ParentReady { slot, parent_block } => { - Self::send_to_metrics( - &vctx.consensus_metrics_sender, - vec![ConsensusMetricsEvent::StartOfSlot { slot }], - stats, - )?; + vctx.consensus_metrics_sender + .send(( + Instant::now(), + vec![ConsensusMetricsEvent::StartOfSlot { slot }], + )) + .map_err(|_| SendError(()))?; Self::handle_parent_ready_event( slot, parent_block, @@ -352,13 +343,14 @@ impl EventHandler { VotorEvent::Timeout(slot) => { info!("{my_pubkey}: Timeout {slot}"); if slot != last_of_consecutive_leader_slots(slot) { - Self::send_to_metrics( - &vctx.consensus_metrics_sender, - vec![ConsensusMetricsEvent::StartOfSlot { - slot: slot.saturating_add(1), - }], - stats, - )?; + vctx.consensus_metrics_sender + .send(( + Instant::now(), + vec![ConsensusMetricsEvent::StartOfSlot { + slot: slot.saturating_add(1), + }], + )) + .map_err(|_| SendError(()))?; } if vctx.vote_history.voted(slot) { return Ok(votes); @@ -404,21 +396,7 @@ impl EventHandler { // It is time to produce our leader window VotorEvent::ProduceWindow(window_info) => { info!("{my_pubkey}: ProduceWindow {window_info:?}"); - let mut l_window_info = ctx.leader_window_notifier.window_info.lock().unwrap(); - if let Some(old_window_info) = l_window_info.as_ref() { - stats.leader_window_replaced = stats.leader_window_replaced.saturating_add(1); - error!( - "{my_pubkey}: Attempting to start leader window for {}-{}, however there \ - is already a pending window to produce {}-{}. Our production is lagging, \ - discarding in favor of the newer window", - window_info.start_slot, - window_info.end_slot, - old_window_info.start_slot, - old_window_info.end_slot, - ); - } - *l_window_info = Some(window_info); - ctx.leader_window_notifier.window_notification.notify_one(); + ctx.leader_window_info_sender.send(window_info).unwrap(); } // We have finalized this block consider it for rooting @@ -434,13 +412,13 @@ impl EventHandler { finalized_blocks, received_shred, stats, - )?; - if let Some((slot, block)) = + ); + if let Some(parent_block) = Self::add_missing_parent_ready(block, ctx, vctx, local_context) { Self::handle_parent_ready_event( - slot, - block, + block.0, + parent_block, vctx, ctx, local_context, @@ -495,12 +473,14 @@ impl EventHandler { /// all later slots. So B and C together can keep finalizing the blocks and unstuck the /// cluster. If we get a finalization cert for later slots of the window and we have the /// block replayed, trace back to the first slot of the window and emit parent ready. + /// + /// Returns [`Some(Block)`] of the parent if the parent ready for the `finalized_block` should be added. fn add_missing_parent_ready( finalized_block: Block, ctx: &SharedContext, vctx: &mut VotingContext, local_context: &mut LocalContext, - ) -> Option<(Slot, Block)> { + ) -> Option { let (slot, block_id) = finalized_block; let first_slot_of_window = first_of_consecutive_leader_slots(slot); if first_slot_of_window == slot || first_slot_of_window == 0 { @@ -537,7 +517,7 @@ impl EventHandler { {parent_block_id}", local_context.my_pubkey ); - Some((slot, (parent_slot, parent_block_id))) + Some((parent_slot, parent_block_id)) } fn handle_set_identity( @@ -639,7 +619,7 @@ impl EventHandler { } /// Checks the pending blocks that have completed replay to see if they - /// are eligible to be voted on now + /// are eligble to be voted on now fn check_pending_blocks( my_pubkey: &Pubkey, pending_blocks: &mut PendingBlocks, @@ -750,16 +730,15 @@ impl EventHandler { Ok(()) } - /// Checks if we can set root on a new block. The block must: - /// - Be present in bank forks + /// Checks if we can set root on a new block + /// The block must be: + /// - Present in bank forks /// - Newer than the current root - /// - Already been voted on (bank.slot()) - /// - Have its Bank frozen - /// - Finished shredding - /// - Have a finalization certificate (determined by presence in - /// `finalized_blocks`) + /// - We must have already voted on bank.slot() + /// - Bank is frozen and finished shredding + /// - Block has a finalization certificate /// - /// If so, set root on the highest block that fits these conditions. + /// If so set root on the highest block that fits these conditions fn check_rootable_blocks( my_pubkey: &Pubkey, ctx: &SharedContext, @@ -769,7 +748,7 @@ impl EventHandler { finalized_blocks: &mut BTreeSet, received_shred: &mut BTreeSet, stats: &mut EventHandlerStats, - ) -> Result<(), EventLoopError> { + ) { let bank_forks_r = ctx.bank_forks.read().unwrap(); let old_root = bank_forks_r.root(); let Some(new_root) = finalized_blocks @@ -785,10 +764,10 @@ impl EventHandler { .max() else { // No rootable banks - return Ok(()); + return; }; drop(bank_forks_r); - let set_root_result = root_utils::set_root( + root_utils::set_root( my_pubkey, new_root, ctx, @@ -797,14 +776,8 @@ impl EventHandler { pending_blocks, finalized_blocks, received_shred, - ) - .map_err(EventLoopError::SetRoot); - - if set_root_result.is_ok() { - stats.set_root(new_root) - } - - set_root_result + ); + stats.set_root(new_root); } pub(crate) fn join(self) -> thread::Result<()> { @@ -825,13 +798,12 @@ mod tests { VoteHistoryStorage, }, voting_service::BLSOp, - votor::LeaderWindowNotifier, }, agave_votor_messages::{ consensus_message::{ConsensusMessage, VoteMessage, BLS_KEYPAIR_DERIVE_SEED}, vote::Vote, }, - crossbeam_channel::{bounded, Receiver, TryRecvError}, + crossbeam_channel::{unbounded, Receiver, TryRecvError}, parking_lot::RwLock as PlRwLock, solana_bls_signatures::{ keypair::Keypair as BLSKeypair, signature::Signature as BLSSignature, @@ -861,14 +833,14 @@ mod tests { }; struct EventHandlerTestContext { - exit: Arc, bls_receiver: Receiver, commitment_receiver: Receiver, own_vote_receiver: Receiver, bank_forks: Arc>, my_bls_keypair: BLSKeypair, timer_manager: Arc>, - leader_window_notifier: Arc, + leader_window_info_receiver: Receiver, + highest_parent_ready: Arc>, drop_bank_receiver: Receiver>, cluster_info: Arc, consensus_metrics_receiver: ConsensusMetricsEventReceiver, @@ -879,118 +851,120 @@ mod tests { bls_ops: Vec, } - impl EventHandlerTestContext { - fn setup() -> EventHandlerTestContext { - // For tests, we just make each queue bounded at 100, should be enough. - let (bls_sender, bls_receiver) = bounded(100); - let (commitment_sender, commitment_receiver) = bounded(100); - let (own_vote_sender, own_vote_receiver) = bounded(100); - let (drop_bank_sender, drop_bank_receiver) = bounded(100); - let exit = Arc::new(AtomicBool::new(false)); - let (event_sender, _event_receiver) = bounded(100); - let (consensus_metrics_sender, consensus_metrics_receiver) = bounded(100); - let timer_manager = Arc::new(PlRwLock::new(TimerManager::new( - event_sender.clone(), - exit.clone(), - ))); - - // Create 10 node validatorvotekeypairs vec - let validator_keypairs = (0..10) - .map(|_| ValidatorVoteKeypairs::new(Keypair::new(), Keypair::new(), Keypair::new())) - .collect::>(); - let stakes = (0..validator_keypairs.len()) - .rev() - .map(|i| 100_u64.saturating_add(i as u64)) - .collect::>(); - let genesis = create_genesis_config_with_alpenglow_vote_accounts( - 1_000_000_000, - &validator_keypairs, - stakes, - ); - let my_index = 0; - let my_node_keypair = validator_keypairs[my_index].node_keypair.insecure_clone(); - let my_vote_keypair = validator_keypairs[my_index].vote_keypair.insecure_clone(); - let my_bls_keypair = - BLSKeypair::derive_from_signer(&my_vote_keypair, BLS_KEYPAIR_DERIVE_SEED).unwrap(); - let bank0 = Bank::new_for_tests(&genesis.genesis_config); - let bank_forks = BankForks::new_rw_arc(bank0); - let contact_info = ContactInfo::new_localhost(&my_node_keypair.pubkey(), 0); - let cluster_info = Arc::new(ClusterInfo::new( - contact_info, - Arc::new(my_node_keypair.insecure_clone()), - SocketAddrSpace::Unspecified, - )); - let blockstore = Arc::new( - Blockstore::open_with_options( - &get_tmp_ledger_path!(), - BlockstoreOptions::default_for_tests(), - ) - .unwrap(), - ); - - let leader_window_notifier = Arc::new(LeaderWindowNotifier::default()); - let shared_context = SharedContext { - cluster_info: cluster_info.clone(), - bank_forks: bank_forks.clone(), - vote_history_storage: Arc::new(FileVoteHistoryStorage::default()), - leader_window_notifier: leader_window_notifier.clone(), - blockstore, - rpc_subscriptions: None, - }; + fn setup() -> EventHandlerTestContext { + let (bls_sender, bls_receiver) = unbounded(); + let (commitment_sender, commitment_receiver) = unbounded(); + let (own_vote_sender, own_vote_receiver) = unbounded(); + let (drop_bank_sender, drop_bank_receiver) = unbounded(); + let exit = Arc::new(AtomicBool::new(false)); + let (event_sender, _event_receiver) = unbounded(); + let (consensus_metrics_sender, consensus_metrics_receiver) = unbounded(); + let (leader_window_info_sender, leader_window_info_receiver) = unbounded(); + let timer_manager = Arc::new(PlRwLock::new(TimerManager::new( + event_sender.clone(), + exit.clone(), + Arc::new(MigrationStatus::default()), + ))); + + // Create 10 node validatorvotekeypairs vec + let validator_keypairs = (0..10) + .map(|_| ValidatorVoteKeypairs::new(Keypair::new(), Keypair::new(), Keypair::new())) + .collect::>(); + let stakes = (0..validator_keypairs.len()) + .rev() + .map(|i| 100_u64.saturating_add(i as u64)) + .collect::>(); + let genesis = create_genesis_config_with_alpenglow_vote_accounts( + 1_000_000_000, + &validator_keypairs, + stakes, + ); + let my_index = 0; + let my_node_keypair = validator_keypairs[my_index].node_keypair.insecure_clone(); + let my_vote_keypair = validator_keypairs[my_index].vote_keypair.insecure_clone(); + let my_bls_keypair = + BLSKeypair::derive_from_signer(&my_vote_keypair, BLS_KEYPAIR_DERIVE_SEED).unwrap(); + let bank0 = Bank::new_for_tests(&genesis.genesis_config); + let bank_forks = BankForks::new_rw_arc(bank0); + let contact_info = ContactInfo::new_localhost(&my_node_keypair.pubkey(), 0); + let cluster_info = Arc::new(ClusterInfo::new( + contact_info, + Arc::new(my_node_keypair.insecure_clone()), + SocketAddrSpace::Unspecified, + )); + let blockstore = Arc::new( + Blockstore::open_with_options( + &get_tmp_ledger_path!(), + BlockstoreOptions::default_for_tests(), + ) + .unwrap(), + ); + let highest_parent_ready = Arc::new(RwLock::default()); + + let shared_context = SharedContext { + cluster_info: cluster_info.clone(), + bank_forks: bank_forks.clone(), + vote_history_storage: Arc::new(FileVoteHistoryStorage::default()), + leader_window_info_sender, + blockstore, + rpc_subscriptions: None, + highest_parent_ready: highest_parent_ready.clone(), + }; - let vote_history = VoteHistory::new(my_node_keypair.pubkey(), 0); - let voting_context = VotingContext { - identity_keypair: Arc::new(my_node_keypair.insecure_clone()), - sharable_banks: bank_forks.read().unwrap().sharable_banks(), - vote_history, - bls_sender, - commitment_sender, - vote_account_pubkey: my_vote_keypair.pubkey(), - wait_to_vote_slot: None, - authorized_voter_keypairs: Arc::new(RwLock::new(vec![Arc::new(my_vote_keypair)])), - derived_bls_keypairs: HashMap::new(), - has_new_vote_been_rooted: false, - own_vote_sender, - consensus_metrics_sender, - }; + let vote_history = VoteHistory::new(my_node_keypair.pubkey(), 0); + let voting_context = VotingContext { + identity_keypair: Arc::new(my_node_keypair.insecure_clone()), + sharable_banks: bank_forks.read().unwrap().sharable_banks(), + vote_history, + bls_sender, + commitment_sender, + vote_account_pubkey: my_vote_keypair.pubkey(), + wait_to_vote_slot: None, + authorized_voter_keypairs: Arc::new(RwLock::new(vec![Arc::new(my_vote_keypair)])), + derived_bls_keypairs: HashMap::new(), + has_new_vote_been_rooted: false, + own_vote_sender, + consensus_metrics_sender, + }; - let root_context = RootContext { - leader_schedule_cache: Arc::new(LeaderScheduleCache::new_from_bank( - &bank_forks.read().unwrap().root_bank(), - )), - snapshot_controller: None, - bank_notification_sender: None, - drop_bank_sender, - }; + let root_context = RootContext { + leader_schedule_cache: Arc::new(LeaderScheduleCache::new_from_bank( + &bank_forks.read().unwrap().root_bank(), + )), + snapshot_controller: None, + bank_notification_sender: None, + drop_bank_sender, + }; - let local_context = LocalContext { - my_pubkey: my_node_keypair.pubkey(), - pending_blocks: BTreeMap::new(), - finalized_blocks: BTreeSet::new(), - received_shred: BTreeSet::new(), - stats: EventHandlerStats::default(), - }; + let local_context = LocalContext { + my_pubkey: my_node_keypair.pubkey(), + pending_blocks: BTreeMap::new(), + finalized_blocks: BTreeSet::new(), + received_shred: BTreeSet::new(), + stats: EventHandlerStats::default(), + }; - EventHandlerTestContext { - exit, - bls_receiver, - commitment_receiver, - own_vote_receiver, - bank_forks, - my_bls_keypair, - timer_manager, - leader_window_notifier, - drop_bank_receiver, - cluster_info, - consensus_metrics_receiver, - shared_context, - voting_context, - root_context, - local_context, - bls_ops: vec![], - } + EventHandlerTestContext { + bls_receiver, + commitment_receiver, + own_vote_receiver, + bank_forks, + my_bls_keypair, + timer_manager, + leader_window_info_receiver, + drop_bank_receiver, + cluster_info, + consensus_metrics_receiver, + highest_parent_ready, + shared_context, + voting_context, + root_context, + local_context, + bls_ops: vec![], } + } + impl EventHandlerTestContext { fn send_parent_ready_event(&mut self, slot: Slot, parent_block: Block) { let mut new_ops = EventHandler::handle_event( VotorEvent::ParentReady { slot, parent_block }, @@ -1225,7 +1199,7 @@ mod tests { assert_eq!(commitment.slot, expected_slot); } - fn check_no_vote_or_commitment(&self) { + fn check_no_vote_or_commitment(&mut self) { assert_eq!( self.bls_receiver.try_recv().err(), Some(TryRecvError::Empty) @@ -1236,24 +1210,17 @@ mod tests { ); } - fn check_parent_ready_slot(&mut self, expected: (Slot, Block)) { - assert_eq!( - *self - .leader_window_notifier - .highest_parent_ready - .read() - .unwrap(), - expected - ); + fn check_parent_ready_slot(&self, expected: (Slot, Block)) { + assert_eq!(*self.highest_parent_ready.read().unwrap(), expected); let slot = expected.0; self.check_timeout_set(slot); } - fn check_timeout_set(&mut self, expected_slot: Slot) { + fn check_timeout_set(&self, expected_slot: Slot) { assert!(self.timer_manager.read().is_timeout_set(expected_slot)); } - fn check_for_metrics_event(&mut self, expected: ConsensusMetricsEvent) { + fn check_for_metrics_event(&self, expected: ConsensusMetricsEvent) { let event = self .consensus_metrics_receiver .try_recv() @@ -1261,7 +1228,7 @@ mod tests { assert!(event.1.contains(&expected)); } - fn create_vote_history_storage_and_switch_identity( + fn crate_vote_history_storage_and_switch_identity( &mut self, new_identity: &Keypair, ) -> PathBuf { @@ -1274,6 +1241,7 @@ mod tests { .is_ok()); self.cluster_info .set_keypair(Arc::new(new_identity.insecure_clone())); + self.send_set_identity_event(); file_vote_history_storage.filename(&new_identity.pubkey()) } @@ -1283,7 +1251,7 @@ mod tests { fn test_received_block_event_and_parent_ready_event() { // Test different orders of received block event and parent ready event // some will send Notarize immediately, some will wait for parent ready - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // Received block event which says block has completed replay // If there is a parent ready for block 1 Notarization is sent out. @@ -1305,7 +1273,6 @@ mod tests { // We should receive Notarize Vote for block 1 test_context.check_for_vote(&Vote::new_notarization_vote(slot, block_id_1)); test_context.check_for_commitment(CommitmentType::Notarize, slot); - // Add block event for 1 again will not trigger another Notarize or commitment test_context.send_block_event(1, bank1.clone()); test_context.check_no_vote_or_commitment(); @@ -1317,7 +1284,6 @@ mod tests { // Because 2 is middle of window, we should see Notarize vote for block 2 even without parentready test_context.check_for_vote(&Vote::new_notarization_vote(slot, block_id_2)); test_context.check_for_commitment(CommitmentType::Notarize, slot); - // Slot 3 somehow links to block 1, should not trigger Notarize vote because it has a wrong parent (not 2) let _ = test_context.create_block_and_send_block_event(3, bank1.clone()); test_context.check_no_vote_or_commitment(); @@ -1326,7 +1292,6 @@ mod tests { let slot = 4; let bank4 = test_context.create_block_and_send_block_event(slot, bank2.clone()); let block_id_4 = bank4.block_id().unwrap(); - test_context.check_no_vote_or_commitment(); // Send parent ready for slot 4 should trigger Notarize vote for slot 4 test_context.send_parent_ready_event(slot, (2, block_id_2)); @@ -1339,7 +1304,7 @@ mod tests { fn test_received_block_notarized_and_timeout() { // Test block notarized event will trigger Finalize vote when all conditions are met // But it will not trigger Finalize if any of the conditions are not met - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); let root_bank = test_context .bank_forks @@ -1355,7 +1320,6 @@ mod tests { test_context.check_parent_ready_slot((1, (0, Hash::default()))); test_context.check_for_vote(&Vote::new_notarization_vote(1, block_id_1)); test_context.check_for_commitment(CommitmentType::Notarize, 1); - // Send block notarized event should trigger Finalize vote test_context.send_block_notarized_event((1, block_id_1)); test_context.check_for_vote(&Vote::new_finalization_vote(1)); @@ -1409,13 +1373,11 @@ mod tests { let bank5 = test_context.create_block_only(slot, bank4.clone()); test_context.send_block_event(slot, bank5.clone()); test_context.check_no_vote_or_commitment(); - - test_context.exit.store(true, Ordering::Relaxed); } #[test] fn test_received_timeout_crashed_leader_and_first_shred() { - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // Simulate a crashed leader for slot 4 test_context.send_timeout_crashed_leader_event(4); @@ -1435,7 +1397,7 @@ mod tests { #[test] fn test_received_safe_to_notar() { - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // We can theoretically not vote skip here and test will pass, but in real world // safe_to_notar event only fires after we voted skip for the whole window @@ -1448,6 +1410,7 @@ mod tests { let bank_1 = test_context.create_block_and_send_block_event(1, root_bank); let block_id_1_old = bank_1.block_id().unwrap(); test_context.send_parent_ready_event(1, (0, Hash::default())); + test_context.check_parent_ready_slot((1, (0, Hash::default()))); test_context.check_for_vote(&Vote::new_notarization_vote(1, block_id_1_old)); test_context.check_for_commitment(CommitmentType::Notarize, 1); @@ -1478,7 +1441,7 @@ mod tests { #[test] fn test_received_safe_to_skip() { - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // The safe_to_skip event only fires after we voted notarize for the slot let root_bank = test_context @@ -1490,10 +1453,10 @@ mod tests { let bank_1 = test_context.create_block_and_send_block_event(1, root_bank); let block_id_1 = bank_1.block_id().unwrap(); test_context.send_parent_ready_event(1, (0, Hash::default())); + test_context.check_parent_ready_slot((1, (0, Hash::default()))); test_context.check_for_vote(&Vote::new_notarization_vote(1, block_id_1)); test_context.check_for_commitment(CommitmentType::Notarize, 1); - // Now we got safe_to_skip event for slot 1 test_context.send_safe_to_skip_event(1); // We should see rest of the window skipped @@ -1509,46 +1472,35 @@ mod tests { #[test] fn test_received_produce_window() { - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // Produce a full window of blocks // Assume the leader for 1-3 is us, send produce window event test_context.send_produce_window_event(1, 3, (0, Hash::default())); - // Check that leader_window_notifier is updated - let mut guard = test_context - .leader_window_notifier - .window_info - .lock() - .unwrap(); - let received_leader_window_info = guard.take().unwrap(); + // Check that leader_window_info is sent via channel + let received_leader_window_info = + test_context.leader_window_info_receiver.try_recv().unwrap(); assert_eq!(received_leader_window_info.start_slot, 1); assert_eq!(received_leader_window_info.end_slot, 3); assert_eq!( received_leader_window_info.parent_block, (0, Hash::default()) ); - drop(guard); // Suddenly I found out I produced block 1 already, send new produce window event let block_id_1 = Hash::new_unique(); test_context.send_produce_window_event(2, 3, (1, block_id_1)); - let mut guard = test_context - .leader_window_notifier - .window_info - .lock() - .unwrap(); - let received_leader_window_info = guard.take().unwrap(); + let received_leader_window_info = + test_context.leader_window_info_receiver.try_recv().unwrap(); assert_eq!(received_leader_window_info.start_slot, 2); assert_eq!(received_leader_window_info.end_slot, 3); assert_eq!(received_leader_window_info.parent_block, (1, block_id_1)); - drop(guard); } #[test] fn test_received_finalized() { - agave_logger::setup(); - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); let root_bank = test_context .bank_forks @@ -1560,10 +1512,10 @@ mod tests { let block_id_1 = bank1.block_id().unwrap(); test_context.send_parent_ready_event(1, (0, Hash::default())); + test_context.check_parent_ready_slot((1, (0, Hash::default()))); test_context.check_for_vote(&Vote::new_notarization_vote(1, block_id_1)); test_context.check_for_commitment(CommitmentType::Notarize, 1); - // Now we got finalized event for slot 1 test_context.send_finalized_event((1, block_id_1), true); // Listen on drop bank receiver, it should get bank 0 @@ -1576,8 +1528,7 @@ mod tests { #[test] fn test_parent_ready_in_middle_of_window() { - agave_logger::setup(); - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // We just woke up and received finalize for slot 5 let root_bank = test_context @@ -1593,6 +1544,7 @@ mod tests { let block_id_5 = bank5.block_id().unwrap(); test_context.send_finalized_event((5, block_id_5), true); + // We should now have parent ready for slot 5 test_context.check_parent_ready_slot((5, (4, block_id_4))); @@ -1601,6 +1553,7 @@ mod tests { let bank9 = test_context.create_block_only(9, bank5.clone()); let block_id_9 = bank9.block_id().unwrap(); test_context.send_finalized_event((9, block_id_9), true); + test_context.send_block_event(9, bank9.clone()); // We should now have parent ready for slot 9 @@ -1609,8 +1562,7 @@ mod tests { #[test] fn test_received_standstill() { - agave_logger::setup(); - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // Send notarize vote for slot 1 then skip rest of the window let root_bank = test_context @@ -1622,13 +1574,16 @@ mod tests { let bank1 = test_context.create_block_and_send_block_event(1, root_bank); let block_id_1 = bank1.block_id().unwrap(); test_context.send_parent_ready_event(1, (0, Hash::default())); + test_context.check_for_vote(&Vote::new_notarization_vote(1, block_id_1)); + test_context.send_timeout_event(2); test_context.check_for_vote(&Vote::new_skip_vote(2)); test_context.check_for_vote(&Vote::new_skip_vote(3)); // Send a standstill event with highest parent ready at 0, we should refresh all the votes test_context.send_standstill_event(0); + test_context.check_for_votes(&[ Vote::new_notarization_vote(1, block_id_1), Vote::new_skip_vote(2), @@ -1636,22 +1591,21 @@ mod tests { ]); // Send another standstill event with highest parent ready at 1, we should refresh votes for 2 and 3 only - test_context.bls_ops.clear(); test_context.send_standstill_event(1); + test_context.check_for_votes(&[Vote::new_skip_vote(2), Vote::new_skip_vote(3)]); } #[test] fn test_received_set_identity() { - agave_logger::setup(); - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); let old_identity = test_context.cluster_info.keypair().insecure_clone(); let new_identity = Keypair::new(); let mut files_to_remove = vec![]; // Before set identity we need to manually create the vote history storage file for new identity files_to_remove - .push(test_context.create_vote_history_storage_and_switch_identity(&new_identity)); + .push(test_context.crate_vote_history_storage_and_switch_identity(&new_identity)); // Should not send any votes because we set to a different identity let root_bank = test_context @@ -1662,16 +1616,17 @@ mod tests { .root(); let _ = test_context.create_block_and_send_block_event(1, root_bank.clone()); test_context.send_parent_ready_event(1, (0, Hash::default())); + // There should be no votes but we should see commitments for hot spares assert_eq!( test_context.bls_receiver.try_recv().err(), - Some(crossbeam_channel::TryRecvError::Empty) + Some(TryRecvError::Empty) ); test_context.check_for_commitment(CommitmentType::Notarize, 1); // Now set back to original identity files_to_remove - .push(test_context.create_vote_history_storage_and_switch_identity(&old_identity)); + .push(test_context.crate_vote_history_storage_and_switch_identity(&old_identity)); // We should now be able to vote again let slot = 4; diff --git a/votor/src/event_handler/stats.rs b/votor/src/event_handler/stats.rs index 76dfd7b3bd3f1e..739f35e12fbe1c 100644 --- a/votor/src/event_handler/stats.rs +++ b/votor/src/event_handler/stats.rs @@ -1,36 +1,31 @@ use { - crate::{common::VoteType, event::VotorEvent, voting_service::BLSOp}, - agave_votor_messages::consensus_message::ConsensusMessage, + crate::{event::VotorEvent, voting_service::BLSOp}, + agave_votor_messages::{consensus_message::ConsensusMessage, vote::VoteType}, solana_clock::Slot, solana_metrics::datapoint_info, std::{ - collections::BTreeMap, + collections::{BTreeMap, HashMap}, time::{Duration, Instant}, }, }; -const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(1); +const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(10); #[derive(Debug, Clone)] struct SlotTracking { /// The time when the slot tracking started start: Instant, - /// Duration in microseconds from start to when the first shred for this - /// slot was received - first_shred: Option, - /// Duration in microseconds from start to when the parent block for this - /// slot was ready - parent_ready: Option, - /// Duration in microseconds from start to when the notarization vote for - /// this slot was sent - vote_notarize: Option, - /// Duration in microseconds from start to when the skip vote for this slot - /// was sent - vote_skip: Option, - /// If the slot was finalized, this is the duration in microseconds from - /// start to when it was finalized, and the bool indicates if it was fast - /// finalized - finalized: Option<(i64, bool)>, + /// The time when the first shred for this slot was received + first_shred: Option, + /// The time when the parent block for this slot was ready + parent_ready: Option, + /// The time when the notarization vote for this slot was sent + vote_notarize: Option, + /// The time when the skip vote for this slot was sent + vote_skip: Option, + /// If the slot was finalized, this is the time when it was finalized, + /// the bool indicates if it was fast finalized + finalized: Option<(Instant, bool)>, } impl Default for SlotTracking { @@ -46,289 +41,45 @@ impl Default for SlotTracking { } } -impl SlotTracking { - fn finalized_elapsed_micros(&self) -> Option { - self.finalized.map(|(t, _fast_finalize)| t) - } - - fn is_fast_finalized(&self) -> Option { - self.finalized.map(|(_t, fast_finalize)| fast_finalize) - } - - fn report(&self, slot: Slot) { - datapoint_info!( - "event_handler_slot_tracking", - ("slot", slot as i64, i64), - ( - "first_shred", - self.first_shred, - Option - ), - ( - "parent_ready", - self.parent_ready, - Option - ), - ( - "vote_notarize", - self.vote_notarize, - Option - ), - ( - "vote_skip", - self.vote_skip, - Option - ), - ( - "finalized", - self.finalized_elapsed_micros(), - Option - ), - ("is_fast_finalization", self.is_fast_finalized(), Option) - ); - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum StatsEvent { - Block, - BlockNotarized, - FirstShred, - ParentReady, - TimeoutCrashedLeader, - Timeout, - SafeToNotar, - SafeToSkip, - ProduceWindow, - Finalized, - Standstill, - SetIdentity, -} - #[derive(Debug, Default)] struct EventCountAndTime { - count: usize, - time_us: u64, -} - -#[derive(Debug, Default)] -struct ReceivedEventsStats { - block: EventCountAndTime, - block_notarized: EventCountAndTime, - first_shred: EventCountAndTime, - parent_ready: EventCountAndTime, - timeout_crashed_leader: EventCountAndTime, - timeout: EventCountAndTime, - safe_to_notar: EventCountAndTime, - safe_to_skip: EventCountAndTime, - produce_window: EventCountAndTime, - finalized: EventCountAndTime, - standstill: EventCountAndTime, - set_identity: EventCountAndTime, -} - -impl ReceivedEventsStats { - fn incr_event_with_timing(&mut self, stats_event: StatsEvent, time_us: u64) { - match stats_event { - StatsEvent::Block => { - self.block.count = self.block.count.saturating_add(1); - self.block.time_us = self.block.time_us.saturating_add(time_us); - } - StatsEvent::BlockNotarized => { - self.block_notarized.count = self.block_notarized.count.saturating_add(1); - self.block_notarized.time_us = self.block_notarized.time_us.saturating_add(time_us); - } - StatsEvent::FirstShred => { - self.first_shred.count = self.first_shred.count.saturating_add(1); - self.first_shred.time_us = self.first_shred.time_us.saturating_add(time_us); - } - StatsEvent::ParentReady => { - self.parent_ready.count = self.parent_ready.count.saturating_add(1); - self.parent_ready.time_us = self.parent_ready.time_us.saturating_add(time_us); - } - StatsEvent::TimeoutCrashedLeader => { - self.timeout_crashed_leader.count = - self.timeout_crashed_leader.count.saturating_add(1); - self.timeout_crashed_leader.time_us = - self.timeout_crashed_leader.time_us.saturating_add(time_us); - } - StatsEvent::Timeout => { - self.timeout.count = self.timeout.count.saturating_add(1); - self.timeout.time_us = self.timeout.time_us.saturating_add(time_us); - } - StatsEvent::SafeToNotar => { - self.safe_to_notar.count = self.safe_to_notar.count.saturating_add(1); - self.safe_to_notar.time_us = self.safe_to_notar.time_us.saturating_add(time_us); - } - StatsEvent::SafeToSkip => { - self.safe_to_skip.count = self.safe_to_skip.count.saturating_add(1); - self.safe_to_skip.time_us = self.safe_to_skip.time_us.saturating_add(time_us); - } - StatsEvent::ProduceWindow => { - self.produce_window.count = self.produce_window.count.saturating_add(1); - self.produce_window.time_us = self.produce_window.time_us.saturating_add(time_us); - } - StatsEvent::Finalized => { - self.finalized.count = self.finalized.count.saturating_add(1); - self.finalized.time_us = self.finalized.time_us.saturating_add(time_us); - } - StatsEvent::Standstill => { - self.standstill.count = self.standstill.count.saturating_add(1); - self.standstill.time_us = self.standstill.time_us.saturating_add(time_us); - } - StatsEvent::SetIdentity => { - self.set_identity.count = self.set_identity.count.saturating_add(1); - self.set_identity.time_us = self.set_identity.time_us.saturating_add(time_us); - } - } - } - - fn report(&self) { - datapoint_info!( - "event_handler_received_event_count_and_timing", - ("block_count", self.block.count as i64, i64), - ("block_elapsed_us", self.block.time_us as i64, i64), - ( - "block_notarized_count", - self.block_notarized.count as i64, - i64 - ), - ( - "block_notarized_elapsed_us", - self.block_notarized.time_us as i64, - i64 - ), - ("first_shred_count", self.first_shred.count as i64, i64), - ( - "first_shred_elapsed_us", - self.first_shred.time_us as i64, - i64 - ), - ("parent_ready_count", self.parent_ready.count as i64, i64), - ( - "parent_ready_elapsed_us", - self.parent_ready.time_us as i64, - i64 - ), - ( - "timeout_crashed_leader_count", - self.timeout_crashed_leader.count as i64, - i64 - ), - ( - "timeout_crashed_leader_elapsed_us", - self.timeout_crashed_leader.time_us as i64, - i64 - ), - ("timeout_count", self.timeout.count as i64, i64), - ("timeout_elapsed_us", self.timeout.time_us as i64, i64), - ("safe_to_notar_count", self.safe_to_notar.count as i64, i64), - ( - "safe_to_notar_elapsed_us", - self.safe_to_notar.time_us as i64, - i64 - ), - ("safe_to_skip_count", self.safe_to_skip.count as i64, i64), - ( - "safe_to_skip_elapsed_us", - self.safe_to_skip.time_us as i64, - i64 - ), - ( - "produce_window_count", - self.produce_window.count as i64, - i64 - ), - ( - "produce_window_elapsed_us", - self.produce_window.time_us as i64, - i64 - ), - ("finalized_count", self.finalized.count as i64, i64), - ("finalized_elapsed_us", self.finalized.time_us as i64, i64), - ("standstill_count", self.standstill.count as i64, i64), - ("standstill_elapsed_us", self.standstill.time_us as i64, i64), - ("set_identity_count", self.set_identity.count as i64, i64), - ( - "set_identity_elapsed_us", - self.set_identity.time_us as i64, - i64 - ), - ); - } -} - -#[derive(Debug, Default)] -struct SentVoteStats { - finalize: usize, - notarize: usize, - notarize_fallback: usize, - skip: usize, - skip_fallback: usize, -} - -impl SentVoteStats { - fn incr_vote(&mut self, vote_type: VoteType) { - match vote_type { - VoteType::Finalize => self.finalize = self.finalize.saturating_add(1), - VoteType::Notarize => self.notarize = self.notarize.saturating_add(1), - VoteType::NotarizeFallback => { - self.notarize_fallback = self.notarize_fallback.saturating_add(1) - } - VoteType::Skip => self.skip = self.skip.saturating_add(1), - VoteType::SkipFallback => self.skip_fallback = self.skip_fallback.saturating_add(1), - VoteType::Genesis => (), - } - } - - fn report(&self) { - datapoint_info!( - "event_handler_sent_vote_count", - ("finalize", self.finalize as i64, i64), - ("notarize", self.notarize as i64, i64), - ("notarize_fallback", self.notarize_fallback as i64, i64), - ("skip", self.skip as i64, i64), - ("skip_fallback", self.skip_fallback as i64, i64), - ); - } + count: u16, + time_us: u32, } #[derive(Debug)] pub(crate) struct EventHandlerStats { - /// Number of events that were ignored. This includes events that were - /// received but not processed due to various reasons (e.g., outdated, - /// irrelevant). - pub(crate) ignored: usize, + // Number of events that were ignored. This includes events that were + // received but not processed due to various reasons (e.g., outdated, + // irrelevant). + pub(crate) ignored: u16, - /// Number of times where we are attempting to start a leader window but - /// there is already a pending window to produce. The older window is - /// discarded in favor of the newer one. - pub(crate) leader_window_replaced: usize, + // Number of times where we are attempting to start a leader window but + // there is already a pending window to produce. The older window is + // discarded in favor of the newer one. + pub(crate) leader_window_replaced: u16, - /// Number of times we updated the root. - pub(crate) set_root_count: usize, + // Number of times we updated the root. + pub(crate) set_root_count: u16, - /// Number of times we setup timeouts for a new leader window. - pub(crate) timeout_set: usize, + // Number of times we setup timeouts for a new leader window. + pub(crate) timeout_set: u16, - /// Amount of time spent receiving events. Includes waiting for events. - pub(crate) receive_event_time_us: u64, + // Amount of time spent receiving events. Includes waiting for events. + pub(crate) receive_event_time_us: u32, - /// Amount of time spent sending votes. - pub(crate) send_votes_batch_time_us: u64, + // Amount of time spent sending votes. + pub(crate) send_votes_batch_time_us: u32, - /// Number of times we saw each event and time spent processing the event. - received_events_stats: ReceivedEventsStats, + // Number of times we saw each event and time spent processing the event. + received_events_count_and_timing: HashMap, - /// Number of votes sent for each vote type. - sent_votes: SentVoteStats, + // Number of votes sent for each vote type. + sent_votes: HashMap, - /// Timing information for major events for each slot. + // Timing information for major events for each slot. slot_tracking_map: BTreeMap, - /// Whether the send metrics queue has been full. - pub(super) metrics_queue_became_full: bool, - root_slot: Slot, last_report_time: Instant, } @@ -339,6 +90,22 @@ impl Default for EventHandlerStats { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum StatsEvent { + Block, + BlockNotarized, + FirstShred, + ParentReady, + TimeoutCrashedLeader, + Timeout, + SafeToNotar, + SafeToSkip, + ProduceWindow, + Finalized, + Standstill, + SetIdentity, +} + impl StatsEvent { pub fn new(event: &VotorEvent) -> Self { match event { @@ -367,48 +134,28 @@ impl EventHandlerStats { timeout_set: 0, receive_event_time_us: 0, send_votes_batch_time_us: 0, - received_events_stats: ReceivedEventsStats::default(), - sent_votes: SentVoteStats::default(), + received_events_count_and_timing: HashMap::new(), + sent_votes: HashMap::new(), slot_tracking_map: BTreeMap::new(), root_slot: 0, last_report_time: Instant::now(), - metrics_queue_became_full: false, } } - fn reset(&mut self, root_slot: Slot, slot_tracking_map: BTreeMap) { - *self = EventHandlerStats::new(); - self.root_slot = root_slot; - self.slot_tracking_map = slot_tracking_map; - } - pub fn handle_event_arrival(&mut self, event: &VotorEvent) -> StatsEvent { match event { VotorEvent::FirstShred(slot) => { let entry = self.slot_tracking_map.entry(*slot).or_default(); - entry.first_shred = Some( - Instant::now() - .saturating_duration_since(entry.start) - .as_micros() as i64, - ); + entry.first_shred = Some(Instant::now()); } VotorEvent::ParentReady { slot, .. } => { let entry = self.slot_tracking_map.entry(*slot).or_default(); - entry.parent_ready = Some( - Instant::now() - .saturating_duration_since(entry.start) - .as_micros() as i64, - ); + entry.parent_ready = Some(Instant::now()); } VotorEvent::Finalized((slot, _), is_fast_finalization) => { let entry = self.slot_tracking_map.entry(*slot).or_default(); if entry.finalized.is_none() { - entry.finalized = Some(( - Instant::now() - .saturating_duration_since(entry.start) - .as_micros() as i64, - *is_fast_finalization, - )); + entry.finalized = Some((Instant::now(), *is_fast_finalization)); } else if *is_fast_finalization { // We can accept Notarize and FastFinalization, never set the flag from true to false if let Some((instant, false)) = entry.finalized { @@ -427,39 +174,32 @@ impl EventHandlerStats { } pub fn incr_event_with_timing(&mut self, stats_event: StatsEvent, time_us: u64) { - self.received_events_stats - .incr_event_with_timing(stats_event, time_us); + let entry = self + .received_events_count_and_timing + .entry(stats_event) + .or_default(); + entry.count = entry.count.saturating_add(1); + entry.time_us = entry.time_us.saturating_add(time_us as u32); } pub fn incr_vote(&mut self, bls_op: &BLSOp) { - let BLSOp::PushVote { message, .. } = bls_op else { + if let BLSOp::PushVote { message, .. } = bls_op { + let ConsensusMessage::Vote(vote) = **message else { + warn!("Unexpected BLS message type: {message:?}"); + return; + }; + let vote_type = vote.vote.get_type(); + let entry = self.sent_votes.entry(vote_type).or_insert(0); + *entry = entry.saturating_add(1); + if vote_type == VoteType::Notarize { + let entry = self.slot_tracking_map.entry(vote.vote.slot()).or_default(); + entry.vote_notarize = Some(Instant::now()); + } else if vote_type == VoteType::Skip { + let entry = self.slot_tracking_map.entry(vote.vote.slot()).or_default(); + entry.vote_skip = Some(Instant::now()); + } + } else { warn!("Unexpected BLS operation: {bls_op:?}"); - return; - }; - let ConsensusMessage::Vote(ref vote) = **message else { - warn!("Unexpected BLS message type: {message:?}"); - return; - }; - - // Increment vote type counters - let vote_type = VoteType::get_type(&vote.vote); - self.sent_votes.incr_vote(vote_type); - - // Increment slot based counters - if vote_type == VoteType::Notarize { - let entry = self.slot_tracking_map.entry(vote.vote.slot()).or_default(); - entry.vote_notarize = Some( - Instant::now() - .saturating_duration_since(entry.start) - .as_micros() as i64, - ); - } else if vote_type == VoteType::Skip { - let entry = self.slot_tracking_map.entry(vote.vote.slot()).or_default(); - entry.vote_skip = Some( - Instant::now() - .saturating_duration_since(entry.start) - .as_micros() as i64, - ); } } @@ -478,12 +218,16 @@ impl EventHandlerStats { ), ("set_root_count", self.set_root_count as i64, i64), ("timeout_set", self.timeout_set as i64, i64), - ( - "metrics_queue_became_full", - self.metrics_queue_became_full, - bool - ) ); + for (event, EventCountAndTime { count, time_us }) in &self.received_events_count_and_timing + { + datapoint_info!( + "event_handler_received_event_count_and_timing", + ("event", format!("{:?}", event), String), + ("count", *count as i64, i64), + ("elapsed_us", *time_us as i64, i64) + ); + } datapoint_info!( "event_handler_timing", ( @@ -497,16 +241,72 @@ impl EventHandlerStats { i64 ), ); - - self.received_events_stats.report(); - self.sent_votes.report(); - - // Only report slots lower than the `root_slot` + for (vote_type, count) in &self.sent_votes { + datapoint_info!( + "event_handler_sent_vote_count", + ("vote", format!("{:?}", vote_type), String), + ("count", *count as i64, i64) + ); + } + // Only report if the slot is lower than root_slot let split_off_map = self.slot_tracking_map.split_off(&self.root_slot); for (slot, tracking) in &self.slot_tracking_map { - tracking.report(*slot); + let start = tracking.start; + datapoint_info!( + "event_handler_slot_tracking", + ("slot", *slot as i64, i64), + ( + "first_shred", + tracking.first_shred.map(|t| { + t.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ( + "parent_ready", + tracking.parent_ready.map(|t| { + t.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ( + "vote_notarize", + tracking.vote_notarize.map(|t| { + t.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ( + "vote_skip", + tracking.vote_skip.map(|t| { + t.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ( + "finalized", + tracking.finalized.map(|t| { + t.0.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ("is_fast_finalization", tracking.finalized.map(|t| t.1), Option) + ); } - - self.reset(self.root_slot, split_off_map); + self.last_report_time = now; + let root_slot = self.root_slot; + *self = EventHandlerStats::new(); + self.root_slot = root_slot; + self.slot_tracking_map = split_off_map; } } diff --git a/votor/src/root_utils.rs b/votor/src/root_utils.rs index 316fa24dbb56f9..1adb45d28cda5d 100644 --- a/votor/src/root_utils.rs +++ b/votor/src/root_utils.rs @@ -1,13 +1,10 @@ use { crate::{event_handler::PendingBlocks, voting_utils::VotingContext, votor::SharedContext}, agave_votor_messages::consensus_message::Block, - crossbeam_channel::{SendError, Sender}, - log::{info, warn}, + crossbeam_channel::Sender, solana_clock::Slot, - solana_ledger::{ - blockstore::{Blockstore, BlockstoreError}, - leader_schedule_cache::LeaderScheduleCache, - }, + solana_hash::Hash, + solana_ledger::{blockstore::Blockstore, leader_schedule_cache::LeaderScheduleCache}, solana_pubkey::Pubkey, solana_rpc::{ optimistically_confirmed_bank_tracker::{BankNotification, BankNotificationSenderConfig}, @@ -22,10 +19,8 @@ use { collections::BTreeSet, sync::{Arc, RwLock}, }, - thiserror::Error, }; -#[allow(dead_code)] /// Structures that are not used in the event loop, but need to be updated /// or notified when setting root pub(crate) struct RootContext { @@ -35,15 +30,6 @@ pub(crate) struct RootContext { pub(crate) drop_bank_sender: Sender>, } -#[derive(Debug, Error)] -pub enum SetRootError { - #[error("Failed to record slot in blockstore: {0}")] - Blockstore(#[from] BlockstoreError), - - #[error("Error sending bank nofification: {0}")] - SendNotification(#[from] SendError<()>), -} - /// Sets the root for the votor event handling loop. Handles rooting all things /// except the certificate pool pub(crate) fn set_root( @@ -55,12 +41,12 @@ pub(crate) fn set_root( pending_blocks: &mut PendingBlocks, finalized_blocks: &mut BTreeSet, received_shred: &mut BTreeSet, -) -> Result<(), SetRootError> { +) { info!("{my_pubkey}: setting root {new_root}"); vctx.vote_history.set_root(new_root); - pending_blocks.retain(|pending_block, _| *pending_block >= new_root); - finalized_blocks.retain(|(slot, _)| *slot >= new_root); - received_shred.retain(|slot| *slot >= new_root); + *pending_blocks = pending_blocks.split_off(&new_root); + *finalized_blocks = finalized_blocks.split_off(&(new_root, Hash::default())); + *received_shred = received_shred.split_off(&new_root); check_and_handle_new_root( new_root, @@ -75,12 +61,19 @@ pub(crate) fn set_root( ctx.rpc_subscriptions.as_deref(), my_pubkey, |_| {}, - )?; + ); // Distinguish between duplicate versions of same slot let hash = ctx.bank_forks.read().unwrap().bank_hash(new_root).unwrap(); - ctx.blockstore - .insert_optimistic_slot(new_root, &hash, timestamp().try_into().unwrap())?; + if let Err(e) = + ctx.blockstore + .insert_optimistic_slot(new_root, &hash, timestamp().try_into().unwrap()) + { + error!( + "failed to record optimistic slot in blockstore: slot={}: {:?}", + new_root, &e + ); + } // It is critical to send the OC notification in order to keep compatibility with // the RPC API. Additionally the PrioritizationFeeCache relies on this notification @@ -91,20 +84,16 @@ pub(crate) fn set_root( .dependency_tracker .as_ref() .map(|s| s.get_current_declared_work()); - config - .sender - .send(( - BankNotification::OptimisticallyConfirmed(new_root), - dependency_work, - )) - .map_err(|_| SendError(()))?; + // TODO: propagate error + let _ = config.sender.send(( + BankNotification::OptimisticallyConfirmed(new_root), + dependency_work, + )); } - - Ok(()) } /// Sets the new root, additionally performs the callback after setting the bank forks root -/// During this transition period where both replay stage and votor can root depending on the feature flag we +/// During this transition period where both replay stage and voting loop can root depending on the feature flag we /// have a callback that cleans up progress map and other tower bft structures. Then the callgraph is /// /// ReplayStage::check_and_handle_new_root -> root_utils::check_and_handle_new_root(callback) @@ -127,8 +116,7 @@ pub fn check_and_handle_new_root( rpc_subscriptions: Option<&RpcSubscriptions>, my_pubkey: &Pubkey, callback: CB, -) -> Result<(), SetRootError> -where +) where CB: FnOnce(&BankForks), { // get the root bank before squash @@ -156,8 +144,9 @@ where // get shreds for repair on gossip before we update leader schedule, otherwise they may // get dropped. leader_schedule_cache.set_root(rooted_banks.last().unwrap()); - blockstore.set_roots(rooted_slots.iter())?; - + blockstore + .set_roots(rooted_slots.iter()) + .expect("Ledger set roots failed"); set_bank_forks_root( new_root, bank_forks, @@ -192,8 +181,6 @@ where } } info!("{my_pubkey}: new root {new_root}"); - - Ok(()) } /// Sets the bank forks root: diff --git a/votor/src/staked_validators_cache.rs b/votor/src/staked_validators_cache.rs index 1540e2d81bc992..6633acd4140f53 100644 --- a/votor/src/staked_validators_cache.rs +++ b/votor/src/staked_validators_cache.rs @@ -4,7 +4,6 @@ use { crate::voting_service::AlpenglowPortOverride, lru::LruCache, solana_clock::{Epoch, Slot}, - solana_epoch_schedule::EpochSchedule, solana_gossip::cluster_info::ClusterInfo, solana_pubkey::Pubkey, solana_runtime::bank_forks::BankForks, @@ -41,9 +40,6 @@ pub struct StakedValidatorsCache { /// Bank forks bank_forks: Arc>, - // Cache Epoch schedule since it never changes - epoch_schedule: EpochSchedule, - /// Whether to include the running validator's socket address in cache entries include_self: bool, @@ -62,17 +58,10 @@ impl StakedValidatorsCache { include_self: bool, alpenglow_port_override: Option, ) -> Self { - let epoch_schedule = bank_forks - .read() - .unwrap() - .working_bank() - .epoch_schedule() - .clone(); Self { cache: LruCache::new(max_cache_size), ttl, bank_forks, - epoch_schedule, include_self, alpenglow_port_override, alpenglow_port_override_last_modified: Instant::now(), @@ -81,7 +70,12 @@ impl StakedValidatorsCache { #[inline] fn cur_epoch(&self, slot: Slot) -> Epoch { - self.epoch_schedule.get_epoch(slot) + self.bank_forks + .read() + .unwrap() + .working_bank() + .epoch_schedule() + .get_epoch(slot) } fn refresh_cache_entry( @@ -167,7 +161,6 @@ impl StakedValidatorsCache { cluster_info: &ClusterInfo, access_time: Instant, ) -> (&[SocketAddr], bool) { - let epoch = self.cur_epoch(slot); // Check if self.alpenglow_port_override has a different last_modified. // Immediately refresh the cache if it does. if let Some(alpenglow_port_override) = &self.alpenglow_port_override { @@ -176,14 +169,15 @@ impl StakedValidatorsCache { self.alpenglow_port_override_last_modified = alpenglow_port_override.last_modified(); trace!( - "refreshing cache entry for epoch {epoch} due to alpenglow port override \ - last_modified change" + "refreshing cache entry for epoch {} due to alpenglow port override \ + last_modified change", + self.cur_epoch(slot) ); - self.refresh_cache_entry(epoch, cluster_info, access_time); + self.refresh_cache_entry(self.cur_epoch(slot), cluster_info, access_time); } } - self.get_staked_validators_by_epoch(epoch, cluster_info, access_time) + self.get_staked_validators_by_epoch(self.cur_epoch(slot), cluster_info, access_time) } fn get_staked_validators_by_epoch( @@ -273,12 +267,12 @@ mod tests { .map(|(node_ix, pubkey)| { let mut contact_info = ContactInfo::new(*pubkey, 0_u64, 0_u16); - contact_info + assert!(contact_info .set_alpenglow(( Ipv4Addr::LOCALHOST, - 8080_u16.saturating_add(node_ix as u16), + 8080_u16.saturating_add(node_ix as u16) )) - .unwrap(); + .is_ok()); contact_info }); diff --git a/votor/src/timer_manager.rs b/votor/src/timer_manager.rs index 4fcd30d8c09b06..387b03c53e4de2 100644 --- a/votor/src/timer_manager.rs +++ b/votor/src/timer_manager.rs @@ -1,13 +1,15 @@ //! Controls the queueing and firing of skip timer events for use //! in the event loop. -// TODO: Make this mockable in event_handler for tests + mod stats; mod timers; + use { crate::{ common::{DELTA_BLOCK, DELTA_TIMEOUT}, event::VotorEvent, }, + agave_votor_messages::migration::MigrationStatus, crossbeam_channel::Sender, parking_lot::RwLock as PlRwLock, solana_clock::Slot, @@ -21,6 +23,7 @@ use { }, timers::Timers, }; + /// A manager of timer states. Uses a background thread to trigger next ready /// timers and send events. pub(crate) struct TimerManager { @@ -29,7 +32,11 @@ pub(crate) struct TimerManager { } impl TimerManager { - pub(crate) fn new(event_sender: Sender, exit: Arc) -> Self { + pub(crate) fn new( + event_sender: Sender, + exit: Arc, + migration_status: Arc, + ) -> Self { let timers = Arc::new(PlRwLock::new(Timers::new( DELTA_TIMEOUT, DELTA_BLOCK, @@ -38,6 +45,7 @@ impl TimerManager { let handle = { let timers = Arc::clone(&timers); thread::spawn(move || { + let _ = migration_status.wait_for_migration_or_exit(exit.as_ref()); while !exit.load(Ordering::Relaxed) { let duration = match timers.write().progress(Instant::now()) { None => { @@ -52,11 +60,14 @@ impl TimerManager { } }) }; + Self { timers, handle } } + pub(crate) fn set_timeouts(&self, slot: Slot) { self.timers.write().set_timeouts(slot, Instant::now()); } + pub(crate) fn join(self) { self.handle.join().unwrap(); } @@ -70,11 +81,16 @@ impl TimerManager { #[cfg(test)] mod tests { use {super::*, crate::event::VotorEvent, crossbeam_channel::unbounded, std::time::Duration}; + #[test] fn test_timer_manager() { let (event_sender, event_receiver) = unbounded(); let exit = Arc::new(AtomicBool::new(false)); - let timer_manager = TimerManager::new(event_sender, exit.clone()); + let timer_manager = TimerManager::new( + event_sender, + exit.clone(), + Arc::new(MigrationStatus::post_migration_status()), + ); let slot = 52; let start = Instant::now(); timer_manager.set_timeouts(slot); @@ -82,8 +98,8 @@ mod tests { let mut timeouts_received = 0; while timeouts_received < 2 && Instant::now().duration_since(start) < Duration::from_secs(2) { - let res = event_receiver.recv_timeout(Duration::from_millis(200)); - if let Ok(event) = res { + let recv = event_receiver.recv_timeout(Duration::from_millis(200)); + if let Ok(event) = recv { match event { VotorEvent::Timeout(s) => { assert_eq!(s, slot); diff --git a/votor/src/timer_manager/timers.rs b/votor/src/timer_manager/timers.rs index 1ad08b90a0b617..f62d504c0104e0 100644 --- a/votor/src/timer_manager/timers.rs +++ b/votor/src/timer_manager/timers.rs @@ -9,6 +9,7 @@ use { time::{Duration, Instant}, }, }; + /// Encodes a basic state machine of the different stages involved in handling /// timeouts for a window of slots. enum TimerState { @@ -29,6 +30,7 @@ enum TimerState { /// The state machine is done. Done, } + impl TimerState { /// Creates a new instance of the state machine. /// @@ -39,6 +41,7 @@ impl TimerState { let timeout = now.checked_add(delta_timeout).unwrap(); (Self::WaitDeltaTimeout { window, timeout }, timeout) } + /// Call to make progress on the state machine. /// /// Returns a potentially empty list of events that should be sent. @@ -75,6 +78,7 @@ impl TimerState { Self::Done => None, } } + /// When would this state machine next be able to make progress. fn next_fire(&self) -> Option { match self { @@ -84,6 +88,7 @@ impl TimerState { } } } + /// Maintains all active timer states for windows of slots. pub(super) struct Timers { delta_timeout: Duration, @@ -97,6 +102,7 @@ pub(super) struct Timers { /// Stats for the timer manager. stats: TimerManagerStats, } + impl Timers { pub(super) fn new( delta_timeout: Duration, @@ -112,6 +118,7 @@ impl Timers { stats: TimerManagerStats::new(), } } + /// Call to set timeouts for a new window of slots. pub(super) fn set_timeouts(&mut self, slot: Slot, now: Instant) { assert_eq!(self.heap.len(), self.timers.len()); @@ -127,6 +134,7 @@ impl Timers { self.stats .incr_timeout_count_with_heap_size(self.heap.len(), new_timer_inserted); } + /// Call to make progress on the timer states. If there are still active /// timer states, returns when the earliest one might become ready. pub(super) fn progress(&mut self, now: Instant) -> Option { @@ -159,6 +167,7 @@ impl Timers { } ret_timeout } + #[cfg(test)] pub(super) fn stats(&self) -> TimerManagerStats { self.stats.clone() @@ -230,6 +239,7 @@ mod tests { now = now.checked_add(one_micro).unwrap(); } let mut events = receiver.try_iter().collect::>(); + assert!(matches!( events.remove(0), VotorEvent::TimeoutCrashedLeader(0) diff --git a/votor/src/vote_history.rs b/votor/src/vote_history.rs index c9d5a4ea617eb7..2b32e980cfb28a 100644 --- a/votor/src/vote_history.rs +++ b/votor/src/vote_history.rs @@ -12,6 +12,8 @@ use { thiserror::Error, }; +pub const VOTE_THRESHOLD_SIZE: f64 = 2f64 / 3f64; + #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] pub enum VoteHistoryVersions { Current(VoteHistory), @@ -128,7 +130,7 @@ impl VoteHistory { pub fn votes_cast_since(&self, slot: Slot) -> Vec { self.votes_cast .iter() - .filter(|(s, _)| s > &&slot) + .filter(|&(&s, _)| s > slot) .flat_map(|(_, votes)| votes.iter()) .cloned() .collect() @@ -193,7 +195,11 @@ impl VoteHistory { self.skipped.insert(vote.slot); self.voted_skip_fallback.insert(vote.slot); } - Vote::Genesis(_vote) => {} + Vote::Genesis(_vote) => { + // Genesis votes are only used during migration. + // Since these votes are tracked and sent outside of + // votor, we do not need to insert anything here. + } } self.votes_cast.entry(vote.slot()).or_default().push(vote); } diff --git a/votor/src/voting_service.rs b/votor/src/voting_service.rs index 72b18ca242ce49..395060762a85b5 100644 --- a/votor/src/voting_service.rs +++ b/votor/src/voting_service.rs @@ -23,20 +23,11 @@ use { thread::{self, Builder, JoinHandle}, time::{Duration, Instant}, }, - thiserror::Error, }; const STAKED_VALIDATORS_CACHE_TTL_S: u64 = 5; const STAKED_VALIDATORS_CACHE_NUM_EPOCH_CAP: usize = 5; -#[derive(Debug, Error)] -enum SendVoteError { - #[error(transparent)] - BincodeError(#[from] bincode::Error), - #[error(transparent)] - TransportError(#[from] TransportError), -} - #[derive(Debug)] pub enum BLSOp { PushVote { @@ -269,7 +260,7 @@ mod tests { solana_bls_signatures::Signature as BLSSignature, solana_gossip::{cluster_info::ClusterInfo, contact_info::ContactInfo}, solana_keypair::Keypair, - solana_net_utils::{sockets::bind_to_localhost_unique, SocketAddrSpace}, + solana_net_utils::SocketAddrSpace, solana_runtime::{ bank::Bank, bank_forks::BankForks, @@ -283,7 +274,10 @@ mod tests { quic::{spawn_stake_wighted_qos_server, QuicStreamerConfig, SpawnServerResult}, streamer::StakedNodes, }, - std::{net::SocketAddr, sync::Arc}, + std::{ + net::SocketAddr, + sync::{Arc, RwLock}, + }, test_case::test_case, tokio_util::sync::CancellationToken, }; @@ -360,14 +354,14 @@ mod tests { // Create listener thread on a random port we allocated and return SocketAddr to create VotingService // Bind to a random UDP port - let socket = bind_to_localhost_unique().unwrap(); + let socket = solana_net_utils::bind_to_localhost().unwrap(); let listener_addr = socket.local_addr().unwrap(); // Create VotingService with the listener address let (_, validator_keypairs) = create_voting_service(bls_receiver, listener_addr); // Send a BLS message via the VotingService - bls_sender.send(bls_op).unwrap(); + assert!(bls_sender.send(bls_op).is_ok()); // Start a quick streamer to handle quick control packets let (sender, receiver) = crossbeam_channel::unbounded(); @@ -379,22 +373,24 @@ mod tests { Arc::new(stakes), HashMap::::default(), // overrides ))); - let cancel_token = CancellationToken::new(); + let cancel = CancellationToken::new(); let SpawnServerResult { + endpoints: _, thread: quic_server_thread, - .. + key_updater: _, } = spawn_stake_wighted_qos_server( "AlpenglowLocalClusterTest", - "quic_streamer_test", + "voting_service_test", [socket], &Keypair::new(), sender, staked_nodes, QuicStreamerConfig::default_for_tests(), SwQosConfig::default(), - cancel_token.clone(), + cancel.clone(), ) .unwrap(); + let packets = receiver.recv().unwrap(); let packet = packets.first().expect("No packets received"); let received_message = packet @@ -407,7 +403,7 @@ mod tests { ) }); assert_eq!(received_message, expected_message); - cancel_token.cancel(); + cancel.cancel(); quic_server_thread.join().unwrap(); } } diff --git a/votor/src/voting_utils.rs b/votor/src/voting_utils.rs index 867ba73d3834b5..447cd39afc08cb 100644 --- a/votor/src/voting_utils.rs +++ b/votor/src/voting_utils.rs @@ -128,12 +128,12 @@ pub struct VotingContext { pub consensus_metrics_sender: ConsensusMetricsEventSender, } -fn get_bls_keypair( - context: &mut VotingContext, +fn get_or_insert_bls_keypair( + derived_bls_keypairs: &mut HashMap>, authorized_voter_keypair: &Arc, ) -> Result, BlsError> { let pubkey = authorized_voter_keypair.pubkey(); - if let Some(existing) = context.derived_bls_keypairs.get(&pubkey) { + if let Some(existing) = derived_bls_keypairs.get(&pubkey) { return Ok(existing.clone()); } @@ -142,23 +142,28 @@ fn get_bls_keypair( BLS_KEYPAIR_DERIVE_SEED, )?); - context - .derived_bls_keypairs - .insert(pubkey, bls_keypair.clone()); + derived_bls_keypairs.insert(pubkey, bls_keypair.clone()); Ok(bls_keypair) } -fn generate_vote_tx(vote: &Vote, bank: &Bank, context: &mut VotingContext) -> GenerateVoteTxResult { - let vote_account_pubkey = context.vote_account_pubkey; +fn generate_vote_tx( + vote: &Vote, + bank: &Bank, + vote_account_pubkey: Pubkey, + identity_keypair: &Arc, + authorized_voter_keypairs: &Arc>>>, + wait_to_vote_slot: Option, + derived_bls_keypairs: &mut HashMap>, +) -> GenerateVoteTxResult { let authorized_voter_keypair; let bls_pubkey_in_vote_account; { - let authorized_voter_keypairs = context.authorized_voter_keypairs.read().unwrap(); + let authorized_voter_keypairs = authorized_voter_keypairs.read().unwrap(); if authorized_voter_keypairs.is_empty() { return GenerateVoteTxResult::NonVoting; } - if let Some(slot) = context.wait_to_vote_slot { + if let Some(slot) = wait_to_vote_slot { if vote.slot() < slot { return GenerateVoteTxResult::WaitToVoteSlot(slot); } @@ -167,18 +172,18 @@ fn generate_vote_tx(vote: &Vote, bank: &Bank, context: &mut VotingContext) -> Ge return GenerateVoteTxResult::VoteAccountNotFound(vote_account_pubkey); }; let vote_state_view = vote_account.vote_state_view(); - if vote_state_view.node_pubkey() != &context.identity_keypair.pubkey() { + if vote_state_view.node_pubkey() != &identity_keypair.pubkey() { info!( "Vote account node_pubkey mismatch: {} (expected: {}). Unable to vote", vote_state_view.node_pubkey(), - context.identity_keypair.pubkey() + identity_keypair.pubkey() ); return GenerateVoteTxResult::HotSpare; } let Some(bls_pubkey_serialized) = vote_state_view.bls_pubkey_compressed() else { panic!( "No BLS pubkey in vote account {}", - context.identity_keypair.pubkey() + identity_keypair.pubkey() ); }; bls_pubkey_in_vote_account = @@ -187,7 +192,7 @@ fn generate_vote_tx(vote: &Vote, bank: &Bank, context: &mut VotingContext) -> Ge .unwrap_or_else(|_| { panic!( "Failed to decompress BLS pubkey in vote account {}", - context.identity_keypair.pubkey() + identity_keypair.pubkey() ); }); let Some(authorized_voter_pubkey) = vote_state_view.get_authorized_voter(bank.epoch()) @@ -209,7 +214,7 @@ fn generate_vote_tx(vote: &Vote, bank: &Bank, context: &mut VotingContext) -> Ge authorized_voter_keypair = keypair.clone(); } - let bls_keypair = get_bls_keypair(context, &authorized_voter_keypair) + let bls_keypair = get_or_insert_bls_keypair(derived_bls_keypairs, &authorized_voter_keypair) .unwrap_or_else(|e| panic!("Failed to derive my own BLS keypair: {e:?}")); let my_bls_pubkey: BLSPubkey = bls_keypair.public; if my_bls_pubkey != bls_pubkey_in_vote_account { @@ -263,7 +268,15 @@ fn insert_vote_and_create_bls_message( } let bank = context.sharable_banks.root(); - let message = match generate_vote_tx(&vote, &bank, context) { + let message = match generate_vote_tx( + &vote, + &bank, + context.vote_account_pubkey, + &context.identity_keypair, + &context.authorized_voter_keypairs, + context.wait_to_vote_slot, + &mut context.derived_bls_keypairs, + ) { GenerateVoteTxResult::ConsensusMessage(m) => m, e => { if e.is_transient_error() { @@ -360,9 +373,9 @@ mod tests { let my_keys = &validator_keypairs[my_index]; let sharable_banks = bank_forks.read().unwrap().sharable_banks(); - let (bls_sender, _bls_receiver) = unbounded(); - let (commitment_sender, _commitment_receiver) = unbounded(); - let (consensus_metrics_sender, _consensus_metrics_receiver) = unbounded(); + let bls_sender = unbounded().0; + let commitment_sender = unbounded().0; + let consensus_metrics_sender = unbounded().0; VotingContext { vote_history: VoteHistory::new(my_keys.node_keypair.pubkey(), 0), vote_account_pubkey: my_keys.vote_keypair.pubkey(), diff --git a/votor/src/votor.rs b/votor/src/votor.rs index 0bdb2680ff2649..382dc451f655eb 100644 --- a/votor/src/votor.rs +++ b/votor/src/votor.rs @@ -1,6 +1,6 @@ -//! ```text //! The entrypoint into votor the module responsible for voting, rooting, and notifying //! the core to create a new block. +//! ```text //! //! Votor //! ┌────────────────────────────────────────────────────────────────────────────┐ @@ -20,7 +20,7 @@ //! │ │ │ │ │ │ //! │ ┌────┼─────────┼───────────────┐ │ │ │ //! │ │ │ │ │ Block │ ┌────────────────────┐ -//! │ │ Consensus Pool Service │ │ │ ┌─────────────────────│─┼ Replay / Broadcast │ +//! │ │ Consensus Pool Service │ │ │ ┌─────────────────────│─┼ Replay / Broadcast │ //! │ │ │ │ │ │ │ └────────────────────┘ //! │ │ ┌──────────────────────────┐ │ │ │ │ │ //! │ │ │ │ │ │ │ │ │ @@ -44,7 +44,6 @@ use { crate::{ commitment::CommitmentAggregationData, - common::DELTA_STANDSTILL, consensus_metrics::{ ConsensusMetrics, ConsensusMetricsEventReceiver, ConsensusMetricsEventSender, }, @@ -58,7 +57,7 @@ use { voting_service::BLSOp, voting_utils::VotingContext, }, - agave_votor_messages::consensus_message::ConsensusMessage, + agave_votor_messages::{consensus_message::ConsensusMessage, migration::MigrationStatus}, crossbeam_channel::{Receiver, Sender}, parking_lot::RwLock as PlRwLock, solana_clock::Slot, @@ -77,23 +76,12 @@ use { }, std::{ collections::HashMap, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, Condvar, Mutex, RwLock, - }, + sync::{atomic::AtomicBool, Arc, RwLock}, thread::{self, JoinHandle}, time::Duration, }, }; -/// Communication with the block creation loop to notify leader window -#[derive(Default)] -pub struct LeaderWindowNotifier { - pub window_info: Mutex>, - pub window_notification: Condvar, - pub highest_parent_ready: RwLock<(Slot, (Slot, Hash))>, -} - /// Inputs to Votor pub struct VotorConfig { pub exit: Arc, @@ -111,6 +99,8 @@ pub struct VotorConfig { pub cluster_info: Arc, pub leader_schedule_cache: Arc, pub rpc_subscriptions: Option>, + pub consensus_metrics_sender: ConsensusMetricsEventSender, + pub migration_status: Arc, // Senders / Notifiers pub snapshot_controller: Option>, @@ -118,10 +108,10 @@ pub struct VotorConfig { pub commitment_sender: Sender, pub drop_bank_sender: Sender>, pub bank_notification_sender: Option, - pub leader_window_notifier: Arc, + pub leader_window_info_sender: Sender, + pub highest_parent_ready: Arc>, pub event_sender: VotorEventSender, pub own_vote_sender: Sender, - pub consensus_metrics_sender: ConsensusMetricsEventSender, // Receivers pub event_receiver: VotorEventReceiver, @@ -135,19 +125,16 @@ pub(crate) struct SharedContext { pub(crate) bank_forks: Arc>, pub(crate) cluster_info: Arc, pub(crate) rpc_subscriptions: Option>, - pub(crate) leader_window_notifier: Arc, + pub(crate) leader_window_info_sender: Sender, + pub(crate) highest_parent_ready: Arc>, pub(crate) vote_history_storage: Arc, } pub struct Votor { - // TODO: Just a placeholder for how migration could look like, - // will fix once we finish the strategy - start: Arc<(Mutex, Condvar)>, - event_handler: EventHandler, consensus_pool_service: ConsensusPoolService, timer_manager: Arc>, - consensus_metrics_handle: JoinHandle<()>, + metrics: JoinHandle<()>, } impl Votor { @@ -165,22 +152,22 @@ impl Votor { cluster_info, leader_schedule_cache, rpc_subscriptions, + migration_status, snapshot_controller, bls_sender, commitment_sender, drop_bank_sender, bank_notification_sender, - leader_window_notifier, + leader_window_info_sender, + highest_parent_ready, event_sender, - event_receiver, own_vote_sender, - consensus_message_receiver, + event_receiver, + consensus_message_receiver: bls_receiver, consensus_metrics_sender, consensus_metrics_receiver, } = config; - let start = Arc::new((Mutex::new(false), Condvar::new())); - let identity_keypair = cluster_info.keypair().clone(); let has_new_vote_been_rooted = !wait_for_vote_to_start_leader; @@ -192,7 +179,8 @@ impl Votor { bank_forks: bank_forks.clone(), cluster_info: cluster_info.clone(), rpc_subscriptions, - leader_window_notifier, + highest_parent_ready, + leader_window_info_sender, vote_history_storage, }; @@ -221,11 +209,12 @@ impl Votor { let timer_manager = Arc::new(PlRwLock::new(TimerManager::new( event_sender.clone(), exit.clone(), + migration_status.clone(), ))); let event_handler_context = EventHandlerContext { exit: exit.clone(), - start: start.clone(), + migration_status: migration_status.clone(), event_receiver, timer_manager: Arc::clone(&timer_manager), shared_context, @@ -237,20 +226,19 @@ impl Votor { let consensus_pool_context = ConsensusPoolContext { exit: exit.clone(), - start: start.clone(), + migration_status, cluster_info: cluster_info.clone(), my_vote_pubkey: vote_account, blockstore, sharable_banks, leader_schedule_cache, - consensus_message_receiver, + consensus_message_receiver: bls_receiver, bls_sender, event_sender, commitment_sender, - delta_standstill: DELTA_STANDSTILL, }; - let consensus_metrics_handle = ConsensusMetrics::start_metrics_loop( + let metrics = ConsensusMetrics::start_metrics_loop( root_epoch, consensus_metrics_receiver, exit.clone(), @@ -259,35 +247,10 @@ impl Votor { let consensus_pool_service = ConsensusPoolService::new(consensus_pool_context); Self { - start, event_handler, consensus_pool_service, timer_manager, - consensus_metrics_handle, - } - } - - pub fn start_migration(&self) { - // TODO: evaluate once we have actual migration logic - let (lock, cvar) = &*self.start; - let mut started = lock.lock().unwrap(); - *started = true; - cvar.notify_all(); - } - - pub(crate) fn wait_for_migration_or_exit( - exit: &AtomicBool, - (lock, cvar): &(Mutex, Condvar), - ) { - let mut started = lock.lock().unwrap(); - while !*started { - if exit.load(Ordering::Relaxed) { - return; - } - // Add timeout to check for exit flag. Check infrequent enough to - // not hit performance while frequent enough that validator exit - // isn't delayed a lot. - (started, _) = cvar.wait_timeout(started, Duration::from_secs(1)).unwrap(); + metrics, } } @@ -308,7 +271,7 @@ impl Votor { } } } - self.event_handler.join()?; - self.consensus_metrics_handle.join() + self.metrics.join()?; + self.event_handler.join() } }