diff --git a/Cargo.lock b/Cargo.lock index f1cbcdef7cf8cc..0e7c9e76d96c23 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -546,6 +546,7 @@ version = "3.1.0" dependencies = [ "anyhow", "bincode", + "bitvec", "bs58", "crossbeam-channel", "dashmap", @@ -560,6 +561,7 @@ dependencies = [ "serde_derive", "solana-accounts-db", "solana-bloom", + "solana-bls-signatures", "solana-clock", "solana-entry", "solana-epoch-schedule", @@ -577,8 +579,12 @@ dependencies = [ "solana-runtime", "solana-signature", "solana-signer", + "solana-signer-store", "solana-time-utils", "solana-transaction", + "solana-vote", + "solana-vote-program", + "solana-votor-messages", "test-case", "thiserror 2.0.16", ] @@ -1365,6 +1371,7 @@ checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" dependencies = [ "funty", "radium", + "serde", "tap", "wyz", ] @@ -1422,6 +1429,34 @@ dependencies = [ "byte-tools", ] +[[package]] +name = "blst" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fd49896f12ac9b6dcd7a5998466b9b58263a695a3dd1ecc1aaca2e12a90b080" +dependencies = [ + "cc", + "glob", + "threadpool", + "zeroize", +] + +[[package]] +name = "blstrs" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a8a8ed6fefbeef4a8c7b460e4110e12c5e22a5b7cf32621aae6ad650c4dcf29" +dependencies = [ + "blst", + "byte-slice-cast", + "ff", + "group", + "pairing", + "rand_core 0.6.4", + "serde", + "subtle", +] + [[package]] name = "borsh" version = "1.5.7" @@ -1518,6 +1553,12 @@ dependencies = [ "serde", ] +[[package]] +name = "byte-slice-cast" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7575182f7272186991736b70173b0ea045398f984bf5ebbb3804736ce1330c9d" + [[package]] name = "byte-tools" version = "0.3.1" @@ -2869,6 +2910,7 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" dependencies = [ + "bitvec", "rand_core 0.6.4", "subtle", ] @@ -3308,7 +3350,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" dependencies = [ "ff", + "rand 0.8.5", "rand_core 0.6.4", + "rand_xorshift 0.3.0", "subtle", ] @@ -5013,6 +5057,15 @@ version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" +[[package]] +name = "pairing" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fec4625e73cf41ef4bb6846cafa6d44736525f442ba45e407c4a000a13996f" +dependencies = [ + "group", +] + [[package]] name = "parity-tokio-ipc" version = "0.9.0" @@ -5468,7 +5521,7 @@ dependencies = [ "num-traits", "rand 0.9.0", "rand_chacha 0.9.0", - "rand_xorshift", + "rand_xorshift 0.4.0", "regex-syntax", "rusty-fork", "tempfile", @@ -5802,6 +5855,15 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "rand_xorshift" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" +dependencies = [ + "rand_core 0.6.4", +] + [[package]] name = "rand_xorshift" version = "0.4.0" @@ -7394,6 +7456,32 @@ dependencies = [ "solana-time-utils", ] +[[package]] +name = "solana-bls-signatures" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "210bb2fc7efc40b34d4e506759c3b1f21360b53d02c2e5ca3601feca7a9fd269" +dependencies = [ + "base64 0.22.1", + "blst", + "blstrs", + "bytemuck", + "cfg_eval", + "ff", + "group", + "pairing", + "rand 0.8.5", + "serde", + "serde_json", + "serde_with", + "solana-frozen-abi", + "solana-frozen-abi-macro", + "solana-signature", + "solana-signer", + "subtle", + "thiserror 2.0.16", +] + [[package]] name = "solana-bn254" version = "3.0.0" @@ -9093,6 +9181,7 @@ dependencies = [ "solana-transaction-status", "solana-vote", "solana-vote-program", + "solana-votor-messages", "spl-generic-token", "spl-pod", "static_assertions", @@ -10243,6 +10332,7 @@ dependencies = [ "solana-account-info", "solana-accounts-db", "solana-address-lookup-table-interface", + "solana-bls-signatures", "solana-bpf-loader-program", "solana-bucket-map", "solana-builtins", @@ -10325,6 +10415,7 @@ dependencies = [ "solana-vote", "solana-vote-interface", "solana-vote-program", + "solana-votor-messages", "spl-generic-token", "static_assertions", "strum", @@ -10599,6 +10690,17 @@ dependencies = [ "solana-transaction-error", ] +[[package]] +name = "solana-signer-store" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36329bba208f0e41954389ae4ad5d973fe15952672cfd71a9b49deb7d2ecbc2f" +dependencies = [ + "bitvec", + "num-derive", + "num-traits", +] + [[package]] name = "solana-slot-hashes" version = "3.0.0" @@ -10709,6 +10811,7 @@ dependencies = [ "solana-sysvar", "solana-transaction-context", "solana-vote-interface", + "solana-vote-program", ] [[package]] @@ -11710,6 +11813,7 @@ dependencies = [ "serde_derive", "solana-account", "solana-bincode", + "solana-bls-signatures", "solana-clock", "solana-frozen-abi", "solana-frozen-abi-macro", @@ -11727,6 +11831,7 @@ dependencies = [ "solana-svm-transaction", "solana-transaction", "solana-vote-interface", + "solana-vote-program", "static_assertions", "thiserror 2.0.16", ] @@ -11775,6 +11880,7 @@ dependencies = [ "serde_derive", "solana-account", "solana-bincode", + "solana-bls-signatures", "solana-clock", "solana-epoch-schedule", "solana-fee-calculator", @@ -11800,6 +11906,27 @@ dependencies = [ "thiserror 2.0.16", ] +[[package]] +name = "solana-votor-messages" +version = "3.1.0" +dependencies = [ + "bitvec", + "bytemuck", + "bytemuck_derive", + "num_enum", + "serde", + "solana-account", + "solana-bls-signatures", + "solana-clock", + "solana-frozen-abi", + "solana-frozen-abi-macro", + "solana-hash", + "solana-logger", + "solana-program", + "solana-vote-interface", + "spl-pod", +] + [[package]] name = "solana-wen-restart" version = "3.1.0" @@ -12610,6 +12737,15 @@ dependencies = [ "once_cell", ] +[[package]] +name = "threadpool" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" +dependencies = [ + "num_cpus", +] + [[package]] name = "tikv-jemalloc-sys" version = "0.6.0+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7" diff --git a/Cargo.toml b/Cargo.toml index 2f260e88ba091e..4d97e342cb5e15 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -211,6 +211,7 @@ base64 = "0.22.1" bencher = "0.1.5" bincode = "1.3.3" bitflags = { version = "2.9.3" } +bitvec = { version = "1.0.1", features = ["serde"] } blake3 = "1.8.2" borsh = { version = "1.5.7", features = ["derive", "unstable__schema"] } bs58 = { version = "0.5.1", default-features = false } @@ -385,6 +386,7 @@ solana-big-mod-exp = "3.0.0" solana-bincode = "3.0.0" solana-blake3-hasher = "3.0.0" solana-bloom = { path = "bloom", version = "=3.1.0" } +solana-bls-signatures = { version = "0.2.0", features = ["serde"] } solana-bn254 = "3.0.0" solana-borsh = "3.0.0" solana-bpf-loader-program = { path = "programs/bpf_loader", version = "=3.1.0" } @@ -511,6 +513,7 @@ solana-short-vec = "3.0.0" solana-shred-version = "3.0.0" solana-signature = { version = "3.1.0", default-features = false } solana-signer = "3.0.0" +solana-signer-store = "0.1.0" solana-slot-hashes = "3.0.0" solana-slot-history = "3.0.0" solana-stable-layout = "3.0.0" @@ -554,6 +557,7 @@ solana-version = { path = "version", version = "=3.1.0" } solana-vote = { path = "vote", version = "=3.1.0" } solana-vote-interface = "3.0.0" solana-vote-program = { path = "programs/vote", version = "=3.1.0", default-features = false } +solana-votor-messages = { path = "votor-messages", version = "=3.1.0" } solana-wen-restart = { path = "wen-restart", version = "=3.1.0" } solana-zk-elgamal-proof-program = { path = "programs/zk-elgamal-proof", version = "=3.1.0" } solana-zk-sdk = "4.0.0" diff --git a/ci/test-miri.sh b/ci/test-miri.sh index 6447af502746bb..8676cabef1f88a 100755 --- a/ci/test-miri.sh +++ b/ci/test-miri.sh @@ -9,7 +9,7 @@ source ci/rust-version.sh nightly _ cargo "+${rust_nightly}" miri test -p solana-unified-scheduler-logic # test big endian branch -_ cargo "+${rust_nightly}" miri test --target s390x-unknown-linux-gnu -p solana-vote -- "vote_state_view" --skip "arbitrary" +_ cargo "+${rust_nightly}" miri test --target s390x-unknown-linux-gnu --no-default-features --features miri -p solana-vote -- "vote_state_view" --skip "arbitrary" # test little endian branch for UB _ cargo "+${rust_nightly}" miri test -p solana-vote -- "vote_state_view" --skip "arbitrary" diff --git a/ledger/Cargo.toml b/ledger/Cargo.toml index fcb6f807e77c4a..8bfd66da4340c7 100644 --- a/ledger/Cargo.toml +++ b/ledger/Cargo.toml @@ -114,6 +114,7 @@ solana-transaction-error = { workspace = true } solana-transaction-status = { workspace = true } solana-vote = { workspace = true } solana-vote-program = { workspace = true } +solana-votor-messages = { workspace = true } static_assertions = { workspace = true } strum = { workspace = true, features = ["derive"] } strum_macros = { workspace = true } diff --git a/ledger/src/leader_schedule_utils.rs b/ledger/src/leader_schedule_utils.rs index 0f9d85c5f0044d..86bbd76c83eb4b 100644 --- a/ledger/src/leader_schedule_utils.rs +++ b/ledger/src/leader_schedule_utils.rs @@ -70,6 +70,27 @@ pub fn first_of_consecutive_leader_slots(slot: Slot) -> Slot { (slot / NUM_CONSECUTIVE_LEADER_SLOTS) * NUM_CONSECUTIVE_LEADER_SLOTS } +/// Returns the last slot in the leader window that contains `slot` +#[inline] +pub fn last_of_consecutive_leader_slots(slot: Slot) -> Slot { + first_of_consecutive_leader_slots(slot) + NUM_CONSECUTIVE_LEADER_SLOTS - 1 +} + +/// Returns the index within the leader slot range that contains `slot` +#[inline] +pub fn leader_slot_index(slot: Slot) -> usize { + (slot % NUM_CONSECUTIVE_LEADER_SLOTS) as usize +} + +/// Returns the number of slots left after `slot` in the leader window +/// that contains `slot` +#[inline] +pub fn remaining_slots_in_window(slot: Slot) -> u64 { + NUM_CONSECUTIVE_LEADER_SLOTS + .checked_sub(leader_slot_index(slot) as u64) + .unwrap() +} + #[cfg(test)] mod tests { use { diff --git a/local-cluster/src/local_cluster.rs b/local-cluster/src/local_cluster.rs index 41988fa4527ba8..1aee07c2d662a1 100644 --- a/local-cluster/src/local_cluster.rs +++ b/local-cluster/src/local_cluster.rs @@ -284,6 +284,7 @@ impl LocalCluster { node_keypair: node_keypair.insecure_clone(), vote_keypair: vote_keypair.insecure_clone(), stake_keypair: Keypair::new(), + bls_keypair: None, }, stake, )) @@ -314,6 +315,7 @@ impl LocalCluster { &keys_in_genesis, stakes_in_genesis, config.cluster_type, + false, ); genesis_config.accounts.extend( config diff --git a/program-test/src/lib.rs b/program-test/src/lib.rs index 2971fcf529063c..6b440bccb3fefa 100644 --- a/program-test/src/lib.rs +++ b/program-test/src/lib.rs @@ -809,6 +809,7 @@ impl ProgramTest { rent.clone(), ClusterType::Development, std::mem::take(&mut self.genesis_accounts), + None, ); // Remove features tagged to deactivate diff --git a/programs/sbf/Cargo.lock b/programs/sbf/Cargo.lock index a44a24e31d7ae3..b22ac97fdf9feb 100644 --- a/programs/sbf/Cargo.lock +++ b/programs/sbf/Cargo.lock @@ -289,6 +289,7 @@ version = "3.1.0" dependencies = [ "anyhow", "bincode", + "bitvec", "bs58", "crossbeam-channel", "dashmap", @@ -303,6 +304,7 @@ dependencies = [ "serde_derive", "solana-accounts-db", "solana-bloom", + "solana-bls-signatures", "solana-clock", "solana-entry", "solana-epoch-schedule", @@ -318,8 +320,12 @@ dependencies = [ "solana-runtime", "solana-signature", "solana-signer", + "solana-signer-store", "solana-time-utils", "solana-transaction", + "solana-vote", + "solana-vote-program", + "solana-votor-messages", "thiserror 2.0.16", ] @@ -973,6 +979,19 @@ dependencies = [ "typenum", ] +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "serde", + "tap", + "wyz", +] + [[package]] name = "blake3" version = "1.8.2" @@ -1005,6 +1024,34 @@ dependencies = [ "generic-array", ] +[[package]] +name = "blst" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fd49896f12ac9b6dcd7a5998466b9b58263a695a3dd1ecc1aaca2e12a90b080" +dependencies = [ + "cc", + "glob", + "threadpool", + "zeroize", +] + +[[package]] +name = "blstrs" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a8a8ed6fefbeef4a8c7b460e4110e12c5e22a5b7cf32621aae6ad650c4dcf29" +dependencies = [ + "blst", + "byte-slice-cast", + "ff", + "group", + "pairing", + "rand_core 0.6.4", + "serde", + "subtle", +] + [[package]] name = "borsh" version = "1.5.7" @@ -1084,6 +1131,12 @@ dependencies = [ "serde", ] +[[package]] +name = "byte-slice-cast" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7575182f7272186991736b70173b0ea045398f984bf5ebbb3804736ce1330c9d" + [[package]] name = "bytemuck" version = "1.23.2" @@ -2136,6 +2189,7 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" dependencies = [ + "bitvec", "rand_core 0.6.4", "subtle", ] @@ -2255,6 +2309,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures" version = "0.1.31" @@ -2489,7 +2549,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" dependencies = [ "ff", + "rand 0.8.5", "rand_core 0.6.4", + "rand_xorshift", "subtle", ] @@ -4140,6 +4202,15 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "pairing" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fec4625e73cf41ef4bb6846cafa6d44736525f442ba45e407c4a000a13996f" +dependencies = [ + "group", +] + [[package]] name = "parity-tokio-ipc" version = "0.9.0" @@ -4664,6 +4735,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.7.3" @@ -4765,6 +4842,15 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rand_xorshift" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" +dependencies = [ + "rand_core 0.6.4", +] + [[package]] name = "rand_xoshiro" version = "0.6.0" @@ -5956,6 +6042,30 @@ dependencies = [ "solana-time-utils", ] +[[package]] +name = "solana-bls-signatures" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "210bb2fc7efc40b34d4e506759c3b1f21360b53d02c2e5ca3601feca7a9fd269" +dependencies = [ + "base64 0.22.1", + "blst", + "blstrs", + "bytemuck", + "cfg_eval", + "ff", + "group", + "pairing", + "rand 0.8.5", + "serde", + "serde_json", + "serde_with", + "solana-signature", + "solana-signer", + "subtle", + "thiserror 2.0.16", +] + [[package]] name = "solana-bn254" version = "3.0.0" @@ -7125,6 +7235,7 @@ dependencies = [ "solana-transaction-status", "solana-vote", "solana-vote-program", + "solana-votor-messages", "static_assertions", "strum", "strum_macros", @@ -8017,6 +8128,7 @@ dependencies = [ "solana-account-info", "solana-accounts-db", "solana-address-lookup-table-interface", + "solana-bls-signatures", "solana-bpf-loader-program", "solana-bucket-map", "solana-builtins", @@ -8093,6 +8205,7 @@ dependencies = [ "solana-vote", "solana-vote-interface", "solana-vote-program", + "solana-votor-messages", "spl-generic-token", "static_assertions", "strum", @@ -9089,6 +9202,17 @@ dependencies = [ "solana-transaction-error", ] +[[package]] +name = "solana-signer-store" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36329bba208f0e41954389ae4ad5d973fe15952672cfd71a9b49deb7d2ecbc2f" +dependencies = [ + "bitvec", + "num-derive", + "num-traits", +] + [[package]] name = "solana-slot-hashes" version = "3.0.0" @@ -9163,6 +9287,7 @@ dependencies = [ "solana-sysvar", "solana-transaction-context", "solana-vote-interface", + "solana-vote-program", ] [[package]] @@ -9880,6 +10005,7 @@ dependencies = [ "serde_derive", "solana-account", "solana-bincode", + "solana-bls-signatures", "solana-clock", "solana-hash", "solana-instruction", @@ -9893,6 +10019,7 @@ dependencies = [ "solana-svm-transaction", "solana-transaction", "solana-vote-interface", + "solana-vote-program", "thiserror 2.0.16", ] @@ -9935,6 +10062,7 @@ dependencies = [ "serde_derive", "solana-account", "solana-bincode", + "solana-bls-signatures", "solana-clock", "solana-epoch-schedule", "solana-hash", @@ -9954,6 +10082,25 @@ dependencies = [ "thiserror 2.0.16", ] +[[package]] +name = "solana-votor-messages" +version = "3.1.0" +dependencies = [ + "bitvec", + "bytemuck", + "bytemuck_derive", + "num_enum", + "serde", + "solana-account", + "solana-bls-signatures", + "solana-clock", + "solana-hash", + "solana-logger", + "solana-program", + "solana-vote-interface", + "spl-pod", +] + [[package]] name = "solana-wen-restart" version = "3.1.0" @@ -10498,6 +10645,12 @@ dependencies = [ "libc", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tar" version = "0.4.44" @@ -10622,6 +10775,15 @@ dependencies = [ "once_cell", ] +[[package]] +name = "threadpool" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" +dependencies = [ + "num_cpus", +] + [[package]] name = "tikv-jemalloc-sys" version = "0.6.0+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7" @@ -11868,6 +12030,15 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "x509-parser" version = "0.14.0" diff --git a/programs/sbf/tests/programs.rs b/programs/sbf/tests/programs.rs index 836b65ffe33c0b..415ca717a2db33 100644 --- a/programs/sbf/tests/programs.rs +++ b/programs/sbf/tests/programs.rs @@ -1593,6 +1593,7 @@ fn get_stable_genesis_config() -> GenesisConfigInfo { Rent::free(), // most tests don't expect rent ClusterType::Development, vec![], + None, ); genesis_config.creation_time = Duration::ZERO.as_secs() as UnixTimestamp; diff --git a/programs/stake/Cargo.toml b/programs/stake/Cargo.toml index 7f2573e16c1042..543d373af7903f 100644 --- a/programs/stake/Cargo.toml +++ b/programs/stake/Cargo.toml @@ -32,6 +32,7 @@ solana-stake-interface = { workspace = true, features = ["bincode"] } solana-sysvar = { workspace = true } solana-transaction-context = { workspace = true, features = ["bincode"] } solana-vote-interface = { workspace = true, features = ["bincode"] } +solana-vote-program = { workspace = true } [lints] workspace = true diff --git a/programs/stake/src/stake_state.rs b/programs/stake/src/stake_state.rs index 447afecf788fac..3716b6eb9e24db 100644 --- a/programs/stake/src/stake_state.rs +++ b/programs/stake/src/stake_state.rs @@ -12,7 +12,8 @@ use { solana_rent::Rent, solana_sdk_ids::stake::id, solana_stake_interface::stake_flags::StakeFlags, - solana_vote_interface::state::VoteStateV3, + solana_vote_interface::state::{VoteStateV3, VoteStateV4}, + solana_vote_program::vote_state::VoteStateVersionsMock, }; // utility function, used by Stakes, tests @@ -52,6 +53,23 @@ fn new_stake( } } +fn new_stake_v4( + stake: u64, + voter_pubkey: &Pubkey, + vote_state: &VoteStateV4, + activation_epoch: Epoch, +) -> Stake { + let credits_observed = if vote_state.epoch_credits.is_empty() { + 0 + } else { + vote_state.epoch_credits.last().unwrap().1 + }; + Stake { + delegation: Delegation::new(voter_pubkey, stake, activation_epoch), + credits_observed, + } +} + // genesis investor accounts pub fn create_lockup_stake_account( authorized: &Authorized, @@ -96,6 +114,23 @@ pub fn create_account( ) } +pub fn create_account_v4( + authorized: &Pubkey, + voter_pubkey: &Pubkey, + vote_account: &AccountSharedData, + rent: &Rent, + lamports: u64, +) -> AccountSharedData { + do_create_account_v4( + authorized, + voter_pubkey, + vote_account, + rent, + lamports, + Epoch::MAX, + ) +} + fn do_create_account( authorized: &Pubkey, voter_pubkey: &Pubkey, @@ -129,3 +164,43 @@ fn do_create_account( stake_account } + +fn do_create_account_v4( + authorized: &Pubkey, + voter_pubkey: &Pubkey, + vote_account: &AccountSharedData, + rent: &Rent, + lamports: u64, + activation_epoch: Epoch, +) -> AccountSharedData { + let mut stake_account = AccountSharedData::new(lamports, StakeStateV2::size_of(), &id()); + + // Custom deserialize here since VoteStateV4 does not provide it yet. + let vote_state_version: VoteStateVersionsMock = + bincode::deserialize_from(vote_account.data()).expect("vote_state"); + let vote_state: VoteStateV4 = match vote_state_version { + VoteStateVersionsMock::V4(vote_state) => *vote_state, + _ => panic!("Unexpected vote state version"), + }; + + let rent_exempt_reserve = rent.minimum_balance(stake_account.data().len()); + + stake_account + .set_state(&StakeStateV2::Stake( + Meta { + authorized: Authorized::auto(authorized), + rent_exempt_reserve, + ..Meta::default() + }, + new_stake_v4( + lamports - rent_exempt_reserve, // underflow is an error, is basically: assert!(lamports > rent_exempt_reserve); + voter_pubkey, + &vote_state, + activation_epoch, + ), + StakeFlags::empty(), + )) + .expect("set_state"); + + stake_account +} diff --git a/programs/vote/Cargo.toml b/programs/vote/Cargo.toml index cbacb867fc1087..fcf96d3bca9845 100644 --- a/programs/vote/Cargo.toml +++ b/programs/vote/Cargo.toml @@ -36,6 +36,7 @@ serde = { workspace = true } serde_derive = { workspace = true } solana-account = { workspace = true } solana-bincode = { workspace = true } +solana-bls-signatures = { workspace = true } solana-clock = { workspace = true } solana-epoch-schedule = { workspace = true } solana-frozen-abi = { workspace = true, optional = true, features = [ diff --git a/programs/vote/src/vote_state/mod.rs b/programs/vote/src/vote_state/mod.rs index 7bd7c26ca5b2db..f3646192845ad5 100644 --- a/programs/vote/src/vote_state/mod.rs +++ b/programs/vote/src/vote_state/mod.rs @@ -1,9 +1,13 @@ //! Vote state, vote program //! Receive and processes votes from validators -pub use solana_vote_interface::state::{vote_state_versions::*, *}; +pub use solana_vote_interface::{ + authorized_voters::AuthorizedVoters, + state::{vote_state_v4::VoteStateV4, vote_state_versions::*, *}, +}; use { log::*, solana_account::{AccountSharedData, ReadableAccount, WritableAccount}, + solana_bls_signatures::{Pubkey as BLSPubkey, PubkeyCompressed}, solana_clock::{Clock, Epoch, Slot}, solana_epoch_schedule::EpochSchedule, solana_hash::Hash, @@ -19,6 +23,17 @@ use { }, }; +// This is a hack because VoteStateVersions does not currently include +// VoteStateV4, so we fake/duplicate it here to line up with how we will try and +// deserialize into VoteStateFrame later. +#[derive(Debug, PartialEq, Eq, Clone, serde_derive::Deserialize, serde_derive::Serialize)] +pub enum VoteStateVersionsMock { + V0_23_5(Box), + V1_14_11(Box), + V3(Box), + V4(Box), +} + // utility function, used by Stakes, tests pub fn from(account: &T) -> Option { VoteStateV3::deserialize(account.data()).ok() @@ -1049,6 +1064,39 @@ pub fn create_account_with_authorized( vote_account } +pub fn create_account_with_authorized_v4( + node_pubkey: &Pubkey, + authorized_voter: &Pubkey, + authorized_withdrawer: &Pubkey, + commission: u16, + lamports: u64, + bls_pubkey: &BLSPubkey, +) -> AccountSharedData { + // This size seemed in the ballpark??? <(^_^)> + let size = 4096; + let mut vote_account = AccountSharedData::new(lamports, size, &id()); + + let authorized_voters = AuthorizedVoters::new(0, *authorized_voter); + let bls_pubkey_compressed: PubkeyCompressed = bls_pubkey.try_into().unwrap(); + let vote_state = VoteStateV4 { + node_pubkey: *node_pubkey, + authorized_withdrawer: *authorized_withdrawer, + inflation_rewards_commission_bps: commission, + authorized_voters, + bls_pubkey_compressed: Some(bls_pubkey_compressed.0), + ..VoteStateV4::default() + }; + + // Custom serialize because VoteStateV4 does not provide it yet. + assert!(bincode::serialize_into( + vote_account.data_as_mut_slice(), + &VoteStateVersionsMock::V4(Box::new(vote_state)) + ) + .is_ok()); + + vote_account +} + // create_account() should be removed, use create_account_with_authorized() instead pub fn create_account( vote_pubkey: &Pubkey, diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index 03506b3df19b49..f43a6b8bba6626 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -28,6 +28,7 @@ frozen-abi = [ "dep:solana-frozen-abi-macro", "solana-account/frozen-abi", "solana-accounts-db/frozen-abi", + "solana-bls-signatures/frozen-abi", "solana-compute-budget/frozen-abi", "solana-cost-model/frozen-abi", "solana-epoch-schedule/frozen-abi", @@ -90,6 +91,7 @@ solana-account = { workspace = true } solana-account-info = { workspace = true } solana-accounts-db = { workspace = true } solana-address-lookup-table-interface = { workspace = true } +solana-bls-signatures = { workspace = true } solana-bpf-loader-program = { workspace = true } solana-bucket-map = { workspace = true } solana-builtins = { workspace = true } @@ -172,6 +174,7 @@ solana-version = { workspace = true } solana-vote = { workspace = true } solana-vote-interface = { workspace = true } solana-vote-program = { workspace = true } +solana-votor-messages = { workspace = true } spl-generic-token = { workspace = true } static_assertions = { workspace = true } strum = { workspace = true, features = ["derive"] } diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index ee8ab5c5527cb7..32c4b1841222a4 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -5548,6 +5548,10 @@ impl Bank { &self.fee_structure } + pub fn parent_block_id(&self) -> Option { + self.parent().and_then(|p| p.block_id()) + } + pub fn block_id(&self) -> Option { *self.block_id.read().unwrap() } diff --git a/runtime/src/bank/serde_snapshot.rs b/runtime/src/bank/serde_snapshot.rs index 97a6c0a66dcf2f..6d64631b53c23d 100644 --- a/runtime/src/bank/serde_snapshot.rs +++ b/runtime/src/bank/serde_snapshot.rs @@ -3,7 +3,7 @@ mod tests { use { crate::{ bank::{test_utils as bank_test_utils, Bank}, - epoch_stakes::{EpochAuthorizedVoters, NodeIdToVoteAccounts, VersionedEpochStakes}, + epoch_stakes::VersionedEpochStakes, genesis_utils::activate_all_features, runtime_config::RuntimeConfig, serde_snapshot::{self, ExtraFieldsToSerialize, SnapshotStreams}, @@ -13,7 +13,6 @@ mod tests { create_tmp_accounts_dir_for_tests, get_storages_to_serialize, StorageAndNextAccountsFileId, }, - stakes::{SerdeStakesToStakeFormat, Stakes}, }, solana_accounts_db::{ account_storage::AccountStorageMap, @@ -27,8 +26,10 @@ mod tests { solana_genesis_config::create_genesis_config, solana_nohash_hasher::BuildNoHashHasher, solana_pubkey::Pubkey, - solana_stake_interface::state::Stake, + solana_signer::Signer, + solana_vote::vote_account::VoteAccount, std::{ + collections::HashMap, io::{BufReader, BufWriter, Cursor}, mem, ops::RangeFull, @@ -182,11 +183,13 @@ mod tests { bank.flush_accounts_cache_slot_for_tests() } - #[test_case(StorageAccess::Mmap)] - #[test_case(StorageAccess::File)] - fn test_extra_fields_eof(storage_access: StorageAccess) { + #[test_case(StorageAccess::Mmap, false)] + #[test_case(StorageAccess::File, false)] + #[test_case(StorageAccess::Mmap, true)] + #[test_case(StorageAccess::File, true)] + fn test_extra_fields_eof(storage_access: StorageAccess, is_alpenglow: bool) { solana_logger::setup(); - let (genesis_config, _) = create_genesis_config(500); + let (genesis_config, keypair) = create_genesis_config(500); let bank0 = Arc::new(Bank::new_for_tests(&genesis_config)); bank0.squash(); @@ -194,20 +197,24 @@ mod tests { bank.freeze(); add_root_and_flush_write_cache(&bank0); + let epoch_with_real_stake = 42; + // Set extra fields bank.fee_rate_governor.lamports_per_signature = 7000; // Note that epoch_stakes already has two epoch stakes entries for epochs 0 and 1 // which will also be serialized to the versioned epoch stakes extra field. Those // entries are of type Stakes so add a new entry for Stakes. - bank.epoch_stakes.insert( - 42, - VersionedEpochStakes::Current { - stakes: SerdeStakesToStakeFormat::Stake(Stakes::::default()), - total_stake: 42, - node_id_to_vote_accounts: Arc::::default(), - epoch_authorized_voters: Arc::::default(), - }, - ); + // Give some real stake distribution and generate real VersionedEpochStakes, to + // make sure bls_pubkey_to_rank_map is populated correctly after deserialize. + let vote_account = if is_alpenglow { + VoteAccount::new_random_vote_state_v4() + } else { + VoteAccount::new_random() + }; + let vote_accounts_hash_map = HashMap::from([(keypair.pubkey(), (100, vote_account))]); + let stakes = + VersionedEpochStakes::new_for_tests(vote_accounts_hash_map, epoch_with_real_stake); + bank.epoch_stakes.insert(epoch_with_real_stake, stakes); assert_eq!(bank.epoch_stakes.len(), 3); // Serialize @@ -252,6 +259,24 @@ mod tests { ) .unwrap(); + // Specifically check that bls_pubkey_rank_map is equal, you do need to call this + // before checking epoch_stakes because this needs to be populated. + for (epoch, epoch_stakes) in dbank.epoch_stakes.iter() { + let bls_pubkey_to_rank_map = epoch_stakes.bls_pubkey_to_rank_map(); + if is_alpenglow && *epoch == epoch_with_real_stake { + assert!(!bls_pubkey_to_rank_map.is_empty()); + } else { + assert!(bls_pubkey_to_rank_map.is_empty()); + } + assert_eq!( + bls_pubkey_to_rank_map, + bank.epoch_stakes + .get(epoch) + .expect("Expecting epoch stakes for {epoch}") + .bls_pubkey_to_rank_map() + ); + } + assert_eq!(bank.epoch_stakes, dbank.epoch_stakes); assert_eq!( bank.fee_rate_governor.lamports_per_signature, diff --git a/runtime/src/epoch_stakes.rs b/runtime/src/epoch_stakes.rs index c61fa0304f1756..d9f9b80e7466ba 100644 --- a/runtime/src/epoch_stakes.rs +++ b/runtime/src/epoch_stakes.rs @@ -1,15 +1,75 @@ use { crate::stakes::SerdeStakesToStakeFormat, serde::{Deserialize, Serialize}, + solana_bls_signatures::Pubkey as BLSPubkey, solana_clock::Epoch, solana_pubkey::Pubkey, solana_vote::vote_account::VoteAccountsHashMap, - std::{collections::HashMap, sync::Arc}, + std::{ + collections::HashMap, + sync::{Arc, OnceLock}, + }, }; pub type NodeIdToVoteAccounts = HashMap; pub type EpochAuthorizedVoters = HashMap; +#[derive(Clone, Debug, Default)] +#[cfg_attr(feature = "frozen-abi", derive(AbiExample))] +#[cfg_attr(feature = "dev-context-only-utils", derive(PartialEq))] +pub struct BLSPubkeyToRankMap { + rank_map: HashMap, + //TODO(wen): We can make SortedPubkeys a Vec after we remove ed25519 + // pubkey from certificate pool. + sorted_pubkeys: Vec<(Pubkey, BLSPubkey)>, +} + +impl BLSPubkeyToRankMap { + pub fn new(epoch_vote_accounts_hash_map: &VoteAccountsHashMap) -> Self { + let mut pubkey_stake_pair_vec: Vec<(Pubkey, BLSPubkey, u64)> = epoch_vote_accounts_hash_map + .iter() + .filter_map(|(pubkey, (stake, account))| { + if *stake > 0 { + account + .bls_pubkey() + .map(|bls_pubkey| (*pubkey, bls_pubkey, *stake)) + } else { + None + } + }) + .collect(); + pubkey_stake_pair_vec.sort_by(|(_, a_pubkey, a_stake), (_, b_pubkey, b_stake)| { + b_stake.cmp(a_stake).then(a_pubkey.cmp(b_pubkey)) + }); + let mut sorted_pubkeys = Vec::new(); + let mut bls_pubkey_to_rank_map = HashMap::new(); + for (rank, (pubkey, bls_pubkey, _stake)) in pubkey_stake_pair_vec.into_iter().enumerate() { + sorted_pubkeys.push((pubkey, bls_pubkey)); + bls_pubkey_to_rank_map.insert(bls_pubkey, rank as u16); + } + Self { + rank_map: bls_pubkey_to_rank_map, + sorted_pubkeys, + } + } + + pub fn is_empty(&self) -> bool { + self.rank_map.is_empty() + } + + pub fn len(&self) -> usize { + self.rank_map.len() + } + + pub fn get_rank(&self, bls_pubkey: &BLSPubkey) -> Option<&u16> { + self.rank_map.get(bls_pubkey) + } + + pub fn get_pubkey(&self, index: usize) -> Option<&(Pubkey, BLSPubkey)> { + self.sorted_pubkeys.get(index) + } +} + #[cfg_attr(feature = "frozen-abi", derive(AbiExample))] #[derive(Clone, Serialize, Debug, Deserialize, Default, PartialEq, Eq)] pub struct NodeVoteAccounts { @@ -26,6 +86,8 @@ pub enum VersionedEpochStakes { total_stake: u64, node_id_to_vote_accounts: Arc, epoch_authorized_voters: Arc, + #[serde(skip)] + bls_pubkey_to_rank_map: OnceLock>, }, } @@ -39,6 +101,7 @@ impl VersionedEpochStakes { total_stake, node_id_to_vote_accounts: Arc::new(node_id_to_vote_accounts), epoch_authorized_voters: Arc::new(epoch_authorized_voters), + bls_pubkey_to_rank_map: OnceLock::new(), } } @@ -105,6 +168,19 @@ impl VersionedEpochStakes { } } + pub fn bls_pubkey_to_rank_map(&self) -> &Arc { + match self { + Self::Current { + bls_pubkey_to_rank_map, + .. + } => bls_pubkey_to_rank_map.get_or_init(|| { + Arc::new(BLSPubkeyToRankMap::new( + self.stakes().vote_accounts().as_ref(), + )) + }), + } + } + pub fn vote_account_stake(&self, vote_account: &Pubkey) -> u64 { self.stakes() .vote_accounts() diff --git a/runtime/src/genesis_utils.rs b/runtime/src/genesis_utils.rs index 31c24b3906686a..837dabb4b6733b 100644 --- a/runtime/src/genesis_utils.rs +++ b/runtime/src/genesis_utils.rs @@ -2,6 +2,7 @@ use { agave_feature_set::{FeatureSet, FEATURE_NAMES}, log::*, solana_account::{Account, AccountSharedData}, + solana_bls_signatures::Keypair as BLSKeypair, solana_cluster_type::ClusterType, solana_feature_gate_interface::{self as feature, Feature}, solana_fee_calculator::FeeRateGovernor, @@ -16,6 +17,7 @@ use { solana_stake_program::stake_state, solana_system_interface::program as system_program, solana_vote_program::vote_state, + solana_votor_messages::consensus_message::BLS_KEYPAIR_DERIVE_SEED, std::borrow::Borrow, }; @@ -50,14 +52,18 @@ pub struct ValidatorVoteKeypairs { pub node_keypair: Keypair, pub vote_keypair: Keypair, pub stake_keypair: Keypair, + pub bls_keypair: Option, } impl ValidatorVoteKeypairs { pub fn new(node_keypair: Keypair, vote_keypair: Keypair, stake_keypair: Keypair) -> Self { + let bls_keypair = + BLSKeypair::derive_from_signer(&vote_keypair, BLS_KEYPAIR_DERIVE_SEED).unwrap(); Self { node_keypair, vote_keypair, stake_keypair, + bls_keypair: Some(bls_keypair), } } @@ -66,6 +72,7 @@ impl ValidatorVoteKeypairs { node_keypair: Keypair::new(), vote_keypair: Keypair::new(), stake_keypair: Keypair::new(), + bls_keypair: Some(BLSKeypair::new()), } } } @@ -99,6 +106,21 @@ pub fn create_genesis_config_with_vote_accounts( voting_keypairs, stakes, ClusterType::Development, + false, + ) +} + +pub fn create_genesis_config_with_vote_accounts_alpenglow( + mint_lamports: u64, + voting_keypairs: &[impl Borrow], + stakes: Vec, +) -> GenesisConfigInfo { + create_genesis_config_with_vote_accounts_and_cluster_type( + mint_lamports, + voting_keypairs, + stakes, + ClusterType::Development, + true, ) } @@ -107,6 +129,7 @@ pub fn create_genesis_config_with_vote_accounts_and_cluster_type( voting_keypairs: &[impl Borrow], stakes: Vec, cluster_type: ClusterType, + alpenglow: bool, ) -> GenesisConfigInfo { assert!(!voting_keypairs.is_empty()); assert_eq!(voting_keypairs.len(), stakes.len()); @@ -115,6 +138,11 @@ pub fn create_genesis_config_with_vote_accounts_and_cluster_type( let voting_keypair = voting_keypairs[0].borrow().vote_keypair.insecure_clone(); let validator_pubkey = voting_keypairs[0].borrow().node_keypair.pubkey(); + let bls_keypair = if alpenglow { + voting_keypairs[0].borrow().bls_keypair.clone() + } else { + None + }; let genesis_config = create_genesis_config_with_leader_ex( mint_lamports, &mint_keypair.pubkey(), @@ -127,6 +155,7 @@ pub fn create_genesis_config_with_vote_accounts_and_cluster_type( Rent::free(), // most tests don't expect rent cluster_type, vec![], + bls_keypair, ); let mut genesis_config_info = GenesisConfigInfo { @@ -143,14 +172,41 @@ pub fn create_genesis_config_with_vote_accounts_and_cluster_type( // Create accounts let node_account = Account::new(VALIDATOR_LAMPORTS, 0, &system_program::id()); - let vote_account = vote_state::create_account(&vote_pubkey, &node_pubkey, 0, *stake); - let stake_account = Account::from(stake_state::create_account( - &stake_pubkey, - &vote_pubkey, - &vote_account, - &genesis_config_info.genesis_config.rent, - *stake, - )); + let vote_account = if alpenglow { + let bls_pubkey = validator_voting_keypairs + .borrow() + .bls_keypair + .clone() + .unwrap() + .public; + vote_state::create_account_with_authorized_v4( + &node_pubkey, + &vote_pubkey, + &vote_pubkey, + 0, + *stake, + &bls_pubkey, + ) + } else { + vote_state::create_account(&vote_pubkey, &node_pubkey, 0, *stake) + }; + let stake_account = if alpenglow { + Account::from(stake_state::create_account_v4( + &stake_pubkey, + &vote_pubkey, + &vote_account, + &genesis_config_info.genesis_config.rent, + *stake, + )) + } else { + Account::from(stake_state::create_account( + &stake_pubkey, + &vote_pubkey, + &vote_account, + &genesis_config_info.genesis_config.rent, + *stake, + )) + }; let vote_account = Account::from(vote_account); @@ -210,6 +266,7 @@ pub fn create_genesis_config_with_leader_with_mint_keypair( Rent::free(), // most tests don't expect rent ClusterType::Development, vec![], + None, ); GenesisConfigInfo { @@ -279,21 +336,42 @@ pub fn create_genesis_config_with_leader_ex_no_features( rent: Rent, cluster_type: ClusterType, mut initial_accounts: Vec<(Pubkey, AccountSharedData)>, + bls_keypair: Option, ) -> GenesisConfig { - let validator_vote_account = vote_state::create_account( - validator_vote_account_pubkey, - validator_pubkey, - 0, - validator_stake_lamports, - ); + let validator_vote_account = match &bls_keypair { + Some(bls_keypair) => vote_state::create_account_with_authorized_v4( + validator_pubkey, + validator_vote_account_pubkey, + validator_vote_account_pubkey, + 0, + validator_stake_lamports, + &bls_keypair.public, + ), + None => vote_state::create_account( + validator_vote_account_pubkey, + validator_pubkey, + 0, + validator_stake_lamports, + ), + }; - let validator_stake_account = stake_state::create_account( - validator_stake_account_pubkey, - validator_vote_account_pubkey, - &validator_vote_account, - &rent, - validator_stake_lamports, - ); + let validator_stake_account = if bls_keypair.is_some() { + stake_state::create_account_v4( + validator_stake_account_pubkey, + validator_vote_account_pubkey, + &validator_vote_account, + &rent, + validator_stake_lamports, + ) + } else { + stake_state::create_account( + validator_stake_account_pubkey, + validator_vote_account_pubkey, + &validator_vote_account, + &rent, + validator_stake_lamports, + ) + }; initial_accounts.push(( *mint_pubkey, @@ -348,6 +426,7 @@ pub fn create_genesis_config_with_leader_ex( rent: Rent, cluster_type: ClusterType, initial_accounts: Vec<(Pubkey, AccountSharedData)>, + bls_keypair: Option, ) -> GenesisConfig { let mut genesis_config = create_genesis_config_with_leader_ex_no_features( mint_lamports, @@ -361,6 +440,7 @@ pub fn create_genesis_config_with_leader_ex( rent, cluster_type, initial_accounts, + bls_keypair, ); if genesis_config.cluster_type == ClusterType::Development { diff --git a/runtime/src/stakes.rs b/runtime/src/stakes.rs index 3253dbe6aecd85..28134dc0a04d58 100644 --- a/runtime/src/stakes.rs +++ b/runtime/src/stakes.rs @@ -91,7 +91,12 @@ impl StakesCache { } debug_assert_ne!(account.lamports(), 0u64); if solana_vote_program::check_id(owner) { - if VoteStateVersions::is_correct_size_and_initialized(account.data()) { + // is_correct_size_and_initialized doesn't understand VoteStateV4 + // yet, so we add the data len check as a proxy for "this must be a + // VoteStateV4 account" + if VoteStateVersions::is_correct_size_and_initialized(account.data()) + || account.data().len() == 4096 + { match VoteAccount::try_from(account.to_account_shared_data()) { Ok(vote_account) => { // drop the old account after releasing the lock diff --git a/test-validator/src/lib.rs b/test-validator/src/lib.rs index cb40bea40f9aa0..e33c51d11ecc95 100644 --- a/test-validator/src/lib.rs +++ b/test-validator/src/lib.rs @@ -945,6 +945,7 @@ impl TestValidator { config.rent.clone(), solana_cluster_type::ClusterType::Development, accounts.into_iter().collect(), + None, ); genesis_config.epoch_schedule = config .epoch_schedule diff --git a/vote/Cargo.toml b/vote/Cargo.toml index 53ad36bc0b764b..c9d5d0577382e2 100644 --- a/vote/Cargo.toml +++ b/vote/Cargo.toml @@ -17,8 +17,15 @@ crate-type = ["lib"] name = "solana_vote" [features] -dev-context-only-utils = ["dep:rand", "dep:bincode"] -frozen-abi = ["dep:solana-frozen-abi", "dep:solana-frozen-abi-macro"] +default = ["bls"] +dev-context-only-utils = ["dep:bincode", "dep:rand", "dep:solana-vote-program"] +frozen-abi = [ + "dep:solana-frozen-abi", + "dep:solana-frozen-abi-macro", + "solana-bls-signatures/frozen-abi", +] +bls = ["solana-bls-signatures"] +miri = [] [dependencies] bincode = { workspace = true, optional = true } @@ -29,6 +36,7 @@ serde = { workspace = true, features = ["rc"] } serde_derive = { workspace = true } solana-account = { workspace = true, features = ["bincode"] } solana-bincode = { workspace = true } +solana-bls-signatures = { workspace = true, optional = true } solana-clock = { workspace = true } solana-frozen-abi = { workspace = true, optional = true, features = [ "frozen-abi", @@ -48,6 +56,7 @@ solana-signer = { workspace = true } solana-svm-transaction = { workspace = true } solana-transaction = { workspace = true, features = ["bincode"] } solana-vote-interface = { workspace = true, features = ["bincode"] } +solana-vote-program = { workspace = true, optional = true } thiserror = { workspace = true } [dev-dependencies] diff --git a/vote/src/vote_account.rs b/vote/src/vote_account.rs index 9c150c581108fe..4dd62af2acdf44 100644 --- a/vote/src/vote_account.rs +++ b/vote/src/vote_account.rs @@ -1,3 +1,5 @@ +#[cfg(not(all(miri, target_endian = "big")))] +use solana_bls_signatures::{Pubkey as BLSPubkey, PubkeyCompressed as BLSPubkeyCompressed}; use { crate::vote_state_view::VoteStateView, itertools::Itertools, @@ -124,6 +126,50 @@ impl VoteAccount { VoteAccount::try_from(account).unwrap() } + + #[cfg(feature = "dev-context-only-utils")] + pub fn new_random_vote_state_v4() -> VoteAccount { + use { + rand::Rng as _, + solana_bls_signatures::{Keypair as BLSKeypair, PubkeyCompressed}, + solana_vote_interface::state::{LandedVote, Lockout, VoteStateV4}, + solana_vote_program::vote_state::{AuthorizedVoters, VoteStateVersionsMock}, + std::collections::VecDeque, + }; + + let mut rng = rand::thread_rng(); + let bls_pubkey = BLSKeypair::new(); + let bls_pubkey_compressed: PubkeyCompressed = bls_pubkey.public.try_into().unwrap(); + + let target_vote_state = VoteStateV4 { + authorized_voters: AuthorizedVoters::new(0, Pubkey::new_unique()), + epoch_credits: vec![(1, 2, 3)], + bls_pubkey_compressed: Some(bls_pubkey_compressed.0), + votes: VecDeque::from([LandedVote { + latency: 0, + lockout: Lockout::default(), + }]), + root_slot: Some(42), + ..VoteStateV4::default() + }; + let state = VoteStateVersionsMock::V4(Box::new(target_vote_state)); + let account = AccountSharedData::new_data( + rng.gen(), // lamports + &state, + &solana_sdk_ids::vote::id(), // owner + ) + .unwrap(); + + VoteAccount::try_from(account).unwrap() + } + + #[cfg(not(all(miri, target_endian = "big")))] + pub fn bls_pubkey(&self) -> Option { + let bls_pubkey_compressed = self.0.vote_state_view.bls_pubkey_compressed()?; + let bls_pubkey_compressed = BLSPubkeyCompressed(bls_pubkey_compressed); + let bls_pubkey = BLSPubkey::try_from(bls_pubkey_compressed).unwrap(); + Some(bls_pubkey) + } } impl VoteAccounts { diff --git a/votor-messages/Cargo.toml b/votor-messages/Cargo.toml new file mode 100644 index 00000000000000..ef8ce34f4b88ca --- /dev/null +++ b/votor-messages/Cargo.toml @@ -0,0 +1,44 @@ +[package] +name = "solana-votor-messages" +description = "Blockchain, Rebuilt for Scale" +documentation = "https://docs.rs/solana-votor-messages" +readme = "../README.md" +version = { workspace = true } +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +license = { workspace = true } +edition = { workspace = true } + +[features] +frozen-abi = [ + "dep:solana-frozen-abi", + "dep:solana-frozen-abi-macro", + "solana-bls-signatures/frozen-abi", +] + +[dependencies] +bitvec = { workspace = true } +bytemuck = { workspace = true } +bytemuck_derive = { workspace = true } +num_enum = { workspace = true } +serde = { workspace = true } +solana-account = { workspace = true } +solana-bls-signatures = { workspace = true, features = [ + "bytemuck", "solana-signer-derive", +] } +solana-clock = { workspace = true } +solana-frozen-abi = { workspace = true, optional = true, features = [ + "frozen-abi", +] } +solana-frozen-abi-macro = { workspace = true, optional = true, features = [ + "frozen-abi", +] } +solana-hash = { workspace = true } +solana-logger = { workspace = true } +solana-program = { workspace = true } +solana-vote-interface = { workspace = true } +spl-pod = { workspace = true } + +[lints] +workspace = true diff --git a/votor-messages/src/consensus_message.rs b/votor-messages/src/consensus_message.rs new file mode 100644 index 00000000000000..118cad0a28b2dc --- /dev/null +++ b/votor-messages/src/consensus_message.rs @@ -0,0 +1,178 @@ +//! Put BLS message here so all clients can agree on the format +use { + crate::vote::Vote, + serde::{Deserialize, Serialize}, + solana_bls_signatures::Signature as BLSSignature, + solana_clock::Slot, + solana_hash::Hash, +}; + +/// The seed used to derive the BLS keypair +pub const BLS_KEYPAIR_DERIVE_SEED: &[u8; 9] = b"alpenglow"; + +/// Block, a (slot, hash) tuple +pub type Block = (Slot, Hash); + +#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] +/// BLS vote message, we need rank to look up pubkey +pub struct VoteMessage { + /// The vote + pub vote: Vote, + /// The signature + pub signature: BLSSignature, + /// The rank of the validator + pub rank: u16, +} + +/// Certificate details +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Deserialize, Serialize)] +pub enum Certificate { + /// Finalize certificate + Finalize(Slot), + /// Fast finalize certificate + FinalizeFast(Slot, Hash), + /// Notarize certificate + Notarize(Slot, Hash), + /// Notarize fallback certificate + NotarizeFallback(Slot, Hash), + /// Skip certificate + Skip(Slot), +} + +/// Certificate type +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Deserialize, Serialize)] +pub enum CertificateType { + /// Finalize certificate + Finalize, + /// Fast finalize certificate + FinalizeFast, + /// Notarize certificate + Notarize, + /// Notarize fallback certificate + NotarizeFallback, + /// Skip certificate + Skip, +} + +impl Certificate { + /// Create a new certificate ID from a CertificateType, Option, and Option + pub fn new(certificate_type: CertificateType, slot: Slot, hash: Option) -> Self { + match (certificate_type, hash) { + (CertificateType::Finalize, None) => Certificate::Finalize(slot), + (CertificateType::FinalizeFast, Some(hash)) => Certificate::FinalizeFast(slot, hash), + (CertificateType::Notarize, Some(hash)) => Certificate::Notarize(slot, hash), + (CertificateType::NotarizeFallback, Some(hash)) => { + Certificate::NotarizeFallback(slot, hash) + } + (CertificateType::Skip, None) => Certificate::Skip(slot), + _ => panic!("Invalid certificate type and hash combination"), + } + } + + /// Get the certificate type + pub fn certificate_type(&self) -> CertificateType { + match self { + Certificate::Finalize(_) => CertificateType::Finalize, + Certificate::FinalizeFast(_, _) => CertificateType::FinalizeFast, + Certificate::Notarize(_, _) => CertificateType::Notarize, + Certificate::NotarizeFallback(_, _) => CertificateType::NotarizeFallback, + Certificate::Skip(_) => CertificateType::Skip, + } + } + + /// Get the slot of the certificate + pub fn slot(&self) -> Slot { + match self { + Certificate::Finalize(slot) + | Certificate::FinalizeFast(slot, _) + | Certificate::Notarize(slot, _) + | Certificate::NotarizeFallback(slot, _) + | Certificate::Skip(slot) => *slot, + } + } + + /// Is this a fast finalize certificate? + pub fn is_fast_finalization(&self) -> bool { + matches!(self, Self::FinalizeFast(_, _)) + } + + /// Is this a finalize / fast finalize certificate? + pub fn is_finalization(&self) -> bool { + matches!(self, Self::Finalize(_) | Self::FinalizeFast(_, _)) + } + + /// Is this a notarize fallback certificate? + pub fn is_notarize_fallback(&self) -> bool { + matches!(self, Self::NotarizeFallback(_, _)) + } + + /// Is this a skip certificate? + pub fn is_skip(&self) -> bool { + matches!(self, Self::Skip(_)) + } + + /// Gets the block associated with this certificate, if present + pub fn to_block(self) -> Option { + match self { + Certificate::Finalize(_) | Certificate::Skip(_) => None, + Certificate::Notarize(slot, block_id) + | Certificate::NotarizeFallback(slot, block_id) + | Certificate::FinalizeFast(slot, block_id) => Some((slot, block_id)), + } + } + + /// "Critical" certs are the certificates necessary to make progress + /// We do not consider the next slot for voting until we've seen either + /// a Skip certificate or a NotarizeFallback certificate for ParentReady + /// + /// Note: Notarization certificates necessarily generate a + /// NotarizeFallback certificate as well + pub fn is_critical(&self) -> bool { + matches!(self, Self::NotarizeFallback(_, _) | Self::Skip(_)) + } +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +/// BLS vote message, we need rank to look up pubkey +pub struct CertificateMessage { + /// The certificate + pub certificate: Certificate, + /// The signature + pub signature: BLSSignature, + /// The bitmap for validators, see solana-signer-store for encoding format + pub bitmap: Vec, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[allow(clippy::large_enum_variant)] +/// BLS message data in Alpenglow +pub enum ConsensusMessage { + /// Vote message, with the vote and the rank of the validator. + Vote(VoteMessage), + /// Certificate message + Certificate(CertificateMessage), +} + +impl ConsensusMessage { + /// Create a new vote message + pub fn new_vote(vote: Vote, signature: BLSSignature, rank: u16) -> Self { + Self::Vote(VoteMessage { + vote, + signature, + rank, + }) + } + + /// Create a new certificate message + pub fn new_certificate( + certificate: Certificate, + bitmap: Vec, + signature: BLSSignature, + ) -> Self { + Self::Certificate(CertificateMessage { + certificate, + signature, + bitmap, + }) + } +} diff --git a/votor-messages/src/lib.rs b/votor-messages/src/lib.rs new file mode 100644 index 00000000000000..c78d22cf828c93 --- /dev/null +++ b/votor-messages/src/lib.rs @@ -0,0 +1,12 @@ +//! Alpenglow Vote program +#![cfg_attr(feature = "frozen-abi", feature(min_specialization))] +#![deny(missing_docs)] + +pub mod consensus_message; +pub mod vote; + +#[cfg_attr(feature = "frozen-abi", macro_use)] +#[cfg(feature = "frozen-abi")] +extern crate solana_frozen_abi_macro; + +solana_program::declare_id!("Vote222222222222222222222222222222222222222"); diff --git a/votor-messages/src/vote.rs b/votor-messages/src/vote.rs new file mode 100644 index 00000000000000..939d3ad5a562b3 --- /dev/null +++ b/votor-messages/src/vote.rs @@ -0,0 +1,263 @@ +//! Vote data types for use by clients +use { + serde::{Deserialize, Serialize}, + solana_hash::Hash, + solana_program::clock::Slot, +}; + +/// Enum that clients can use to parse and create the vote +/// structures expected by the program +#[cfg_attr( + feature = "frozen-abi", + derive(AbiExample, AbiEnumVisitor), + frozen_abi(digest = "FRn4f3PTtbvw3uv2r3qF8K49a5UF4QqDuVdyeshtipTW") +)] +#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] +pub enum Vote { + /// A notarization vote + Notarize(NotarizationVote), + /// A finalization vote + Finalize(FinalizationVote), + /// A skip vote + Skip(SkipVote), + /// A notarization fallback vote + NotarizeFallback(NotarizationFallbackVote), + /// A skip fallback vote + SkipFallback(SkipFallbackVote), +} + +impl Vote { + /// Create a new notarization vote + pub fn new_notarization_vote(slot: Slot, block_id: Hash) -> Self { + Self::from(NotarizationVote::new(slot, block_id)) + } + + /// Create a new finalization vote + pub fn new_finalization_vote(slot: Slot) -> Self { + Self::from(FinalizationVote::new(slot)) + } + + /// Create a new skip vote + pub fn new_skip_vote(slot: Slot) -> Self { + Self::from(SkipVote::new(slot)) + } + + /// Create a new notarization fallback vote + pub fn new_notarization_fallback_vote(slot: Slot, block_id: Hash) -> Self { + Self::from(NotarizationFallbackVote::new(slot, block_id)) + } + + /// Create a new skip fallback vote + pub fn new_skip_fallback_vote(slot: Slot) -> Self { + Self::from(SkipFallbackVote::new(slot)) + } + + /// The slot which was voted for + pub fn slot(&self) -> Slot { + match self { + Self::Notarize(vote) => vote.slot(), + Self::Finalize(vote) => vote.slot(), + Self::Skip(vote) => vote.slot(), + Self::NotarizeFallback(vote) => vote.slot(), + Self::SkipFallback(vote) => vote.slot(), + } + } + + /// The block id associated with the block which was voted for + pub fn block_id(&self) -> Option<&Hash> { + match self { + Self::Notarize(vote) => Some(vote.block_id()), + Self::NotarizeFallback(vote) => Some(vote.block_id()), + Self::Finalize(_) | Self::Skip(_) | Self::SkipFallback(_) => None, + } + } + + /// Whether the vote is a notarization vote + pub fn is_notarization(&self) -> bool { + matches!(self, Self::Notarize(_)) + } + + /// Whether the vote is a finalization vote + pub fn is_finalize(&self) -> bool { + matches!(self, Self::Finalize(_)) + } + + /// Whether the vote is a skip vote + pub fn is_skip(&self) -> bool { + matches!(self, Self::Skip(_)) + } + + /// Whether the vote is a notarization fallback vote + pub fn is_notarize_fallback(&self) -> bool { + matches!(self, Self::NotarizeFallback(_)) + } + + /// Whether the vote is a skip fallback vote + pub fn is_skip_fallback(&self) -> bool { + matches!(self, Self::SkipFallback(_)) + } + + /// Whether the vote is a notarization or finalization + pub fn is_notarization_or_finalization(&self) -> bool { + matches!(self, Self::Notarize(_) | Self::Finalize(_)) + } +} + +impl From for Vote { + fn from(vote: NotarizationVote) -> Self { + Self::Notarize(vote) + } +} + +impl From for Vote { + fn from(vote: FinalizationVote) -> Self { + Self::Finalize(vote) + } +} + +impl From for Vote { + fn from(vote: SkipVote) -> Self { + Self::Skip(vote) + } +} + +impl From for Vote { + fn from(vote: NotarizationFallbackVote) -> Self { + Self::NotarizeFallback(vote) + } +} + +impl From for Vote { + fn from(vote: SkipFallbackVote) -> Self { + Self::SkipFallback(vote) + } +} + +/// A notarization vote +#[cfg_attr( + feature = "frozen-abi", + derive(AbiExample), + frozen_abi(digest = "5AdwChAjsj5QUXLdpDnGGK2L2nA8y8EajVXi6jsmTv1m") +)] +#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +pub struct NotarizationVote { + slot: Slot, + block_id: Hash, +} + +impl NotarizationVote { + /// Construct a notarization vote for `slot` + pub fn new(slot: Slot, block_id: Hash) -> Self { + Self { slot, block_id } + } + + /// The slot to notarize + pub fn slot(&self) -> Slot { + self.slot + } + + /// The block_id of the notarization slot + pub fn block_id(&self) -> &Hash { + &self.block_id + } +} + +/// A finalization vote +#[cfg_attr( + feature = "frozen-abi", + derive(AbiExample), + frozen_abi(digest = "2XQ5N6YLJjF28w7cMFFUQ9SDgKuf9JpJNtAiXSPA8vR2") +)] +#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +pub struct FinalizationVote { + slot: Slot, +} + +impl FinalizationVote { + /// Construct a finalization vote for `slot` + pub fn new(slot: Slot) -> Self { + Self { slot } + } + + /// The slot to finalize + pub fn slot(&self) -> Slot { + self.slot + } +} + +/// A skip vote +/// Represents a range of slots to skip +/// inclusive on both ends +#[cfg_attr( + feature = "frozen-abi", + derive(AbiExample), + frozen_abi(digest = "G8Nrx3sMYdnLpHsCNark3BGA58BmW2sqNnqjkYhQHtN") +)] +#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +pub struct SkipVote { + pub(crate) slot: Slot, +} + +impl SkipVote { + /// Construct a skip vote for `slot` + pub fn new(slot: Slot) -> Self { + Self { slot } + } + + /// The slot to skip + pub fn slot(&self) -> Slot { + self.slot + } +} + +/// A notarization fallback vote +#[cfg_attr( + feature = "frozen-abi", + derive(AbiExample), + frozen_abi(digest = "7j5ZPwwyz1FaG3fpyQv5PVnQXicdSmqSk8NvqzkG1Eqz") +)] +#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +pub struct NotarizationFallbackVote { + slot: Slot, + block_id: Hash, +} + +impl NotarizationFallbackVote { + /// Construct a notarization vote for `slot` + pub fn new(slot: Slot, block_id: Hash) -> Self { + Self { slot, block_id } + } + + /// The slot to notarize + pub fn slot(&self) -> Slot { + self.slot + } + + /// The block_id of the notarization slot + pub fn block_id(&self) -> &Hash { + &self.block_id + } +} + +/// A skip fallback vote +#[cfg_attr( + feature = "frozen-abi", + derive(AbiExample), + frozen_abi(digest = "WsUNum8V62gjRU1yAnPuBMAQui4YvMwD1RwrzHeYkeF") +)] +#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +pub struct SkipFallbackVote { + pub(crate) slot: Slot, +} + +impl SkipFallbackVote { + /// Construct a skip fallback vote for `slot` + pub fn new(slot: Slot) -> Self { + Self { slot } + } + + /// The slot to skip + pub fn slot(&self) -> Slot { + self.slot + } +} diff --git a/votor/Cargo.toml b/votor/Cargo.toml index 8b09ab70247b71..a06711ab298c12 100644 --- a/votor/Cargo.toml +++ b/votor/Cargo.toml @@ -18,13 +18,17 @@ frozen-abi = [ "dep:solana-frozen-abi-macro", "solana-accounts-db/frozen-abi", "solana-bloom/frozen-abi", + "solana-bls-signatures/frozen-abi", "solana-ledger/frozen-abi", "solana-runtime/frozen-abi", + "solana-vote/frozen-abi", + "solana-vote-program/frozen-abi", ] [dependencies] anyhow = { workspace = true } bincode = { workspace = true } +bitvec = { workspace = true } bs58 = { workspace = true } crossbeam-channel = { workspace = true } dashmap = { workspace = true, features = ["rayon", "raw-api"] } @@ -39,6 +43,7 @@ serde_bytes = { workspace = true } serde_derive = { workspace = true } solana-accounts-db = { workspace = true } solana-bloom = { workspace = true } +solana-bls-signatures = { workspace = true, features = ["solana-signer-derive"] } solana-clock = { workspace = true } solana-entry = { workspace = true } solana-epoch-schedule = { workspace = true } @@ -60,8 +65,12 @@ solana-rpc = { workspace = true } solana-runtime = { workspace = true } solana-signature = { workspace = true } solana-signer = { workspace = true } +solana-signer-store = { workspace = true } solana-time-utils = { workspace = true } solana-transaction = { workspace = true } +solana-vote = { workspace = true } +solana-vote-program = { workspace = true } +solana-votor-messages = { workspace = true } thiserror = { workspace = true } [dev-dependencies] diff --git a/votor/src/certificate_pool.rs b/votor/src/certificate_pool.rs new file mode 100644 index 00000000000000..7411b87fdfad14 --- /dev/null +++ b/votor/src/certificate_pool.rs @@ -0,0 +1,2264 @@ +use { + crate::{ + certificate_limits_and_vote_types, + certificate_pool::{ + parent_ready_tracker::ParentReadyTracker, + slot_stake_counters::SlotStakeCounters, + stats::CertificatePoolStats, + vote_certificate_builder::{CertificateError, VoteCertificateBuilder}, + vote_pool::{DuplicateBlockVotePool, SimpleVotePool, VotePool, VotePoolType}, + }, + commitment::AlpenglowCommitmentError, + conflicting_types, + event::VotorEvent, + vote_to_certificate_ids, Certificate, Stake, VoteType, + MAX_ENTRIES_PER_PUBKEY_FOR_NOTARIZE_LITE, MAX_ENTRIES_PER_PUBKEY_FOR_OTHER_TYPES, + }, + crossbeam_channel::Sender, + log::{error, trace}, + solana_clock::{Epoch, Slot}, + solana_epoch_schedule::EpochSchedule, + solana_hash::Hash, + solana_pubkey::Pubkey, + solana_runtime::{bank::Bank, epoch_stakes::VersionedEpochStakes}, + solana_votor_messages::{ + consensus_message::{ + Block, CertificateMessage, CertificateType, ConsensusMessage, VoteMessage, + }, + vote::Vote, + }, + std::{ + cmp::Ordering, + collections::{BTreeMap, HashMap}, + sync::Arc, + }, + thiserror::Error, +}; + +pub mod parent_ready_tracker; +mod slot_stake_counters; +mod stats; +mod vote_certificate_builder; +mod vote_pool; + +impl VoteType { + pub fn get_type(vote: &Vote) -> VoteType { + match vote { + Vote::Notarize(_) => VoteType::Notarize, + Vote::NotarizeFallback(_) => VoteType::NotarizeFallback, + Vote::Skip(_) => VoteType::Skip, + Vote::SkipFallback(_) => VoteType::SkipFallback, + Vote::Finalize(_) => VoteType::Finalize, + } + } +} + +pub type PoolId = (Slot, VoteType); + +#[derive(Debug, Error, PartialEq)] +pub enum AddVoteError { + #[error("Conflicting vote type: {0:?} vs existing {1:?} for slot: {2} pubkey: {3}")] + ConflictingVoteType(VoteType, VoteType, Slot, Pubkey), + + #[error("Epoch stakes missing for epoch: {0}")] + EpochStakesNotFound(Epoch), + + #[error("Unrooted slot")] + UnrootedSlot, + + #[error("Slot in the future")] + SlotInFuture, + + #[error("Certificate error: {0}")] + Certificate(#[from] CertificateError), + + #[error("{0} channel disconnected")] + ChannelDisconnected(String), + + #[error("Voting Service queue full")] + VotingServiceQueueFull, + + #[error("Invalid rank: {0}")] + InvalidRank(u16), +} + +impl From for AddVoteError { + fn from(_: AlpenglowCommitmentError) -> Self { + AddVoteError::ChannelDisconnected("CommitmentSender".to_string()) + } +} + +fn get_key_and_stakes( + epoch_schedule: &EpochSchedule, + epoch_stakes_map: &HashMap, + slot: Slot, + rank: u16, +) -> Result<(Pubkey, Stake, Stake), AddVoteError> { + let epoch = epoch_schedule.get_epoch(slot); + let epoch_stakes = epoch_stakes_map + .get(&epoch) + .ok_or(AddVoteError::EpochStakesNotFound(epoch))?; + let Some((vote_key, _)) = epoch_stakes + .bls_pubkey_to_rank_map() + .get_pubkey(rank as usize) + else { + return Err(AddVoteError::InvalidRank(rank)); + }; + let stake = epoch_stakes.vote_account_stake(vote_key); + if stake == 0 { + // Since we have a valid rank, this should never happen, there is no rank for zero stake. + panic!("Validator stake is zero for pubkey: {vote_key}"); + } + Ok((*vote_key, stake, epoch_stakes.total_stake())) +} + +pub struct CertificatePool { + my_pubkey: Pubkey, + // Vote pools to do bean counting for votes. + vote_pools: BTreeMap, + /// Completed certificates + completed_certificates: BTreeMap>, + /// Tracks slots which have reached the parent ready condition: + /// - They have a potential parent block with a NotarizeFallback certificate + /// - All slots from the parent have a Skip certificate + pub parent_ready_tracker: ParentReadyTracker, + /// Highest block that has a NotarizeFallback certificate, for use in producing our leader window + highest_notarized_fallback: Option<(Slot, Hash)>, + /// Highest slot that has a Finalized variant certificate + highest_finalized_slot: Option, + /// Highest slot that has Finalize+Notarize or FinalizeFast, for use in standstill + /// Also add a bool to indicate whether this slot has FinalizeFast certificate + highest_finalized_with_notarize: Option<(Slot, bool)>, + /// The certificate sender, if set, newly created certificates will be sent here + certificate_sender: Option>, + /// Stats for the certificate pool + stats: CertificatePoolStats, + /// Slot stake counters, used to calculate safe_to_notar and safe_to_skip + slot_stake_counters_map: BTreeMap, +} + +impl CertificatePool { + pub fn new_from_root_bank( + my_pubkey: Pubkey, + bank: &Bank, + certificate_sender: Option>, + ) -> Self { + // To account for genesis and snapshots we allow default block id until + // block id can be serialized as part of the snapshot + let root_block = (bank.slot(), bank.block_id().unwrap_or_default()); + let parent_ready_tracker = ParentReadyTracker::new(my_pubkey, root_block); + + Self { + my_pubkey, + vote_pools: BTreeMap::new(), + completed_certificates: BTreeMap::new(), + highest_notarized_fallback: None, + highest_finalized_slot: None, + highest_finalized_with_notarize: None, + certificate_sender, + parent_ready_tracker, + stats: CertificatePoolStats::new(), + slot_stake_counters_map: BTreeMap::new(), + } + } + + fn new_vote_pool(vote_type: VoteType) -> VotePoolType { + match vote_type { + VoteType::NotarizeFallback => VotePoolType::DuplicateBlockVotePool( + DuplicateBlockVotePool::new(MAX_ENTRIES_PER_PUBKEY_FOR_NOTARIZE_LITE), + ), + VoteType::Notarize => VotePoolType::DuplicateBlockVotePool( + DuplicateBlockVotePool::new(MAX_ENTRIES_PER_PUBKEY_FOR_OTHER_TYPES), + ), + _ => VotePoolType::SimpleVotePool(SimpleVotePool::new()), + } + } + + fn update_vote_pool( + &mut self, + slot: Slot, + vote_type: VoteType, + block_id: Option, + transaction: &VoteMessage, + validator_vote_key: &Pubkey, + validator_stake: Stake, + ) -> Option { + let pool = self + .vote_pools + .entry((slot, vote_type)) + .or_insert_with(|| Self::new_vote_pool(vote_type)); + match pool { + VotePoolType::SimpleVotePool(pool) => { + pool.add_vote(validator_vote_key, validator_stake, transaction) + } + VotePoolType::DuplicateBlockVotePool(pool) => pool.add_vote( + validator_vote_key, + block_id.expect("Duplicate block pool expects a block id"), + transaction, + validator_stake, + ), + } + } + + /// For a new vote `slot` , `vote_type` checks if any + /// of the related certificates are newly complete. + /// For each newly constructed certificate + /// - Insert it into `self.certificates` + /// - Potentially update `self.highest_notarized_fallback`, + /// - If it is a `is_critical` certificate, send via the certificate sender + /// - Potentially update `self.highest_finalized_slot`, + /// - If we have a new highest finalized slot, return it + /// - update any newly created events + fn update_certificates( + &mut self, + vote: &Vote, + block_id: Option, + events: &mut Vec, + total_stake: Stake, + ) -> Result>, AddVoteError> { + let slot = vote.slot(); + let mut new_certificates_to_send = Vec::new(); + for cert_id in vote_to_certificate_ids(vote) { + // If the certificate is already complete, skip it + if self.completed_certificates.contains_key(&cert_id) { + continue; + } + // Otherwise check whether the certificate is complete + let (limit, vote_types) = certificate_limits_and_vote_types(cert_id); + let accumulated_stake = vote_types + .iter() + .filter_map(|vote_type| { + Some(match self.vote_pools + .get(&(slot, *vote_type))? { + VotePoolType::SimpleVotePool(pool) => pool.total_stake(), + VotePoolType::DuplicateBlockVotePool(pool) => pool.total_stake_by_block_id(block_id.as_ref().expect("Duplicate block pool for {vote_type:?} expects a block id for certificate {cert_id:?}")), + }) + }) + .sum::(); + if accumulated_stake as f64 / (total_stake as f64) < limit { + continue; + } + let mut vote_certificate_builder = VoteCertificateBuilder::new(cert_id); + vote_types.iter().for_each(|vote_type| { + if let Some(vote_pool) = self.vote_pools.get(&(slot, *vote_type)) { + match vote_pool { + VotePoolType::SimpleVotePool(pool) => pool.add_to_certificate(&mut vote_certificate_builder), + VotePoolType::DuplicateBlockVotePool(pool) => pool.add_to_certificate(block_id.as_ref().expect("Duplicate block pool for {vote_type:?} expects a block id for certificate {cert_id:?}"), &mut vote_certificate_builder), + }; + } + }); + let new_cert = Arc::new(vote_certificate_builder.build()?); + self.send_and_insert_certificate(cert_id, new_cert.clone(), events)?; + self.stats + .incr_cert_type(new_cert.certificate.certificate_type(), true); + new_certificates_to_send.push(new_cert); + } + Ok(new_certificates_to_send) + } + + fn send_and_insert_certificate( + &mut self, + cert_id: Certificate, + vote_certificate: Arc, + events: &mut Vec, + ) -> Result<(), AddVoteError> { + if let Some(sender) = &self.certificate_sender { + if cert_id.is_critical() { + if let Err(e) = sender.try_send((cert_id, (*vote_certificate).clone())) { + error!("Unable to send certificate {cert_id:?}: {e:?}"); + return Err(AddVoteError::ChannelDisconnected( + "CertificateSender".to_string(), + )); + } + } + } + self.insert_certificate(cert_id, vote_certificate, events); + Ok(()) + } + + fn has_conflicting_vote( + &self, + slot: Slot, + vote_type: VoteType, + validator_vote_key: &Pubkey, + block_id: &Option, + ) -> Option { + for conflicting_type in conflicting_types(vote_type) { + if let Some(pool) = self.vote_pools.get(&(slot, *conflicting_type)) { + let is_conflicting = match pool { + // In a simple vote pool, just check if the validator previously voted at all. If so, that's a conflict + VotePoolType::SimpleVotePool(pool) => { + pool.has_prev_validator_vote(validator_vote_key) + } + // In a duplicate block vote pool, because some conflicts between things like Notarize and NotarizeFallback + // for different blocks are allowed, we need a more specific check. + // TODO: This can be made much cleaner/safer if VoteType carried the bank hash, block id so we + // could check which exact VoteType(blockid, bankhash) was the source of the conflict. + VotePoolType::DuplicateBlockVotePool(pool) => { + if let Some(block_id) = &block_id { + // Reject votes for the same block with a conflicting type, i.e. + // a NotarizeFallback vote for the same block as a Notarize vote. + pool.has_prev_validator_vote_for_block(validator_vote_key, block_id) + } else { + pool.has_prev_validator_vote(validator_vote_key) + } + } + }; + if is_conflicting { + return Some(*conflicting_type); + } + } + } + None + } + + fn insert_certificate( + &mut self, + cert_id: Certificate, + cert: Arc, + events: &mut Vec, + ) { + trace!("{}: Inserting certificate {:?}", self.my_pubkey, cert_id); + self.completed_certificates.insert(cert_id, cert); + match cert_id { + Certificate::NotarizeFallback(slot, block_id) => { + self.parent_ready_tracker + .add_new_notar_fallback_or_stronger((slot, block_id), events); + if self + .highest_notarized_fallback + .is_none_or(|(s, _)| s < slot) + { + self.highest_notarized_fallback = Some((slot, block_id)); + } + } + Certificate::Skip(slot) => self.parent_ready_tracker.add_new_skip(slot, events), + Certificate::Notarize(slot, block_id) => { + events.push(VotorEvent::BlockNotarized((slot, block_id))); + self.parent_ready_tracker + .add_new_notar_fallback_or_stronger((slot, block_id), events); + if self.is_finalized(slot) { + // It's fine to set FastFinalization to false here, because + // we will report correctly as long as we have FastFinalization cert. + events.push(VotorEvent::Finalized((slot, block_id), false)); + if self + .highest_finalized_with_notarize + .is_none_or(|(s, _)| s < slot) + { + self.highest_finalized_with_notarize = Some((slot, false)); + } + } + } + Certificate::Finalize(slot) => { + if let Some(block) = self.get_notarized_block(slot) { + events.push(VotorEvent::Finalized(block, false)); + if self + .highest_finalized_with_notarize + .is_none_or(|(s, _)| s < slot) + { + self.highest_finalized_with_notarize = Some((slot, false)); + } + } + if self.highest_finalized_slot.is_none_or(|s| s < slot) { + self.highest_finalized_slot = Some(slot); + } + } + Certificate::FinalizeFast(slot, block_id) => { + events.push(VotorEvent::Finalized((slot, block_id), true)); + self.parent_ready_tracker + .add_new_notar_fallback_or_stronger((slot, block_id), events); + if self.highest_finalized_slot.is_none_or(|s| s < slot) { + self.highest_finalized_slot = Some(slot); + } + if self + .highest_finalized_with_notarize + .is_none_or(|(s, _)| s <= slot) + { + self.highest_finalized_with_notarize = Some((slot, true)); + } + } + } + } + + /// Adds the new vote the the certificate pool. If a new certificate is created + /// as a result of this, send it via the `self.certificate_sender` + /// + /// Any new votor events that are a result of adding this new vote will be added + /// to `events`. + /// + /// If this resulted in a new highest Finalize or FastFinalize certificate, + /// return the slot + pub fn add_message( + &mut self, + epoch_schedule: &EpochSchedule, + epoch_stakes_map: &HashMap, + root_slot: Slot, + my_vote_pubkey: &Pubkey, + message: &ConsensusMessage, + events: &mut Vec, + ) -> Result<(Option, Vec>), AddVoteError> { + let current_highest_finalized_slot = self.highest_finalized_slot; + let new_certficates_to_send = match message { + ConsensusMessage::Vote(vote_message) => self.add_vote( + epoch_schedule, + epoch_stakes_map, + root_slot, + my_vote_pubkey, + vote_message, + events, + )?, + ConsensusMessage::Certificate(certificate_message) => { + self.add_certificate(root_slot, certificate_message, events)? + } + }; + // If we have a new highest finalized slot, return it + let new_finalized_slot = if self.highest_finalized_slot > current_highest_finalized_slot { + self.highest_finalized_slot + } else { + None + }; + Ok((new_finalized_slot, new_certficates_to_send)) + } + + fn add_vote( + &mut self, + epoch_schedule: &EpochSchedule, + epoch_stakes_map: &HashMap, + root_slot: Slot, + my_vote_pubkey: &Pubkey, + vote_message: &VoteMessage, + events: &mut Vec, + ) -> Result>, AddVoteError> { + let vote = &vote_message.vote; + let rank = vote_message.rank; + let vote_slot = vote.slot(); + let (validator_vote_key, validator_stake, total_stake) = + get_key_and_stakes(epoch_schedule, epoch_stakes_map, vote_slot, rank)?; + + // Since we have a valid rank, this should never happen, there is no rank for zero stake. + assert_ne!( + validator_stake, 0, + "Validator stake is zero for pubkey: {validator_vote_key}" + ); + + self.stats.incoming_votes = self.stats.incoming_votes.saturating_add(1); + if vote_slot < root_slot { + self.stats.out_of_range_votes = self.stats.out_of_range_votes.saturating_add(1); + return Err(AddVoteError::UnrootedSlot); + } + let block_id = vote.block_id().map(|block_id| { + if !matches!(vote, Vote::Notarize(_) | Vote::NotarizeFallback(_)) { + panic!("expected Notarize or NotarizeFallback vote"); + } + *block_id + }); + let vote_type = VoteType::get_type(vote); + if let Some(conflicting_type) = + self.has_conflicting_vote(vote_slot, vote_type, &validator_vote_key, &block_id) + { + self.stats.conflicting_votes = self.stats.conflicting_votes.saturating_add(1); + return Err(AddVoteError::ConflictingVoteType( + vote_type, + conflicting_type, + vote_slot, + validator_vote_key, + )); + } + match self.update_vote_pool( + vote_slot, + vote_type, + block_id, + vote_message, + &validator_vote_key, + validator_stake, + ) { + None => { + // No new vote pool entry was created, just return empty vec + self.stats.exist_votes = self.stats.exist_votes.saturating_add(1); + return Ok(vec![]); + } + Some(entry_stake) => { + let fallback_vote_counters = self + .slot_stake_counters_map + .entry(vote_slot) + .or_insert_with(|| SlotStakeCounters::new(total_stake)); + fallback_vote_counters.add_vote( + vote, + entry_stake, + my_vote_pubkey == &validator_vote_key, + events, + &mut self.stats, + ); + } + } + self.stats.incr_ingested_vote_type(vote_type); + + self.update_certificates(vote, block_id, events, total_stake) + } + + fn add_certificate( + &mut self, + root_slot: Slot, + certificate_message: &CertificateMessage, + events: &mut Vec, + ) -> Result>, AddVoteError> { + let certificate_id = certificate_message.certificate; + self.stats.incoming_certs = self.stats.incoming_certs.saturating_add(1); + if certificate_id.slot() < root_slot { + self.stats.out_of_range_certs = self.stats.out_of_range_certs.saturating_add(1); + return Err(AddVoteError::UnrootedSlot); + } + if self.completed_certificates.contains_key(&certificate_id) { + self.stats.exist_certs = self.stats.exist_certs.saturating_add(1); + return Ok(vec![]); + } + let new_certificate = Arc::new(certificate_message.clone()); + self.send_and_insert_certificate(certificate_id, new_certificate.clone(), events)?; + + self.stats + .incr_cert_type(certificate_id.certificate_type(), false); + + Ok(vec![new_certificate]) + } + + /// The highest notarized fallback slot, for use as the parent slot in leader window + pub fn highest_notarized_fallback(&self) -> Option<(Slot, Hash)> { + self.highest_notarized_fallback + } + + /// Get the notarized block in `slot` + pub fn get_notarized_block(&self, slot: Slot) -> Option { + self.completed_certificates + .iter() + .find_map(|(cert_id, _)| match cert_id { + Certificate::Notarize(s, block_id) if slot == *s => Some((*s, *block_id)), + _ => None, + }) + } + + #[cfg(test)] + fn highest_notarized_slot(&self) -> Slot { + // Return the max of CertificateType::Notarize and CertificateType::NotarizeFallback + self.completed_certificates + .iter() + .filter_map(|(cert_id, _)| match cert_id { + Certificate::Notarize(s, _) => Some(s), + Certificate::NotarizeFallback(s, _) => Some(s), + _ => None, + }) + .max() + .copied() + .unwrap_or(0) + } + + #[cfg(test)] + fn highest_skip_slot(&self) -> Slot { + self.completed_certificates + .iter() + .filter_map(|(cert_id, _)| match cert_id { + Certificate::Skip(s) => Some(s), + _ => None, + }) + .max() + .copied() + .unwrap_or(0) + } + + pub fn highest_finalized_slot(&self) -> Slot { + self.completed_certificates + .iter() + .filter_map(|(cert_id, _)| match cert_id { + Certificate::Finalize(s) => Some(s), + Certificate::FinalizeFast(s, _) => Some(s), + _ => None, + }) + .max() + .copied() + .unwrap_or(0) + } + + pub fn highest_fast_finalized_block(&self) -> Option { + self.completed_certificates + .iter() + .filter_map(|(cert_id, _)| match cert_id { + Certificate::FinalizeFast(s, bid) => Some((*s, *bid)), + _ => None, + }) + .max() + } + + /// Checks if any block in the slot `s` is finalized + pub fn is_finalized(&self, slot: Slot) -> bool { + self.completed_certificates.keys().any(|cert_id| { + matches!(cert_id, Certificate::Finalize(s) | Certificate::FinalizeFast(s, _) if *s == slot) + }) + } + + /// Check if the specific block `(block_id)` in slot `s` is notarized + pub fn is_notarized(&self, slot: Slot, block_id: Hash) -> bool { + self.completed_certificates + .contains_key(&Certificate::Notarize(slot, block_id)) + } + + /// Checks if the any block in slot `slot` has received a `NotarizeFallback` certificate, if so return + /// the size of the certificate + #[cfg(test)] + pub fn slot_has_notarized_fallback(&self, slot: Slot) -> bool { + self.completed_certificates + .iter() + .any(|(cert_id, _)| matches!(cert_id, Certificate::NotarizeFallback(s,_) if *s == slot)) + } + + /// Checks if `slot` has a `Skip` certificate + pub fn skip_certified(&self, slot: Slot) -> bool { + self.completed_certificates + .contains_key(&Certificate::Skip(slot)) + } + + #[cfg(test)] + fn make_start_leader_decision( + &self, + my_leader_slot: Slot, + parent_slot: Slot, + first_alpenglow_slot: Slot, + ) -> bool { + // TODO: for GCE tests we WFSM on 1 so slot 1 is exempt + let needs_notarization_certificate = parent_slot >= first_alpenglow_slot && parent_slot > 1; + + if needs_notarization_certificate + && !self.slot_has_notarized_fallback(parent_slot) + && !self.is_finalized(parent_slot) + { + error!("Missing notarization certificate {parent_slot}"); + return false; + } + + let needs_skip_certificate = + // handles cases where we are entering the alpenglow epoch, where the first + // slot in the epoch will pass my_leader_slot == parent_slot + my_leader_slot != first_alpenglow_slot && + my_leader_slot != parent_slot.saturating_add(1); + + if needs_skip_certificate { + let begin_skip_slot = first_alpenglow_slot.max(parent_slot.saturating_add(1)); + for slot in begin_skip_slot..my_leader_slot { + if !self.skip_certified(slot) { + error!( + "Missing skip certificate for {slot}, required for skip certificate \ + from {begin_skip_slot} to build {my_leader_slot}" + ); + return false; + } + } + } + + true + } + + /// Cleanup any old slots from the certificate pool + pub fn prune_old_state(&mut self, root_slot: Slot) { + // `completed_certificates`` now only contains entries >= `slot` + self.completed_certificates + .retain(|cert_id, _| match cert_id { + Certificate::Finalize(s) + | Certificate::FinalizeFast(s, _) + | Certificate::Notarize(s, _) + | Certificate::NotarizeFallback(s, _) + | Certificate::Skip(s) => s >= &root_slot, + }); + self.vote_pools = self.vote_pools.split_off(&(root_slot, VoteType::Finalize)); + self.slot_stake_counters_map = self.slot_stake_counters_map.split_off(&root_slot); + self.parent_ready_tracker.set_root(root_slot); + } + + /// Updates the pubkey used for logging purposes only. + /// This avoids the need to recreate the entire certificate pool since it's + /// not distinguished by the pubkey. + pub fn update_pubkey(&mut self, new_pubkey: Pubkey) { + self.my_pubkey = new_pubkey; + self.parent_ready_tracker.update_pubkey(new_pubkey); + } + + pub fn maybe_report(&mut self) { + self.stats.maybe_report(); + } + + pub fn get_certs_for_standstill(&self) -> Vec> { + let (highest_finalized_with_notarize_slot, has_fast_finalize) = + self.highest_finalized_with_notarize.unwrap_or((0, false)); + self.completed_certificates + .iter() + .filter_map(|(cert_id, cert)| { + let cert_to_send = match ( + cert_id.slot().cmp(&highest_finalized_with_notarize_slot), + cert_id.certificate_type(), + has_fast_finalize, + ) { + (Ordering::Greater, _, _) + | ( + Ordering::Equal, + CertificateType::Finalize | CertificateType::Notarize, + false, + ) + | (Ordering::Equal, CertificateType::FinalizeFast, true) => Some(cert.clone()), + (Ordering::Equal, CertificateType::FinalizeFast, false) => { + panic!("Should not happen while certificate pool is single threaded") + } + _ => None, + }; + if cert_to_send.is_some() { + trace!("{}: Refreshing certificate {:?}", self.my_pubkey, cert_id); + } + cert_to_send + }) + .collect() + } +} + +#[cfg(test)] +mod tests { + use { + super::*, + solana_bls_signatures::{keypair::Keypair as BLSKeypair, Signature as BLSSignature}, + solana_clock::Slot, + solana_hash::Hash, + solana_runtime::{ + bank::{Bank, NewBankOptions}, + bank_forks::BankForks, + genesis_utils::{ + create_genesis_config_with_vote_accounts_alpenglow, ValidatorVoteKeypairs, + }, + }, + solana_signer::Signer, + solana_votor_messages::consensus_message::{ + CertificateType, VoteMessage, BLS_KEYPAIR_DERIVE_SEED, + }, + std::sync::{Arc, RwLock}, + test_case::test_case, + }; + + fn dummy_transaction( + keypairs: &[ValidatorVoteKeypairs], + vote: &Vote, + rank: usize, + ) -> ConsensusMessage { + let bls_keypair = + BLSKeypair::derive_from_signer(&keypairs[rank].vote_keypair, BLS_KEYPAIR_DERIVE_SEED) + .unwrap(); + let signature: BLSSignature = bls_keypair + .sign(bincode::serialize(vote).unwrap().as_slice()) + .into(); + ConsensusMessage::new_vote(*vote, signature, rank as u16) + } + + fn create_bank(slot: Slot, parent: Arc, pubkey: &Pubkey) -> Bank { + Bank::new_from_parent_with_options(parent, pubkey, slot, NewBankOptions::default()) + } + + fn create_bank_forks(validator_keypairs: &[ValidatorVoteKeypairs]) -> Arc> { + let genesis = create_genesis_config_with_vote_accounts_alpenglow( + 1_000_000_000, + validator_keypairs, + vec![100; validator_keypairs.len()], + ); + let bank0 = Bank::new_for_tests(&genesis.genesis_config); + BankForks::new_rw_arc(bank0) + } + + fn create_initial_state() -> ( + Vec, + CertificatePool, + Arc>, + ) { + // Create 10 node validatorvotekeypairs vec + let validator_keypairs = (0..10) + .map(|_| ValidatorVoteKeypairs::new_rand()) + .collect::>(); + let bank_forks = create_bank_forks(&validator_keypairs); + let root_bank = bank_forks.read().unwrap().root_bank(); + ( + validator_keypairs, + CertificatePool::new_from_root_bank(Pubkey::new_unique(), &root_bank, None), + bank_forks, + ) + } + + fn add_certificate( + pool: &mut CertificatePool, + bank: &Bank, + validator_keypairs: &[ValidatorVoteKeypairs], + vote: Vote, + ) { + for rank in 0..6 { + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(validator_keypairs, &vote, rank), + &mut vec![] + ) + .is_ok()); + } + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(validator_keypairs, &vote, 6), + &mut vec![] + ) + .is_ok()); + match vote { + Vote::Notarize(vote) => assert_eq!(pool.highest_notarized_slot(), vote.slot()), + Vote::NotarizeFallback(vote) => assert_eq!(pool.highest_notarized_slot(), vote.slot()), + Vote::Skip(vote) => assert_eq!(pool.highest_skip_slot(), vote.slot()), + Vote::SkipFallback(vote) => assert_eq!(pool.highest_skip_slot(), vote.slot()), + Vote::Finalize(vote) => assert_eq!(pool.highest_finalized_slot(), vote.slot()), + } + } + + fn add_skip_vote_range( + pool: &mut CertificatePool, + root_bank: &Bank, + start: Slot, + end: Slot, + keypairs: &[ValidatorVoteKeypairs], + rank: usize, + ) { + for slot in start..=end { + let vote = Vote::new_skip_vote(slot); + let result = pool.add_message( + root_bank.epoch_schedule(), + root_bank.epoch_stakes_map(), + root_bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(keypairs, &vote, rank), + &mut vec![], + ); + assert!( + result.is_ok(), + "Failed to add skip vote for slot {slot}: {result:?}" + ); + } + } + + #[test] + fn test_make_decision_leader_does_not_start_if_notarization_missing() { + let (_, pool, _) = create_initial_state(); + + // No notarization set, pool is default + let parent_slot = 2; + let my_leader_slot = 3; + let first_alpenglow_slot = 0; + let decision = + pool.make_start_leader_decision(my_leader_slot, parent_slot, first_alpenglow_slot); + assert!( + !decision, + "Leader should not be allowed to start without notarization" + ); + } + + #[test] + fn test_make_decision_first_alpenglow_slot_edge_case_1() { + let (_, pool, _) = create_initial_state(); + + // If parent_slot == 0, you don't need a notarization certificate + // Because leader_slot == parent_slot + 1, you don't need a skip certificate + let parent_slot = 0; + let my_leader_slot = 1; + let first_alpenglow_slot = 0; + assert!(pool.make_start_leader_decision(my_leader_slot, parent_slot, first_alpenglow_slot)); + } + + #[test] + fn test_make_decision_first_alpenglow_slot_edge_case_2() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + + // If parent_slot < first_alpenglow_slot, and parent_slot > 0 + // no notarization certificate is required, but a skip + // certificate will be + let parent_slot = 1; + let my_leader_slot = 3; + let first_alpenglow_slot = 2; + + assert!(!pool.make_start_leader_decision( + my_leader_slot, + parent_slot, + first_alpenglow_slot, + )); + + add_certificate( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + Vote::new_skip_vote(first_alpenglow_slot), + ); + + assert!(pool.make_start_leader_decision(my_leader_slot, parent_slot, first_alpenglow_slot)); + } + + #[test] + fn test_make_decision_first_alpenglow_slot_edge_case_3() { + let (_, pool, _) = create_initial_state(); + // If parent_slot == first_alpenglow_slot, and + // first_alpenglow_slot > 0, you need a notarization certificate + let parent_slot = 2; + let my_leader_slot = 3; + let first_alpenglow_slot = 2; + assert!(!pool.make_start_leader_decision( + my_leader_slot, + parent_slot, + first_alpenglow_slot, + )); + } + + #[test] + fn test_make_decision_first_alpenglow_slot_edge_case_4() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + + // If parent_slot < first_alpenglow_slot, and parent_slot == 0, + // no notarization certificate is required, but a skip certificate will + // be + let parent_slot = 0; + let my_leader_slot = 2; + let first_alpenglow_slot = 1; + + assert!(!pool.make_start_leader_decision( + my_leader_slot, + parent_slot, + first_alpenglow_slot, + )); + + add_certificate( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + Vote::new_skip_vote(first_alpenglow_slot), + ); + assert!(pool.make_start_leader_decision(my_leader_slot, parent_slot, first_alpenglow_slot)); + } + + #[test] + fn test_make_decision_first_alpenglow_slot_edge_case_5() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + + // Valid skip certificate for 1-9 exists + for slot in 1..=9 { + add_certificate( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + Vote::new_skip_vote(slot), + ); + } + + // Parent slot is equal to 0, so no notarization certificate required + let my_leader_slot = 10; + let parent_slot = 0; + let first_alpenglow_slot = 0; + assert!(pool.make_start_leader_decision(my_leader_slot, parent_slot, first_alpenglow_slot)); + } + + #[test] + fn test_make_decision_first_alpenglow_slot_edge_case_6() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + + // Valid skip certificate for 1-9 exists + for slot in 1..=9 { + add_certificate( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + Vote::new_skip_vote(slot), + ); + } + // Parent slot is less than first_alpenglow_slot, so no notarization certificate required + let my_leader_slot = 10; + let parent_slot = 4; + let first_alpenglow_slot = 5; + assert!(pool.make_start_leader_decision(my_leader_slot, parent_slot, first_alpenglow_slot)); + } + + #[test] + fn test_make_decision_leader_does_not_start_if_skip_certificate_missing() { + let (validator_keypairs, mut pool, _) = create_initial_state(); + + let bank_forks = create_bank_forks(&validator_keypairs); + let my_pubkey = validator_keypairs[0].vote_keypair.pubkey(); + + // Create bank 5 + let bank = create_bank(5, bank_forks.read().unwrap().get(0).unwrap(), &my_pubkey); + bank.freeze(); + bank_forks.write().unwrap().insert(bank); + + // Notarize slot 5 + add_certificate( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + Vote::new_notarization_vote(5, Hash::default()), + ); + assert_eq!(pool.highest_notarized_slot(), 5); + + // No skip certificate for 6-10 + let my_leader_slot = 10; + let parent_slot = 5; + let first_alpenglow_slot = 0; + let decision = + pool.make_start_leader_decision(my_leader_slot, parent_slot, first_alpenglow_slot); + assert!( + !decision, + "Leader should not be allowed to start if a skip certificate is missing" + ); + } + + #[test] + fn test_make_decision_leader_starts_when_no_skip_required() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + + // Notarize slot 5 + add_certificate( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + Vote::new_notarization_vote(5, Hash::default()), + ); + assert_eq!(pool.highest_notarized_slot(), 5); + + // Leader slot is just +1 from notarized slot (no skip needed) + let my_leader_slot = 6; + let parent_slot = 5; + let first_alpenglow_slot = 0; + assert!(pool.make_start_leader_decision(my_leader_slot, parent_slot, first_alpenglow_slot)); + } + + #[test] + fn test_make_decision_leader_starts_if_notarized_and_skips_valid() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + + // Notarize slot 5 + add_certificate( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + Vote::new_notarization_vote(5, Hash::default()), + ); + assert_eq!(pool.highest_notarized_slot(), 5); + + // Valid skip certificate for 6-9 exists + for slot in 6..=9 { + add_certificate( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + Vote::new_skip_vote(slot), + ); + } + + let my_leader_slot = 10; + let parent_slot = 5; + let first_alpenglow_slot = 0; + assert!(pool.make_start_leader_decision(my_leader_slot, parent_slot, first_alpenglow_slot)); + } + + #[test] + fn test_make_decision_leader_starts_if_skip_range_superset() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + + // Notarize slot 5 + add_certificate( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + Vote::new_notarization_vote(5, Hash::default()), + ); + assert_eq!(pool.highest_notarized_slot(), 5); + + // Valid skip certificate for 4-9 exists + // Should start leader block even if the beginning of the range is from + // before your last notarized slot + for slot in 4..=9 { + add_certificate( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + Vote::new_skip_fallback_vote(slot), + ); + } + + let my_leader_slot = 10; + let parent_slot = 5; + let first_alpenglow_slot = 0; + assert!(pool.make_start_leader_decision(my_leader_slot, parent_slot, first_alpenglow_slot)); + } + + #[test_case(Vote::new_finalization_vote(5), vec![CertificateType::Finalize])] + #[test_case(Vote::new_notarization_vote(6, Hash::new_unique()), vec![CertificateType::Notarize, CertificateType::NotarizeFallback])] + #[test_case(Vote::new_notarization_fallback_vote(7, Hash::new_unique()), vec![CertificateType::NotarizeFallback])] + #[test_case(Vote::new_skip_vote(8), vec![CertificateType::Skip])] + #[test_case(Vote::new_skip_fallback_vote(9), vec![CertificateType::Skip])] + fn test_add_vote_and_create_new_certificate_with_types( + vote: Vote, + expected_certificate_types: Vec, + ) { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + let my_validator_ix = 5; + let highest_slot_fn = match &vote { + Vote::Finalize(_) => |pool: &CertificatePool| pool.highest_finalized_slot(), + Vote::Notarize(_) => |pool: &CertificatePool| pool.highest_notarized_slot(), + Vote::NotarizeFallback(_) => |pool: &CertificatePool| pool.highest_notarized_slot(), + Vote::Skip(_) => |pool: &CertificatePool| pool.highest_skip_slot(), + Vote::SkipFallback(_) => |pool: &CertificatePool| pool.highest_skip_slot(), + }; + let bank = bank_forks.read().unwrap().root_bank(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, my_validator_ix), + &mut vec![] + ) + .is_ok()); + let slot = vote.slot(); + assert!(highest_slot_fn(&pool) < slot); + // Same key voting again shouldn't make a certificate + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, my_validator_ix), + &mut vec![] + ) + .is_ok()); + assert!(highest_slot_fn(&pool) < slot); + for rank in 0..4 { + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, rank), + &mut vec![] + ) + .is_ok()); + } + assert!(highest_slot_fn(&pool) < slot); + let new_validator_ix = 6; + let (new_finalized_slot, certs_to_send) = pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, new_validator_ix), + &mut vec![], + ) + .unwrap(); + if vote.is_finalize() { + assert_eq!(new_finalized_slot, Some(slot)); + } else { + assert!(new_finalized_slot.is_none()); + } + // Assert certs_to_send contains the expected certificate types + for cert_type in expected_certificate_types { + assert!(certs_to_send.iter().any(|cert| { + cert.certificate.certificate_type() == cert_type && cert.certificate.slot() == slot + })); + } + assert_eq!(highest_slot_fn(&pool), slot); + // Now add the same certificate again, this should silently exit. + for cert in certs_to_send { + let (new_finalized_slot, certs_to_send) = pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate((*cert).clone()), + &mut vec![], + ) + .unwrap(); + assert!(new_finalized_slot.is_none()); + assert_eq!(certs_to_send, []); + } + } + + #[test_case(CertificateType::Finalize, Vote::new_finalization_vote(5))] + #[test_case( + CertificateType::FinalizeFast, + Vote::new_notarization_vote(6, Hash::new_unique()) + )] + #[test_case( + CertificateType::Notarize, + Vote::new_notarization_vote(6, Hash::new_unique()) + )] + #[test_case( + CertificateType::NotarizeFallback, + Vote::new_notarization_fallback_vote(7, Hash::new_unique()) + )] + #[test_case(CertificateType::Skip, Vote::new_skip_vote(8))] + fn test_add_certificate_with_types(certificate_type: CertificateType, vote: Vote) { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + + let certificate = Certificate::new(certificate_type, vote.slot(), vote.block_id().copied()); + + let certificate_message = CertificateMessage { + certificate, + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + let bank = bank_forks.read().unwrap().root_bank(); + let message = ConsensusMessage::Certificate(certificate_message.clone()); + // Add the certificate to the pool + let (new_finalized_slot, certs_to_send) = pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &message, + &mut vec![], + ) + .unwrap(); + // Because this is the first certificate of this type, it should be sent out. + if certificate_type == CertificateType::Finalize + || certificate_type == CertificateType::FinalizeFast + { + assert_eq!(new_finalized_slot, Some(certificate.slot())); + } else { + assert!(new_finalized_slot.is_none()); + } + assert_eq!(certs_to_send.len(), 1); + assert_eq!(*certs_to_send[0], certificate_message); + + // Adding the cert again will not trigger another send + let (new_finalized_slot, certs_to_send) = pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &message, + &mut vec![], + ) + .unwrap(); + assert!(new_finalized_slot.is_none()); + assert_eq!(certs_to_send, []); + + // Now add the vote from everyone else, this will not trigger a certificate send + for rank in 0..validator_keypairs.len() { + let (_, certs_to_send) = pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, rank), + &mut vec![], + ) + .unwrap(); + assert!(!certs_to_send + .iter() + .any(|cert| { cert.certificate.certificate_type() == certificate_type })); + } + } + + #[test] + fn test_add_vote_zero_stake() { + let (_, mut pool, bank_forks) = create_initial_state(); + let bank = bank_forks.read().unwrap().root_bank(); + assert_eq!( + pool.add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Vote(VoteMessage { + vote: Vote::new_skip_vote(5), + rank: 100, + signature: BLSSignature::default(), + }), + &mut vec![] + ), + Err(AddVoteError::InvalidRank(100)) + ); + } + + fn assert_single_certificate_range( + pool: &CertificatePool, + exp_range_start: Slot, + exp_range_end: Slot, + ) { + for i in exp_range_start..=exp_range_end { + assert!(pool.skip_certified(i)); + } + } + + #[test] + fn test_consecutive_slots() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + + add_certificate( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + Vote::new_skip_vote(15), + ); + assert_eq!(pool.highest_skip_slot(), 15); + + let bank = bank_forks.read().unwrap().root_bank(); + for i in 0..validator_keypairs.len() { + let slot = (i as u64).saturating_add(16); + let vote = Vote::new_skip_vote(slot); + // These should not extend the skip range + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, i), + &mut vec![] + ) + .is_ok()); + } + + assert_single_certificate_range(&pool, 15, 15); + } + + #[test] + fn test_multi_skip_cert() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + + // We have 10 validators, 40% voted for (5, 15) + for rank in 0..4 { + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 5, + 15, + &validator_keypairs, + rank, + ); + } + // 30% voted for (5, 8) + for rank in 4..7 { + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 5, + 8, + &validator_keypairs, + rank, + ); + } + // The rest voted for (11, 15) + for rank in 7..10 { + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 11, + 15, + &validator_keypairs, + rank, + ); + } + // Test slots from 5 to 15, [5, 8] and [11, 15] should be certified, the others aren't + for slot in 5..9 { + assert!(pool.skip_certified(slot)); + } + for slot in 9..11 { + assert!(!pool.skip_certified(slot)); + } + for slot in 11..=15 { + assert!(pool.skip_certified(slot)); + } + } + + #[test] + fn test_add_multiple_votes() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + + // 10 validators, half vote for (5, 15), the other (20, 30) + for rank in 0..5 { + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 5, + 15, + &validator_keypairs, + rank, + ); + } + for rank in 5..10 { + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 20, + 30, + &validator_keypairs, + rank, + ); + } + assert_eq!(pool.highest_skip_slot(), 0); + + // Now the first half vote for (5, 30) + for rank in 0..5 { + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 5, + 30, + &validator_keypairs, + rank, + ); + } + assert_single_certificate_range(&pool, 20, 30); + } + + #[test] + fn test_add_multiple_disjoint_votes() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + // 50% of the validators vote for (1, 10) + for rank in 0..5 { + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 1, + 10, + &validator_keypairs, + rank, + ); + } + let bank = bank_forks.read().unwrap().root_bank(); + // 10% vote for skip 2 + let vote = Vote::new_skip_vote(2); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, 6), + &mut vec![] + ) + .is_ok()); + assert_eq!(pool.highest_skip_slot(), 2); + + assert_single_certificate_range(&pool, 2, 2); + // 10% vote for skip 4 + let vote = Vote::new_skip_vote(4); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, 7), + &mut vec![] + ) + .is_ok()); + assert_eq!(pool.highest_skip_slot(), 4); + + assert_single_certificate_range(&pool, 2, 2); + assert_single_certificate_range(&pool, 4, 4); + // 10% vote for skip 3 + let vote = Vote::new_skip_vote(3); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, 8), + &mut vec![] + ) + .is_ok()); + assert_eq!(pool.highest_skip_slot(), 4); + assert_single_certificate_range(&pool, 2, 4); + assert!(pool.skip_certified(3)); + // Let the last 10% vote for (3, 10) now + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 3, + 10, + &validator_keypairs, + 8, + ); + assert_eq!(pool.highest_skip_slot(), 10); + assert_single_certificate_range(&pool, 2, 10); + assert!(pool.skip_certified(7)); + } + + #[test] + fn test_update_existing_singleton_vote() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + // 50% voted on (1, 6) + for rank in 0..5 { + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 1, + 6, + &validator_keypairs, + rank, + ); + } + let bank = bank_forks.read().unwrap().root_bank(); + // Range expansion on a singleton vote should be ok + let vote = Vote::new_skip_vote(1); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, 6), + &mut vec![] + ) + .is_ok()); + assert_eq!(pool.highest_skip_slot(), 1); + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 1, + 6, + &validator_keypairs, + 6, + ); + assert_eq!(pool.highest_skip_slot(), 6); + assert_single_certificate_range(&pool, 1, 6); + } + + #[test] + fn test_update_existing_vote() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + let bank = bank_forks.read().unwrap().root_bank(); + // 50% voted for (10, 25) + for rank in 0..5 { + add_skip_vote_range(&mut pool, &bank, 10, 25, &validator_keypairs, rank); + } + + add_skip_vote_range(&mut pool, &bank, 10, 20, &validator_keypairs, 6); + assert_eq!(pool.highest_skip_slot(), 20); + assert_single_certificate_range(&pool, 10, 20); + + // AlreadyExists, silently fail + let vote = Vote::new_skip_vote(20); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, 6), + &mut vec![] + ) + .is_ok()); + } + + #[test] + fn test_threshold_not_reached() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + // half voted (5, 15) and the other half voted (20, 30) + for rank in 0..5 { + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 5, + 15, + &validator_keypairs, + rank, + ); + } + for rank in 5..10 { + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 20, + 30, + &validator_keypairs, + rank, + ); + } + for slot in 5..31 { + assert!(!pool.skip_certified(slot)); + } + } + + #[test] + fn test_update_and_skip_range_certify() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + // half voted (5, 15) and the other half voted (10, 30) + for rank in 0..5 { + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 5, + 15, + &validator_keypairs, + rank, + ); + } + for rank in 5..10 { + add_skip_vote_range( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + 10, + 30, + &validator_keypairs, + rank, + ); + } + for slot in 5..10 { + assert!(!pool.skip_certified(slot)); + } + for slot in 16..31 { + assert!(!pool.skip_certified(slot)); + } + assert_single_certificate_range(&pool, 10, 15); + } + + #[test] + fn test_safe_to_notar() { + solana_logger::setup(); + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + let bank = bank_forks.read().unwrap().root_bank(); + let (my_vote_key, _, _) = + get_key_and_stakes(bank.epoch_schedule(), bank.epoch_stakes_map(), 0, 0).unwrap(); + + // Create bank 2 + let slot = 2; + let block_id = Hash::new_unique(); + + // Add a skip from myself. + let vote = Vote::new_skip_vote(2); + let mut new_events = vec![]; + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &my_vote_key, + &dummy_transaction(&validator_keypairs, &vote, 0), + &mut new_events + ) + .is_ok()); + assert!(new_events.is_empty()); + + // 40% notarized, should succeed + for rank in 1..5 { + let vote = Vote::new_notarization_vote(2, block_id); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); + } + assert_eq!(new_events.len(), 1); + if let VotorEvent::SafeToNotar((event_slot, event_block_id)) = new_events[0] { + assert_eq!(block_id, event_block_id); + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToNotar event"); + } + new_events.clear(); + + // Create bank 3 + let slot = 3; + let block_id = Hash::new_unique(); + + // Add 20% notarize, but no vote from myself, should fail + for rank in 1..3 { + let vote = Vote::new_notarization_vote(3, block_id); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); + } + assert!(new_events.is_empty()); + + // Add a notarize from myself for some other block, but still not enough notar or skip, should fail. + let vote = Vote::new_notarization_vote(3, Hash::new_unique()); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &my_vote_key, + &dummy_transaction(&validator_keypairs, &vote, 0), + &mut new_events + ) + .is_ok()); + assert!(new_events.is_empty()); + + // Now add 40% skip, should succeed + // Funny thing is in this case we will also get SafeToSkip(3) + for rank in 3..7 { + let vote = Vote::new_skip_vote(3); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); + } + assert_eq!(new_events.len(), 2); + if let VotorEvent::SafeToSkip(event_slot) = new_events[0] { + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToSkip event"); + } + if let VotorEvent::SafeToNotar((event_slot, event_block_id)) = new_events[1] { + assert_eq!(block_id, event_block_id); + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToNotar event"); + } + new_events.clear(); + + // Add 20% notarization for another block, we should notify on new block_id + // but not on the same block_id because we already sent the event + let duplicate_block_id = Hash::new_unique(); + for rank in 7..9 { + let vote = Vote::new_notarization_vote(3, duplicate_block_id); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); + } + + assert_eq!(new_events.len(), 1); + if let VotorEvent::SafeToNotar((event_slot, event_block_id)) = new_events[0] { + assert_eq!(duplicate_block_id, event_block_id); + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToNotar event"); + } + } + + #[test] + fn test_safe_to_skip() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + let bank = bank_forks.read().unwrap().root_bank(); + let (my_vote_key, _, _) = + get_key_and_stakes(bank.epoch_schedule(), bank.epoch_stakes_map(), 0, 0).unwrap(); + let slot = 2; + let mut new_events = vec![]; + + // Add a notarize from myself. + let block_id = Hash::new_unique(); + let vote = Vote::new_notarization_vote(2, block_id); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &my_vote_key, + &dummy_transaction(&validator_keypairs, &vote, 0), + &mut new_events + ) + .is_ok()); + // Should still fail because there are no other votes. + assert!(new_events.is_empty()); + // Add 50% skip, should succeed + for rank in 1..6 { + let vote = Vote::new_skip_vote(2); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); + } + assert_eq!(new_events.len(), 1); + if let VotorEvent::SafeToSkip(event_slot) = new_events[0] { + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToSkip event"); + } + new_events.clear(); + // Add 10% more notarize, will not send new SafeToSkip because the event was already sent + let vote = Vote::new_notarization_vote(2, block_id); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, 6), + &mut new_events + ) + .is_ok()); + assert!(new_events.is_empty()); + } + + fn create_new_vote(vote_type: VoteType, slot: Slot) -> Vote { + match vote_type { + VoteType::Notarize => Vote::new_notarization_vote(slot, Hash::default()), + VoteType::NotarizeFallback => { + Vote::new_notarization_fallback_vote(slot, Hash::default()) + } + VoteType::Skip => Vote::new_skip_vote(slot), + VoteType::SkipFallback => Vote::new_skip_fallback_vote(slot), + VoteType::Finalize => Vote::new_finalization_vote(slot), + } + } + + fn test_reject_conflicting_vote( + pool: &mut CertificatePool, + bank: &Bank, + validator_keypairs: &[ValidatorVoteKeypairs], + vote_type_1: VoteType, + vote_type_2: VoteType, + slot: Slot, + ) { + let vote_1 = create_new_vote(vote_type_1, slot); + let vote_2 = create_new_vote(vote_type_2, slot); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(validator_keypairs, &vote_1, 0), + &mut vec![] + ) + .is_ok()); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(validator_keypairs, &vote_2, 0), + &mut vec![] + ) + .is_err()); + } + + #[test] + fn test_reject_conflicting_votes_with_type() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + let mut slot = 2; + for vote_type_1 in [ + VoteType::Finalize, + VoteType::Notarize, + VoteType::NotarizeFallback, + VoteType::Skip, + VoteType::SkipFallback, + ] { + let conflicting_vote_types = conflicting_types(vote_type_1); + for vote_type_2 in conflicting_vote_types { + test_reject_conflicting_vote( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + vote_type_1, + *vote_type_2, + slot, + ); + } + slot = slot.saturating_add(4); + } + } + + #[test] + fn test_handle_new_root() { + let validator_keypairs = (0..10) + .map(|_| ValidatorVoteKeypairs::new_rand()) + .collect::>(); + let bank_forks = create_bank_forks(&validator_keypairs); + let mut pool = CertificatePool::new_from_root_bank( + Pubkey::new_unique(), + &bank_forks.read().unwrap().root_bank(), + None, + ); + + let root_bank = bank_forks.read().unwrap().root_bank(); + let new_bank = Arc::new(create_bank(2, root_bank, &Pubkey::new_unique())); + pool.prune_old_state(new_bank.slot()); + let new_bank = Arc::new(create_bank(3, new_bank, &Pubkey::new_unique())); + pool.prune_old_state(new_bank.slot()); + // Send a vote on slot 1, it should be rejected + let vote = Vote::new_skip_vote(1); + assert!(pool + .add_message( + new_bank.epoch_schedule(), + new_bank.epoch_stakes_map(), + new_bank.slot(), + &Pubkey::new_unique(), + &dummy_transaction(&validator_keypairs, &vote, 0), + &mut vec![] + ) + .is_err()); + + // Send a cert on slot 2, it should be rejected + let certificate = Certificate::new(CertificateType::Notarize, 2, Some(Hash::new_unique())); + + let cert = ConsensusMessage::Certificate(CertificateMessage { + certificate, + signature: BLSSignature::default(), + bitmap: Vec::new(), + }); + assert!(pool + .add_message( + new_bank.epoch_schedule(), + new_bank.epoch_stakes_map(), + new_bank.slot(), + &Pubkey::new_unique(), + &cert, + &mut vec![] + ) + .is_err()); + } + + #[test] + fn test_get_certs_for_standstill() { + let (_, mut pool, bank_forks) = create_initial_state(); + + // Should return empty vector if no certificates + assert!(pool.get_certs_for_standstill().is_empty()); + + // Add notar-fallback cert on 3 and finalize cert on 4 + let cert_3 = CertificateMessage { + certificate: Certificate::new( + CertificateType::NotarizeFallback, + 3, + Some(Hash::new_unique()), + ), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + let bank = bank_forks.read().unwrap().root_bank(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate(cert_3.clone()), + &mut vec![] + ) + .is_ok()); + let cert_4 = CertificateMessage { + certificate: Certificate::new(CertificateType::Finalize, 4, None), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate(cert_4.clone()), + &mut vec![] + ) + .is_ok()); + // Should return both certificates + let certs = pool.get_certs_for_standstill(); + assert_eq!(certs.len(), 2); + assert!(certs.iter().any(|cert| cert.certificate.slot() == 3 + && cert.certificate.certificate_type() == CertificateType::NotarizeFallback)); + assert!(certs.iter().any(|cert| cert.certificate.slot() == 4 + && cert.certificate.certificate_type() == CertificateType::Finalize)); + + // Add Notarize cert on 5 + let cert_5 = CertificateMessage { + certificate: Certificate::new(CertificateType::Notarize, 5, Some(Hash::new_unique())), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate(cert_5.clone()), + &mut vec![] + ) + .is_ok()); + + // Add Finalize cert on 5 + let cert_5_finalize = CertificateMessage { + certificate: Certificate::new(CertificateType::Finalize, 5, None), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate(cert_5_finalize.clone()), + &mut vec![] + ) + .is_ok()); + + // Add FinalizeFast cert on 5 + let cert_5 = CertificateMessage { + certificate: Certificate::new( + CertificateType::FinalizeFast, + 5, + Some(Hash::new_unique()), + ), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate(cert_5.clone()), + &mut vec![] + ) + .is_ok()); + // Should return only FinalizeFast cert on 5 + let certs = pool.get_certs_for_standstill(); + assert_eq!(certs.len(), 1); + assert!( + certs[0].certificate.slot() == 5 + && certs[0].certificate.certificate_type() == CertificateType::FinalizeFast + ); + + // Now add Notarize cert on 6 + let cert_6 = CertificateMessage { + certificate: Certificate::new(CertificateType::Notarize, 6, Some(Hash::new_unique())), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate(cert_6.clone()), + &mut vec![] + ) + .is_ok()); + // Should return certs on 5 and 6 + let certs = pool.get_certs_for_standstill(); + assert_eq!(certs.len(), 2); + assert!(certs.iter().any(|cert| cert.certificate.slot() == 5 + && cert.certificate.certificate_type() == CertificateType::FinalizeFast)); + assert!(certs.iter().any(|cert| cert.certificate.slot() == 6 + && cert.certificate.certificate_type() == CertificateType::Notarize)); + + // Add another Finalize cert on 6 + let cert_6_finalize = CertificateMessage { + certificate: Certificate::new(CertificateType::Finalize, 6, None), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate(cert_6_finalize.clone()), + &mut vec![] + ) + .is_ok()); + // Add a NotarizeFallback cert on 6 + let cert_6_notarize_fallback = CertificateMessage { + certificate: Certificate::new( + CertificateType::NotarizeFallback, + 6, + Some(Hash::new_unique()), + ), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate(cert_6_notarize_fallback.clone()), + &mut vec![] + ) + .is_ok()); + // This should not be returned because 6 is the current highest finalized slot + // only Notarize/Finalze/FinalizeFast should be returned + let certs = pool.get_certs_for_standstill(); + assert_eq!(certs.len(), 2); + assert!(certs.iter().any(|cert| cert.certificate.slot() == 6 + && cert.certificate.certificate_type() == CertificateType::Finalize)); + assert!(certs.iter().any(|cert| cert.certificate.slot() == 6 + && cert.certificate.certificate_type() == CertificateType::Notarize)); + + // Add another skip on 7 + let cert_7 = CertificateMessage { + certificate: Certificate::new(CertificateType::Skip, 7, None), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate(cert_7.clone()), + &mut vec![] + ) + .is_ok()); + // Should return certs on 6 and 7 + let certs = pool.get_certs_for_standstill(); + assert_eq!(certs.len(), 3); + assert!(certs.iter().any(|cert| cert.certificate.slot() == 6 + && cert.certificate.certificate_type() == CertificateType::Finalize)); + assert!(certs.iter().any(|cert| cert.certificate.slot() == 6 + && cert.certificate.certificate_type() == CertificateType::Notarize)); + assert!(certs.iter().any(|cert| cert.certificate.slot() == 7 + && cert.certificate.certificate_type() == CertificateType::Skip)); + } + + #[test] + fn test_new_parent_ready_with_certificates() { + solana_logger::setup(); + let (_, mut pool, bank_forks) = create_initial_state(); + let bank = bank_forks.read().unwrap().root_bank(); + let mut events = vec![]; + + // Add a notarization cert on slot 1 to 3 + let hash = Hash::new_unique(); + for slot in 1..=3 { + let cert = CertificateMessage { + certificate: Certificate::new(CertificateType::Notarize, slot, Some(hash)), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate(cert), + &mut events, + ) + .is_ok()); + } + // events should now contain ParentReady for slot 4 + error!("Events: {:?}", events); + assert!(events + .iter() + .any(|event| matches!(event, VotorEvent::ParentReady { + slot: 4, + parent_block: (3, h) + } if h == &hash))); + events.clear(); + + // Also works if we add FinalizeFast for slot 4 to 7 + for slot in 4..=7 { + let cert = CertificateMessage { + certificate: Certificate::new(CertificateType::FinalizeFast, slot, Some(hash)), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate(cert), + &mut events, + ) + .is_ok()); + } + // events should now contain ParentReady for slot 8 + error!("Events: {:?}", events); + assert!(events + .iter() + .any(|event| matches!(event, VotorEvent::ParentReady { + slot: 8, + parent_block: (7, h) + } if h == &hash))); + events.clear(); + + // NotarizeFallback on slot 8 to 10 and FinalizeFast on slot 11 + for slot in 8..=10 { + let cert = CertificateMessage { + certificate: Certificate::new(CertificateType::NotarizeFallback, slot, Some(hash)), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate(cert), + &mut events, + ) + .is_ok()); + } + let cert = CertificateMessage { + certificate: Certificate::new(CertificateType::FinalizeFast, 11, Some(hash)), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + &ConsensusMessage::Certificate(cert), + &mut events, + ) + .is_ok()); + // events should now contain ParentReady for slot 12 + error!("Events: {:?}", events); + assert!(events + .iter() + .any(|event| matches!(event, VotorEvent::ParentReady { + slot: 12, + parent_block: (11, h) + } if h == &hash))); + } +} diff --git a/votor/src/certificate_pool/parent_ready_tracker.rs b/votor/src/certificate_pool/parent_ready_tracker.rs new file mode 100644 index 00000000000000..919cd8d1911792 --- /dev/null +++ b/votor/src/certificate_pool/parent_ready_tracker.rs @@ -0,0 +1,382 @@ +//! Tracks the parent-ready condition +//! +//! The parent-ready condition pertains to a slot `s` and a block hash `hash(b)`, +//! where `s` is the first slot of a leader window and `s > slot(b)`. +//! Specifically, it is defined as the following: +//! - Block `b` is notarized or notarized-fallback, and +//! - slots `slot(b) + 1` (inclusive) to `s` (non-inclusive) are skip-certified. +//! +//! Additional restriction on notarization votes ensure that the parent-ready +//! condition holds for a block `b` only if it also holds for all ancestors of `b`. +//! Together this ensures that the block `b` is a valid parent for block +//! production, i.e., under good network conditions an honest leader proposing +//! a block with parent `b` in slot `s` will have their block finalized. + +use { + crate::{event::VotorEvent, MAX_ENTRIES_PER_PUBKEY_FOR_NOTARIZE_LITE}, + solana_clock::{Slot, NUM_CONSECUTIVE_LEADER_SLOTS}, + solana_pubkey::Pubkey, + solana_votor_messages::consensus_message::Block, + std::collections::HashMap, +}; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum BlockProductionParent { + MissedWindow, + ParentNotReady, + Parent(Block), +} + +#[derive(Clone, Debug, Default)] +pub struct ParentReadyTracker { + /// Our pubkey for logging + my_pubkey: Pubkey, + + /// Parent ready status for each slot + slot_statuses: HashMap, + + /// Root + root: Slot, + + /// Highest slot with parent ready status + // TODO: While the voting loop is sequential we track every slot (not just the first in window) + // However once we handle all slots concurrently we will update this to only count first leader + // slot in window + highest_with_parent_ready: Slot, +} + +#[derive(Clone, Default, Debug)] +struct ParentReadyStatus { + /// Whether this slot has a skip certificate + skip: bool, + /// The blocks that have been notar fallbacked in this slot + notar_fallbacks: Vec, + /// The parent blocks that achieve parent ready in this slot, + /// Theses blocks are all potential parents choosable in this slot + parents_ready: Vec, +} + +impl ParentReadyTracker { + /// Creates a new tracker with the root bank as implicitely notarized fallback + pub fn new(my_pubkey: Pubkey, root_block @ (root_slot, _): Block) -> Self { + let mut slot_statuses = HashMap::new(); + slot_statuses.insert( + root_slot, + ParentReadyStatus { + skip: false, + notar_fallbacks: vec![root_block], + parents_ready: vec![], + }, + ); + slot_statuses.insert( + root_slot.saturating_add(1), + ParentReadyStatus { + skip: false, + notar_fallbacks: vec![], + parents_ready: vec![root_block], + }, + ); + Self { + my_pubkey, + slot_statuses, + root: root_slot, + highest_with_parent_ready: root_slot.saturating_add(1), + } + } + + /// Adds a new notarize fallback certificate, we can use Notarize/NotarizeFallback/FastFinalize + pub fn add_new_notar_fallback_or_stronger( + &mut self, + block @ (slot, _): Block, + events: &mut Vec, + ) { + if slot <= self.root { + return; + } + + let status = self.slot_statuses.entry(slot).or_default(); + if status.notar_fallbacks.contains(&block) { + return; + } + trace!( + "{}: Adding new notar fallback for {block:?}", + self.my_pubkey + ); + status.notar_fallbacks.push(block); + assert!(status.notar_fallbacks.len() <= MAX_ENTRIES_PER_PUBKEY_FOR_NOTARIZE_LITE); + + // Add this block as valid parent to skip connected future blocks + for s in slot.saturating_add(1).. { + trace!( + "{}: Adding new parent ready for {s} parent {block:?}", + self.my_pubkey + ); + let status = self.slot_statuses.entry(s).or_default(); + if !status.parents_ready.contains(&block) { + status.parents_ready.push(block); + + // Only notify for parent ready on first leader slots + if s % NUM_CONSECUTIVE_LEADER_SLOTS == 0 { + events.push(VotorEvent::ParentReady { + slot: s, + parent_block: block, + }); + } + + self.highest_with_parent_ready = s.max(self.highest_with_parent_ready); + } + + if !status.skip { + break; + } + } + } + + /// Adds a new skip certificate + pub fn add_new_skip(&mut self, slot: Slot, events: &mut Vec) { + if slot <= self.root { + return; + } + + trace!("{}: Adding new skip for {slot:?}", self.my_pubkey); + let status = self.slot_statuses.entry(slot).or_default(); + status.skip = true; + + // Get newly connected future slots + let mut future_slots = vec![]; + for s in slot.saturating_add(1).. { + future_slots.push(s); + if !self.slot_statuses.get(&s).is_some_and(|ss| ss.skip) { + break; + } + } + + // Find possible parents using the previous slot + let mut potential_parents = vec![]; + let Some(status) = self.slot_statuses.get(&(slot.saturating_sub(1))) else { + return; + }; + for nf in &status.notar_fallbacks { + // If there's a notarize fallback certificate we can use the previous slot + // as a parent + potential_parents.push(*nf); + } + if status.skip { + // If there's a skip certificate we can use the parents of the previous slot + // as a parent + for parent in &status.parents_ready { + potential_parents.push(*parent); + } + } + + if potential_parents.is_empty() { + return; + } + + // Add these as valid parents to the future slots + for s in future_slots { + trace!( + "{}: Adding new parent ready for {s} parents {potential_parents:?}", + self.my_pubkey, + ); + let status = self.slot_statuses.entry(s).or_default(); + for &block in &potential_parents { + if status.parents_ready.contains(&block) { + // We already have this parent ready + continue; + } + status.parents_ready.push(block); + // Only notify for parent ready on first leader slots + if s % NUM_CONSECUTIVE_LEADER_SLOTS == 0 { + events.push(VotorEvent::ParentReady { + slot: s, + parent_block: block, + }); + } + } + + self.highest_with_parent_ready = s.max(self.highest_with_parent_ready); + } + } + + pub fn parent_ready(&self, slot: Slot, parent: Block) -> bool { + self.slot_statuses + .get(&slot) + .is_some_and(|ss| ss.parents_ready.contains(&parent)) + } + + /// For our leader slot `slot`, which block should we use as the parent + pub fn block_production_parent(&self, slot: Slot) -> BlockProductionParent { + if self.highest_parent_ready() > slot { + // This indicates that our block has already received a certificate + // either because we were too slow, or because we are restarting + // and catching up. Either way we should not attempt to produce this slot + return BlockProductionParent::MissedWindow; + } + // TODO: for duplicate blocks we should adjust this to choose the + // parent with the least amount of duplicate blocks if possible. + // Notice that each scenario with multiple NotarFallbacks also will eventually + // have a skip for that slot, so prefer the skip if we've received it. + match self + .slot_statuses + .get(&slot) + .and_then(|ss| ss.parents_ready.iter().max().copied()) + { + Some(parent) => BlockProductionParent::Parent(parent), + // TODO: this will be plugged in for optimistic block production + None => BlockProductionParent::ParentNotReady, + } + } + + pub fn highest_parent_ready(&self) -> Slot { + self.highest_with_parent_ready + } + + pub fn set_root(&mut self, root: Slot) { + self.root = root; + self.slot_statuses.retain(|&s, _| s >= root); + } + + /// Updates the pubkey. Note that the pubkey is used for logging purposes only. + pub fn update_pubkey(&mut self, new_pubkey: Pubkey) { + self.my_pubkey = new_pubkey; + } +} + +#[cfg(test)] +mod tests { + use { + super::*, solana_clock::NUM_CONSECUTIVE_LEADER_SLOTS, solana_hash::Hash, + solana_pubkey::Pubkey, + }; + + #[test] + fn basic() { + let genesis = Block::default(); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); + let mut events = vec![]; + + for i in 1..2 * NUM_CONSECUTIVE_LEADER_SLOTS { + let block = (i, Hash::new_unique()); + tracker.add_new_notar_fallback_or_stronger(block, &mut events); + assert_eq!(tracker.highest_parent_ready(), i + 1); + assert!(tracker.parent_ready(i + 1, block)); + } + } + + #[test] + fn skips() { + let genesis = Block::default(); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); + let mut events = vec![]; + let block = (1, Hash::new_unique()); + + tracker.add_new_notar_fallback_or_stronger(block, &mut events); + tracker.add_new_skip(1, &mut events); + tracker.add_new_skip(2, &mut events); + tracker.add_new_skip(3, &mut events); + + assert!(tracker.parent_ready(4, block)); + assert!(tracker.parent_ready(4, genesis)); + assert_eq!(tracker.highest_parent_ready(), 4); + } + + #[test] + fn out_of_order() { + let genesis = Block::default(); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); + let mut events = vec![]; + let block = (1, Hash::new_unique()); + + tracker.add_new_skip(3, &mut events); + tracker.add_new_skip(2, &mut events); + + tracker.add_new_notar_fallback_or_stronger(block, &mut events); + assert!(tracker.parent_ready(4, block)); + assert!(!tracker.parent_ready(4, genesis)); + + tracker.add_new_skip(1, &mut events); + assert!(tracker.parent_ready(4, block)); + assert!(tracker.parent_ready(4, genesis)); + } + + #[test] + fn snapshot_wfsm() { + let root_slot = 2147; + let root_block = (root_slot, Hash::new_unique()); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), root_block); + let mut events = vec![]; + + assert!(tracker.parent_ready(root_slot + 1, root_block)); + assert_eq!(tracker.highest_parent_ready(), root_slot + 1); + + // Skipping root slot shouldn't do anything + tracker.add_new_skip(root_slot, &mut events); + assert!(tracker.parent_ready(root_slot + 1, root_block)); + assert_eq!(tracker.highest_parent_ready(), root_slot + 1); + + // Adding new certs should work as root slot is implicitely notarized fallback + tracker.add_new_skip(root_slot + 1, &mut events); + tracker.add_new_skip(root_slot + 2, &mut events); + assert!(tracker.parent_ready(root_slot + 3, root_block)); + assert_eq!(tracker.highest_parent_ready(), root_slot + 3); + + let block = (root_slot + 4, Hash::new_unique()); + tracker.add_new_notar_fallback_or_stronger(block, &mut events); + assert!(tracker.parent_ready(root_slot + 3, root_block)); + assert!(tracker.parent_ready(root_slot + 5, block)); + assert_eq!(tracker.highest_parent_ready(), root_slot + 5); + } + + #[test] + fn highest_parent_ready_out_of_order() { + let genesis = Block::default(); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); + let mut events = vec![]; + assert_eq!(tracker.highest_parent_ready(), 1); + + tracker.add_new_skip(2, &mut events); + assert_eq!(tracker.highest_parent_ready(), 1); + + tracker.add_new_skip(3, &mut events); + assert_eq!(tracker.highest_parent_ready(), 1); + + tracker.add_new_skip(1, &mut events); + assert!(tracker.parent_ready(4, genesis)); + assert_eq!(tracker.highest_parent_ready(), 4); + assert_eq!( + tracker.block_production_parent(4), + BlockProductionParent::Parent(genesis) + ); + } + + #[test] + fn missed_window() { + let genesis = Block::default(); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); + let mut events = vec![]; + assert_eq!(tracker.highest_parent_ready(), 1); + assert_eq!( + tracker.block_production_parent(4), + BlockProductionParent::ParentNotReady + ); + + tracker.add_new_notar_fallback_or_stronger((4, Hash::new_unique()), &mut events); + assert_eq!(tracker.highest_parent_ready(), 5); + assert_eq!( + tracker.block_production_parent(4), + BlockProductionParent::MissedWindow + ); + + assert_eq!( + tracker.block_production_parent(8), + BlockProductionParent::ParentNotReady + ); + tracker.add_new_notar_fallback_or_stronger((64, Hash::new_unique()), &mut events); + assert_eq!(tracker.highest_parent_ready(), 65); + assert_eq!( + tracker.block_production_parent(8), + BlockProductionParent::MissedWindow + ); + } +} diff --git a/votor/src/certificate_pool/slot_stake_counters.rs b/votor/src/certificate_pool/slot_stake_counters.rs new file mode 100644 index 00000000000000..042e3bd43bab06 --- /dev/null +++ b/votor/src/certificate_pool/slot_stake_counters.rs @@ -0,0 +1,309 @@ +use { + crate::{ + certificate_pool::stats::CertificatePoolStats, event::VotorEvent, Stake, + SAFE_TO_NOTAR_MIN_NOTARIZE_AND_SKIP, SAFE_TO_NOTAR_MIN_NOTARIZE_FOR_NOTARIZE_OR_SKIP, + SAFE_TO_NOTAR_MIN_NOTARIZE_ONLY, SAFE_TO_SKIP_THRESHOLD, + }, + solana_hash::Hash, + solana_votor_messages::vote::Vote, + std::collections::BTreeMap, +}; + +#[derive(Debug, Default)] +pub(crate) struct SlotStakeCounters { + my_first_vote: Option, + total_stake: Stake, + skip_total: Stake, + notarize_total: Stake, + notarize_entry_total: BTreeMap, + top_notarized_stake: Stake, + safe_to_notar_sent: Vec, + safe_to_skip_sent: bool, +} + +impl SlotStakeCounters { + pub fn new(total_stake: Stake) -> Self { + Self { + total_stake, + ..Default::default() + } + } + + pub fn add_vote( + &mut self, + vote: &Vote, + entry_stake: Stake, + is_my_own_vote: bool, + events: &mut Vec, + stats: &mut CertificatePoolStats, + ) { + match vote { + Vote::Skip(_) => self.skip_total = entry_stake, + Vote::Notarize(vote) => { + let old_entry_stake = self + .notarize_entry_total + .insert(*vote.block_id(), entry_stake) + .unwrap_or(0); + self.notarize_total = self + .notarize_total + .saturating_sub(old_entry_stake) + .saturating_add(entry_stake); + self.top_notarized_stake = self.top_notarized_stake.max(entry_stake); + } + _ => return, // Not interested in other vote types + } + if self.my_first_vote.is_none() && is_my_own_vote { + self.my_first_vote = Some(*vote); + } + if self.my_first_vote.is_none() { + // We have not voted yet, no need to check safe to notarize or skip + return; + } + let slot = vote.slot(); + // Check safe to notar + for (block_id, stake) in &self.notarize_entry_total { + if !self.safe_to_notar_sent.contains(block_id) && self.is_safe_to_notar(block_id, stake) + { + events.push(VotorEvent::SafeToNotar((slot, *block_id))); + stats.event_safe_to_notarize = stats.event_safe_to_notarize.saturating_add(1); + self.safe_to_notar_sent.push(*block_id); + } + } + // Check safe to skip + if !self.safe_to_skip_sent && self.is_safe_to_skip() { + events.push(VotorEvent::SafeToSkip(slot)); + self.safe_to_skip_sent = true; + stats.event_safe_to_skip = stats.event_safe_to_skip.saturating_add(1); + } + } + + fn is_safe_to_notar(&self, block_id: &Hash, stake: &Stake) -> bool { + // White paper v1.1 page 22: The event is only issued if the node voted in slot s already, + // but not to notarize b. Moreover: + // notar(b) >= 40% or (skip(s) + notar(b) >= 60% and notar(b) >= 20%) + if let Some(Vote::Notarize(my_vote)) = self.my_first_vote.as_ref() { + if my_vote.block_id() == block_id { + return false; // I voted for the same block, no need to send NotarizeFallback + } + } + let skip_ratio = self.skip_total as f64 / self.total_stake as f64; + let notarized_ratio = *stake as f64 / self.total_stake as f64; + trace!( + "safe_to_notar {:?} {} {}", + block_id, + skip_ratio, + notarized_ratio, + ); + // Check if the block fits condition (i) 40% of stake holders voted notarize + notarized_ratio >= SAFE_TO_NOTAR_MIN_NOTARIZE_ONLY + // Check if the block fits condition (ii) 20% notarized, and 60% notarized or skip + || (notarized_ratio >= SAFE_TO_NOTAR_MIN_NOTARIZE_FOR_NOTARIZE_OR_SKIP + && notarized_ratio + skip_ratio >= SAFE_TO_NOTAR_MIN_NOTARIZE_AND_SKIP) + } + + fn is_safe_to_skip(&self) -> bool { + // White paper v1.1 page 22: The event is only issued if the node voted in slot s already, + // but not to skip s. Moreover: + // skip(s) + Sum of all notarize - (max in notarize(b)) >= 40% + if let Some(Vote::Notarize(_)) = self.my_first_vote.as_ref() { + trace!( + "safe_to_skip {} {:?} {} {} {}", + self.my_first_vote.unwrap().slot(), + self.my_first_vote.unwrap().block_id(), + self.skip_total, + self.notarize_total, + self.top_notarized_stake + ); + self.skip_total + .saturating_add(self.notarize_total.saturating_sub(self.top_notarized_stake)) + as f64 + / self.total_stake as f64 + >= SAFE_TO_SKIP_THRESHOLD + } else { + false + } + } +} + +#[cfg(test)] +mod tests { + use {super::*, solana_votor_messages::vote::Vote}; + + #[test] + fn test_safe_to_notar() { + let mut counters = SlotStakeCounters::new(100); + + let mut events = vec![]; + let mut stats = CertificatePoolStats::default(); + let slot = 2; + // I voted for skip + counters.add_vote( + &Vote::new_skip_vote(slot), + 10, + true, + &mut events, + &mut stats, + ); + assert!(events.is_empty()); + assert_eq!(stats.event_safe_to_notarize, 0); + + // 40% of stake holders voted notarize + counters.add_vote( + &Vote::new_notarization_vote(slot, Hash::default()), + 40, + false, + &mut events, + &mut stats, + ); + assert_eq!(events.len(), 1); + assert!( + matches!(events[0], VotorEvent::SafeToNotar((s, block_id)) if s == slot && block_id == Hash::default()) + ); + assert_eq!(stats.event_safe_to_notarize, 1); + events.clear(); + + // Adding more notarizations does not trigger more events + counters.add_vote( + &Vote::new_notarization_vote(slot, Hash::default()), + 20, + false, + &mut events, + &mut stats, + ); + assert!(events.is_empty()); + assert_eq!(stats.event_safe_to_notarize, 1); + + // Reset counters + counters = SlotStakeCounters::new(100); + events.clear(); + stats = CertificatePoolStats::default(); + + // I voted for notarize b + let hash_1 = Hash::new_unique(); + counters.add_vote( + &Vote::new_notarization_vote(slot, hash_1), + 1, + true, + &mut events, + &mut stats, + ); + assert!(events.is_empty()); + assert_eq!(stats.event_safe_to_notarize, 0); + + // 25% of stake holders voted notarize b' + let hash_2 = Hash::new_unique(); + counters.add_vote( + &Vote::new_notarization_vote(slot, hash_2), + 25, + false, + &mut events, + &mut stats, + ); + assert!(events.is_empty()); + assert_eq!(stats.event_safe_to_notarize, 0); + + // 35% more of stake holders voted skip + counters.add_vote( + &Vote::new_skip_vote(slot), + 35, + false, + &mut events, + &mut stats, + ); + assert_eq!(events.len(), 1); + assert!( + matches!(events[0], VotorEvent::SafeToNotar((s, block_id)) if s == slot && block_id == hash_2) + ); + assert_eq!(stats.event_safe_to_notarize, 1); + } + + #[test] + fn test_safe_to_skip() { + let mut counters = SlotStakeCounters::new(100); + + let mut events = vec![]; + let mut stats = CertificatePoolStats::default(); + let slot = 2; + // I voted for notarize b + counters.add_vote( + &Vote::new_notarization_vote(slot, Hash::default()), + 10, + true, + &mut events, + &mut stats, + ); + assert!(events.is_empty()); + assert_eq!(stats.event_safe_to_skip, 0); + + // 40% of stake holders voted skip + counters.add_vote( + &Vote::new_skip_vote(slot), + 40, + false, + &mut events, + &mut stats, + ); + assert_eq!(events.len(), 1); + assert!(matches!(events[0], VotorEvent::SafeToSkip(s) if s == slot)); + assert_eq!(stats.event_safe_to_skip, 1); + events.clear(); + + // Adding more skips does not trigger more events + counters.add_vote( + &Vote::new_skip_vote(slot), + 20, + false, + &mut events, + &mut stats, + ); + assert!(events.is_empty()); + assert_eq!(stats.event_safe_to_skip, 1); + + // Reset counters + counters = SlotStakeCounters::new(100); + events.clear(); + stats = CertificatePoolStats::default(); + + // I voted for notarize b, 10% of stake holders voted with me + let hash_1 = Hash::new_unique(); + counters.add_vote( + &Vote::new_notarization_vote(slot, hash_1), + 10, + true, + &mut events, + &mut stats, + ); + // 20% of stake holders voted a different notarization b' + let hash_2 = Hash::new_unique(); + counters.add_vote( + &Vote::new_notarization_vote(slot, hash_2), + 20, + false, + &mut events, + &mut stats, + ); + // 30% of stake holders voted skip + counters.add_vote( + &Vote::new_skip_vote(slot), + 30, + false, + &mut events, + &mut stats, + ); + assert_eq!(events.len(), 1); + assert!(matches!(events[0], VotorEvent::SafeToSkip(s) if s == slot)); + assert_eq!(stats.event_safe_to_skip, 1); + events.clear(); + + // Adding more notarization on b does not trigger more events + counters.add_vote( + &Vote::new_notarization_vote(slot, hash_1), + 10, + false, + &mut events, + &mut stats, + ); + assert!(events.is_empty()); + assert_eq!(stats.event_safe_to_skip, 1); + } +} diff --git a/votor/src/certificate_pool/stats.rs b/votor/src/certificate_pool/stats.rs new file mode 100644 index 00000000000000..f4652be255b4d7 --- /dev/null +++ b/votor/src/certificate_pool/stats.rs @@ -0,0 +1,229 @@ +use { + crate::VoteType, + solana_metrics::datapoint_info, + solana_votor_messages::consensus_message::CertificateType, + std::time::{Duration, Instant}, +}; + +const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(10); + +#[derive(Debug)] +pub(crate) struct CertificatePoolStats { + pub(crate) conflicting_votes: u32, + pub(crate) event_safe_to_notarize: u32, + pub(crate) event_safe_to_skip: u32, + pub(crate) exist_certs: u32, + pub(crate) exist_votes: u32, + pub(crate) incoming_certs: u32, + pub(crate) incoming_votes: u32, + pub(crate) out_of_range_certs: u32, + pub(crate) out_of_range_votes: u32, + + pub(crate) new_certs_generated: Vec, + pub(crate) new_certs_ingested: Vec, + pub(crate) ingested_votes: Vec, + + pub(crate) last_request_time: Instant, +} + +impl Default for CertificatePoolStats { + fn default() -> Self { + Self::new() + } +} + +impl CertificatePoolStats { + pub fn new() -> Self { + let num_vote_types = (VoteType::SkipFallback as usize).saturating_add(1); + let num_cert_types = (CertificateType::Skip as usize).saturating_add(1); + Self { + conflicting_votes: 0, + event_safe_to_notarize: 0, + event_safe_to_skip: 0, + exist_certs: 0, + exist_votes: 0, + incoming_certs: 0, + incoming_votes: 0, + out_of_range_certs: 0, + out_of_range_votes: 0, + + new_certs_ingested: vec![0; num_cert_types], + new_certs_generated: vec![0; num_cert_types], + ingested_votes: vec![0; num_vote_types], + + last_request_time: Instant::now(), + } + } + + pub fn incr_ingested_vote_type(&mut self, vote_type: VoteType) { + let index = vote_type as usize; + + self.ingested_votes[index] = self.ingested_votes[index].saturating_add(1); + } + + pub fn incr_cert_type(&mut self, cert_type: CertificateType, is_generated: bool) { + let index = cert_type as usize; + let array = if is_generated { + &mut self.new_certs_generated + } else { + &mut self.new_certs_ingested + }; + + array[index] = array[index].saturating_add(1); + } + + fn report(&self) { + datapoint_info!( + "certificate_pool_stats", + ("conflicting_votes", self.conflicting_votes as i64, i64), + ("event_safe_to_skip", self.event_safe_to_skip as i64, i64), + ( + "event_safe_to_notarize", + self.event_safe_to_notarize as i64, + i64 + ), + ("exist_votes", self.exist_votes as i64, i64), + ("exist_certs", self.exist_certs as i64, i64), + ("incoming_votes", self.incoming_votes as i64, i64), + ("incoming_certs", self.incoming_certs as i64, i64), + ("out_of_range_votes", self.out_of_range_votes as i64, i64), + ("out_of_range_certs", self.out_of_range_certs as i64, i64), + ); + + datapoint_info!( + "certificate_pool_ingested_votes", + ( + "finalize", + *self + .ingested_votes + .get(VoteType::Finalize as usize) + .unwrap() as i64, + i64 + ), + ( + "notarize", + *self + .ingested_votes + .get(VoteType::Notarize as usize) + .unwrap() as i64, + i64 + ), + ( + "notarize_fallback", + *self + .ingested_votes + .get(VoteType::NotarizeFallback as usize) + .unwrap() as i64, + i64 + ), + ( + "skip", + *self.ingested_votes.get(VoteType::Skip as usize).unwrap() as i64, + i64 + ), + ( + "skip_fallback", + *self + .ingested_votes + .get(VoteType::SkipFallback as usize) + .unwrap() as i64, + i64 + ), + ); + + datapoint_info!( + "certfificate_pool_ingested_certs", + ( + "finalize", + *self + .new_certs_ingested + .get(CertificateType::Finalize as usize) + .unwrap() as i64, + i64 + ), + ( + "finalize_fast", + *self + .new_certs_ingested + .get(CertificateType::FinalizeFast as usize) + .unwrap() as i64, + i64 + ), + ( + "notarize", + *self + .new_certs_ingested + .get(CertificateType::Notarize as usize) + .unwrap() as i64, + i64 + ), + ( + "notarize_fallback", + *self + .new_certs_ingested + .get(CertificateType::NotarizeFallback as usize) + .unwrap() as i64, + i64 + ), + ( + "skip", + *self + .new_certs_ingested + .get(CertificateType::Skip as usize) + .unwrap() as i64, + i64 + ), + ); + + datapoint_info!( + "certificate_pool_generated_certs", + ( + "finalize", + *self + .new_certs_generated + .get(CertificateType::Finalize as usize) + .unwrap() as i64, + i64 + ), + ( + "finalize_fast", + *self + .new_certs_generated + .get(CertificateType::FinalizeFast as usize) + .unwrap() as i64, + i64 + ), + ( + "notarize", + *self + .new_certs_generated + .get(CertificateType::Notarize as usize) + .unwrap() as i64, + i64 + ), + ( + "notarize_fallback", + *self + .new_certs_generated + .get(CertificateType::NotarizeFallback as usize) + .unwrap() as i64, + i64 + ), + ( + "skip", + *self + .new_certs_generated + .get(CertificateType::Skip as usize) + .unwrap() as i64, + i64 + ), + ); + } + + pub fn maybe_report(&mut self) { + if self.last_request_time.elapsed() >= STATS_REPORT_INTERVAL { + self.report(); + *self = Self::new(); + } + } +} diff --git a/votor/src/certificate_pool/vote_certificate_builder.rs b/votor/src/certificate_pool/vote_certificate_builder.rs new file mode 100644 index 00000000000000..2df0a03e8fafba --- /dev/null +++ b/votor/src/certificate_pool/vote_certificate_builder.rs @@ -0,0 +1,341 @@ +use { + crate::{certificate_limits_and_vote_types, VoteType}, + bitvec::prelude::*, + itertools::Itertools, + solana_bls_signatures::{BlsError, SignatureProjective}, + solana_signer_store::{decode, encode_base2, encode_base3, DecodeError, Decoded, EncodeError}, + solana_votor_messages::consensus_message::{Certificate, CertificateMessage, VoteMessage}, + thiserror::Error, +}; + +/// Maximum number of validators in a certificate +/// +/// There are around 1500 validators currently. For a clean power-of-two +/// implementation, we should choose either 2048 or 4096. Choose a more +/// conservative number 4096 for now. During build() we will cut off end +/// of the bitmaps if the tail contains only zeroes, so actual bitmap +/// length will be less than or equal to this number. +const MAXIMUM_VALIDATORS: usize = 4096; + +#[derive(Debug, Error, PartialEq)] +pub enum CertificateError { + #[error("BLS error: {0}")] + BlsError(#[from] BlsError), + #[error("solana-signer-store decode error: {0:?}")] + DecodeError(DecodeError), + #[error("solana-signer-store encode error: {0:?}")] + EncodeError(EncodeError), + #[error("Validator does not exist for given rank: {0}")] + ValidatorDoesNotExist(u16), +} + +/// A builder for creating a `CertificateMessage` by efficiently aggregating BLS signatures. +#[derive(Clone)] +pub struct VoteCertificateBuilder { + certificate: Certificate, + signature: SignatureProjective, + // For some certificates we need two bitmaps, for example, NotarizeFallback + // certificates have Notarize and NotarizeFallback votes, so we need two bitmaps + // to represent them. The order of the VoteType is defined in certificate_limits_and_vote_types. + // We normally put fallback votes in the second bitmap. + // The order of the VoteType is important, if you change it, you might interpret + // the bitmap incorrectly. + // Some certificates (like Finalize) only need one bitmap, then the second bitmap + // will be empty. + input_bitmap_1: BitVec, + input_bitmap_2: BitVec, +} + +impl TryFrom for VoteCertificateBuilder { + type Error = CertificateError; + + fn try_from(message: CertificateMessage) -> Result { + let projective_signature = SignatureProjective::try_from(message.signature)?; + let decoded_bitmap = + decode(&message.bitmap, MAXIMUM_VALIDATORS).map_err(CertificateError::DecodeError)?; + let (mut input_bitmap_1, mut input_bitmap_2) = match decoded_bitmap { + Decoded::Base2(bitmap) => ( + bitmap, + BitVec::::repeat(false, MAXIMUM_VALIDATORS), + ), + Decoded::Base3(bitmap1, bitmap2) => (bitmap1, bitmap2), + }; + input_bitmap_1.resize(MAXIMUM_VALIDATORS, false); + input_bitmap_2.resize(MAXIMUM_VALIDATORS, false); + Ok(VoteCertificateBuilder { + certificate: message.certificate, + signature: projective_signature, + input_bitmap_1, + input_bitmap_2, + }) + } +} + +impl VoteCertificateBuilder { + pub fn new(certificate_id: Certificate) -> Self { + Self { + certificate: certificate_id, + signature: SignatureProjective::identity(), + input_bitmap_1: BitVec::repeat(false, MAXIMUM_VALIDATORS), + input_bitmap_2: BitVec::repeat(false, MAXIMUM_VALIDATORS), + } + } + + /// Aggregates a slice of `VoteMessage`s into the builder. + pub fn aggregate(&mut self, messages: &[VoteMessage]) -> Result<(), CertificateError> { + let Some(vote_type) = messages.first().map(|m| VoteType::get_type(&m.vote)) else { + return Ok(()); + }; + let vote_types = certificate_limits_and_vote_types(self.certificate).1; + + let target_bitmap = if vote_type == vote_types[0] { + &mut self.input_bitmap_1 + } else { + &mut self.input_bitmap_2 + }; + + for vote_message in messages { + let rank = vote_message.rank as usize; + if MAXIMUM_VALIDATORS <= rank { + return Err(CertificateError::ValidatorDoesNotExist(vote_message.rank)); + } + target_bitmap.set(rank, true); + } + + let signature_iter = messages + .iter() + .map(|vote_message| &vote_message.signature) + .collect_vec(); + Ok(self.signature.aggregate_with(&signature_iter)?) + } + + pub fn build(self) -> Result { + let mut input_bitmap_1 = self.input_bitmap_1; + let mut input_bitmap_2 = self.input_bitmap_2; + + let last_one_1 = input_bitmap_1 // use local variable + .last_one() + .map_or(0, |i| i.saturating_add(1)); + let last_one_2 = input_bitmap_2 // use local variable + .last_one() + .map_or(0, |i| i.saturating_add(1)); + let new_length = last_one_1.max(last_one_2); + if new_length > MAXIMUM_VALIDATORS { + error!( + "Bitmap length exceeds maximum allowed: {} should be caught during aggregation", + MAXIMUM_VALIDATORS + ); + return Err(CertificateError::ValidatorDoesNotExist(new_length as u16)); + } + + input_bitmap_1.resize(new_length, false); + input_bitmap_2.resize(new_length, false); + let bitmap = if input_bitmap_2.count_ones() > 0 { + // If we have two bitmaps, use Base3 encoding + encode_base3(&input_bitmap_1, &input_bitmap_2).map_err(CertificateError::EncodeError)? + } else { + // If we only have one bitmap, use Base2 encoding + encode_base2(&input_bitmap_1).map_err(CertificateError::EncodeError)? + }; + Ok(CertificateMessage { + certificate: self.certificate, + signature: self.signature.into(), + bitmap, + }) + } +} + +#[cfg(test)] +mod tests { + use { + super::*, + solana_bls_signatures::{Keypair as BLSKeypair, Signature as BLSSignature}, + solana_hash::Hash, + solana_votor_messages::{ + consensus_message::{Certificate, CertificateType, VoteMessage}, + vote::Vote, + }, + }; + + #[test] + fn test_normal_build() { + let hash = Hash::new_unique(); + let certificate = Certificate::new(CertificateType::NotarizeFallback, 1, Some(hash)); + let mut builder = VoteCertificateBuilder::new(certificate); + // Test building the certificate from Notarize and NotarizeFallback votes + // Create Notarize on validator 1, 4, 6 + let vote = Vote::new_notarization_vote(1, hash); + let rank_1 = [1, 4, 6]; + let messages_1 = rank_1 + .iter() + .map(|&rank| { + let keypair = BLSKeypair::new(); + let signature = keypair.sign(b"fake_vote_message"); + VoteMessage { + vote, + signature: signature.into(), + rank, + } + }) + .collect::>(); + builder + .aggregate(&messages_1) + .expect("Failed to aggregate notarization votes"); + // Create NotarizeFallback on validator 2, 3, 5, 7 + let vote = Vote::new_notarization_fallback_vote(1, hash); + let rank_2 = [2, 3, 5, 7]; + let messages_2 = rank_2 + .iter() + .map(|&rank| { + let keypair = BLSKeypair::new(); + let signature = keypair.sign(b"fake_vote_message_2"); + VoteMessage { + vote, + signature: signature.into(), + rank, + } + }) + .collect::>(); + builder + .aggregate(&messages_2) + .expect("Failed to aggregate notarization fallback votes"); + + let certificate_message = builder.build().expect("Failed to build certificate"); + assert_eq!(certificate_message.certificate, certificate); + match decode(&certificate_message.bitmap, MAXIMUM_VALIDATORS) + .expect("Failed to decode bitmap") + { + Decoded::Base3(bitmap1, bitmap2) => { + assert_eq!(bitmap1.len(), 8); + assert_eq!(bitmap2.len(), 8); + for i in rank_1 { + assert!(bitmap1[i as usize]); + } + assert_eq!(bitmap1.count_ones(), 3); + for i in rank_2 { + assert!(bitmap2[i as usize]); + } + assert_eq!(bitmap2.count_ones(), 4); + } + _ => panic!("Expected Base3 encoding"), + } + + // Build a new certificate with only Notarize votes, we should get Base2 encoding + let mut builder = VoteCertificateBuilder::new(certificate); + builder + .aggregate(&messages_1) + .expect("Failed to aggregate notarization votes"); + let certificate_message = builder.build().expect("Failed to build certificate"); + assert_eq!(certificate_message.certificate, certificate); + match decode(&certificate_message.bitmap, MAXIMUM_VALIDATORS) + .expect("Failed to decode bitmap") + { + Decoded::Base2(bitmap1) => { + assert_eq!(bitmap1.len(), 7); + for i in rank_1 { + assert!(bitmap1[i as usize]); + } + assert_eq!(bitmap1.count_ones(), 3); + } + _ => panic!("Expected Base2 encoding"), + } + + // Base2 encoding only applies when the first bitmap is non-empty, if we build another + // certificate with only NotarizeFallback votes, we should still get Base3 encoding + let mut builder = VoteCertificateBuilder::new(certificate); + builder + .aggregate(&messages_2) + .expect("Failed to aggregate notarization fallback votes"); + let certificate_message = builder.build().expect("Failed to build certificate"); + assert_eq!(certificate_message.certificate, certificate); + match decode(&certificate_message.bitmap, MAXIMUM_VALIDATORS) + .expect("Failed to decode bitmap") + { + Decoded::Base3(bitmap1, bitmap2) => { + assert_eq!(bitmap1.count_ones(), 0); + assert_eq!(bitmap2.len(), 8); + for i in rank_2 { + assert!(bitmap2[i as usize]); + } + assert_eq!(bitmap2.count_ones(), 4); + } + _ => panic!("Expected Base3 encoding"), + } + } + + #[test] + fn test_builder_with_errors() { + let hash = Hash::new_unique(); + let certificate = Certificate::new(CertificateType::NotarizeFallback, 1, Some(hash)); + let mut builder = VoteCertificateBuilder::new(certificate); + + // Test with a rank that exceeds the maximum allowed + let vote = Vote::new_notarization_vote(1, hash); + let vote2 = Vote::new_notarization_fallback_vote(1, hash); + let rank_out_of_bounds = MAXIMUM_VALIDATORS.saturating_add(1); // Exceeds MAXIMUM_VALIDATORS + let keypair = BLSKeypair::new(); + let signature = keypair.sign(b"fake_vote_message"); + let message_out_of_bounds = VoteMessage { + vote, + signature: signature.into(), + rank: rank_out_of_bounds as u16, + }; + assert_eq!( + builder.aggregate(&[message_out_of_bounds]), + Err(CertificateError::ValidatorDoesNotExist( + rank_out_of_bounds as u16 + )) + ); + + // Test bls error + let message_with_invalid_signature = VoteMessage { + vote, + signature: BLSSignature::default(), // Invalid signature + rank: 1, + }; + assert_eq!( + builder.aggregate(&[message_with_invalid_signature]), + Err(CertificateError::BlsError(BlsError::PointConversion)) + ); + + // Test encoding error + // Create two bitmaps with the same rank set + let signature = keypair.sign(b"fake_vote_message_2"); + let messages_1 = vec![VoteMessage { + vote, + signature: signature.into(), + rank: 1, + }]; + let mut builder = VoteCertificateBuilder::new(certificate); + builder + .aggregate(&messages_1) + .expect("Failed to aggregate notarization votes"); + let messages_2 = vec![VoteMessage { + vote: vote2, + signature: signature.into(), + rank: 1, // Same rank as in messages_1 + }]; + builder + .aggregate(&messages_2) + .expect("Failed to aggregate notarization fallback votes"); + assert_eq!( + builder.build(), + Err(CertificateError::EncodeError( + EncodeError::InvalidBitCombination + )) + ); + + // Test decoding error + let corrupt_certificate_message = CertificateMessage { + certificate: Certificate::new(CertificateType::NotarizeFallback, 1, Some(hash)), + signature: signature.into(), + bitmap: vec![0xFF; 100], // Corrupted bitmap + }; + assert_eq!( + VoteCertificateBuilder::try_from(corrupt_certificate_message).err(), + Some(CertificateError::DecodeError( + DecodeError::UnsupportedEncoding + )) + ); + } +} diff --git a/votor/src/certificate_pool/vote_pool.rs b/votor/src/certificate_pool/vote_pool.rs new file mode 100644 index 00000000000000..2a0b253cba1b1e --- /dev/null +++ b/votor/src/certificate_pool/vote_pool.rs @@ -0,0 +1,305 @@ +use { + crate::{certificate_pool::vote_certificate_builder::VoteCertificateBuilder, Stake}, + solana_hash::Hash, + solana_pubkey::Pubkey, + solana_votor_messages::consensus_message::VoteMessage, + std::collections::{HashMap, HashSet}, +}; + +#[derive(Debug)] +pub(crate) struct VoteEntry { + pub(crate) transactions: Vec, + pub(crate) total_stake_by_key: Stake, +} + +impl VoteEntry { + pub fn new() -> Self { + Self { + transactions: Vec::new(), + total_stake_by_key: 0, + } + } +} + +pub(crate) trait VotePool { + fn total_stake(&self) -> Stake; + fn has_prev_validator_vote(&self, validator_vote_key: &Pubkey) -> bool; +} + +/// There are two types of vote pools: +/// - SimpleVotePool: Tracks all votes of a specfic vote type made by validators for some slot N, but only one vote per block. +/// - DuplicateBlockVotePool: Tracks all votes of a specfic vote type made by validators for some slot N, +/// but allows votes for different blocks by the same validator. Only relevant for VotePool's that are of type +/// Notarization or NotarizationFallback +pub(crate) enum VotePoolType { + SimpleVotePool(SimpleVotePool), + DuplicateBlockVotePool(DuplicateBlockVotePool), +} + +pub(crate) struct SimpleVotePool { + /// Tracks all votes of a specfic vote type made by validators for some slot N. + pub(crate) vote_entry: VoteEntry, + prev_voted_validators: HashSet, +} + +impl SimpleVotePool { + pub fn new() -> Self { + Self { + vote_entry: VoteEntry::new(), + prev_voted_validators: HashSet::new(), + } + } + + pub fn add_vote( + &mut self, + validator_vote_key: &Pubkey, + validator_stake: Stake, + transaction: &VoteMessage, + ) -> Option { + if self.prev_voted_validators.contains(validator_vote_key) { + return None; + } + self.prev_voted_validators.insert(*validator_vote_key); + self.vote_entry.transactions.push(*transaction); + self.vote_entry.total_stake_by_key = self + .vote_entry + .total_stake_by_key + .saturating_add(validator_stake); + Some(self.vote_entry.total_stake_by_key) + } + + pub fn add_to_certificate(&self, output: &mut VoteCertificateBuilder) { + output + .aggregate(&self.vote_entry.transactions) + .expect("Incoming vote message signatures are assumed to be valid") + } +} + +impl VotePool for SimpleVotePool { + fn total_stake(&self) -> Stake { + self.vote_entry.total_stake_by_key + } + fn has_prev_validator_vote(&self, validator_vote_key: &Pubkey) -> bool { + self.prev_voted_validators.contains(validator_vote_key) + } +} + +pub(crate) struct DuplicateBlockVotePool { + max_entries_per_pubkey: usize, + pub(crate) votes: HashMap, + total_stake: Stake, + prev_voted_block_ids: HashMap>, +} + +impl DuplicateBlockVotePool { + pub fn new(max_entries_per_pubkey: usize) -> Self { + Self { + max_entries_per_pubkey, + votes: HashMap::new(), + total_stake: 0, + prev_voted_block_ids: HashMap::new(), + } + } + + pub fn add_vote( + &mut self, + validator_vote_key: &Pubkey, + voted_block_id: Hash, + transaction: &VoteMessage, + validator_stake: Stake, + ) -> Option { + // Check whether the validator_vote_key already used the same voted_block_id or exceeded max_entries_per_pubkey + // If so, return false, otherwise add the voted_block_id to the prev_votes + let prev_voted_block_ids = self + .prev_voted_block_ids + .entry(*validator_vote_key) + .or_default(); + if prev_voted_block_ids.contains(&voted_block_id) { + return None; + } + let inserted_first_time = prev_voted_block_ids.is_empty(); + if prev_voted_block_ids.len() >= self.max_entries_per_pubkey { + return None; + } + prev_voted_block_ids.push(voted_block_id); + + let vote_entry = self + .votes + .entry(voted_block_id) + .or_insert_with(VoteEntry::new); + vote_entry.transactions.push(*transaction); + vote_entry.total_stake_by_key = vote_entry + .total_stake_by_key + .saturating_add(validator_stake); + + if inserted_first_time { + self.total_stake = self.total_stake.saturating_add(validator_stake); + } + Some(vote_entry.total_stake_by_key) + } + + pub fn total_stake_by_block_id(&self, block_id: &Hash) -> Stake { + self.votes + .get(block_id) + .map_or(0, |vote_entries| vote_entries.total_stake_by_key) + } + + pub fn add_to_certificate(&self, block_id: &Hash, output: &mut VoteCertificateBuilder) { + if let Some(vote_entries) = self.votes.get(block_id) { + output + .aggregate(&vote_entries.transactions) + .expect("Incoming vote message signatures are assumed to be valid") + } + } + + pub fn has_prev_validator_vote_for_block( + &self, + validator_vote_key: &Pubkey, + block_id: &Hash, + ) -> bool { + self.prev_voted_block_ids + .get(validator_vote_key) + .is_some_and(|vs| vs.contains(block_id)) + } +} + +impl VotePool for DuplicateBlockVotePool { + fn total_stake(&self) -> Stake { + self.total_stake + } + fn has_prev_validator_vote(&self, validator_vote_key: &Pubkey) -> bool { + self.prev_voted_block_ids.contains_key(validator_vote_key) + } +} + +#[cfg(test)] +mod test { + use { + super::*, + solana_bls_signatures::Signature as BLSSignature, + solana_votor_messages::{consensus_message::VoteMessage, vote::Vote}, + }; + + #[test] + fn test_skip_vote_pool() { + let mut vote_pool = SimpleVotePool::new(); + let vote = Vote::new_skip_vote(5); + let transaction = VoteMessage { + vote, + signature: BLSSignature::default(), + rank: 1, + }; + let my_pubkey = Pubkey::new_unique(); + + assert_eq!(vote_pool.add_vote(&my_pubkey, 10, &transaction), Some(10)); + assert_eq!(vote_pool.total_stake(), 10); + + // Adding the same key again should fail + assert_eq!(vote_pool.add_vote(&my_pubkey, 10, &transaction), None); + assert_eq!(vote_pool.total_stake(), 10); + + // Adding a different key should succeed + let new_pubkey = Pubkey::new_unique(); + assert_eq!(vote_pool.add_vote(&new_pubkey, 60, &transaction), Some(70)); + assert_eq!(vote_pool.total_stake(), 70); + } + + #[test] + fn test_notarization_pool() { + let mut vote_pool = DuplicateBlockVotePool::new(1); + let my_pubkey = Pubkey::new_unique(); + let block_id = Hash::new_unique(); + let vote = Vote::new_notarization_vote(3, block_id); + let transaction = VoteMessage { + vote, + signature: BLSSignature::default(), + rank: 1, + }; + assert_eq!( + vote_pool.add_vote(&my_pubkey, block_id, &transaction, 10), + Some(10) + ); + assert_eq!(vote_pool.total_stake(), 10); + assert_eq!(vote_pool.total_stake_by_block_id(&block_id), 10); + + // Adding the same key again should fail + assert_eq!( + vote_pool.add_vote(&my_pubkey, block_id, &transaction, 10), + None + ); + assert_eq!(vote_pool.total_stake(), 10); + + // Adding a different bankhash should fail + assert_eq!( + vote_pool.add_vote(&my_pubkey, block_id, &transaction, 10), + None + ); + assert_eq!(vote_pool.total_stake(), 10); + + // Adding a different key should succeed + let new_pubkey = Pubkey::new_unique(); + assert_eq!( + vote_pool.add_vote(&new_pubkey, block_id, &transaction, 60), + Some(70) + ); + assert_eq!(vote_pool.total_stake(), 70); + assert_eq!(vote_pool.total_stake_by_block_id(&block_id), 70); + } + + #[test] + fn test_notarization_fallback_pool() { + solana_logger::setup(); + let mut vote_pool = DuplicateBlockVotePool::new(3); + let vote = Vote::new_notarization_fallback_vote(7, Hash::new_unique()); + let transaction = VoteMessage { + vote, + signature: BLSSignature::default(), + rank: 1, + }; + let my_pubkey = Pubkey::new_unique(); + + let block_ids: Vec = (0..4).map(|_| Hash::new_unique()).collect(); + + // Adding the first 3 votes should succeed, but total_stake should remain at 10 + for block_id in &block_ids[0..3] { + assert_eq!( + vote_pool.add_vote(&my_pubkey, *block_id, &transaction, 10), + Some(10) + ); + assert_eq!(vote_pool.total_stake(), 10); + assert_eq!(vote_pool.total_stake_by_block_id(block_id), 10); + } + // Adding the 4th vote should fail + assert_eq!( + vote_pool.add_vote(&my_pubkey, block_ids[3], &transaction, 10), + None + ); + assert_eq!(vote_pool.total_stake(), 10); + assert_eq!(vote_pool.total_stake_by_block_id(&block_ids[3]), 0); + + // Adding a different key should succeed + let new_pubkey = Pubkey::new_unique(); + for block_id in &block_ids[1..3] { + assert_eq!( + vote_pool.add_vote(&new_pubkey, *block_id, &transaction, 60), + Some(70) + ); + assert_eq!(vote_pool.total_stake(), 70); + assert_eq!(vote_pool.total_stake_by_block_id(block_id), 70); + } + + // The new key only added 2 votes, so adding block_ids[3] should succeed + assert_eq!( + vote_pool.add_vote(&new_pubkey, block_ids[3], &transaction, 60), + Some(60) + ); + assert_eq!(vote_pool.total_stake(), 70); + assert_eq!(vote_pool.total_stake_by_block_id(&block_ids[3]), 60); + + // Now if adding the same key again, it should fail + assert_eq!( + vote_pool.add_vote(&new_pubkey, block_ids[0], &transaction, 60), + None + ); + } +} diff --git a/votor/src/certificate_pool_service.rs b/votor/src/certificate_pool_service.rs new file mode 100644 index 00000000000000..b5db3fd6e6b06a --- /dev/null +++ b/votor/src/certificate_pool_service.rs @@ -0,0 +1,419 @@ +//! Service in charge of ingesting new messages into the certificate pool +//! and notifying votor of new events that occur + +mod stats; + +use { + crate::{ + certificate_pool::{ + parent_ready_tracker::BlockProductionParent, AddVoteError, CertificatePool, + }, + commitment::{ + alpenglow_update_commitment_cache, AlpenglowCommitmentAggregationData, + AlpenglowCommitmentType, + }, + event::{LeaderWindowInfo, VotorEvent, VotorEventSender}, + voting_utils::BLSOp, + votor::Votor, + Certificate, DELTA_STANDSTILL, + }, + crossbeam_channel::{select, Receiver, Sender, TrySendError}, + solana_clock::Slot, + solana_gossip::cluster_info::ClusterInfo, + solana_ledger::{ + blockstore::Blockstore, leader_schedule_cache::LeaderScheduleCache, + leader_schedule_utils::last_of_consecutive_leader_slots, + }, + solana_pubkey::Pubkey, + solana_runtime::{bank::Bank, bank_forks::SharableBanks}, + solana_votor_messages::consensus_message::{CertificateMessage, ConsensusMessage}, + stats::CertificatePoolServiceStats, + std::{ + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Condvar, Mutex, + }, + thread::{self, Builder, JoinHandle}, + time::{Duration, Instant}, + }, +}; + +/// Inputs for the certificate pool thread +pub(crate) struct CertificatePoolContext { + pub(crate) exit: Arc, + pub(crate) start: Arc<(Mutex, Condvar)>, + + pub(crate) cluster_info: Arc, + pub(crate) my_vote_pubkey: Pubkey, + pub(crate) blockstore: Arc, + pub(crate) sharable_banks: SharableBanks, + pub(crate) leader_schedule_cache: Arc, + + // TODO: for now we ingest our own votes into the certificate pool + // just like regular votes. However do we need to convert + // Vote -> ConsensusMessage -> Vote? + // consider adding a separate pathway in cert_pool.add_transaction for ingesting own votes + pub(crate) consensus_message_receiver: Receiver, + + pub(crate) bls_sender: Sender, + pub(crate) event_sender: VotorEventSender, + pub(crate) commitment_sender: Sender, + pub(crate) certificate_sender: Sender<(Certificate, CertificateMessage)>, +} + +pub(crate) struct CertificatePoolService { + t_ingest: JoinHandle<()>, +} + +impl CertificatePoolService { + pub(crate) fn new(ctx: CertificatePoolContext) -> Self { + let t_ingest = Builder::new() + .name("solCertPoolIngest".to_string()) + .spawn(move || { + if let Err(e) = Self::certificate_pool_ingest_loop(ctx) { + info!("Certificate pool service exited: {e:?}. Shutting down"); + } + }) + .unwrap(); + + Self { t_ingest } + } + + fn maybe_update_root_and_send_new_certificates( + cert_pool: &mut CertificatePool, + root_bank: &Bank, + bls_sender: &Sender, + new_finalized_slot: Option, + new_certificates_to_send: Vec>, + standstill_timer: &mut Instant, + stats: &mut CertificatePoolServiceStats, + ) -> Result<(), AddVoteError> { + // If we have a new finalized slot, update the root and send new certificates + if new_finalized_slot.is_some() { + // Reset standstill timer + *standstill_timer = Instant::now(); + CertificatePoolServiceStats::incr_u16(&mut stats.new_finalized_slot); + } + cert_pool.prune_old_state(root_bank.slot()); + CertificatePoolServiceStats::incr_u64(&mut stats.prune_old_state_called); + // Send new certificates to peers + Self::send_certificates(bls_sender, new_certificates_to_send, stats) + } + + fn send_certificates( + bls_sender: &Sender, + certificates_to_send: Vec>, + stats: &mut CertificatePoolServiceStats, + ) -> Result<(), AddVoteError> { + for (i, certificate) in certificates_to_send.iter().enumerate() { + // The buffer should normally be large enough, so we don't handle + // certificate re-send here. + match bls_sender.try_send(BLSOp::PushCertificate { + certificate: certificate.clone(), + }) { + Ok(_) => { + CertificatePoolServiceStats::incr_u16(&mut stats.certificates_sent); + } + Err(TrySendError::Disconnected(_)) => { + return Err(AddVoteError::ChannelDisconnected( + "VotingService".to_string(), + )); + } + Err(TrySendError::Full(_)) => { + let dropped = certificates_to_send.len().saturating_sub(i) as u16; + stats.certificates_dropped = stats.certificates_dropped.saturating_add(dropped); + return Err(AddVoteError::VotingServiceQueueFull); + } + } + } + Ok(()) + } + + fn process_consensus_message( + ctx: &mut CertificatePoolContext, + my_pubkey: &Pubkey, + message: &ConsensusMessage, + cert_pool: &mut CertificatePool, + events: &mut Vec, + standstill_timer: &mut Instant, + stats: &mut CertificatePoolServiceStats, + ) -> Result<(), AddVoteError> { + match message { + ConsensusMessage::Certificate(_) => { + CertificatePoolServiceStats::incr_u32(&mut stats.received_certificates); + } + ConsensusMessage::Vote(_) => { + CertificatePoolServiceStats::incr_u32(&mut stats.received_votes); + } + } + let root_bank = ctx.sharable_banks.root(); + let (new_finalized_slot, new_certificates_to_send) = + Self::add_message_and_maybe_update_commitment( + &root_bank, + my_pubkey, + &ctx.my_vote_pubkey, + message, + cert_pool, + events, + &ctx.commitment_sender, + )?; + Self::maybe_update_root_and_send_new_certificates( + cert_pool, + &root_bank, + &ctx.bls_sender, + new_finalized_slot, + new_certificates_to_send, + standstill_timer, + stats, + ) + } + + fn handle_channel_disconnected( + ctx: &mut CertificatePoolContext, + channel_name: &str, + ) -> Result<(), ()> { + info!( + "{}: {} disconnected. Exiting", + ctx.cluster_info.id(), + channel_name + ); + ctx.exit.store(true, Ordering::Relaxed); + Err(()) + } + + // Main loop for the certificate pool service, it only exits when any channel is disconnected + fn certificate_pool_ingest_loop(mut ctx: CertificatePoolContext) -> Result<(), ()> { + let mut events = vec![]; + let mut my_pubkey = ctx.cluster_info.id(); + let root_bank = ctx.sharable_banks.root(); + let mut cert_pool = CertificatePool::new_from_root_bank( + my_pubkey, + &root_bank, + Some(ctx.certificate_sender.clone()), + ); + + // Wait until migration has completed + info!("{}: Certificate pool loop initialized", &my_pubkey); + Votor::wait_for_migration_or_exit(&ctx.exit, &ctx.start); + info!("{}: Certificate pool loop starting", &my_pubkey); + let mut stats = CertificatePoolServiceStats::new(); + + // Standstill tracking + let mut standstill_timer = Instant::now(); + + // Kick off parent ready + let root_bank = ctx.sharable_banks.root(); + let root_block = (root_bank.slot(), root_bank.block_id().unwrap_or_default()); + let mut highest_parent_ready = root_bank.slot(); + events.push(VotorEvent::ParentReady { + slot: root_bank.slot().checked_add(1).unwrap(), + parent_block: root_block, + }); + + // Ingest votes into certificate pool and notify voting loop of new events + while !ctx.exit.load(Ordering::Relaxed) { + // Update the current pubkey if it has changed + let new_pubkey = ctx.cluster_info.id(); + if my_pubkey != new_pubkey { + my_pubkey = new_pubkey; + cert_pool.update_pubkey(my_pubkey); + warn!("Certificate pool pubkey updated to {my_pubkey}"); + } + + Self::add_produce_block_event( + &mut highest_parent_ready, + &cert_pool, + &my_pubkey, + &mut ctx, + &mut events, + &mut stats, + ); + + if standstill_timer.elapsed() > DELTA_STANDSTILL { + events.push(VotorEvent::Standstill(cert_pool.highest_finalized_slot())); + stats.standstill = true; + standstill_timer = Instant::now(); + match Self::send_certificates( + &ctx.bls_sender, + cert_pool.get_certs_for_standstill(), + &mut stats, + ) { + Ok(()) => (), + Err(AddVoteError::ChannelDisconnected(channel_name)) => { + return Self::handle_channel_disconnected(&mut ctx, channel_name.as_str()); + } + Err(e) => { + trace!( + "{}: unable to push standstill certificates into pool {}", + my_pubkey, + e + ); + } + } + } + + if events + .drain(..) + .try_for_each(|event| ctx.event_sender.send(event)) + .is_err() + { + return Self::handle_channel_disconnected(&mut ctx, "Votor event receiver"); + } + + let messages: Vec = select! { + recv(ctx.consensus_message_receiver) -> msg => { + let Ok(first) = msg else { + return Self::handle_channel_disconnected(&mut ctx, "BLS receiver"); + }; + std::iter::once(first).chain(ctx.consensus_message_receiver.try_iter()).collect() + }, + default(Duration::from_secs(1)) => continue + }; + + for message in messages { + match Self::process_consensus_message( + &mut ctx, + &my_pubkey, + &message, + &mut cert_pool, + &mut events, + &mut standstill_timer, + &mut stats, + ) { + Ok(()) => {} + Err(AddVoteError::ChannelDisconnected(channel_name)) => { + return Self::handle_channel_disconnected(&mut ctx, channel_name.as_str()) + } + Err(e) => { + // This is a non critical error, a duplicate vote for example + trace!("{}: unable to push vote into pool {}", &my_pubkey, e); + CertificatePoolServiceStats::incr_u32(&mut stats.add_message_failed); + } + } + } + stats.maybe_report(); + cert_pool.maybe_report(); + } + Ok(()) + } + + /// Adds a vote to the certificate pool and updates the commitment cache if necessary + /// + /// If a new finalization slot was recognized, returns the slot + fn add_message_and_maybe_update_commitment( + root_bank: &Bank, + my_pubkey: &Pubkey, + my_vote_pubkey: &Pubkey, + message: &ConsensusMessage, + cert_pool: &mut CertificatePool, + votor_events: &mut Vec, + commitment_sender: &Sender, + ) -> Result<(Option, Vec>), AddVoteError> { + let (new_finalized_slot, new_certificates_to_send) = cert_pool.add_message( + root_bank.epoch_schedule(), + root_bank.epoch_stakes_map(), + root_bank.slot(), + my_vote_pubkey, + message, + votor_events, + )?; + let Some(new_finalized_slot) = new_finalized_slot else { + return Ok((None, new_certificates_to_send)); + }; + trace!("{my_pubkey}: new finalization certificate for {new_finalized_slot}"); + alpenglow_update_commitment_cache( + AlpenglowCommitmentType::Finalized, + new_finalized_slot, + commitment_sender, + )?; + Ok((Some(new_finalized_slot), new_certificates_to_send)) + } + + fn add_produce_block_event( + highest_parent_ready: &mut Slot, + cert_pool: &CertificatePool, + my_pubkey: &Pubkey, + ctx: &mut CertificatePoolContext, + events: &mut Vec, + stats: &mut CertificatePoolServiceStats, + ) { + let Some(new_highest_parent_ready) = events + .iter() + .filter_map(|event| match event { + VotorEvent::ParentReady { slot, .. } => Some(slot), + _ => None, + }) + .max() + .copied() + else { + return; + }; + + if new_highest_parent_ready <= *highest_parent_ready { + return; + } + *highest_parent_ready = new_highest_parent_ready; + + let root_bank = ctx.sharable_banks.root(); + let Some(leader_pubkey) = ctx + .leader_schedule_cache + .slot_leader_at(*highest_parent_ready, Some(&root_bank)) + else { + error!("Unable to compute the leader at slot {highest_parent_ready}. Something is wrong, exiting"); + ctx.exit.store(true, Ordering::Relaxed); + return; + }; + + if &leader_pubkey != my_pubkey { + return; + } + + let start_slot = *highest_parent_ready; + let end_slot = last_of_consecutive_leader_slots(start_slot); + + if (start_slot..=end_slot).any(|s| ctx.blockstore.has_existing_shreds_for_slot(s)) { + warn!( + "{}: We have already produced shreds in the window {start_slot}-{end_slot}, \ + skipping production of our leader window", + my_pubkey, + ); + return; + } + + match cert_pool + .parent_ready_tracker + .block_production_parent(start_slot) + { + BlockProductionParent::MissedWindow => { + warn!( + "{}: Leader slot {start_slot} has already been certified, \ + skipping production of {start_slot}-{end_slot}", + my_pubkey, + ); + CertificatePoolServiceStats::incr_u16(&mut stats.parent_ready_missed_window); + } + BlockProductionParent::ParentNotReady => { + // This can't happen, place holder depending on how we hook up optimistic + ctx.exit.store(true, Ordering::Relaxed); + panic!( + "Must have a block production parent: {:#?}", + cert_pool.parent_ready_tracker + ); + } + BlockProductionParent::Parent(parent_block) => { + events.push(VotorEvent::ProduceWindow(LeaderWindowInfo { + start_slot, + end_slot, + parent_block, + // TODO: we can just remove this + skip_timer: Instant::now(), + })); + CertificatePoolServiceStats::incr_u16(&mut stats.parent_ready_produce_window); + } + } + } + + pub(crate) fn join(self) -> thread::Result<()> { + self.t_ingest.join() + } +} diff --git a/votor/src/certificate_pool_service/stats.rs b/votor/src/certificate_pool_service/stats.rs new file mode 100644 index 00000000000000..d24a3599b5aa4a --- /dev/null +++ b/votor/src/certificate_pool_service/stats.rs @@ -0,0 +1,96 @@ +use { + solana_metrics::datapoint_info, + std::time::{Duration, Instant}, +}; + +const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(10); + +#[derive(Debug)] +pub(crate) struct CertificatePoolServiceStats { + pub(crate) add_message_failed: u32, + pub(crate) certificates_sent: u16, + pub(crate) certificates_dropped: u16, + pub(crate) new_finalized_slot: u16, + pub(crate) parent_ready_missed_window: u16, + pub(crate) parent_ready_produce_window: u16, + pub(crate) received_votes: u32, + pub(crate) received_certificates: u32, + pub(crate) standstill: bool, + pub(crate) prune_old_state_called: u64, + last_request_time: Instant, +} + +impl CertificatePoolServiceStats { + pub fn new() -> Self { + Self { + add_message_failed: 0, + certificates_sent: 0, + certificates_dropped: 0, + new_finalized_slot: 0, + parent_ready_missed_window: 0, + parent_ready_produce_window: 0, + received_votes: 0, + received_certificates: 0, + standstill: false, + prune_old_state_called: 0, + last_request_time: Instant::now(), + } + } + + pub fn incr_u16(value: &mut u16) { + *value = value.saturating_add(1); + } + + pub fn incr_u32(value: &mut u32) { + *value = value.saturating_add(1); + } + + pub fn incr_u64(value: &mut u64) { + *value = value.saturating_add(1); + } + + fn reset(&mut self) { + self.add_message_failed = 0; + self.certificates_sent = 0; + self.certificates_dropped = 0; + self.new_finalized_slot = 0; + self.parent_ready_missed_window = 0; + self.parent_ready_produce_window = 0; + self.received_votes = 0; + self.received_certificates = 0; + self.standstill = false; + self.prune_old_state_called = 0; + self.last_request_time = Instant::now(); + } + + fn report(&self) { + datapoint_info!( + "cert_pool_service", + ("add_message_failed", self.add_message_failed, i64), + ("certificates_sent", self.certificates_sent, i64), + ("certificates_dropped", self.certificates_dropped, i64), + ("new_finalized_slot", self.new_finalized_slot, i64), + ( + "parent_ready_missed_window", + self.parent_ready_missed_window, + i64 + ), + ( + "parent_ready_produce_window", + self.parent_ready_produce_window, + i64 + ), + ("received_votes", self.received_votes, i64), + ("received_certificates", self.received_certificates, i64), + ("standstill", self.standstill, i64), + ("prune_old_state_called", self.prune_old_state_called, i64), + ); + } + + pub fn maybe_report(&mut self) { + if self.last_request_time.elapsed() >= STATS_REPORT_INTERVAL { + self.report(); + self.reset(); + } + } +} diff --git a/votor/src/commitment.rs b/votor/src/commitment.rs new file mode 100644 index 00000000000000..3e96a831859dd4 --- /dev/null +++ b/votor/src/commitment.rs @@ -0,0 +1,42 @@ +use { + crossbeam_channel::{Sender, TrySendError}, + solana_clock::Slot, + thiserror::Error, +}; + +#[derive(Debug, Error)] +pub enum AlpenglowCommitmentError { + #[error("Failed to send commitment data, channel disconnected")] + ChannelDisconnected, +} + +pub enum AlpenglowCommitmentType { + /// Our node has voted notarize for the slot + Notarize, + /// We have observed a finalization certificate for the slot + Finalized, +} + +pub struct AlpenglowCommitmentAggregationData { + pub commitment_type: AlpenglowCommitmentType, + pub slot: Slot, +} + +pub fn alpenglow_update_commitment_cache( + commitment_type: AlpenglowCommitmentType, + slot: Slot, + commitment_sender: &Sender, +) -> Result<(), AlpenglowCommitmentError> { + match commitment_sender.try_send(AlpenglowCommitmentAggregationData { + commitment_type, + slot, + }) { + Err(TrySendError::Disconnected(_)) => { + info!("commitment_sender has disconnected"); + return Err(AlpenglowCommitmentError::ChannelDisconnected); + } + Err(TrySendError::Full(_)) => error!("commitment_sender is backed up, something is wrong"), + Ok(_) => (), + } + Ok(()) +} diff --git a/votor/src/event.rs b/votor/src/event.rs new file mode 100644 index 00000000000000..33ed8b6a7c16ce --- /dev/null +++ b/votor/src/event.rs @@ -0,0 +1,94 @@ +use { + crossbeam_channel::{Receiver, Sender}, + solana_clock::Slot, + solana_runtime::bank::Bank, + solana_votor_messages::consensus_message::Block, + std::{sync::Arc, time::Instant}, +}; + +#[derive(Debug, Clone)] +pub struct CompletedBlock { + pub slot: Slot, + // TODO: once we have the async execution changes this can be (block_id, parent_block_id) instead + pub bank: Arc, +} + +/// Context for the block creation loop to start a leader window +#[derive(Copy, Clone, Debug)] +pub struct LeaderWindowInfo { + pub start_slot: Slot, + pub end_slot: Slot, + pub parent_block: Block, + pub skip_timer: Instant, +} + +pub type VotorEventSender = Sender; +pub type VotorEventReceiver = Receiver; + +/// Events that trigger actions in Votor +/// TODO: remove bank hash once we update votes +#[derive(Debug, Clone)] +pub enum VotorEvent { + /// A block has completed replay and is ready for voting + Block(CompletedBlock), + + /// The block has received a notarization certificate + BlockNotarized(Block), + + /// Received the first shred for the slot. + FirstShred(Slot), + + /// The pool has marked the given block as a ready parent for `slot` + ParentReady { slot: Slot, parent_block: Block }, + + //// Timeout to early detect that a honest that has crashed and + /// if the leader window should be skipped. + TimeoutCrashedLeader(Slot), + + /// Timeout to inspect whether the remaining leader window should be skipped. + Timeout(Slot), + + /// The given block has reached the safe to notar status + SafeToNotar(Block), + + /// The given slot has reached the safe to skip status + SafeToSkip(Slot), + + /// We are the leader for this window and have reached the parent ready status + /// Produce the window + ProduceWindow(LeaderWindowInfo), + + /// The block has received a slow or fast finalization certificate and is eligble for rooting + /// The second bool indicates whether the block is a fast finalization + Finalized(Block, bool), + + /// We have not observed a finalization and reached the standstill timeout + /// The slot is the highest finalized slot + Standstill(Slot), + + /// The identity keypair has changed due to an operator calling set-identity + SetIdentity, +} + +impl VotorEvent { + /// Ignore old events + pub(crate) fn should_ignore(&self, root: Slot) -> bool { + match self { + VotorEvent::Block(completed_block) => completed_block.slot <= root, + VotorEvent::Timeout(s) + | VotorEvent::SafeToSkip(s) + | VotorEvent::TimeoutCrashedLeader(s) + | VotorEvent::FirstShred(s) + | VotorEvent::SafeToNotar((s, _)) + | VotorEvent::Finalized((s, _), _) + | VotorEvent::BlockNotarized((s, _)) + | VotorEvent::ParentReady { + slot: s, + parent_block: _, + } => s <= &root, + VotorEvent::ProduceWindow(_) => false, + VotorEvent::Standstill(_) => false, + VotorEvent::SetIdentity => false, + } + } +} diff --git a/votor/src/event_handler.rs b/votor/src/event_handler.rs new file mode 100644 index 00000000000000..106507bfe59f23 --- /dev/null +++ b/votor/src/event_handler.rs @@ -0,0 +1,761 @@ +//! Handles incoming VotorEvents to take action or +//! notify block creation loop + +use { + crate::{ + commitment::{alpenglow_update_commitment_cache, AlpenglowCommitmentType}, + event::{CompletedBlock, VotorEvent, VotorEventReceiver}, + event_handler::stats::EventHandlerStats, + root_utils::{self, RootContext}, + timer_manager::TimerManager, + vote_history::{VoteHistory, VoteHistoryError}, + voting_utils::{generate_vote_message, BLSOp, VoteError, VotingContext}, + votor::{SharedContext, Votor}, + }, + crossbeam_channel::{select, RecvError, SendError}, + parking_lot::RwLock, + solana_clock::Slot, + solana_hash::Hash, + solana_ledger::leader_schedule_utils::{ + first_of_consecutive_leader_slots, last_of_consecutive_leader_slots, leader_slot_index, + }, + solana_measure::measure::Measure, + solana_pubkey::Pubkey, + solana_runtime::{bank::Bank, bank_forks::SetRootError}, + solana_signer::Signer, + solana_votor_messages::{consensus_message::Block, vote::Vote}, + std::{ + collections::{BTreeMap, BTreeSet}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Condvar, Mutex, + }, + thread::{self, Builder, JoinHandle}, + time::Duration, + }, + thiserror::Error, +}; + +mod stats; + +/// Banks that have completed replay, but are yet to be voted on +/// in the form of (block, parent block) +pub(crate) type PendingBlocks = BTreeMap>; + +/// Inputs for the event handler thread +pub(crate) struct EventHandlerContext { + pub(crate) exit: Arc, + pub(crate) start: Arc<(Mutex, Condvar)>, + + pub(crate) event_receiver: VotorEventReceiver, + pub(crate) timer_manager: Arc>, + + // Contexts + pub(crate) shared_context: SharedContext, + pub(crate) voting_context: VotingContext, + pub(crate) root_context: RootContext, +} + +#[derive(Debug, Error)] +enum EventLoopError { + #[error("Receiver is disconnected")] + ReceiverDisconnected(#[from] RecvError), + + #[error("Sender is disconnected")] + SenderDisconnected(#[from] SendError<()>), + + #[error("Error generating and inserting vote")] + VotingError(#[from] VoteError), + + #[error("Unable to set root")] + SetRootError(#[from] SetRootError), + + #[error("Set identity error")] + SetIdentityError(#[from] VoteHistoryError), +} + +pub(crate) struct EventHandler { + t_event_handler: JoinHandle<()>, +} + +struct LocalContext { + pub(crate) my_pubkey: Pubkey, + pub(crate) pending_blocks: PendingBlocks, + pub(crate) finalized_blocks: BTreeSet, + pub(crate) received_shred: BTreeSet, + pub(crate) stats: EventHandlerStats, +} + +impl EventHandler { + pub(crate) fn new(ctx: EventHandlerContext) -> Self { + let exit = ctx.exit.clone(); + let t_event_handler = Builder::new() + .name("solVotorEventLoop".to_string()) + .spawn(move || { + if let Err(e) = Self::event_loop(ctx) { + info!("Event loop exited: {e:?}. Shutting down"); + exit.store(true, Ordering::Relaxed); + } + }) + .unwrap(); + + Self { t_event_handler } + } + + fn event_loop(context: EventHandlerContext) -> Result<(), EventLoopError> { + let EventHandlerContext { + exit, + start, + event_receiver, + timer_manager, + shared_context: ctx, + voting_context: mut vctx, + root_context: rctx, + } = context; + let mut local_context = LocalContext { + my_pubkey: ctx.cluster_info.keypair().pubkey(), + pending_blocks: PendingBlocks::default(), + finalized_blocks: BTreeSet::default(), + received_shred: BTreeSet::default(), + stats: EventHandlerStats::new(), + }; + + // Wait until migration has completed + info!("{}: Event loop initialized", local_context.my_pubkey); + Votor::wait_for_migration_or_exit(&exit, &start); + info!("{}: Event loop starting", local_context.my_pubkey); + + if exit.load(Ordering::Relaxed) { + return Ok(()); + } + + // Check for set identity + if let Err(e) = Self::handle_set_identity(&mut local_context.my_pubkey, &ctx, &mut vctx) { + error!( + "Unable to load new vote history when attempting to change identity from {} \ + to {} on voting loop startup, Exiting: {}", + vctx.vote_history.node_pubkey, + ctx.cluster_info.id(), + e + ); + return Err(EventLoopError::SetIdentityError(e)); + } + + while !exit.load(Ordering::Relaxed) { + let mut receive_event_time = Measure::start("receive_event"); + let event = select! { + recv(event_receiver) -> msg => { + msg? + }, + default(Duration::from_secs(1)) => continue + }; + receive_event_time.stop(); + local_context.stats.receive_event_time_us = local_context + .stats + .receive_event_time_us + .saturating_add(receive_event_time.as_us() as u32); + + let root_bank = vctx.sharable_banks.root(); + if event.should_ignore(root_bank.slot()) { + local_context.stats.ignored = local_context.stats.ignored.saturating_add(1); + continue; + } + + let mut event_processing_time = Measure::start("event_processing"); + let stats_event = local_context.stats.handle_event_arrival(&event); + let votes = Self::handle_event( + event, + &timer_manager, + &ctx, + &mut vctx, + &rctx, + &mut local_context, + )?; + event_processing_time.stop(); + local_context + .stats + .incr_event_with_timing(stats_event, event_processing_time.as_us()); + + let mut send_vote_time = Measure::start("send_vote"); + for vote in votes { + local_context.stats.incr_vote(&vote); + vctx.bls_sender.send(vote).map_err(|_| SendError(()))?; + } + send_vote_time.stop(); + local_context.stats.send_vote_time_us = local_context + .stats + .send_vote_time_us + .saturating_add(send_vote_time.as_us() as u32); + local_context.stats.maybe_report(); + } + + Ok(()) + } + + fn handle_parent_ready_event( + slot: Slot, + parent_block: Block, + vctx: &mut VotingContext, + ctx: &SharedContext, + local_context: &mut LocalContext, + timer_manager: &RwLock, + votes: &mut Vec, + ) -> Result<(), EventLoopError> { + let my_pubkey = &local_context.my_pubkey; + info!("{my_pubkey}: Parent ready {slot} {parent_block:?}"); + let should_set_timeouts = vctx.vote_history.add_parent_ready(slot, parent_block); + Self::check_pending_blocks(my_pubkey, &mut local_context.pending_blocks, vctx, votes)?; + if should_set_timeouts { + timer_manager.write().set_timeouts(slot); + local_context.stats.timeout_set = local_context.stats.timeout_set.saturating_add(1); + } + let mut highest_parent_ready = ctx + .leader_window_notifier + .highest_parent_ready + .write() + .unwrap(); + + let (current_slot, _) = *highest_parent_ready; + + if slot > current_slot { + *highest_parent_ready = (slot, parent_block); + } + Ok(()) + } + + fn handle_event( + event: VotorEvent, + timer_manager: &RwLock, + ctx: &SharedContext, + vctx: &mut VotingContext, + rctx: &RootContext, + local_context: &mut LocalContext, + ) -> Result, EventLoopError> { + let mut votes = vec![]; + let LocalContext { + ref mut my_pubkey, + ref mut pending_blocks, + ref mut finalized_blocks, + ref mut received_shred, + ref mut stats, + } = local_context; + match event { + // Block has completed replay + VotorEvent::Block(CompletedBlock { slot, bank }) => { + debug_assert!(bank.is_frozen()); + let (block, parent_block) = Self::get_block_parent_block(&bank); + info!("{my_pubkey}: Block {block:?} parent {parent_block:?}"); + if Self::try_notar( + my_pubkey, + block, + parent_block, + pending_blocks, + vctx, + &mut votes, + )? { + Self::check_pending_blocks(my_pubkey, pending_blocks, vctx, &mut votes)?; + } else if !vctx.vote_history.voted(slot) { + pending_blocks + .entry(slot) + .or_default() + .push((block, parent_block)); + } + Self::check_rootable_blocks( + my_pubkey, + ctx, + vctx, + rctx, + pending_blocks, + finalized_blocks, + received_shred, + stats, + )?; + if let Some((ready_slot, parent_block)) = + Self::add_missing_parent_ready(block, ctx, vctx, local_context) + { + Self::handle_parent_ready_event( + ready_slot, + parent_block, + vctx, + ctx, + local_context, + timer_manager, + &mut votes, + )?; + } + } + + // Block has received a notarization certificate + VotorEvent::BlockNotarized(block) => { + info!("{my_pubkey}: Block Notarized {block:?}"); + vctx.vote_history.add_block_notarized(block); + Self::try_final(my_pubkey, block, vctx, &mut votes)?; + } + + VotorEvent::FirstShred(slot) => { + info!("{my_pubkey}: First shred {slot}"); + received_shred.insert(slot); + } + + // Received a parent ready notification for `slot` + VotorEvent::ParentReady { slot, parent_block } => { + Self::handle_parent_ready_event( + slot, + parent_block, + vctx, + ctx, + local_context, + timer_manager, + &mut votes, + )?; + } + + VotorEvent::TimeoutCrashedLeader(slot) => { + info!("{my_pubkey}: TimeoutCrashedLeader {slot}"); + if vctx.vote_history.voted(slot) || received_shred.contains(&slot) { + return Ok(votes); + } + Self::try_skip_window(my_pubkey, slot, vctx, &mut votes)?; + } + + // Skip timer for the slot has fired + VotorEvent::Timeout(slot) => { + info!("{my_pubkey}: Timeout {slot}"); + if vctx.vote_history.voted(slot) { + return Ok(votes); + } + Self::try_skip_window(my_pubkey, slot, vctx, &mut votes)?; + } + + // We have observed the safe to notar condition, and can send a notar fallback vote + // TODO: update cert pool to check parent block id for intra window slots + VotorEvent::SafeToNotar(block @ (slot, block_id)) => { + info!("{my_pubkey}: SafeToNotar {block:?}"); + Self::try_skip_window(my_pubkey, slot, vctx, &mut votes)?; + if vctx.vote_history.its_over(slot) + || vctx.vote_history.voted_notar_fallback(slot, block_id) + { + return Ok(votes); + } + info!("{my_pubkey}: Voting notarize-fallback for {slot} {block_id}"); + if let Some(bls_op) = generate_vote_message( + Vote::new_notarization_fallback_vote(slot, block_id), + false, + vctx, + )? { + votes.push(bls_op); + } + } + + // We have observed the safe to skip condition, and can send a skip fallback vote + VotorEvent::SafeToSkip(slot) => { + info!("{my_pubkey}: SafeToSkip {slot}"); + Self::try_skip_window(my_pubkey, slot, vctx, &mut votes)?; + if vctx.vote_history.its_over(slot) || vctx.vote_history.voted_skip_fallback(slot) { + return Ok(votes); + } + info!("{my_pubkey}: Voting skip-fallback for {slot}"); + if let Some(bls_op) = + generate_vote_message(Vote::new_skip_fallback_vote(slot), false, vctx)? + { + votes.push(bls_op); + } + } + + // It is time to produce our leader window + VotorEvent::ProduceWindow(window_info) => { + info!("{my_pubkey}: ProduceWindow {window_info:?}"); + let mut l_window_info = ctx.leader_window_notifier.window_info.lock().unwrap(); + if let Some(old_window_info) = l_window_info.as_ref() { + stats.leader_window_replaced = stats.leader_window_replaced.saturating_add(1); + error!( + "{my_pubkey}: Attempting to start leader window for {}-{}, \ + however there is already a pending window to produce {}-{}. \ + Our production is lagging, discarding in favor of the newer window", + window_info.start_slot, + window_info.end_slot, + old_window_info.start_slot, + old_window_info.end_slot, + ); + } + *l_window_info = Some(window_info); + ctx.leader_window_notifier.window_notification.notify_one(); + } + + // We have finalized this block consider it for rooting + VotorEvent::Finalized(block, is_fast_finalization) => { + info!("{my_pubkey}: Finalized {block:?} fast: {is_fast_finalization}"); + finalized_blocks.insert(block); + Self::check_rootable_blocks( + my_pubkey, + ctx, + vctx, + rctx, + pending_blocks, + finalized_blocks, + received_shred, + stats, + )?; + if let Some((slot, block)) = + Self::add_missing_parent_ready(block, ctx, vctx, local_context) + { + Self::handle_parent_ready_event( + slot, + block, + vctx, + ctx, + local_context, + timer_manager, + &mut votes, + )?; + } + } + + // We have not observed a finalization certificate in a while, refresh our votes + VotorEvent::Standstill(highest_finalized_slot) => { + info!("{my_pubkey}: Standstill {highest_finalized_slot}"); + // certs refresh happens in CertificatePoolService + Self::refresh_votes(my_pubkey, highest_finalized_slot, vctx, &mut votes)?; + } + + // Operator called set identity make sure that our keypair is updated for voting + VotorEvent::SetIdentity => { + info!("{my_pubkey}: SetIdentity"); + if let Err(e) = Self::handle_set_identity(my_pubkey, ctx, vctx) { + error!( + "Unable to load new vote history when attempting to change identity from {} \ + to {} in voting loop, Exiting: {}", + vctx.vote_history.node_pubkey, + ctx.cluster_info.id(), + e + ); + return Err(EventLoopError::SetIdentityError(e)); + } + } + } + Ok(votes) + } + + /// Under normal cases we should have a parent ready for first slot of every window. + /// But it could be we joined when the later slots of the window are finalized, then + /// we never saw the parent ready for the first slot and haven't voted for first slot + /// so we can't keep processing rest of the window. This is especially a problem for + /// cluster standstill. + /// For example: + /// A 40% + /// B 40% + /// C 30% + /// A and B finalize block together up to slot 9, now A exited and C joined. + /// C sees block 9 as finalized, but it never had parent ready triggered for slot 8. + /// C can't vote for any slot in the window because there is no parent ready for slot 8. + /// While B is stuck because it is waiting for >60% of the votes to finalize slot 9. + /// The cluster will get stuck. + /// After we add the following function, C will see that block 9 is finalized yet + /// it never had parent ready for slot 9, so it will trigger parent ready for slot 9, + /// this means C will immediately vote Notarize for slot 9, then vote Notarize for + /// all later slots. So B and C together can keep finalizing the blocks and unstuck the + /// cluster. If we get a finalization cert for later slots of the window and we have the + /// block replayed, trace back to the first slot of the window and emit parent ready. + fn add_missing_parent_ready( + finalized_block: Block, + ctx: &SharedContext, + vctx: &mut VotingContext, + local_context: &mut LocalContext, + ) -> Option<(Slot, Block)> { + let (slot, block_id) = finalized_block; + let first_slot_of_window = first_of_consecutive_leader_slots(slot); + if first_slot_of_window == slot || first_slot_of_window == 0 { + // No need to trigger parent ready for the first slot of the window + return None; + } + if vctx.vote_history.highest_parent_ready_slot() >= Some(first_slot_of_window) + || !local_context.finalized_blocks.contains(&finalized_block) + { + return None; + } + // If the block is missing, we can't trigger parent ready + let bank = ctx.bank_forks.read().unwrap().get(slot)?; + if !bank.is_frozen() { + // We haven't finished replay for the block, so we can't trigger parent ready + return None; + } + if bank.block_id() != Some(block_id) { + // We have a different block id for the slot, repair should kick in later + return None; + } + let parent_bank = bank.parent()?; + let parent_slot = parent_bank.slot(); + let Some(parent_block_id) = parent_bank.block_id() else { + // Maybe this bank is set to root after we drop bank_forks. + error!( + "{}: Unable to find block id for parent bank {parent_slot} to trigger parent ready", + local_context.my_pubkey + ); + return None; + }; + info!( + "{}: Triggering parent ready for slot {slot} with parent {parent_slot} {parent_block_id}", + local_context.my_pubkey + ); + Some((slot, (parent_slot, parent_block_id))) + } + + fn handle_set_identity( + my_pubkey: &mut Pubkey, + ctx: &SharedContext, + vctx: &mut VotingContext, + ) -> Result<(), VoteHistoryError> { + let new_identity = ctx.cluster_info.keypair(); + let new_pubkey = new_identity.pubkey(); + // This covers both: + // - startup set-identity so that vote_history is outdated but my_pubkey == new_pubkey + // - set-identity during normal operation, vote_history == my_pubkey != new_pubkey + if *my_pubkey != new_pubkey || vctx.vote_history.node_pubkey != new_pubkey { + let my_old_pubkey = vctx.vote_history.node_pubkey; + *my_pubkey = new_pubkey; + vctx.vote_history = VoteHistory::restore(ctx.vote_history_storage.as_ref(), my_pubkey)?; + vctx.identity_keypair = new_identity.clone(); + warn!("set-identity: from {my_old_pubkey} to {my_pubkey}"); + } + Ok(()) + } + + fn get_block_parent_block(bank: &Bank) -> (Block, Block) { + let slot = bank.slot(); + let block = ( + slot, + bank.block_id().expect("Block id must be set upstream"), + ); + let parent_slot = bank.parent_slot(); + let parent_block_id = bank.parent_block_id().unwrap_or_else(|| { + // To account for child of genesis and snapshots we insert a + // default block id here. Charlie is working on a SIMD to add block + // id to snapshots, which can allow us to remove this and update + // the default case in parent ready tracker. + trace!("Using default block id for {slot} parent {parent_slot}"); + Hash::default() + }); + let parent_block = (parent_slot, parent_block_id); + (block, parent_block) + } + + /// Tries to vote notarize on `block`: + /// - We have not voted notarize or skip for `slot(block)` + /// - Either it's the first leader block of the window and we are parent ready + /// - or it's a consecutive slot and we have voted notarize on the parent + /// + /// The boolean in the Result indicates whether we actually voted notarize. + /// An error returned will cause the voting process to be aborted. + fn try_notar( + my_pubkey: &Pubkey, + (slot, block_id): Block, + parent_block @ (parent_slot, parent_block_id): Block, + pending_blocks: &mut PendingBlocks, + voting_context: &mut VotingContext, + votes: &mut Vec, + ) -> Result { + if voting_context.vote_history.voted(slot) { + return Ok(false); + } + + if leader_slot_index(slot) == 0 || slot == 1 { + if !voting_context + .vote_history + .is_parent_ready(slot, &parent_block) + { + // Need to ingest more certificates first + return Ok(false); + } + } else { + if parent_slot.saturating_add(1) != slot { + // Non consecutive + return Ok(false); + } + if voting_context.vote_history.voted_notar(parent_slot) != Some(parent_block_id) { + // Voted skip, or notarize on a different version of the parent + return Ok(false); + } + } + + info!("{my_pubkey}: Voting notarize for {slot} {block_id}"); + if let Some(bls_op) = generate_vote_message( + Vote::new_notarization_vote(slot, block_id), + false, + voting_context, + )? { + votes.push(bls_op); + } + alpenglow_update_commitment_cache( + AlpenglowCommitmentType::Notarize, + slot, + &voting_context.commitment_sender, + )?; + pending_blocks.remove(&slot); + + Ok(true) + } + + /// Checks the pending blocks that have completed replay to see if they + /// are eligble to be voted on now + fn check_pending_blocks( + my_pubkey: &Pubkey, + pending_blocks: &mut PendingBlocks, + voting_context: &mut VotingContext, + votes: &mut Vec, + ) -> Result<(), VoteError> { + let blocks_to_check: Vec<(Block, Block)> = pending_blocks + .values() + .flat_map(|blocks| blocks.iter()) + .copied() + .collect(); + + for (block, parent_block) in blocks_to_check { + Self::try_notar( + my_pubkey, + block, + parent_block, + pending_blocks, + voting_context, + votes, + )?; + } + Ok(()) + } + + /// Tries to send a finalize vote for the block if + /// - the block has a notarization certificate + /// - we have not already voted finalize + /// - we voted notarize for the block + /// - we have not voted skip, notarize fallback or skip fallback in the slot (bad window) + /// + /// The boolean in the Result indicates whether we actually voted finalize. + /// An error returned will cause the voting process to be aborted. + fn try_final( + my_pubkey: &Pubkey, + block @ (slot, block_id): Block, + voting_context: &mut VotingContext, + votes: &mut Vec, + ) -> Result { + if !voting_context.vote_history.is_block_notarized(&block) + || voting_context.vote_history.its_over(slot) + || voting_context.vote_history.bad_window(slot) + { + return Ok(false); + } + + if voting_context + .vote_history + .voted_notar(slot) + .is_none_or(|bid| bid != block_id) + { + return Ok(false); + } + + info!("{my_pubkey}: Voting finalize for {slot}"); + if let Some(bls_op) = + generate_vote_message(Vote::new_finalization_vote(slot), false, voting_context)? + { + votes.push(bls_op); + } + Ok(true) + } + + fn try_skip_window( + my_pubkey: &Pubkey, + slot: Slot, + voting_context: &mut VotingContext, + votes: &mut Vec, + ) -> Result<(), VoteError> { + // In case we set root in the middle of a leader window, + // it's not necessary to vote skip prior to it and we won't + // be able to check vote history if we've already voted on it + let root_bank = voting_context.sharable_banks.root(); + let start = first_of_consecutive_leader_slots(slot).max(root_bank.slot()); + for s in start..=last_of_consecutive_leader_slots(slot) { + if voting_context.vote_history.voted(s) { + continue; + } + info!("{my_pubkey}: Voting skip for {s}"); + if let Some(bls_op) = + generate_vote_message(Vote::new_skip_vote(s), false, voting_context)? + { + votes.push(bls_op); + } + } + Ok(()) + } + + /// Refresh all votes cast for slots > highest_finalized_slot + fn refresh_votes( + my_pubkey: &Pubkey, + highest_finalized_slot: Slot, + voting_context: &mut VotingContext, + votes: &mut Vec, + ) -> Result<(), VoteError> { + for vote in voting_context + .vote_history + .votes_cast_since(highest_finalized_slot) + { + info!("{my_pubkey}: Refreshing vote {vote:?}"); + if let Some(bls_op) = generate_vote_message(vote, true, voting_context)? { + votes.push(bls_op); + } + } + Ok(()) + } + + /// Checks if we can set root on a new block + /// The block must be: + /// - Present in bank forks + /// - Newer than the current root + /// - We must have already voted on bank.slot() + /// - Bank is frozen and finished shredding + /// - Block has a finalization certificate + /// + /// If so set root on the highest block that fits these conditions + fn check_rootable_blocks( + my_pubkey: &Pubkey, + ctx: &SharedContext, + vctx: &mut VotingContext, + rctx: &RootContext, + pending_blocks: &mut PendingBlocks, + finalized_blocks: &mut BTreeSet, + received_shred: &mut BTreeSet, + stats: &mut EventHandlerStats, + ) -> Result<(), SetRootError> { + let bank_forks_r = ctx.bank_forks.read().unwrap(); + let old_root = bank_forks_r.root(); + let Some(new_root) = finalized_blocks + .iter() + .filter_map(|&(slot, block_id)| { + let bank = bank_forks_r.get(slot)?; + (slot > old_root + && vctx.vote_history.voted(slot) + && bank.is_frozen() + && bank.block_id().is_some_and(|bid| bid == block_id)) + .then_some(slot) + }) + .max() + else { + // No rootable banks + return Ok(()); + }; + drop(bank_forks_r); + root_utils::set_root( + my_pubkey, + new_root, + ctx, + vctx, + rctx, + pending_blocks, + finalized_blocks, + received_shred, + )?; + stats.set_root(new_root); + Ok(()) + } + + pub(crate) fn join(self) -> thread::Result<()> { + self.t_event_handler.join() + } +} diff --git a/votor/src/event_handler/stats.rs b/votor/src/event_handler/stats.rs new file mode 100644 index 00000000000000..8cf58eaf6059c8 --- /dev/null +++ b/votor/src/event_handler/stats.rs @@ -0,0 +1,308 @@ +use { + crate::{event::VotorEvent, voting_utils::BLSOp, VoteType}, + solana_clock::Slot, + solana_metrics::datapoint_info, + solana_votor_messages::consensus_message::ConsensusMessage, + std::{ + collections::{BTreeMap, HashMap}, + time::{Duration, Instant}, + }, +}; + +const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(10); + +#[derive(Debug, Clone)] +struct SlotTracking { + /// The time when the slot tracking started + start: Instant, + /// The time when the first shred for this slot was received + first_shred: Option, + /// The time when the parent block for this slot was ready + parent_ready: Option, + /// The time when the notarization vote for this slot was sent + vote_notarize: Option, + /// The time when the skip vote for this slot was sent + vote_skip: Option, + /// If the slot was finalized, this is the time when it was finalized, + /// the bool indicates if it was fast finalized + finalized: Option<(Instant, bool)>, +} + +impl Default for SlotTracking { + fn default() -> Self { + Self { + start: Instant::now(), + first_shred: None, + parent_ready: None, + vote_notarize: None, + vote_skip: None, + finalized: None, + } + } +} + +#[derive(Debug, Default)] +struct EventCountAndTime { + count: u16, + time_us: u32, +} + +#[derive(Debug)] +pub(crate) struct EventHandlerStats { + // Number of events that were ignored. This includes events that were + // received but not processed due to various reasons (e.g., outdated, + // irrelevant). + pub(crate) ignored: u16, + + // Number of times where we are attempting to start a leader window but + // there is already a pending window to produce. The older window is + // discarded in favor of the newer one. + pub(crate) leader_window_replaced: u16, + + // Number of times we updated the root. + pub(crate) set_root_count: u16, + + // Number of times we setup timeouts for a new leader window. + pub(crate) timeout_set: u16, + + // Amount of time spent receiving events. Includes waiting for events. + pub(crate) receive_event_time_us: u32, + + // Amount of time spent sending votes. + pub(crate) send_vote_time_us: u32, + + // Number of times we saw each event and time spent processing the event. + received_events_count_and_timing: HashMap, + + // Number of votes sent for each vote type. + sent_votes: HashMap, + + // Timing information for major events for each slot. + slot_tracking_map: BTreeMap, + + root_slot: Slot, + last_report_time: Instant, +} + +impl Default for EventHandlerStats { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum StatsEvent { + Block, + BlockNotarized, + FirstShred, + ParentReady, + TimeoutCrashedLeader, + Timeout, + SafeToNotar, + SafeToSkip, + ProduceWindow, + Finalized, + Standstill, + SetIdentity, +} + +impl StatsEvent { + pub fn new(event: &VotorEvent) -> Self { + match event { + VotorEvent::Block(_) => StatsEvent::Block, + VotorEvent::BlockNotarized(_) => StatsEvent::BlockNotarized, + VotorEvent::FirstShred(_) => StatsEvent::FirstShred, + VotorEvent::ParentReady { .. } => StatsEvent::ParentReady, + VotorEvent::TimeoutCrashedLeader(_) => StatsEvent::TimeoutCrashedLeader, + VotorEvent::Timeout(_) => StatsEvent::Timeout, + VotorEvent::SafeToNotar(_) => StatsEvent::SafeToNotar, + VotorEvent::SafeToSkip(_) => StatsEvent::SafeToSkip, + VotorEvent::ProduceWindow(_) => StatsEvent::ProduceWindow, + VotorEvent::Finalized(..) => StatsEvent::Finalized, + VotorEvent::Standstill(_) => StatsEvent::Standstill, + VotorEvent::SetIdentity => StatsEvent::SetIdentity, + } + } +} + +impl EventHandlerStats { + pub fn new() -> Self { + Self { + ignored: 0, + leader_window_replaced: 0, + set_root_count: 0, + timeout_set: 0, + receive_event_time_us: 0, + send_vote_time_us: 0, + received_events_count_and_timing: HashMap::new(), + sent_votes: HashMap::new(), + slot_tracking_map: BTreeMap::new(), + root_slot: 0, + last_report_time: Instant::now(), + } + } + + pub fn handle_event_arrival(&mut self, event: &VotorEvent) -> StatsEvent { + match event { + VotorEvent::FirstShred(slot) => { + let entry = self.slot_tracking_map.entry(*slot).or_default(); + entry.first_shred = Some(Instant::now()); + } + VotorEvent::ParentReady { slot, .. } => { + let entry = self.slot_tracking_map.entry(*slot).or_default(); + entry.parent_ready = Some(Instant::now()); + } + VotorEvent::Finalized((slot, _), is_fast_finalization) => { + let entry = self.slot_tracking_map.entry(*slot).or_default(); + if entry.finalized.is_none() { + entry.finalized = Some((Instant::now(), *is_fast_finalization)); + } else if *is_fast_finalization { + // We can accept Notarize and FastFinalization, never set the flag from true to false + if let Some((instant, false)) = entry.finalized { + entry.finalized = Some((instant, true)); + } + } + } + _ => (), + } + StatsEvent::new(event) + } + + pub fn set_root(&mut self, new_root: Slot) { + self.root_slot = new_root; + self.set_root_count = self.set_root_count.saturating_add(1); + } + + pub fn incr_event_with_timing(&mut self, stats_event: StatsEvent, time_us: u64) { + let entry = self + .received_events_count_and_timing + .entry(stats_event) + .or_default(); + entry.count = entry.count.saturating_add(1); + entry.time_us = entry.time_us.saturating_add(time_us as u32); + } + + pub fn incr_vote(&mut self, bls_op: &BLSOp) { + if let BLSOp::PushVote { message, .. } = bls_op { + let ConsensusMessage::Vote(vote) = **message else { + warn!("Unexpected BLS message type: {:?}", message); + return; + }; + let vote_type = VoteType::get_type(&vote.vote); + let entry = self.sent_votes.entry(vote_type).or_insert(0); + *entry = entry.saturating_add(1); + if vote_type == VoteType::Notarize { + let entry = self.slot_tracking_map.entry(vote.vote.slot()).or_default(); + entry.vote_notarize = Some(Instant::now()); + } else if vote_type == VoteType::Skip { + let entry = self.slot_tracking_map.entry(vote.vote.slot()).or_default(); + entry.vote_skip = Some(Instant::now()); + } + } else { + warn!("Unexpected BLS operation: {:?}", bls_op); + } + } + + pub fn maybe_report(&mut self) { + let now = Instant::now(); + if now.duration_since(self.last_report_time) < STATS_REPORT_INTERVAL { + return; + } + datapoint_info!( + "event_handler_stats", + ("ignored", self.ignored as i64, i64), + ( + "leader_window_replaced", + self.leader_window_replaced as i64, + i64 + ), + ("set_root_count", self.set_root_count as i64, i64), + ("timeout_set", self.timeout_set as i64, i64), + ); + for (event, EventCountAndTime { count, time_us }) in &self.received_events_count_and_timing + { + datapoint_info!( + "event_handler_received_event_count_and_timing", + ("event", format!("{:?}", event), String), + ("count", *count as i64, i64), + ("elapsed_us", *time_us as i64, i64) + ); + } + datapoint_info!( + "event_handler_timing", + ( + "receive_event_time_us", + self.receive_event_time_us as i64, + i64 + ), + ("send_vote_time_us", self.send_vote_time_us as i64, i64), + ); + for (vote_type, count) in &self.sent_votes { + datapoint_info!( + "event_handler_sent_vote_count", + ("vote", format!("{:?}", vote_type), String), + ("count", *count as i64, i64) + ); + } + // Only report if the slot is lower than root_slot + let split_off_map = self.slot_tracking_map.split_off(&self.root_slot); + for (slot, tracking) in &self.slot_tracking_map { + let start = tracking.start; + datapoint_info!( + "event_handler_slot_tracking", + ("slot", *slot as i64, i64), + ( + "first_shred", + tracking.first_shred.map(|t| { + t.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ( + "parent_ready", + tracking.parent_ready.map(|t| { + t.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ( + "vote_notarize", + tracking.vote_notarize.map(|t| { + t.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ( + "vote_skip", + tracking.vote_skip.map(|t| { + t.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ( + "finalized", + tracking.finalized.map(|t| { + t.0.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ("is_fast_finalization", tracking.finalized.map(|t| t.1), Option) + ); + } + self.last_report_time = now; + let root_slot = self.root_slot; + *self = EventHandlerStats::new(); + self.root_slot = root_slot; + self.slot_tracking_map = split_off_map; + } +} diff --git a/votor/src/lib.rs b/votor/src/lib.rs index 17cae4fcd509a1..74282d33faa3ec 100644 --- a/votor/src/lib.rs +++ b/votor/src/lib.rs @@ -1,3 +1,137 @@ #![cfg_attr(feature = "frozen-abi", feature(min_specialization))] -#[cfg(feature = "agave-unstable-api")] +use { + solana_votor_messages::{consensus_message::Certificate, vote::Vote}, + std::time::Duration, +}; + +pub mod certificate_pool; +mod certificate_pool_service; +pub mod commitment; +pub mod event; +mod event_handler; pub mod root_utils; +mod timer_manager; +pub mod vote_history; +pub mod vote_history_storage; +pub mod voting_utils; +pub mod votor; + +#[macro_use] +extern crate log; + +extern crate serde_derive; + +#[cfg_attr(feature = "frozen-abi", macro_use)] +#[cfg(feature = "frozen-abi")] +extern crate solana_frozen_abi_macro; + +// Core consensus types and constants +pub type Stake = u64; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum VoteType { + Finalize, + Notarize, + NotarizeFallback, + Skip, + SkipFallback, +} + +impl VoteType { + #[allow(dead_code)] + pub fn is_notarize_type(&self) -> bool { + matches!(self, Self::Notarize | Self::NotarizeFallback) + } +} + +pub const fn conflicting_types(vote_type: VoteType) -> &'static [VoteType] { + match vote_type { + VoteType::Finalize => &[VoteType::NotarizeFallback, VoteType::Skip], + VoteType::Notarize => &[VoteType::Skip, VoteType::NotarizeFallback], + VoteType::NotarizeFallback => &[VoteType::Finalize, VoteType::Notarize], + VoteType::Skip => &[ + VoteType::Finalize, + VoteType::Notarize, + VoteType::SkipFallback, + ], + VoteType::SkipFallback => &[VoteType::Skip], + } +} + +/// Lookup from `CertificateId` to the `VoteType`s that contribute, +/// as well as the stake fraction required for certificate completion. +/// +/// Must be in sync with `vote_to_certificate_ids` +pub const fn certificate_limits_and_vote_types( + cert_type: Certificate, +) -> (f64, &'static [VoteType]) { + match cert_type { + Certificate::Notarize(_, _) => (0.6, &[VoteType::Notarize]), + Certificate::NotarizeFallback(_, _) => { + (0.6, &[VoteType::Notarize, VoteType::NotarizeFallback]) + } + Certificate::FinalizeFast(_, _) => (0.8, &[VoteType::Notarize]), + Certificate::Finalize(_) => (0.6, &[VoteType::Finalize]), + Certificate::Skip(_) => (0.6, &[VoteType::Skip, VoteType::SkipFallback]), + } +} + +/// Lookup from `Vote` to the `CertificateId`s the vote accounts for +/// +/// Must be in sync with `certificate_limits_and_vote_types` and `VoteType::get_type` +pub fn vote_to_certificate_ids(vote: &Vote) -> Vec { + match vote { + Vote::Notarize(vote) => vec![ + Certificate::Notarize(vote.slot(), *vote.block_id()), + Certificate::NotarizeFallback(vote.slot(), *vote.block_id()), + Certificate::FinalizeFast(vote.slot(), *vote.block_id()), + ], + Vote::NotarizeFallback(vote) => { + vec![Certificate::NotarizeFallback(vote.slot(), *vote.block_id())] + } + Vote::Finalize(vote) => vec![Certificate::Finalize(vote.slot())], + Vote::Skip(vote) => vec![Certificate::Skip(vote.slot())], + Vote::SkipFallback(vote) => vec![Certificate::Skip(vote.slot())], + } +} + +pub const MAX_ENTRIES_PER_PUBKEY_FOR_OTHER_TYPES: usize = 1; +pub const MAX_ENTRIES_PER_PUBKEY_FOR_NOTARIZE_LITE: usize = 3; + +pub const SAFE_TO_NOTAR_MIN_NOTARIZE_ONLY: f64 = 0.4; +pub const SAFE_TO_NOTAR_MIN_NOTARIZE_FOR_NOTARIZE_OR_SKIP: f64 = 0.2; +pub const SAFE_TO_NOTAR_MIN_NOTARIZE_AND_SKIP: f64 = 0.6; + +pub const SAFE_TO_SKIP_THRESHOLD: f64 = 0.4; + +/// Time bound assumed on network transmission delays during periods of synchrony. +const DELTA: Duration = Duration::from_millis(250); + +/// Time the leader has for producing and sending the block. +const DELTA_BLOCK: Duration = Duration::from_millis(400); + +/// Base timeout for when leader's first slice should arrive if they sent it immediately. +const DELTA_TIMEOUT: Duration = DELTA.checked_mul(3).unwrap(); + +/// Timeout for standstill detection mechanism. +const DELTA_STANDSTILL: Duration = Duration::from_millis(10_000); + +/// Returns the Duration for when the `SkipTimer` should be set for for the given slot in the leader window. +#[inline] +pub fn skip_timeout(leader_block_index: usize) -> Duration { + DELTA_TIMEOUT + .saturating_add( + DELTA_BLOCK + .saturating_mul(leader_block_index as u32) + .saturating_add(DELTA_TIMEOUT), + ) + .saturating_add(DELTA) +} + +/// Block timeout, when we should publish the final shred for the leader block index +/// within the leader window +#[inline] +pub fn block_timeout(leader_block_index: usize) -> Duration { + // TODO: based on testing, perhaps adjust this + DELTA_BLOCK.saturating_mul((leader_block_index as u32).saturating_add(1)) +} diff --git a/votor/src/root_utils.rs b/votor/src/root_utils.rs index e26f2e0792ddd8..769a260cbab2e9 100644 --- a/votor/src/root_utils.rs +++ b/votor/src/root_utils.rs @@ -1,7 +1,8 @@ use { + crate::{event_handler::PendingBlocks, voting_utils::VotingContext, votor::SharedContext}, crossbeam_channel::Sender, - log::{info, warn}, solana_clock::Slot, + solana_hash::Hash, solana_ledger::{blockstore::Blockstore, leader_schedule_cache::LeaderScheduleCache}, solana_pubkey::Pubkey, solana_rpc::{ @@ -13,11 +14,88 @@ use { installed_scheduler_pool::BankWithScheduler, snapshot_controller::SnapshotController, }, - std::sync::{Arc, RwLock}, + solana_time_utils::timestamp, + solana_votor_messages::consensus_message::Block, + std::{ + collections::BTreeSet, + sync::{Arc, RwLock}, + }, }; +/// Structures that are not used in the event loop, but need to be updated +/// or notified when setting root +pub(crate) struct RootContext { + pub(crate) leader_schedule_cache: Arc, + pub(crate) snapshot_controller: Option>, + pub(crate) bank_notification_sender: Option, + pub(crate) drop_bank_sender: Sender>, +} + +/// Sets the root for the votor event handling loop. Handles rooting all things +/// except the certificate pool +pub(crate) fn set_root( + my_pubkey: &Pubkey, + new_root: Slot, + ctx: &SharedContext, + vctx: &mut VotingContext, + rctx: &RootContext, + pending_blocks: &mut PendingBlocks, + finalized_blocks: &mut BTreeSet, + received_shred: &mut BTreeSet, +) -> Result<(), SetRootError> { + info!("{my_pubkey}: setting root {new_root}"); + vctx.vote_history.set_root(new_root); + *pending_blocks = pending_blocks.split_off(&new_root); + *finalized_blocks = finalized_blocks.split_off(&(new_root, Hash::default())); + *received_shred = received_shred.split_off(&new_root); + + check_and_handle_new_root( + new_root, + new_root, + rctx.snapshot_controller.as_deref(), + Some(new_root), + &rctx.bank_notification_sender, + &rctx.drop_bank_sender, + &ctx.blockstore, + &rctx.leader_schedule_cache, + &ctx.bank_forks, + ctx.rpc_subscriptions.as_deref(), + my_pubkey, + |_| {}, + )?; + + // Distinguish between duplicate versions of same slot + let hash = ctx.bank_forks.read().unwrap().bank_hash(new_root).unwrap(); + if let Err(e) = + ctx.blockstore + .insert_optimistic_slot(new_root, &hash, timestamp().try_into().unwrap()) + { + error!( + "failed to record optimistic slot in blockstore: slot={}: {:?}", + new_root, &e + ); + } + + // It is critical to send the OC notification in order to keep compatibility with + // the RPC API. Additionally the PrioritizationFeeCache relies on this notification + // in order to perform cleanup. In the future we will look to deprecate OC and remove + // these code paths. + if let Some(config) = &rctx.bank_notification_sender { + let dependency_work = config + .dependency_tracker + .as_ref() + .map(|s| s.get_current_declared_work()); + // TODO: propagate error + let _ = config.sender.send(( + BankNotification::OptimisticallyConfirmed(new_root), + dependency_work, + )); + } + Ok(()) +} + /// Sets the new root, additionally performs the callback after setting the bank forks root -/// During this transition period where both replay stage and votor can root depending on the feature flag we +/// During this transition period where both replay stage and voting loop can root depending on the feature flag we /// have a callback that cleans up progress map and other tower bft structures. Then the callgraph is /// /// ReplayStage::check_and_handle_new_root -> root_utils::check_and_handle_new_root(callback) diff --git a/votor/src/timer_manager.rs b/votor/src/timer_manager.rs new file mode 100644 index 00000000000000..9b0ce3c677bac7 --- /dev/null +++ b/votor/src/timer_manager.rs @@ -0,0 +1,66 @@ +//! Controls the queueing and firing of skip timer events for use +//! in the event loop. +// TODO: Make this mockable in event_handler for tests + +mod stats; +mod timers; + +use { + crate::{event::VotorEvent, DELTA_BLOCK, DELTA_TIMEOUT}, + crossbeam_channel::Sender, + parking_lot::RwLock, + solana_clock::Slot, + std::{ + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + thread::{self, JoinHandle}, + time::{Duration, Instant}, + }, + timers::Timers, +}; + +/// A manager of timer states. Uses a background thread to trigger next ready +/// timers and send events. +pub(crate) struct TimerManager { + timers: Arc>, + handle: JoinHandle<()>, +} + +impl TimerManager { + pub(crate) fn new(event_sender: Sender, exit: Arc) -> Self { + let timers = Arc::new(RwLock::new(Timers::new( + DELTA_TIMEOUT, + DELTA_BLOCK, + event_sender, + ))); + let handle = { + let timers = Arc::clone(&timers); + thread::spawn(move || { + while !exit.load(Ordering::Relaxed) { + let duration = match timers.write().progress(Instant::now()) { + None => { + // No active timers, sleep for an arbitrary amount. + // This should be smaller than the minimum amount + // of time any newly added timers would take to expire. + Duration::from_millis(100) + } + Some(next_fire) => next_fire.duration_since(Instant::now()), + }; + thread::sleep(duration); + } + }) + }; + + Self { timers, handle } + } + + pub(crate) fn set_timeouts(&self, slot: Slot) { + self.timers.write().set_timeouts(slot, Instant::now()); + } + + pub(crate) fn join(self) { + self.handle.join().unwrap(); + } +} diff --git a/votor/src/timer_manager/stats.rs b/votor/src/timer_manager/stats.rs new file mode 100644 index 00000000000000..16f2b5647388c4 --- /dev/null +++ b/votor/src/timer_manager/stats.rs @@ -0,0 +1,71 @@ +use { + solana_metrics::datapoint_info, + std::time::{Duration, Instant}, +}; + +const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(10); + +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct TimerManagerStats { + /// The maximum heap size of the timers since the last report + max_heap_size: u64, + /// The number of times `set_timeout` was called. + set_timeout_count: u64, + /// The number of times `set_timeout` was called, there was no + /// existing timer, so this operation succeeded. + set_timeout_succeed_count: u64, + /// The last time the stats were reported + last_report: Instant, +} + +impl TimerManagerStats { + pub fn new() -> Self { + Self { + max_heap_size: 0, + set_timeout_count: 0, + set_timeout_succeed_count: 0, + last_report: Instant::now(), + } + } + + #[cfg(test)] + pub fn max_heap_size(&self) -> u64 { + self.max_heap_size + } + + #[cfg(test)] + pub fn set_timeout_count(&self) -> u64 { + self.set_timeout_count + } + + #[cfg(test)] + pub fn set_timeout_succeed_count(&self) -> u64 { + self.set_timeout_succeed_count + } + + pub fn incr_timeout_count_with_heap_size(&mut self, size: usize, new_timer_inserted: bool) { + self.set_timeout_count = self.set_timeout_count.saturating_add(1); + self.max_heap_size = self.max_heap_size.max(size as u64); + if new_timer_inserted { + self.set_timeout_succeed_count = self.set_timeout_succeed_count.saturating_add(1); + } + self.maybe_report(); + } + + fn maybe_report(&mut self) { + if self.last_report.elapsed() < STATS_REPORT_INTERVAL { + return; + } + datapoint_info!( + "votor_timer_manager", + ("max_heap_size", self.max_heap_size as i64, i64), + ("set_timeout_count", self.set_timeout_count as i64, i64), + ( + "set_timeout_succeed_count", + self.set_timeout_succeed_count as i64, + i64 + ), + ); + *self = TimerManagerStats::new(); + } +} diff --git a/votor/src/timer_manager/timers.rs b/votor/src/timer_manager/timers.rs new file mode 100644 index 00000000000000..9a424bca411d32 --- /dev/null +++ b/votor/src/timer_manager/timers.rs @@ -0,0 +1,251 @@ +use { + crate::{event::VotorEvent, timer_manager::stats::TimerManagerStats}, + crossbeam_channel::Sender, + solana_clock::Slot, + solana_ledger::leader_schedule_utils::last_of_consecutive_leader_slots, + std::{ + cmp::Reverse, + collections::{BinaryHeap, HashMap, VecDeque}, + time::{Duration, Instant}, + }, +}; + +/// Encodes a basic state machine of the different stages involved in handling +/// timeouts for a window of slots. +enum TimerState { + /// Waiting for the DELTA_TIMEOUT stage. + WaitDeltaTimeout { + /// The slots in the window. Must not be empty. + window: VecDeque, + /// Time when this stage will end. + timeout: Instant, + }, + /// Waiting for the DELTA_BLOCK stage. + WaitDeltaBlock { + /// The slots in the window. Must not be empty. + window: VecDeque, + /// Time when this stage will end. + timeout: Instant, + }, + /// The state machine is done. + Done, +} + +impl TimerState { + /// Creates a new instance of the state machine. + /// + /// Also returns the next time the timer should fire. + fn new(slot: Slot, delta_timeout: Duration, now: Instant) -> (Self, Instant) { + let window = (slot..=last_of_consecutive_leader_slots(slot)).collect::>(); + assert!(!window.is_empty()); + let timeout = now.checked_add(delta_timeout).unwrap(); + (Self::WaitDeltaTimeout { window, timeout }, timeout) + } + + /// Call to make progress on the state machine. + /// + /// Returns a potentially empty list of events that should be sent. + fn progress(&mut self, delta_block: Duration, now: Instant) -> Option { + match self { + Self::WaitDeltaTimeout { window, timeout } => { + assert!(!window.is_empty()); + if &now < timeout { + return None; + } + let slot = *window.front().unwrap(); + let timeout = now.checked_add(delta_block).unwrap(); + *self = Self::WaitDeltaBlock { + window: window.to_owned(), + timeout, + }; + Some(VotorEvent::TimeoutCrashedLeader(slot)) + } + Self::WaitDeltaBlock { window, timeout } => { + assert!(!window.is_empty()); + if &now < timeout { + return None; + } + + let ret = Some(VotorEvent::Timeout(window.pop_front().unwrap())); + if window.is_empty() { + *self = Self::Done; + } else { + *timeout = now.checked_add(delta_block).unwrap(); + } + ret + } + Self::Done => None, + } + } + + /// When would this state machine next be able to make progress. + fn next_fire(&self) -> Option { + match self { + Self::WaitDeltaTimeout { window: _, timeout } + | Self::WaitDeltaBlock { window: _, timeout } => Some(*timeout), + Self::Done => None, + } + } +} + +/// Maintains all active timer states for windows of slots. +pub(super) struct Timers { + delta_timeout: Duration, + delta_block: Duration, + /// Timers are indexed by slots. + timers: HashMap, + /// A min heap based on the time the next timer state might be ready. + heap: BinaryHeap>, + /// Channel to send events on. + event_sender: Sender, + /// Stats for the timer manager. + stats: TimerManagerStats, +} + +impl Timers { + pub(super) fn new( + delta_timeout: Duration, + delta_block: Duration, + event_sender: Sender, + ) -> Self { + Self { + delta_timeout, + delta_block, + timers: HashMap::new(), + heap: BinaryHeap::new(), + event_sender, + stats: TimerManagerStats::new(), + } + } + + /// Call to set timeouts for a new window of slots. + pub(super) fn set_timeouts(&mut self, slot: Slot, now: Instant) { + assert_eq!(self.heap.len(), self.timers.len()); + let (timer, next_fire) = TimerState::new(slot, self.delta_timeout, now); + // It is possible that this slot already has a timer set e.g. if there + // are multiple ParentReady for the same slot. Do not insert new timer then. + let mut new_timer_inserted = false; + self.timers.entry(slot).or_insert_with(|| { + self.heap.push(Reverse((next_fire, slot))); + new_timer_inserted = true; + timer + }); + self.stats + .incr_timeout_count_with_heap_size(self.heap.len(), new_timer_inserted); + } + + /// Call to make progress on the timer states. If there are still active + /// timer states, returns when the earliest one might become ready. + pub(super) fn progress(&mut self, now: Instant) -> Option { + assert_eq!(self.heap.len(), self.timers.len()); + let mut ret_timeout = None; + loop { + assert_eq!(self.heap.len(), self.timers.len()); + match self.heap.pop() { + None => break, + Some(Reverse((next_fire, slot))) => { + if now < next_fire { + ret_timeout = + Some(ret_timeout.map_or(next_fire, |r| std::cmp::min(r, next_fire))); + self.heap.push(Reverse((next_fire, slot))); + break; + } + + let mut timer = self.timers.remove(&slot).unwrap(); + if let Some(event) = timer.progress(self.delta_block, now) { + self.event_sender.send(event).unwrap(); + } + if let Some(next_fire) = timer.next_fire() { + self.heap.push(Reverse((next_fire, slot))); + assert!(self.timers.insert(slot, timer).is_none()); + ret_timeout = + Some(ret_timeout.map_or(next_fire, |r| std::cmp::min(r, next_fire))); + } + } + } + } + ret_timeout + } + + #[cfg(test)] + pub(super) fn stats(&self) -> TimerManagerStats { + self.stats.clone() + } +} + +#[cfg(test)] +mod tests { + use {super::*, crossbeam_channel::unbounded}; + + #[test] + fn timer_state_machine() { + let one_micro = Duration::from_micros(1); + let now = Instant::now(); + let slot = 0; + let (mut timer_state, next_fire) = TimerState::new(slot, one_micro, now); + + assert!(matches!( + timer_state.progress(one_micro, next_fire).unwrap(), + VotorEvent::TimeoutCrashedLeader(0) + )); + + assert!(matches!( + timer_state + .progress(one_micro, timer_state.next_fire().unwrap()) + .unwrap(), + VotorEvent::Timeout(0) + )); + + assert!(matches!( + timer_state + .progress(one_micro, timer_state.next_fire().unwrap()) + .unwrap(), + VotorEvent::Timeout(1) + )); + + assert!(matches!( + timer_state + .progress(one_micro, timer_state.next_fire().unwrap()) + .unwrap(), + VotorEvent::Timeout(2) + )); + + assert!(matches!( + timer_state + .progress(one_micro, timer_state.next_fire().unwrap()) + .unwrap(), + VotorEvent::Timeout(3) + )); + assert!(timer_state.next_fire().is_none()); + } + + #[test] + fn timers_progress() { + let one_micro = Duration::from_micros(1); + let mut now = Instant::now(); + let (sender, receiver) = unbounded(); + let mut timers = Timers::new(one_micro, one_micro, sender); + assert!(timers.progress(now).is_none()); + assert!(receiver.try_recv().unwrap_err().is_empty()); + + timers.set_timeouts(0, now); + while timers.progress(now).is_some() { + now = now.checked_add(one_micro).unwrap(); + } + let mut events = receiver.try_iter().collect::>(); + + assert!(matches!( + events.remove(0), + VotorEvent::TimeoutCrashedLeader(0) + )); + assert!(matches!(events.remove(0), VotorEvent::Timeout(0))); + assert!(matches!(events.remove(0), VotorEvent::Timeout(1))); + assert!(matches!(events.remove(0), VotorEvent::Timeout(2))); + assert!(matches!(events.remove(0), VotorEvent::Timeout(3))); + assert!(events.is_empty()); + let stats = timers.stats(); + assert_eq!(stats.set_timeout_count(), 1); + assert_eq!(stats.set_timeout_succeed_count(), 1); + assert_eq!(stats.max_heap_size(), 1); + } +} diff --git a/votor/src/vote_history.rs b/votor/src/vote_history.rs new file mode 100644 index 00000000000000..70f17c9ffd92fa --- /dev/null +++ b/votor/src/vote_history.rs @@ -0,0 +1,312 @@ +use { + super::vote_history_storage::{ + Result, SavedVoteHistory, SavedVoteHistoryVersions, VoteHistoryStorage, + }, + serde::{Deserialize, Serialize}, + solana_clock::Slot, + solana_hash::Hash, + solana_keypair::Keypair, + solana_pubkey::Pubkey, + solana_votor_messages::{consensus_message::Block, vote::Vote}, + std::collections::{hash_map::Entry, HashMap, HashSet}, + thiserror::Error, +}; + +pub const VOTE_THRESHOLD_SIZE: f64 = 2f64 / 3f64; + +#[cfg_attr(feature = "frozen-abi", derive(AbiExample))] +#[derive(PartialEq, Eq, Debug, Default, Clone, Copy, Serialize, Deserialize)] +pub(crate) enum BlockhashStatus { + /// No vote since restart + #[default] + Uninitialized, + /// Non voting validator + NonVoting, + /// Hot spare validator + HotSpare, + /// Successfully generated vote tx with blockhash + Blockhash(Slot, Hash), +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] +pub enum VoteHistoryVersions { + Current(VoteHistory), +} +impl VoteHistoryVersions { + pub fn new_current(vote_history: VoteHistory) -> Self { + Self::Current(vote_history) + } + + pub fn convert_to_current(self) -> VoteHistory { + match self { + VoteHistoryVersions::Current(vote_history) => vote_history, + } + } +} + +#[cfg_attr( + feature = "frozen-abi", + derive(AbiExample), + frozen_abi(digest = "H9oKKcWpebSTPtnXG6Aetwb7434CrW21pxnrrusYVEPy") +)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Default)] +pub struct VoteHistory { + /// The validator identity that cast votes + pub node_pubkey: Pubkey, + + /// The slots which this node has cast either a notarization or skip vote + voted: HashSet, + + /// The blocks for which this node has cast a notarization vote + /// In the format of slot, block_id, bank_hash + voted_notar: HashMap, + + /// The blocks for which this node has cast a notarization fallback + /// vote in this slot + voted_notar_fallback: HashMap>, + + /// The slots for which this node has cast a skip fallback vote + voted_skip_fallback: HashSet, + + /// The slots in which this node has cast at least one of: + /// - `SkipVote` + /// - `SkipFallback` + /// - `NotarizeFallback` + skipped: HashSet, + + /// The slots for which this node has cast a finalization vote. This node + /// will not cast any additional votes for these slots + its_over: HashSet, + + /// All votes cast for a `slot`, for use in refresh + votes_cast: HashMap>, + + /// Blocks which have a notarization certificate via the certificate pool + notarized_blocks: HashSet, + + /// Slots which have a parent ready condition via the certificate pool + parent_ready_slots: HashMap>, + + /// The latest root set by the voting loop. The above structures will not + /// contain votes for slots before `root` + root: Slot, +} + +impl VoteHistory { + pub fn new(node_pubkey: Pubkey, root: Slot) -> Self { + Self { + node_pubkey, + root, + ..Self::default() + } + } + + /// Have we cast a notarization or skip vote for `slot` + pub fn voted(&self, slot: Slot) -> bool { + assert!(slot >= self.root); + self.voted.contains(&slot) + } + + /// The block for which we voted notarize in slot `slot` + pub fn voted_notar(&self, slot: Slot) -> Option { + assert!(slot >= self.root); + self.voted_notar.get(&slot).copied() + } + + /// Whether we voted notarize fallback in `slot` for block `(block_id, bank_hash)` + pub fn voted_notar_fallback(&self, slot: Slot, block_id: Hash) -> bool { + assert!(slot >= self.root); + self.voted_notar_fallback + .get(&slot) + .is_some_and(|v| v.contains(&block_id)) + } + + /// Whether we voted skip fallback for `slot` + pub fn voted_skip_fallback(&self, slot: Slot) -> bool { + assert!(slot >= self.root); + self.voted_skip_fallback.contains(&slot) + } + + /// Have we cast any skip vote variation for `slot` + pub fn skipped(&self, slot: Slot) -> bool { + assert!(slot >= self.root); + self.skipped.contains(&slot) + } + + /// Have we casted a finalization vote for `slot` + pub fn its_over(&self, slot: Slot) -> bool { + assert!(slot >= self.root); + self.its_over.contains(&slot) + } + + /// All votes cast since `slot` excluding `slot`, for use in + /// refresh + pub fn votes_cast_since(&self, slot: Slot) -> Vec { + self.votes_cast + .iter() + .filter(|(&s, _)| s > slot) + .flat_map(|(_, votes)| votes.iter()) + .cloned() + .collect() + } + + /// Have we casted a bad window vote for `slot`: + /// - Skip + /// - Notarize fallback + /// - Skip fallback + pub fn bad_window(&self, slot: Slot) -> bool { + assert!(slot >= self.root); + self.skipped.contains(&slot) + || self.voted_notar_fallback.contains_key(&slot) + || self.voted_skip_fallback.contains(&slot) + } + + pub fn is_block_notarized(&self, block: &Block) -> bool { + self.notarized_blocks.contains(block) + } + + pub fn is_parent_ready(&self, slot: Slot, parent: &Block) -> bool { + self.parent_ready_slots + .get(&slot) + .is_some_and(|ps| ps.contains(parent)) + } + + /// The latest root slot set by the voting loop + pub fn root(&self) -> Slot { + self.root + } + + /// Add a new vote to the voting history + pub fn add_vote(&mut self, vote: Vote) { + assert!(vote.slot() >= self.root); + // TODO: these assert!s are for my debugging, can consider removing + // in final version + match vote { + Vote::Notarize(vote) => { + assert!(self.voted.insert(vote.slot())); + assert!(self + .voted_notar + .insert(vote.slot(), *vote.block_id()) + .is_none()); + } + Vote::Finalize(vote) => { + assert!(!self.skipped(vote.slot())); + self.its_over.insert(vote.slot()); + } + Vote::Skip(vote) => { + self.voted.insert(vote.slot()); + self.skipped.insert(vote.slot()); + } + Vote::NotarizeFallback(vote) => { + assert!(self.voted(vote.slot())); + assert!(!self.its_over(vote.slot())); + self.skipped.insert(vote.slot()); + self.voted_notar_fallback + .entry(vote.slot()) + .or_default() + .insert(*vote.block_id()); + } + Vote::SkipFallback(vote) => { + assert!(self.voted(vote.slot())); + assert!(!self.its_over(vote.slot())); + self.skipped.insert(vote.slot()); + self.voted_skip_fallback.insert(vote.slot()); + } + } + self.votes_cast.entry(vote.slot()).or_default().push(vote); + } + + /// Add a new notarized block + pub fn add_block_notarized(&mut self, block @ (slot, _): Block) { + if slot < self.root { + return; + } + self.notarized_blocks.insert(block); + } + + /// Add a new parent ready slot + /// + /// Returns true if the insertion was successful and this was the + /// first parent ready for this slot, indicating we should set timeouts. + pub fn add_parent_ready(&mut self, slot: Slot, parent: Block) -> bool { + if slot < self.root { + return false; + } + match self.parent_ready_slots.entry(slot) { + Entry::Occupied(mut entry) => { + entry.get_mut().insert(parent); + false + } + Entry::Vacant(entry) => { + entry.insert(HashSet::from([parent])); + true + } + } + } + + pub fn highest_parent_ready_slot(&self) -> Option { + self.parent_ready_slots.keys().max().copied() + } + + /// Sets the new root slot and cleans up outdated slots < `root` + pub fn set_root(&mut self, root: Slot) { + self.root = root; + self.voted.retain(|s| *s >= root); + self.voted_notar.retain(|s, _| *s >= root); + self.voted_notar_fallback.retain(|s, _| *s >= root); + self.voted_skip_fallback.retain(|s| *s >= root); + self.skipped.retain(|s| *s >= root); + self.its_over.retain(|s| *s >= root); + self.votes_cast.retain(|s, _| *s >= root); + self.notarized_blocks.retain(|(s, _)| *s >= root); + self.parent_ready_slots.retain(|s, _| *s >= root); + } + + #[allow(dead_code)] + /// Save the vote history to `vote_history_storage` signed by `node_keypair` + pub fn save( + &self, + vote_history_storage: &dyn VoteHistoryStorage, + node_keypair: &Keypair, + ) -> Result<()> { + let saved_vote_history = SavedVoteHistory::new(self, node_keypair)?; + vote_history_storage.store(&SavedVoteHistoryVersions::from(saved_vote_history))?; + Ok(()) + } + + /// Restore the saved vote history from `vote_history_storage` for `node_pubkey` + pub fn restore( + vote_history_storage: &dyn VoteHistoryStorage, + node_pubkey: &Pubkey, + ) -> Result { + vote_history_storage.load(node_pubkey) + } +} + +#[derive(Error, Debug)] +pub enum VoteHistoryError { + #[error("IO Error: {0}")] + IoError(#[from] std::io::Error), + + #[error("Serialization Error: {0}")] + SerializeError(#[from] bincode::Error), + + #[error("The signature on the saved vote history is invalid")] + InvalidSignature, + + #[error("The vote history does not match this validator: {0}")] + WrongVoteHistory(String), + + #[error("The vote history is useless because of new hard fork: {0}")] + HardFork(Slot), +} + +impl VoteHistoryError { + pub fn is_file_missing(&self) -> bool { + if let VoteHistoryError::IoError(io_err) = &self { + io_err.kind() == std::io::ErrorKind::NotFound + } else { + false + } + } +} diff --git a/votor/src/vote_history_storage.rs b/votor/src/vote_history_storage.rs new file mode 100644 index 00000000000000..235f96c7a220e8 --- /dev/null +++ b/votor/src/vote_history_storage.rs @@ -0,0 +1,172 @@ +use { + super::vote_history::*, + log::trace, + serde::{Deserialize, Serialize}, + solana_pubkey::Pubkey, + solana_signature::Signature, + solana_signer::Signer, + std::{ + fs::{self, File}, + io::{self, BufReader}, + path::PathBuf, + }, +}; + +pub type Result = std::result::Result; + +#[cfg_attr(feature = "frozen-abi", derive(AbiExample))] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] +pub enum SavedVoteHistoryVersions { + Current(SavedVoteHistory), +} + +impl SavedVoteHistoryVersions { + fn try_into_vote_history(&self, node_pubkey: &Pubkey) -> Result { + // This method assumes that `self` was just deserialized + assert_eq!(self.pubkey(), Pubkey::default()); + + let vote_history = match self { + SavedVoteHistoryVersions::Current(t) => { + if !t.signature.verify(node_pubkey.as_ref(), &t.data) { + return Err(VoteHistoryError::InvalidSignature); + } + bincode::deserialize(&t.data).map(VoteHistoryVersions::Current) + } + }; + vote_history + .map_err(|e| e.into()) + .and_then(|vote_history: VoteHistoryVersions| { + let vote_history = vote_history.convert_to_current(); + if vote_history.node_pubkey != *node_pubkey { + return Err(VoteHistoryError::WrongVoteHistory(format!( + "node_pubkey is {:?} but found vote history for {:?}", + node_pubkey, vote_history.node_pubkey + ))); + } + Ok(vote_history) + }) + } + + fn serialize_into(&self, file: &mut File) -> Result<()> { + bincode::serialize_into(file, self).map_err(|e| e.into()) + } + + fn pubkey(&self) -> Pubkey { + match self { + SavedVoteHistoryVersions::Current(t) => t.node_pubkey, + } + } +} + +impl From for SavedVoteHistoryVersions { + fn from(vote_history: SavedVoteHistory) -> SavedVoteHistoryVersions { + SavedVoteHistoryVersions::Current(vote_history) + } +} + +#[cfg_attr( + feature = "frozen-abi", + derive(AbiExample), + frozen_abi(digest = "2kq63kt6dJvJaUG7c1jGazLKeGXZc5yN3GDocMz8c5jB") +)] +#[derive(Default, Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] +pub struct SavedVoteHistory { + signature: Signature, + #[serde(with = "serde_bytes")] + data: Vec, + #[serde(skip)] + node_pubkey: Pubkey, +} + +impl SavedVoteHistory { + pub fn new(vote_history: &VoteHistory, keypair: &T) -> Result { + let node_pubkey = keypair.pubkey(); + if vote_history.node_pubkey != node_pubkey { + return Err(VoteHistoryError::WrongVoteHistory(format!( + "node_pubkey is {:?} but found vote history for {:?}", + node_pubkey, vote_history.node_pubkey + ))); + } + + let data = bincode::serialize(&vote_history)?; + let signature = keypair.sign_message(&data); + Ok(Self { + signature, + data, + node_pubkey, + }) + } +} + +pub trait VoteHistoryStorage: Sync + Send { + fn load(&self, node_pubkey: &Pubkey) -> Result; + fn store(&self, saved_vote_history: &SavedVoteHistoryVersions) -> Result<()>; +} + +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct NullVoteHistoryStorage {} + +impl VoteHistoryStorage for NullVoteHistoryStorage { + fn load(&self, _node_pubkey: &Pubkey) -> Result { + Err(VoteHistoryError::IoError(io::Error::new( + io::ErrorKind::Other, + "NullVoteHistoryStorage::load() not available", + ))) + } + + fn store(&self, _saved_vote_history: &SavedVoteHistoryVersions) -> Result<()> { + Ok(()) + } +} + +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct FileVoteHistoryStorage { + pub vote_history_path: PathBuf, +} + +impl FileVoteHistoryStorage { + pub fn new(vote_history_path: PathBuf) -> Self { + Self { vote_history_path } + } + + pub fn filename(&self, node_pubkey: &Pubkey) -> PathBuf { + self.vote_history_path + .join(format!("vote_history-{node_pubkey}")) + .with_extension("bin") + } +} + +impl VoteHistoryStorage for FileVoteHistoryStorage { + fn load(&self, node_pubkey: &Pubkey) -> Result { + let filename = self.filename(node_pubkey); + trace!("load {}", filename.display()); + + // Ensure to create parent dir here, because restore() precedes save() always + fs::create_dir_all(filename.parent().unwrap())?; + + // New format + let file = File::open(&filename)?; + let mut stream = BufReader::new(file); + + bincode::deserialize_from(&mut stream) + .map_err(|e| e.into()) + .and_then(|t: SavedVoteHistoryVersions| t.try_into_vote_history(node_pubkey)) + } + + fn store(&self, saved_vote_history: &SavedVoteHistoryVersions) -> Result<()> { + let pubkey = saved_vote_history.pubkey(); + let filename = self.filename(&pubkey); + trace!("store: {}", filename.display()); + let new_filename = filename.with_extension("bin.new"); + + { + // overwrite anything if exists + let mut file = File::create(&new_filename)?; + saved_vote_history.serialize_into(&mut file)?; + // file.sync_all() hurts performance; pipeline sync-ing and submitting votes to the cluster! + } + fs::rename(&new_filename, &filename)?; + // self.path.parent().sync_all() hurts performance same as the above sync + Ok(()) + } +} diff --git a/votor/src/voting_utils.rs b/votor/src/voting_utils.rs new file mode 100644 index 00000000000000..4b960f0b935c81 --- /dev/null +++ b/votor/src/voting_utils.rs @@ -0,0 +1,325 @@ +use { + crate::{ + commitment::{AlpenglowCommitmentAggregationData, AlpenglowCommitmentError}, + vote_history::{VoteHistory, VoteHistoryError}, + vote_history_storage::{SavedVoteHistory, SavedVoteHistoryVersions}, + }, + crossbeam_channel::{SendError, Sender}, + solana_bls_signatures::{keypair::Keypair as BLSKeypair, BlsError, Pubkey as BLSPubkey}, + solana_clock::Slot, + solana_keypair::Keypair, + solana_pubkey::Pubkey, + solana_runtime::{bank::Bank, bank_forks::SharableBanks}, + solana_signer::Signer, + solana_transaction::Transaction, + solana_votor_messages::{ + consensus_message::{ + CertificateMessage, ConsensusMessage, VoteMessage, BLS_KEYPAIR_DERIVE_SEED, + }, + vote::Vote, + }, + std::{collections::HashMap, sync::Arc}, + thiserror::Error, +}; + +#[derive(Debug)] +pub enum GenerateVoteTxResult { + // The following are transient errors + // non voting validator, not eligible for refresh + // until authorized keypair is overriden + NonVoting, + // hot spare validator, not eligble for refresh + // until set identity is invoked + HotSpare, + // The hash verification at startup has not completed + WaitForStartupVerification, + // Wait to vote slot is not reached + WaitToVoteSlot(Slot), + // no rank found, this can happen if the validator + // is not staked in the current epoch, but it may + // still be staked in future or past epochs, so this + // is considered a transient error + NoRankFound, + + // The following are misconfiguration errors + // The authorized voter for the given pubkey and Epoch does not exist + NoAuthorizedVoter(Pubkey, u64), + // The vote state associated with given pubkey does not exist + NoVoteState(Pubkey), + // The vote account associated with given pubkey does not exist + VoteAccountNotFound(Pubkey), + + // The following are the successful cases + // Generated a vote transaction + Tx(Transaction), + // Generated a ConsensusMessage + ConsensusMessage(ConsensusMessage), +} + +impl GenerateVoteTxResult { + pub fn is_non_voting(&self) -> bool { + matches!(self, Self::NonVoting) + } + + pub fn is_hot_spare(&self) -> bool { + matches!(self, Self::HotSpare) + } + + pub fn is_invalid_config(&self) -> bool { + match self { + Self::NoAuthorizedVoter(_, _) | Self::NoVoteState(_) | Self::VoteAccountNotFound(_) => { + true + } + Self::NonVoting + | Self::HotSpare + | Self::WaitForStartupVerification + | Self::WaitToVoteSlot(_) + | Self::NoRankFound => false, + Self::Tx(_) | Self::ConsensusMessage(_) => false, + } + } + + pub fn is_transient_error(&self) -> bool { + match self { + Self::NoAuthorizedVoter(_, _) | Self::NoVoteState(_) | Self::VoteAccountNotFound(_) => { + false + } + Self::NonVoting + | Self::HotSpare + | Self::WaitForStartupVerification + | Self::WaitToVoteSlot(_) + | Self::NoRankFound => true, + Self::Tx(_) | Self::ConsensusMessage(_) => false, + } + } +} + +#[derive(Debug)] +pub enum BLSOp { + PushVote { + message: Arc, + slot: Slot, + saved_vote_history: SavedVoteHistoryVersions, + }, + PushCertificate { + certificate: Arc, + }, +} + +#[derive(Debug, Error)] +pub enum VoteError { + #[error("Unable to generate bls vote message, transient error: {0:?}")] + TransientError(Box), + + #[error("Unable to generate bls vote message, configuration error: {0:?}")] + InvalidConfig(Box), + + #[error("Unable to send to certificate pool")] + CertificatePoolError(#[from] SendError<()>), + + #[error("Commitment sender error {0}")] + CommitmentSenderError(#[from] AlpenglowCommitmentError), + + #[error("Saved vote history error {0}")] + SavedVoteHistoryError(#[from] VoteHistoryError), +} + +/// Context required to construct vote transactions +pub struct VotingContext { + pub vote_history: VoteHistory, + pub vote_account_pubkey: Pubkey, + pub identity_keypair: Arc, + pub authorized_voter_keypairs: Arc>>>, + // The BLS keypair should always change with authorized_voter_keypairs. + pub derived_bls_keypairs: HashMap>, + pub has_new_vote_been_rooted: bool, + pub own_vote_sender: Sender, + pub bls_sender: Sender, + pub commitment_sender: Sender, + pub wait_to_vote_slot: Option, + pub sharable_banks: SharableBanks, +} + +pub fn get_bls_keypair( + context: &mut VotingContext, + authorized_voter_keypair: &Arc, +) -> Result, BlsError> { + let pubkey = authorized_voter_keypair.pubkey(); + if let Some(existing) = context.derived_bls_keypairs.get(&pubkey) { + return Ok(existing.clone()); + } + + let bls_keypair = Arc::new(BLSKeypair::derive_from_signer( + authorized_voter_keypair, + BLS_KEYPAIR_DERIVE_SEED, + )?); + + context + .derived_bls_keypairs + .insert(pubkey, bls_keypair.clone()); + + Ok(bls_keypair) +} + +pub fn generate_vote_tx( + vote: &Vote, + bank: &Bank, + context: &mut VotingContext, +) -> GenerateVoteTxResult { + let vote_account_pubkey = context.vote_account_pubkey; + let authorized_voter_keypair; + let bls_pubkey_in_vote_account; + { + let authorized_voter_keypairs = context.authorized_voter_keypairs.read().unwrap(); + if !bank.has_initial_accounts_hash_verification_completed() { + return GenerateVoteTxResult::WaitForStartupVerification; + } + if authorized_voter_keypairs.is_empty() { + return GenerateVoteTxResult::NonVoting; + } + if let Some(slot) = context.wait_to_vote_slot { + if vote.slot() < slot { + return GenerateVoteTxResult::WaitToVoteSlot(slot); + } + } + let Some(vote_account) = bank.get_vote_account(&vote_account_pubkey) else { + return GenerateVoteTxResult::VoteAccountNotFound(vote_account_pubkey); + }; + let vote_state = vote_account.vote_state_view(); + if *vote_state.node_pubkey() != context.identity_keypair.pubkey() { + info!( + "Vote account node_pubkey mismatch: {} (expected: {}). Unable to vote", + vote_state.node_pubkey(), + context.identity_keypair.pubkey() + ); + return GenerateVoteTxResult::HotSpare; + } + bls_pubkey_in_vote_account = match vote_account.bls_pubkey() { + None => { + panic!( + "No BLS pubkey in vote account {}", + context.identity_keypair.pubkey() + ); + } + Some(key) => key, + }; + + let Some(authorized_voter_pubkey) = vote_state.get_authorized_voter(bank.epoch()) else { + return GenerateVoteTxResult::NoAuthorizedVoter(vote_account_pubkey, bank.epoch()); + }; + + let Some(keypair) = authorized_voter_keypairs + .iter() + .find(|keypair| keypair.pubkey() == *authorized_voter_pubkey) + else { + warn!( + "The authorized keypair {authorized_voter_pubkey} for vote account \ + {vote_account_pubkey} is not available. Unable to vote" + ); + return GenerateVoteTxResult::NonVoting; + }; + + authorized_voter_keypair = keypair.clone(); + } + + let bls_keypair = get_bls_keypair(context, &authorized_voter_keypair) + .unwrap_or_else(|e| panic!("Failed to derive my own BLS keypair: {e:?}")); + let my_bls_pubkey: BLSPubkey = bls_keypair.public; + if my_bls_pubkey != bls_pubkey_in_vote_account { + panic!( + "Vote account bls_pubkey mismatch: {:?} (expected: {:?}). Unable to vote", + bls_pubkey_in_vote_account, my_bls_pubkey + ); + } + let vote_serialized = bincode::serialize(&vote).unwrap(); + + let Some(epoch_stakes) = bank.epoch_stakes(bank.epoch()) else { + panic!( + "The bank {} doesn't have its own epoch_stakes for {}", + bank.slot(), + bank.epoch() + ); + }; + let Some(my_rank) = epoch_stakes + .bls_pubkey_to_rank_map() + .get_rank(&my_bls_pubkey) + else { + return GenerateVoteTxResult::NoRankFound; + }; + GenerateVoteTxResult::ConsensusMessage(ConsensusMessage::Vote(VoteMessage { + vote: *vote, + signature: bls_keypair.sign(&vote_serialized).into(), + rank: *my_rank, + })) +} + +/// Send an alpenglow vote as a ConsensusMessage +/// `bank` will be used for: +/// - startup verification +/// - vote account checks +/// - authorized voter checks +/// +/// We also update the vote history and send the vote to +/// the certificate pool thread for ingestion. +/// +/// Returns false if we are currently a non-voting node +fn insert_vote_and_create_bls_message( + vote: Vote, + is_refresh: bool, + context: &mut VotingContext, +) -> Result { + // Update and save the vote history + if !is_refresh { + context.vote_history.add_vote(vote); + } + + let bank = context.sharable_banks.root(); + let message = match generate_vote_tx(&vote, &bank, context) { + GenerateVoteTxResult::ConsensusMessage(m) => m, + e => { + if e.is_transient_error() { + return Err(VoteError::TransientError(Box::new(e))); + } else { + return Err(VoteError::InvalidConfig(Box::new(e))); + } + } + }; + context + .own_vote_sender + .send(message.clone()) + .map_err(|_| SendError(()))?; + + // TODO: for refresh votes use a different BLSOp so we don't have to rewrite the same vote history to file + let saved_vote_history = + SavedVoteHistory::new(&context.vote_history, &context.identity_keypair)?; + + // Return vote for sending + Ok(BLSOp::PushVote { + message: Arc::new(message), + slot: vote.slot(), + saved_vote_history: SavedVoteHistoryVersions::from(saved_vote_history), + }) +} + +pub fn generate_vote_message( + vote: Vote, + is_refresh: bool, + vctx: &mut VotingContext, +) -> Result, VoteError> { + let bls_op = match insert_vote_and_create_bls_message(vote, is_refresh, vctx) { + Ok(bls_op) => bls_op, + Err(VoteError::InvalidConfig(e)) => { + warn!("Failed to generate vote and push to votes: {:?}", e); + // These are not fatal errors, just skip the vote for now. But they are misconfigurations + // that should be warned about. + return Ok(None); + } + Err(VoteError::TransientError(e)) => { + info!("Failed to generate vote and push to votes: {:?}", e); + // These are transient errors, just skip the vote for now. + return Ok(None); + } + Err(e) => return Err(e), + }; + Ok(Some(bls_op)) +} diff --git a/votor/src/votor.rs b/votor/src/votor.rs new file mode 100644 index 00000000000000..da76e95ec004cc --- /dev/null +++ b/votor/src/votor.rs @@ -0,0 +1,295 @@ +//! ```text +//! The entrypoint into votor the module responsible for voting, rooting, and notifying +//! the core to create a new block. +//! +//! Votor +//! ┌────────────────────────────────────────────────────────────────────────────┐ +//! │ │ +//! │ Push Certificate │ +//! │ ┌───────────────────────────────────────────────────────────────────│────────┐ +//! │ │ Parent Ready │ │ +//! │ │ Standstill │ │ +//! │ │ Finalized │ │ +//! │ │ Block Notarized │ │ +//! │ │ ┌─────────Safe To Notar/Skip───┐ Push │ │ +//! │ │ │ Produce Window │ Vote │ │ +//! │ │ │ │ ┌────────────────────────│──────┐ │ +//! │ │ │ │ │ │ ┌────▼─▼───────┐ +//! │ │ │ │ │ │ │Voting Service│ +//! │ │ │ │ │ │ └──────────────┘ +//! │ │ │ │ │ │ +//! │ ┌────┼─────────┼───────────────┐ │ │ │ +//! │ │ │ │ │ Block │ ┌────────────────────┐ +//! │ │ Certificate Pool Service │ │ │ ┌─────────────────────│─┼ Replay / Broadcast │ +//! │ │ │ │ │ │ │ └────────────────────┘ +//! │ │ ┌──────────────────────────┐ │ │ │ │ │ +//! │ │ │ │ │ │ │ │ │ +//! │ │ │ Certificate Pool │ │ │ │ │ │ +//! │ │ │ ┌────────────────────┐ │ │ ┌────▼─┼──▼───────┐ Start │ +//! │ │ │ │Parent ready tracker│ │ │ Vote │ │ Leader window ┌──────────────────────┐ +//! │ │ │ └────────────────────┘ │ ◄─────────┼ Event Handler ┼─────────────│─► Block creation loop │ +//! │ │ └──────────────────────────┘ │ │ │ │ └──────────────────────┘ +//! │ │ │ └─▲───────────┬───┘ │ +//! │ └──────────────────────────────┘ │ │ │ +//! │ Timeout │ │ │ +//! │ │ │ Set Timeouts │ +//! │ │ │ │ +//! │ ┌───────────────────┴┐ ┌────▼───────────────┐ │ +//! │ │ │ │ │ │ +//! │ │ Timer Service ┼─────┼ timer Manager │ │ +//! │ │ │ │ │ │ +//! │ └────────────────────┘ └────────────────────┘ │ +//! └────────────────────────────────────────────────────────────────────────────┘ +//! ``` +use { + crate::{ + certificate_pool_service::{CertificatePoolContext, CertificatePoolService}, + commitment::AlpenglowCommitmentAggregationData, + event::{LeaderWindowInfo, VotorEventReceiver, VotorEventSender}, + event_handler::{EventHandler, EventHandlerContext}, + root_utils::RootContext, + timer_manager::TimerManager, + vote_history::VoteHistory, + vote_history_storage::VoteHistoryStorage, + voting_utils::{BLSOp, VotingContext}, + }, + crossbeam_channel::{Receiver, Sender}, + parking_lot::RwLock as PlRwLock, + solana_clock::Slot, + solana_gossip::cluster_info::ClusterInfo, + solana_hash::Hash, + solana_keypair::Keypair, + solana_ledger::{blockstore::Blockstore, leader_schedule_cache::LeaderScheduleCache}, + solana_pubkey::Pubkey, + solana_rpc::{ + optimistically_confirmed_bank_tracker::BankNotificationSenderConfig, + rpc_subscriptions::RpcSubscriptions, + }, + solana_runtime::{ + bank_forks::BankForks, installed_scheduler_pool::BankWithScheduler, + snapshot_controller::SnapshotController, + }, + solana_votor_messages::consensus_message::{Certificate, CertificateMessage, ConsensusMessage}, + std::{ + collections::HashMap, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Condvar, Mutex, RwLock, + }, + thread, + time::Duration, + }, +}; + +/// Communication with the block creation loop to notify leader window +#[derive(Default)] +pub struct LeaderWindowNotifier { + pub window_info: Mutex>, + pub window_notification: Condvar, + pub highest_parent_ready: RwLock<(Slot, (Slot, Hash))>, +} + +/// Inputs to Votor +pub struct VotorConfig { + pub exit: Arc, + // Validator config + pub vote_account: Pubkey, + pub wait_to_vote_slot: Option, + pub wait_for_vote_to_start_leader: bool, + pub vote_history: VoteHistory, + pub vote_history_storage: Arc, + + // Shared state + pub authorized_voter_keypairs: Arc>>>, + pub blockstore: Arc, + pub bank_forks: Arc>, + pub cluster_info: Arc, + pub leader_schedule_cache: Arc, + pub rpc_subscriptions: Option>, + + // Senders / Notifiers + pub snapshot_controller: Option>, + pub bls_sender: Sender, + pub commitment_sender: Sender, + pub drop_bank_sender: Sender>, + pub bank_notification_sender: Option, + pub leader_window_notifier: Arc, + pub certificate_sender: Sender<(Certificate, CertificateMessage)>, + pub event_sender: VotorEventSender, + pub own_vote_sender: Sender, + + // Receivers + pub event_receiver: VotorEventReceiver, + pub consensus_message_receiver: Receiver, +} + +/// Context shared with block creation, replay, gossip, banking stage etc +pub(crate) struct SharedContext { + pub(crate) blockstore: Arc, + pub(crate) bank_forks: Arc>, + pub(crate) cluster_info: Arc, + pub(crate) rpc_subscriptions: Option>, + pub(crate) leader_window_notifier: Arc, + pub(crate) vote_history_storage: Arc, +} + +pub struct Votor { + // TODO: Just a placeholder for how migration could look like, + // will fix once we finish the strategy + #[allow(dead_code)] + start: Arc<(Mutex, Condvar)>, + + event_handler: EventHandler, + certificate_pool_service: CertificatePoolService, + timer_manager: Arc>, +} + +impl Votor { + pub fn new(config: VotorConfig) -> Self { + let VotorConfig { + exit, + vote_account, + wait_to_vote_slot, + wait_for_vote_to_start_leader, + vote_history, + vote_history_storage, + authorized_voter_keypairs, + blockstore, + bank_forks, + cluster_info, + leader_schedule_cache, + rpc_subscriptions, + snapshot_controller, + bls_sender, + commitment_sender, + drop_bank_sender, + bank_notification_sender, + leader_window_notifier, + certificate_sender, + event_sender, + event_receiver, + own_vote_sender, + consensus_message_receiver: bls_receiver, + } = config; + + let start = Arc::new((Mutex::new(false), Condvar::new())); + + let identity_keypair = cluster_info.keypair().clone(); + let has_new_vote_been_rooted = !wait_for_vote_to_start_leader; + + // Get the sharable root bank + let sharable_banks = bank_forks.read().unwrap().sharable_banks(); + + let shared_context = SharedContext { + blockstore: blockstore.clone(), + bank_forks: bank_forks.clone(), + cluster_info: cluster_info.clone(), + rpc_subscriptions, + leader_window_notifier, + vote_history_storage, + }; + + let voting_context = VotingContext { + vote_history, + vote_account_pubkey: vote_account, + identity_keypair: identity_keypair.clone(), + authorized_voter_keypairs, + derived_bls_keypairs: HashMap::new(), + has_new_vote_been_rooted, + own_vote_sender, + bls_sender: bls_sender.clone(), + commitment_sender: commitment_sender.clone(), + wait_to_vote_slot, + sharable_banks: sharable_banks.clone(), + }; + + let root_context = RootContext { + leader_schedule_cache: leader_schedule_cache.clone(), + snapshot_controller, + bank_notification_sender, + drop_bank_sender, + }; + + let timer_manager = Arc::new(PlRwLock::new(TimerManager::new( + event_sender.clone(), + exit.clone(), + ))); + + let event_handler_context = EventHandlerContext { + exit: exit.clone(), + start: start.clone(), + event_receiver, + timer_manager: Arc::clone(&timer_manager), + shared_context, + voting_context, + root_context, + }; + + let cert_pool_context = CertificatePoolContext { + exit: exit.clone(), + start: start.clone(), + cluster_info: cluster_info.clone(), + my_vote_pubkey: vote_account, + blockstore, + sharable_banks, + leader_schedule_cache, + consensus_message_receiver: bls_receiver, + bls_sender, + event_sender, + commitment_sender, + certificate_sender, + }; + + let event_handler = EventHandler::new(event_handler_context); + let certificate_pool_service = CertificatePoolService::new(cert_pool_context); + + Self { + start, + event_handler, + certificate_pool_service, + timer_manager, + } + } + + pub fn start_migration(&self) { + // TODO: evaluate once we have actual migration logic + let (lock, cvar) = &*self.start; + let mut started = lock.lock().unwrap(); + *started = true; + cvar.notify_all(); + } + + pub(crate) fn wait_for_migration_or_exit( + exit: &AtomicBool, + (lock, cvar): &(Mutex, Condvar), + ) { + let mut started = lock.lock().unwrap(); + while !*started { + if exit.load(Ordering::Relaxed) { + return; + } + // Add timeout to check for exit flag + (started, _) = cvar.wait_timeout(started, Duration::from_secs(5)).unwrap(); + } + } + + pub fn join(self) -> thread::Result<()> { + self.certificate_pool_service.join()?; + + // Loop till we manage to unwrap the Arc and then we can join. + let mut timer_manager = self.timer_manager; + loop { + match Arc::try_unwrap(timer_manager) { + Ok(manager) => { + manager.into_inner().join(); + break; + } + Err(m) => { + timer_manager = m; + thread::sleep(Duration::from_millis(1)); + } + } + } + self.event_handler.join() + } +}