Skip to content

Commit

Permalink
consensus key rotation support (#13926)
Browse files Browse the repository at this point in the history
* debug

* consensus_key_rotation

* update

* update

* update

* update

* update

* update

* update

* update

* update

* persist idd file

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* write new key in

* find sk by pk in components

* debug

* update

* make the test a bit harder to pass

* update

* lint
  • Loading branch information
zjma authored Aug 22, 2024
1 parent f554a31 commit 55ff034
Show file tree
Hide file tree
Showing 44 changed files with 480 additions and 222 deletions.
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion aptos-node/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ aptos-peer-monitoring-service-client = { workspace = true }
aptos-peer-monitoring-service-server = { workspace = true }
aptos-peer-monitoring-service-types = { workspace = true }
aptos-runtimes = { workspace = true }
aptos-safety-rules = { workspace = true }
aptos-state-sync-driver = { workspace = true }
aptos-storage-interface = { workspace = true }
aptos-storage-service-client = { workspace = true }
Expand Down
25 changes: 6 additions & 19 deletions aptos-node/src/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,8 @@ use aptos_event_notifications::{
DbBackedOnChainConfig, EventNotificationListener, ReconfigNotificationListener,
};
use aptos_jwk_consensus::{start_jwk_consensus_runtime, types::JWKConsensusMsg};
use aptos_logger::debug;
use aptos_mempool::QuorumStoreRequest;
use aptos_network::application::interface::{NetworkClient, NetworkServiceEvents};
use aptos_safety_rules::safety_rules_manager::load_consensus_key_from_secure_storage;
use aptos_storage_interface::DbReaderWriter;
use aptos_validator_transaction_pool::VTxnPoolState;
use futures::channel::mpsc::Sender;
Expand Down Expand Up @@ -73,13 +71,9 @@ pub fn create_dkg_runtime(
)>,
dkg_network_interfaces: Option<ApplicationNetworkInterfaces<DKGMessage>>,
) -> (VTxnPoolState, Option<Runtime>) {
let maybe_dkg_dealer_sk =
load_consensus_key_from_secure_storage(&node_config.consensus.safety_rules);
debug!("maybe_dkg_dealer_sk={:?}", maybe_dkg_dealer_sk);

let vtxn_pool = VTxnPoolState::default();
let dkg_runtime = match (dkg_network_interfaces, maybe_dkg_dealer_sk) {
(Some(interfaces), Ok(dkg_dealer_sk)) => {
let dkg_runtime = match dkg_network_interfaces {
Some(interfaces) => {
let ApplicationNetworkInterfaces {
network_client,
network_service_events,
Expand All @@ -90,7 +84,7 @@ pub fn create_dkg_runtime(
let rb_config = node_config.consensus.rand_rb_config.clone();
let dkg_runtime = start_dkg_runtime(
my_addr,
dkg_dealer_sk,
&node_config.consensus.safety_rules,
network_client,
network_service_events,
reconfig_events,
Expand All @@ -117,15 +111,8 @@ pub fn create_jwk_consensus_runtime(
jwk_consensus_network_interfaces: Option<ApplicationNetworkInterfaces<JWKConsensusMsg>>,
vtxn_pool: &VTxnPoolState,
) -> Option<Runtime> {
let maybe_jwk_consensus_key =
load_consensus_key_from_secure_storage(&node_config.consensus.safety_rules);
debug!(
"jwk_consensus_key_err={:?}",
maybe_jwk_consensus_key.as_ref().err()
);

let jwk_consensus_runtime = match (jwk_consensus_network_interfaces, maybe_jwk_consensus_key) {
(Some(interfaces), Ok(consensus_key)) => {
let jwk_consensus_runtime = match jwk_consensus_network_interfaces {
Some(interfaces) => {
let ApplicationNetworkInterfaces {
network_client,
network_service_events,
Expand All @@ -136,7 +123,7 @@ pub fn create_jwk_consensus_runtime(
let my_addr = node_config.validator_network.as_ref().unwrap().peer_id();
let jwk_consensus_runtime = start_jwk_consensus_runtime(
my_addr,
consensus_key,
&node_config.consensus.safety_rules,
network_client,
network_service_events,
reconfig_events,
Expand Down
15 changes: 1 addition & 14 deletions aptos-node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ use anyhow::anyhow;
use aptos_admin_service::AdminService;
use aptos_api::bootstrap as bootstrap_api;
use aptos_build_info::build_information;
use aptos_config::config::{
merge_node_config, InitialSafetyRulesConfig, NodeConfig, PersistableConfig,
};
use aptos_config::config::{merge_node_config, NodeConfig, PersistableConfig};
use aptos_framework::ReleaseBundle;
use aptos_logger::{prelude::*, telemetry_log_writer::TelemetryLog, Level, LoggerFilterUpdater};
use aptos_state_sync_driver::driver_factory::StateSyncRuntimes;
Expand Down Expand Up @@ -703,17 +701,6 @@ pub fn setup_environment_and_start_node(
peers_and_metadata,
);

// Ensure consensus key in secure DB.
if !matches!(
node_config
.consensus
.safety_rules
.initial_safety_rules_config,
InitialSafetyRulesConfig::None
) {
aptos_safety_rules::safety_rules_manager::storage(&node_config.consensus.safety_rules);
}

// Create the DKG runtime and get the VTxn pool
let (vtxn_pool, dkg_runtime) =
consensus::create_dkg_runtime(&mut node_config, dkg_subscriptions, dkg_network_interfaces);
Expand Down
1 change: 1 addition & 0 deletions config/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,6 @@ tempfile = { workspace = true }
default = []
failpoints = []
fuzzing = ["aptos-crypto/fuzzing", "aptos-types/fuzzing"]
smoke-test = []
testing = []
tokio-console = []
41 changes: 40 additions & 1 deletion config/src/config/safety_rules_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,15 +123,22 @@ impl ConfigSanitizer for SafetyRulesConfig {
pub enum InitialSafetyRulesConfig {
FromFile {
identity_blob_path: PathBuf,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
overriding_identity_paths: Vec<PathBuf>,
waypoint: WaypointConfig,
},
None,
}

impl InitialSafetyRulesConfig {
pub fn from_file(identity_blob_path: PathBuf, waypoint: WaypointConfig) -> Self {
pub fn from_file(
identity_blob_path: PathBuf,
overriding_identity_paths: Vec<PathBuf>,
waypoint: WaypointConfig,
) -> Self {
Self::FromFile {
identity_blob_path,
overriding_identity_paths,
waypoint,
}
}
Expand Down Expand Up @@ -160,6 +167,38 @@ impl InitialSafetyRulesConfig {
},
}
}

pub fn overriding_identity_blobs(&self) -> anyhow::Result<Vec<IdentityBlob>> {
match self {
InitialSafetyRulesConfig::FromFile {
overriding_identity_paths,
..
} => {
let mut blobs = vec![];
for path in overriding_identity_paths {
let blob = IdentityBlob::from_file(path)?;
blobs.push(blob);
}
Ok(blobs)
},
InitialSafetyRulesConfig::None => {
bail!("loading overriding identity blobs failed with missing initial safety rules config")
},
}
}

#[cfg(feature = "smoke-test")]
pub fn overriding_identity_blob_paths_mut(&mut self) -> &mut Vec<PathBuf> {
match self {
InitialSafetyRulesConfig::FromFile {
overriding_identity_paths,
..
} => overriding_identity_paths,
InitialSafetyRulesConfig::None => {
unreachable!()
},
}
}
}

/// Defines how safety rules should be executed
Expand Down
2 changes: 1 addition & 1 deletion consensus/safety-rules/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ repository = { workspace = true }
rust-version = { workspace = true }

[dependencies]
anyhow = { workspace = true }
aptos-config = { workspace = true }
aptos-consensus-types = { workspace = true }
aptos-crypto = { workspace = true }
Expand All @@ -25,6 +24,7 @@ aptos-secure-net = { workspace = true }
aptos-secure-storage = { workspace = true }
aptos-types = { workspace = true }
aptos-vault-client = { workspace = true }
hex = { workspace = true }
once_cell = { workspace = true }
proptest = { workspace = true, optional = true }
rand = { workspace = true }
Expand Down
29 changes: 22 additions & 7 deletions consensus/safety-rules/src/persistent_safety_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,32 @@ impl PersistentSafetyStorage {
Ok(self.internal_store.get(OWNER_ACCOUNT).map(|v| v.value)?)
}

pub fn consensus_key_for_version(
pub fn consensus_sk_by_pk(
&self,
version: bls12381::PublicKey,
pk: bls12381::PublicKey,
) -> Result<bls12381::PrivateKey, Error> {
let _timer = counters::start_timer("get", CONSENSUS_KEY);
let key: bls12381::PrivateKey = self.internal_store.get(CONSENSUS_KEY).map(|v| v.value)?;
if key.public_key() != version {
let pk_hex = hex::encode(pk.to_bytes());
let explicit_storage_key = format!("{}_{}", CONSENSUS_KEY, pk_hex);
let explicit_sk = self
.internal_store
.get::<bls12381::PrivateKey>(explicit_storage_key.as_str())
.map(|v| v.value);
let default_sk = self
.internal_store
.get::<bls12381::PrivateKey>(CONSENSUS_KEY)
.map(|v| v.value);
let key = match (explicit_sk, default_sk) {
(Ok(sk_0), _) => sk_0,
(Err(_), Ok(sk_1)) => sk_1,
(Err(_), Err(_)) => {
return Err(Error::ValidatorKeyNotFound("not found!".to_string()));
},
};
if key.public_key() != pk {
return Err(Error::SecureStorageMissingDataError(format!(
"PrivateKey for {:?} not found",
version
"Incorrect sk saved for {:?} the expected pk",
pk
)));
}
Ok(key)
Expand Down Expand Up @@ -164,7 +180,6 @@ impl PersistentSafetyStorage {
Ok(())
}

#[cfg(any(test, feature = "testing"))]
pub fn internal_store(&mut self) -> &mut Storage {
&mut self.internal_store
}
Expand Down
9 changes: 3 additions & 6 deletions consensus/safety-rules/src/safety_rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use aptos_types::{
waypoint::Waypoint,
};
use serde::Serialize;
use std::cmp::Ordering;
use std::{cmp::Ordering, sync::Arc};

pub(crate) fn next_round(round: Round) -> Result<Round, Error> {
u64::checked_add(round, 1).ok_or(Error::IncorrectRound(round))
Expand Down Expand Up @@ -316,13 +316,10 @@ impl SafetyRules {
Ok(())
} else {
// Try to export the consensus key directly from storage.
match self
.persistent_storage
.consensus_key_for_version(expected_key)
{
match self.persistent_storage.consensus_sk_by_pk(expected_key) {
Ok(consensus_key) => {
self.validator_signer =
Some(ValidatorSigner::new(author, consensus_key));
Some(ValidatorSigner::new(author, Arc::new(consensus_key)));
Ok(())
},
Err(Error::SecureStorageMissingDataError(error)) => {
Expand Down
45 changes: 29 additions & 16 deletions consensus/safety-rules/src/safety_rules_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ use crate::{
thread::ThreadService,
SafetyRules, TSafetyRules,
};
use anyhow::anyhow;
use aptos_config::config::{InitialSafetyRulesConfig, SafetyRulesConfig, SafetyRulesService};
use aptos_crypto::bls12381::PrivateKey;
use aptos_crypto::bls12381::PublicKey;
use aptos_global_constants::CONSENSUS_KEY;
use aptos_infallible::RwLock;
use aptos_logger::{info, warn};
use aptos_secure_storage::{KVStorage, Storage};
use std::{net::SocketAddr, sync::Arc};
use std::{net::SocketAddr, sync::Arc, time::Instant};

pub fn storage(config: &SafetyRulesConfig) -> PersistentSafetyStorage {
let backend = &config.backend;
Expand Down Expand Up @@ -45,8 +45,8 @@ pub fn storage(config: &SafetyRulesConfig) -> PersistentSafetyStorage {
} else {
let storage =
PersistentSafetyStorage::new(internal_storage, config.enable_cached_safety_data);
// If it's initialized, then we can continue
if storage.author().is_ok() {

let mut storage = if storage.author().is_ok() {
storage
} else if !matches!(
config.initial_safety_rules_config,
Expand Down Expand Up @@ -75,19 +75,32 @@ pub fn storage(config: &SafetyRulesConfig) -> PersistentSafetyStorage {
panic!(
"Safety rules storage is not initialized, provide an initial safety rules config"
)
};

// Ensuring all the overriding consensus keys are in the storage.
let timer = Instant::now();
for blob in config
.initial_safety_rules_config
.overriding_identity_blobs()
.unwrap_or_default()
{
if let Some(sk) = blob.consensus_private_key {
let pk_hex = hex::encode(PublicKey::from(&sk).to_bytes());
let storage_key = format!("{}_{}", CONSENSUS_KEY, pk_hex);
match storage.internal_store().set(storage_key.as_str(), sk) {
Ok(_) => {
info!("Setting {storage_key} succeeded.");
},
Err(e) => {
warn!("Setting {storage_key} failed with internal store set error: {e}");
},
}
}
}
}
}
info!("Overriding key work time: {:?}", timer.elapsed());

pub fn load_consensus_key_from_secure_storage(
config: &SafetyRulesConfig,
) -> anyhow::Result<PrivateKey> {
let storage: Storage = (&config.backend).into();
let storage = Box::new(storage);
let response = storage.get::<PrivateKey>(CONSENSUS_KEY).map_err(|e| {
anyhow!("load_consensus_key_from_secure_storage failed with storage read error: {e}")
})?;
Ok(response.value)
storage
}
}

enum SafetyRulesWrapper {
Expand Down
7 changes: 3 additions & 4 deletions consensus/src/consensus_observer/observer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1095,15 +1095,14 @@ impl ConsensusObserver {
};

// Start the new epoch
let signer = Arc::new(ValidatorSigner::new(
AccountAddress::ZERO,
bls12381::PrivateKey::genesis(),
));
let sk = Arc::new(bls12381::PrivateKey::genesis());
let signer = Arc::new(ValidatorSigner::new(AccountAddress::ZERO, sk.clone()));
let dummy_signer = Arc::new(DagCommitSigner::new(signer.clone()));
let (_, rand_msg_rx) =
aptos_channel::new::<AccountAddress, IncomingRandGenRequest>(QueueStyle::FIFO, 1, None);
self.execution_client
.start_epoch(
Some(sk),
epoch_state.clone(),
dummy_signer,
payload_manager,
Expand Down
Loading

0 comments on commit 55ff034

Please sign in to comment.