Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance consensus key rotation support #13926

Merged
merged 1 commit into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion aptos-node/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ aptos-peer-monitoring-service-client = { workspace = true }
aptos-peer-monitoring-service-server = { workspace = true }
aptos-peer-monitoring-service-types = { workspace = true }
aptos-runtimes = { workspace = true }
aptos-safety-rules = { workspace = true }
aptos-state-sync-driver = { workspace = true }
aptos-storage-interface = { workspace = true }
aptos-storage-service-client = { workspace = true }
Expand Down
25 changes: 6 additions & 19 deletions aptos-node/src/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,8 @@ use aptos_event_notifications::{
DbBackedOnChainConfig, EventNotificationListener, ReconfigNotificationListener,
};
use aptos_jwk_consensus::{start_jwk_consensus_runtime, types::JWKConsensusMsg};
use aptos_logger::debug;
use aptos_mempool::QuorumStoreRequest;
use aptos_network::application::interface::{NetworkClient, NetworkServiceEvents};
use aptos_safety_rules::safety_rules_manager::load_consensus_key_from_secure_storage;
use aptos_storage_interface::DbReaderWriter;
use aptos_validator_transaction_pool::VTxnPoolState;
use futures::channel::mpsc::Sender;
Expand Down Expand Up @@ -73,13 +71,9 @@ pub fn create_dkg_runtime(
)>,
dkg_network_interfaces: Option<ApplicationNetworkInterfaces<DKGMessage>>,
) -> (VTxnPoolState, Option<Runtime>) {
let maybe_dkg_dealer_sk =
load_consensus_key_from_secure_storage(&node_config.consensus.safety_rules);
debug!("maybe_dkg_dealer_sk={:?}", maybe_dkg_dealer_sk);

let vtxn_pool = VTxnPoolState::default();
let dkg_runtime = match (dkg_network_interfaces, maybe_dkg_dealer_sk) {
(Some(interfaces), Ok(dkg_dealer_sk)) => {
let dkg_runtime = match dkg_network_interfaces {
Some(interfaces) => {
let ApplicationNetworkInterfaces {
network_client,
network_service_events,
Expand All @@ -90,7 +84,7 @@ pub fn create_dkg_runtime(
let rb_config = node_config.consensus.rand_rb_config.clone();
let dkg_runtime = start_dkg_runtime(
my_addr,
dkg_dealer_sk,
&node_config.consensus.safety_rules,
network_client,
network_service_events,
reconfig_events,
Expand All @@ -117,15 +111,8 @@ pub fn create_jwk_consensus_runtime(
jwk_consensus_network_interfaces: Option<ApplicationNetworkInterfaces<JWKConsensusMsg>>,
vtxn_pool: &VTxnPoolState,
) -> Option<Runtime> {
let maybe_jwk_consensus_key =
load_consensus_key_from_secure_storage(&node_config.consensus.safety_rules);
debug!(
"jwk_consensus_key_err={:?}",
maybe_jwk_consensus_key.as_ref().err()
);

let jwk_consensus_runtime = match (jwk_consensus_network_interfaces, maybe_jwk_consensus_key) {
(Some(interfaces), Ok(consensus_key)) => {
let jwk_consensus_runtime = match jwk_consensus_network_interfaces {
Some(interfaces) => {
let ApplicationNetworkInterfaces {
network_client,
network_service_events,
Expand All @@ -136,7 +123,7 @@ pub fn create_jwk_consensus_runtime(
let my_addr = node_config.validator_network.as_ref().unwrap().peer_id();
let jwk_consensus_runtime = start_jwk_consensus_runtime(
my_addr,
consensus_key,
&node_config.consensus.safety_rules,
network_client,
network_service_events,
reconfig_events,
Expand Down
15 changes: 1 addition & 14 deletions aptos-node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ use anyhow::anyhow;
use aptos_admin_service::AdminService;
use aptos_api::bootstrap as bootstrap_api;
use aptos_build_info::build_information;
use aptos_config::config::{
merge_node_config, InitialSafetyRulesConfig, NodeConfig, PersistableConfig,
};
use aptos_config::config::{merge_node_config, NodeConfig, PersistableConfig};
use aptos_framework::ReleaseBundle;
use aptos_logger::{prelude::*, telemetry_log_writer::TelemetryLog, Level, LoggerFilterUpdater};
use aptos_state_sync_driver::driver_factory::StateSyncRuntimes;
Expand Down Expand Up @@ -703,17 +701,6 @@ pub fn setup_environment_and_start_node(
peers_and_metadata,
);

// Ensure consensus key in secure DB.
if !matches!(
node_config
.consensus
.safety_rules
.initial_safety_rules_config,
InitialSafetyRulesConfig::None
) {
aptos_safety_rules::safety_rules_manager::storage(&node_config.consensus.safety_rules);
}

// Create the DKG runtime and get the VTxn pool
let (vtxn_pool, dkg_runtime) =
consensus::create_dkg_runtime(&mut node_config, dkg_subscriptions, dkg_network_interfaces);
Expand Down
1 change: 1 addition & 0 deletions config/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,6 @@ tempfile = { workspace = true }
default = []
failpoints = []
fuzzing = ["aptos-crypto/fuzzing", "aptos-types/fuzzing"]
smoke-test = []
testing = []
tokio-console = []
41 changes: 40 additions & 1 deletion config/src/config/safety_rules_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,15 +123,22 @@ impl ConfigSanitizer for SafetyRulesConfig {
pub enum InitialSafetyRulesConfig {
FromFile {
identity_blob_path: PathBuf,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
overriding_identity_paths: Vec<PathBuf>,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to be a vector? Will 1 path not suffice (if the operator wants to rotate again, they need to move the new key to the old key location, and then add a single override). Would that work?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory 2 slots (1 existing, 1 new) is enough. It's just i found n slots can also be supported for free by just replacing Option<PathBuf> with Vec<PathBuf>...

waypoint: WaypointConfig,
},
None,
}

impl InitialSafetyRulesConfig {
pub fn from_file(identity_blob_path: PathBuf, waypoint: WaypointConfig) -> Self {
pub fn from_file(
identity_blob_path: PathBuf,
overriding_identity_paths: Vec<PathBuf>,
waypoint: WaypointConfig,
) -> Self {
Self::FromFile {
identity_blob_path,
overriding_identity_paths,
waypoint,
}
}
Expand Down Expand Up @@ -160,6 +167,38 @@ impl InitialSafetyRulesConfig {
},
}
}

pub fn overriding_identity_blobs(&self) -> anyhow::Result<Vec<IdentityBlob>> {
match self {
InitialSafetyRulesConfig::FromFile {
overriding_identity_paths,
..
} => {
let mut blobs = vec![];
for path in overriding_identity_paths {
let blob = IdentityBlob::from_file(path)?;
blobs.push(blob);
}
Ok(blobs)
},
InitialSafetyRulesConfig::None => {
bail!("loading overriding identity blobs failed with missing initial safety rules config")
},
}
}

#[cfg(feature = "smoke-test")]
pub fn overriding_identity_blob_paths_mut(&mut self) -> &mut Vec<PathBuf> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Maybe make this test-only? It seems like it's only used in the smoke tests? Or better yet, don't expose this here if possible?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed.

NOTE that we can't reuse feature testing as crate smoke-test needs it disabled in addition to overriding_identity_blob_paths_mut.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aah, in that case, don't worry about it -- you can remove the smoke-test feature. I was just wondering if it was a quick change 😄 Seems like overkill for a new feature.

match self {
InitialSafetyRulesConfig::FromFile {
overriding_identity_paths,
..
} => overriding_identity_paths,
InitialSafetyRulesConfig::None => {
unreachable!()
},
}
}
}

/// Defines how safety rules should be executed
Expand Down
2 changes: 1 addition & 1 deletion consensus/safety-rules/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ repository = { workspace = true }
rust-version = { workspace = true }

[dependencies]
anyhow = { workspace = true }
aptos-config = { workspace = true }
aptos-consensus-types = { workspace = true }
aptos-crypto = { workspace = true }
Expand All @@ -25,6 +24,7 @@ aptos-secure-net = { workspace = true }
aptos-secure-storage = { workspace = true }
aptos-types = { workspace = true }
aptos-vault-client = { workspace = true }
hex = { workspace = true }
once_cell = { workspace = true }
proptest = { workspace = true, optional = true }
rand = { workspace = true }
Expand Down
29 changes: 22 additions & 7 deletions consensus/safety-rules/src/persistent_safety_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,32 @@ impl PersistentSafetyStorage {
Ok(self.internal_store.get(OWNER_ACCOUNT).map(|v| v.value)?)
}

pub fn consensus_key_for_version(
pub fn consensus_sk_by_pk(
&self,
version: bls12381::PublicKey,
pk: bls12381::PublicKey,
) -> Result<bls12381::PrivateKey, Error> {
let _timer = counters::start_timer("get", CONSENSUS_KEY);
let key: bls12381::PrivateKey = self.internal_store.get(CONSENSUS_KEY).map(|v| v.value)?;
if key.public_key() != version {
let pk_hex = hex::encode(pk.to_bytes());
let explicit_storage_key = format!("{}_{}", CONSENSUS_KEY, pk_hex);
let explicit_sk = self
.internal_store
.get::<bls12381::PrivateKey>(explicit_storage_key.as_str())
.map(|v| v.value);
let default_sk = self
.internal_store
.get::<bls12381::PrivateKey>(CONSENSUS_KEY)
.map(|v| v.value);
let key = match (explicit_sk, default_sk) {
(Ok(sk_0), _) => sk_0,
(Err(_), Ok(sk_1)) => sk_1,
(Err(_), Err(_)) => {
return Err(Error::ValidatorKeyNotFound("not found!".to_string()));
},
};
if key.public_key() != pk {
return Err(Error::SecureStorageMissingDataError(format!(
"PrivateKey for {:?} not found",
version
"Incorrect sk saved for {:?} the expected pk",
pk
)));
}
Ok(key)
Expand Down Expand Up @@ -164,7 +180,6 @@ impl PersistentSafetyStorage {
Ok(())
}

#[cfg(any(test, feature = "testing"))]
pub fn internal_store(&mut self) -> &mut Storage {
&mut self.internal_store
}
Expand Down
9 changes: 3 additions & 6 deletions consensus/safety-rules/src/safety_rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use aptos_types::{
waypoint::Waypoint,
};
use serde::Serialize;
use std::cmp::Ordering;
use std::{cmp::Ordering, sync::Arc};

pub(crate) fn next_round(round: Round) -> Result<Round, Error> {
u64::checked_add(round, 1).ok_or(Error::IncorrectRound(round))
Expand Down Expand Up @@ -316,13 +316,10 @@ impl SafetyRules {
Ok(())
} else {
// Try to export the consensus key directly from storage.
match self
.persistent_storage
.consensus_key_for_version(expected_key)
{
match self.persistent_storage.consensus_sk_by_pk(expected_key) {
Ok(consensus_key) => {
self.validator_signer =
Some(ValidatorSigner::new(author, consensus_key));
Some(ValidatorSigner::new(author, Arc::new(consensus_key)));
Ok(())
},
Err(Error::SecureStorageMissingDataError(error)) => {
Expand Down
45 changes: 29 additions & 16 deletions consensus/safety-rules/src/safety_rules_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ use crate::{
thread::ThreadService,
SafetyRules, TSafetyRules,
};
use anyhow::anyhow;
use aptos_config::config::{InitialSafetyRulesConfig, SafetyRulesConfig, SafetyRulesService};
use aptos_crypto::bls12381::PrivateKey;
use aptos_crypto::bls12381::PublicKey;
use aptos_global_constants::CONSENSUS_KEY;
use aptos_infallible::RwLock;
use aptos_logger::{info, warn};
use aptos_secure_storage::{KVStorage, Storage};
use std::{net::SocketAddr, sync::Arc};
use std::{net::SocketAddr, sync::Arc, time::Instant};

pub fn storage(config: &SafetyRulesConfig) -> PersistentSafetyStorage {
let backend = &config.backend;
Expand Down Expand Up @@ -45,8 +45,8 @@ pub fn storage(config: &SafetyRulesConfig) -> PersistentSafetyStorage {
} else {
let storage =
PersistentSafetyStorage::new(internal_storage, config.enable_cached_safety_data);
// If it's initialized, then we can continue
if storage.author().is_ok() {

let mut storage = if storage.author().is_ok() {
storage
} else if !matches!(
config.initial_safety_rules_config,
Expand Down Expand Up @@ -75,19 +75,32 @@ pub fn storage(config: &SafetyRulesConfig) -> PersistentSafetyStorage {
panic!(
"Safety rules storage is not initialized, provide an initial safety rules config"
)
};

// Ensuring all the overriding consensus keys are in the storage.
let timer = Instant::now();
for blob in config
zjma marked this conversation as resolved.
Show resolved Hide resolved
.initial_safety_rules_config
.overriding_identity_blobs()
.unwrap_or_default()
{
if let Some(sk) = blob.consensus_private_key {
let pk_hex = hex::encode(PublicKey::from(&sk).to_bytes());
let storage_key = format!("{}_{}", CONSENSUS_KEY, pk_hex);
match storage.internal_store().set(storage_key.as_str(), sk) {
Ok(_) => {
info!("Setting {storage_key} succeeded.");
},
Err(e) => {
warn!("Setting {storage_key} failed with internal store set error: {e}");
},
}
}
}
}
}
info!("Overriding key work time: {:?}", timer.elapsed());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: do we think this is necessary to track/log?


pub fn load_consensus_key_from_secure_storage(
config: &SafetyRulesConfig,
) -> anyhow::Result<PrivateKey> {
let storage: Storage = (&config.backend).into();
let storage = Box::new(storage);
let response = storage.get::<PrivateKey>(CONSENSUS_KEY).map_err(|e| {
anyhow!("load_consensus_key_from_secure_storage failed with storage read error: {e}")
})?;
Ok(response.value)
storage
}
}

enum SafetyRulesWrapper {
Expand Down
7 changes: 3 additions & 4 deletions consensus/src/consensus_observer/observer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1095,15 +1095,14 @@ impl ConsensusObserver {
};

// Start the new epoch
let signer = Arc::new(ValidatorSigner::new(
AccountAddress::ZERO,
bls12381::PrivateKey::genesis(),
));
let sk = Arc::new(bls12381::PrivateKey::genesis());
let signer = Arc::new(ValidatorSigner::new(AccountAddress::ZERO, sk.clone()));
let dummy_signer = Arc::new(DagCommitSigner::new(signer.clone()));
let (_, rand_msg_rx) =
aptos_channel::new::<AccountAddress, IncomingRandGenRequest>(QueueStyle::FIFO, 1, None);
self.execution_client
.start_epoch(
Some(sk),
epoch_state.clone(),
dummy_signer,
payload_manager,
Expand Down
Loading
Loading