Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 89 additions & 20 deletions sled-agent/src/bootstrap/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ use super::ddm_admin_client::{DdmAdminClient, DdmError};
use super::params::SledAgentRequest;
use super::rss_handle::RssHandle;
use super::server::TrustQuorumMembership;
use super::trust_quorum::{RackSecret, ShareDistribution, TrustQuorumError};
use super::trust_quorum::{
RackSecret, SerializableShareDistribution, ShareDistribution,
TrustQuorumError,
};
use super::views::SledAgentResponse;
use crate::config::Config as SledConfig;
use crate::illumos::dladm::{self, Dladm, PhysicalLink};
Expand All @@ -22,7 +25,9 @@ use omicron_common::api::external::{Error as ExternalError, MacAddr};
use omicron_common::backoff::{
internal_service_policy, retry_notify, BackoffError,
};
use serde::{Deserialize, Serialize};
use slog::Logger;
use std::borrow::Cow;
use std::collections::HashSet;
use std::net::{Ipv6Addr, SocketAddrV6};
use std::path::{Path, PathBuf};
Expand Down Expand Up @@ -192,7 +197,7 @@ impl Agent {
let request_path = get_sled_agent_request_path();
let trust_quorum = if request_path.exists() {
info!(agent.log, "Sled already configured, loading sled agent");
let sled_request: SledAgentRequest = toml::from_str(
let sled_request: PersistentSledAgentRequest = toml::from_str(
&tokio::fs::read_to_string(&request_path).await.map_err(
|err| BootstrapError::Io {
message: format!(
Expand All @@ -203,10 +208,13 @@ impl Agent {
)?,
)
.map_err(|err| BootstrapError::Toml { path: request_path, err })?;
agent.request_agent(&sled_request).await?;
TrustQuorumMembership::Known(Arc::new(
sled_request.trust_quorum_share,
))

let trust_quorum_share =
sled_request.trust_quorum_share.map(ShareDistribution::from);
agent
.request_agent(&*sled_request.request, &trust_quorum_share)
.await?;
TrustQuorumMembership::Known(Arc::new(trust_quorum_share))
} else {
TrustQuorumMembership::Uninitialized
};
Expand All @@ -219,6 +227,7 @@ impl Agent {
pub async fn request_agent(
&self,
request: &SledAgentRequest,
trust_quorum_share: &Option<ShareDistribution>,
) -> Result<SledAgentResponse, BootstrapError> {
info!(&self.log, "Loading Sled Agent: {:?}", request);

Expand All @@ -243,7 +252,7 @@ impl Agent {
// partially-initialized rack where we may have a share from a
// previously-started-but-not-completed init process? If rerunning
// it produces different shares this check will fail.
if request.trust_quorum_share != *self.share.lock().await {
if *trust_quorum_share != *self.share.lock().await {
let err_str = concat!(
"Sled Agent already running with",
" a different trust quorum share"
Expand All @@ -270,23 +279,26 @@ impl Agent {
maybe_agent.replace(server);
info!(&self.log, "Sled Agent loaded; recording configuration");

*self.share.lock().await = request.trust_quorum_share.clone();
*self.share.lock().await = trust_quorum_share.clone();

// Record this request so the sled agent can be automatically
// initialized on the next boot.
//
// danger handling: `serialized_request` contains our trust quorum
// share; we do not log it and only write it to the designated path.
let serialized_request = PersistentSledAgentRequest {
request: Cow::Borrowed(request),
trust_quorum_share: trust_quorum_share.clone().map(Into::into),
}
.danger_serialize_as_toml()
.expect("Cannot serialize request");

let path = get_sled_agent_request_path();
tokio::fs::write(
&path,
&toml::to_string(
&toml::Value::try_from(&request)
.expect("Cannot serialize request"),
)
.expect("Cannot convert toml to string"),
)
.await
.map_err(|err| BootstrapError::Io {
message: format!("Recording Sled Agent request to {path:?}"),
err,
tokio::fs::write(&path, &serialized_request).await.map_err(|err| {
BootstrapError::Io {
message: format!("Recording Sled Agent request to {path:?}"),
err,
}
})?;

// Start trying to notify ddmd of our sled prefix so it can
Expand Down Expand Up @@ -481,10 +493,38 @@ impl Agent {
}
}

// We intentionally DO NOT derive `Debug` or `Serialize`; both provide avenues
// by which we may accidentally log the contents of our trust quorum share.
#[derive(Deserialize, PartialEq)]
struct PersistentSledAgentRequest<'a> {
request: Cow<'a, SledAgentRequest>,
trust_quorum_share: Option<SerializableShareDistribution>,
}

impl PersistentSledAgentRequest<'_> {
/// On success, the returned string will contain our raw
/// `trust_quorum_share`. This method is named `danger_*` to remind the
/// caller that they must not log this string.
fn danger_serialize_as_toml(&self) -> Result<String, toml::ser::Error> {
#[derive(Serialize)]
#[serde(remote = "PersistentSledAgentRequest")]
struct PersistentSledAgentRequestDef<'a> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does the Def suffix stand for?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Definition", I think; this is from https://serde.rs/remote-derive.html. In this case we don't need remote derive because we control the original type, but we choose to use it because the original type intentionally doesn't derive Serialize.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, cool it comes from serde example. Stick with it then!

request: Cow<'a, SledAgentRequest>,
trust_quorum_share: Option<SerializableShareDistribution>,
}

let mut out = String::with_capacity(128);
let mut serializer = toml::Serializer::new(&mut out);
PersistentSledAgentRequestDef::serialize(self, &mut serializer)?;
Ok(out)
}
}

#[cfg(test)]
mod tests {
use super::*;
use macaddr::MacAddr6;
use uuid::Uuid;

#[test]
fn test_mac_to_socket_addr() {
Expand All @@ -495,4 +535,33 @@ mod tests {
&"fdb0:a840:2510:1::1".parse::<Ipv6Addr>().unwrap(),
);
}

#[test]
fn persistent_sled_agent_request_serialization_round_trips() {
let secret = RackSecret::new();
let (mut shares, verifier) = secret.split(2, 4).unwrap();

let request = PersistentSledAgentRequest {
request: Cow::Owned(SledAgentRequest {
id: Uuid::new_v4(),
subnet: Ipv6Subnet::new(Ipv6Addr::LOCALHOST),
rack_id: Uuid::new_v4(),
}),
trust_quorum_share: Some(
ShareDistribution {
threshold: 2,
verifier,
share: shares.pop().unwrap(),
member_device_id_certs: vec![],
}
.into(),
),
};

let serialized = request.danger_serialize_as_toml().unwrap();
let deserialized: PersistentSledAgentRequest =
toml::from_slice(serialized.as_bytes()).unwrap();

assert!(request == deserialized, "serialization round trip failed");
}
}
16 changes: 12 additions & 4 deletions sled-agent/src/bootstrap/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use super::params::version;
use super::params::Request;
use super::params::RequestEnvelope;
use super::params::SledAgentRequest;
use super::trust_quorum::ShareDistribution;
use super::views::SledAgentResponse;
use crate::bootstrap::views::Response;
use crate::bootstrap::views::ResponseEnvelope;
Expand Down Expand Up @@ -90,8 +91,12 @@ impl<'a> Client<'a> {
pub(crate) async fn start_sled(
&self,
request: &SledAgentRequest,
trust_quorum_share: Option<ShareDistribution>,
) -> Result<SledAgentResponse, Error> {
let request = Request::SledAgentRequest(Cow::Borrowed(request));
let request = Request::SledAgentRequest(
Cow::Borrowed(request),
trust_quorum_share.map(Into::into),
);

match self.request_response(request).await? {
Response::SledAgentResponse(response) => Ok(response),
Expand Down Expand Up @@ -142,8 +147,11 @@ impl<'a> Client<'a> {

// Build and serialize our request.
let envelope = RequestEnvelope { version: version::V1, request };
let mut buf =
serde_json::to_vec(&envelope).map_err(Error::Serialize)?;

// "danger" note: `buf` contains a raw trust quorum share; we must not
// log or otherwise persist it! We only write it to `stream`.
let buf =
envelope.danger_serialize_as_json().map_err(Error::Serialize)?;
let request_length = u32::try_from(buf.len())
.expect("serialized bootstrap-agent request length overflowed u32");

Expand All @@ -163,7 +171,7 @@ impl<'a> Client<'a> {
return Err(Error::BadResponseLength(response_length));
}

buf.resize(response_length as usize, 0);
let mut buf = vec![0; response_length as usize];
stream.read_exact(&mut buf).await.map_err(Error::ReadResponse)?;

// Deserialize and handle the response.
Expand Down
104 changes: 92 additions & 12 deletions sled-agent/src/bootstrap/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

//! Request types for the bootstrap agent

use super::trust_quorum::ShareDistribution;
use super::trust_quorum::SerializableShareDistribution;
use omicron_common::address::{Ipv6Subnet, SLED_PREFIX};
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
Expand All @@ -16,38 +16,118 @@ pub struct SledAgentRequest {
/// Uuid of the Sled Agent to be created.
pub id: Uuid,

/// Portion of the IP space to be managed by the Sled Agent.
pub subnet: Ipv6Subnet<SLED_PREFIX>,

/// Uuid of the rack to which this sled agent belongs.
pub rack_id: Uuid,

/// Share of the rack secret for this Sled Agent.
// TODO-cleanup This is currently optional because we don't do trust quorum
// shares for single-node deployments (i.e., most dev/test environments),
// but eventually this should be required.
pub trust_quorum_share: Option<ShareDistribution>,
// Note: The order of these fields is load bearing, because we serialize
// `SledAgentRequest`s as toml. `subnet` serializes as a TOML table, so it
// must come after non-table fields.
/// Portion of the IP space to be managed by the Sled Agent.
pub subnet: Ipv6Subnet<SLED_PREFIX>,
}

#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
// We intentionally DO NOT derive `Debug` or `Serialize`; both provide avenues
// by which we may accidentally log the contents of our `share`. To serialize a
// request, use `RequestEnvelope::danger_serialize_as_json()`.
#[derive(Clone, Deserialize, PartialEq)]
// Clippy wants us to put the SledAgentRequest in a Box, but (a) it's not _that_
// big (a couple hundred bytes), and (b) that makes matching annoying.
// `Request`s are relatively rare over the life of a sled agent.
#[allow(clippy::large_enum_variant)]
pub enum Request<'a> {
/// Send configuration information for launching a Sled Agent.
SledAgentRequest(Cow<'a, SledAgentRequest>),
SledAgentRequest(
Cow<'a, SledAgentRequest>,
Option<SerializableShareDistribution>,
),

/// Request the sled's share of the rack secret.
ShareRequest,
}

#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[derive(Clone, Deserialize, PartialEq)]
pub struct RequestEnvelope<'a> {
pub version: u32,
pub request: Request<'a>,
}

impl RequestEnvelope<'_> {
/// On success, the returned `Vec` will contain our raw
/// trust quorum share. This method is named `danger_*` to remind the
/// caller that they must not log it.
pub(crate) fn danger_serialize_as_json(
&self,
) -> Result<Vec<u8>, serde_json::Error> {
#[derive(Serialize)]
#[serde(remote = "Request")]
#[allow(clippy::large_enum_variant)]
pub enum RequestDef<'a> {
/// Send configuration information for launching a Sled Agent.
SledAgentRequest(
Cow<'a, SledAgentRequest>,
Option<SerializableShareDistribution>,
),

/// Request the sled's share of the rack secret.
ShareRequest,
}

#[derive(Serialize)]
#[serde(remote = "RequestEnvelope")]
struct RequestEnvelopeDef<'a> {
version: u32,
#[serde(borrow, with = "RequestDef")]
request: Request<'a>,
}

let mut writer = Vec::with_capacity(128);
let mut serializer = serde_json::Serializer::new(&mut writer);
RequestEnvelopeDef::serialize(self, &mut serializer)?;
Ok(writer)
}
}

pub(super) mod version {
pub(crate) const V1: u32 = 1;
}

#[cfg(test)]
mod tests {
use std::net::Ipv6Addr;

use super::*;
use crate::bootstrap::trust_quorum::RackSecret;
use crate::bootstrap::trust_quorum::ShareDistribution;

#[test]
fn json_serialization_round_trips() {
let secret = RackSecret::new();
let (mut shares, verifier) = secret.split(2, 4).unwrap();

let envelope = RequestEnvelope {
version: 1,
request: Request::SledAgentRequest(
Cow::Owned(SledAgentRequest {
id: Uuid::new_v4(),
subnet: Ipv6Subnet::new(Ipv6Addr::LOCALHOST),
rack_id: Uuid::new_v4(),
}),
Some(
ShareDistribution {
threshold: 2,
verifier,
share: shares.pop().unwrap(),
member_device_id_certs: vec![],
}
.into(),
),
),
};

let serialized = envelope.danger_serialize_as_json().unwrap();
let deserialized: RequestEnvelope =
serde_json::from_slice(&serialized).unwrap();

assert!(envelope == deserialized, "serialization round trip failed");
}
}
Loading