From 04ef3bdcb3ee5efabd6106ada3b3b28e9c7e3ef4 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Thu, 30 Mar 2023 00:13:56 +0000 Subject: [PATCH 01/13] nexus: flesh out migration saga Flesh out the live migration saga. See RFD 361 and the theory statements in instance_migrate.rs for more details about the design. The new saga will start migration, but (yet) more changes are required for it to succeed end-to-end, since sled agent still needs to be taught to send the correct updates when a migration ends. Remove the portion of the migration saga that prospectively reprograms OPTE V2P mappings using the instance's destination sled. A future commit will update V2P mappings in response to the end-of-migration instance update that records an instance's new home sled. --- nexus/db-model/src/instance.rs | 5 +- nexus/src/app/instance.rs | 128 ++-- nexus/src/app/sagas/instance_create.rs | 6 +- nexus/src/app/sagas/instance_migrate.rs | 750 ++++++++++++++++++------ 4 files changed, 677 insertions(+), 212 deletions(-) diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index 1ee25ec8b6e..5d722912da6 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -98,7 +98,10 @@ impl DatastoreAttachTargetConfig for Instance { Serialize, Deserialize, )] -#[diesel(table_name = instance)] +// N.B. Setting `treat_none_as_null` is required for these fields to be cleared +// properly during live migrations. See the documentation for +// `diesel::prelude::AsChangeset`. +#[diesel(table_name = instance, treat_none_as_null = true)] pub struct InstanceRuntimeState { /// The instance's current user-visible instance state. /// diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 1248dbf0853..afac19c6e36 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -35,6 +35,8 @@ use omicron_common::api::external::NameOrId; use omicron_common::api::external::UpdateResult; use omicron_common::api::external::Vni; use omicron_common::api::internal::nexus; +use sled_agent_client::types::InstanceMigrationSourceParams; +use sled_agent_client::types::InstancePutMigrationIdsBody; use sled_agent_client::types::InstancePutStateBody; use sled_agent_client::types::InstanceStateRequested; use sled_agent_client::types::SourceNatConfig; @@ -267,13 +269,29 @@ impl super::Nexus { instance_lookup: &lookup::Instance<'_>, params: params::InstanceMigrate, ) -> UpdateResult { - let (.., authz_instance) = - instance_lookup.lookup_for(authz::Action::Modify).await?; + let (.., authz_instance, db_instance) = + instance_lookup.fetch_for(authz::Action::Modify).await?; + + if db_instance.runtime().state.0 != InstanceState::Running { + return Err(Error::invalid_request( + "instance must be running before it can migrate", + )); + } + + if db_instance.runtime().sled_id == params.dst_sled_id { + return Err(Error::invalid_request( + "instance is already running on destination sled", + )); + } + + if db_instance.runtime().migration_id.is_some() { + return Err(Error::unavail("instance is already migrating")); + } // Kick off the migration saga let saga_params = sagas::instance_migrate::Params { serialized_authn: authn::saga::Serialized::for_opctx(opctx), - instance_id: authz_instance.id(), + instance: db_instance, migrate_params: params, }; self.execute_saga::( @@ -287,39 +305,79 @@ impl super::Nexus { self.db_datastore.instance_refetch(opctx, &authz_instance).await } - /// Idempotently place the instance in a 'Migrating' state. - pub async fn instance_start_migrate( + /// Attempts to set the migration IDs for the supplied instance via the + /// instance's current sled. + /// + /// The caller is assumed to have fetched the current instance record from + /// the DB and verified either that the record has no migration IDs (in + /// the case where the caller is setting IDs) or that it has the expected + /// IDs (if the caller is clearing them). + /// + /// Returns `Ok` and the updated instance record if this call successfully + /// updated the instance with the sled agent and that update was + /// successfully reflected into CRDB. Returns `Err` with an appropriate + /// error otherwise. + /// + /// # Panics + /// + /// Raises an assertion failure if `migration_params` is `Some` and the + /// supplied `db_instance` already has a migration ID or destination + /// Propolis ID set. + /// + /// Raises an assertion failure if `migration_params` is `None` and the + /// supplied `db_instance`'s migration ID or destination Propolis ID is not + /// set. + pub async fn instance_set_migration_ids( &self, - _opctx: &OpContext, - _instance_id: Uuid, - _migration_id: Uuid, - _dst_propolis_id: Uuid, + opctx: &OpContext, + instance_id: Uuid, + db_instance: &db::model::Instance, + migration_params: Option, ) -> UpdateResult { - todo!("Migration endpoint not yet implemented in sled agent"); + if migration_params.is_some() { + assert!(db_instance.runtime().migration_id.is_none()); + assert!(db_instance.runtime().dst_propolis_id.is_none()); + } else { + assert!(db_instance.runtime().migration_id.is_some()); + assert!(db_instance.runtime().dst_propolis_id.is_some()); + } - /* - let (.., authz_instance, db_instance) = - LookupPath::new(opctx, &self.db_datastore) - .instance_id(instance_id) - .fetch() - .await - .unwrap(); - let requested = InstanceRuntimeStateRequested { - run_state: InstanceStateRequested::Migrating, - migration_params: Some(InstanceRuntimeStateMigrateParams { - migration_id, - dst_propolis_id, - }), - }; - self.instance_set_runtime( - opctx, - &authz_instance, - &db_instance, - requested, - ) - .await?; - self.db_datastore.instance_refetch(opctx, &authz_instance).await - */ + let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore) + .instance_id(instance_id) + .lookup_for(authz::Action::Modify) + .await + .unwrap(); + + let sa = self.instance_sled(&db_instance).await?; + let instance_put_result = sa + .instance_put_migration_ids( + &instance_id, + &InstancePutMigrationIdsBody { + old_runtime: db_instance.runtime().clone().into(), + migration_params, + }, + ) + .await + .map(|res| Some(res.into_inner())); + + // Write the updated instance runtime state back to CRDB. If this + // outright fails, this operation fails. If the operation nominally + // succeeds but nothing was updated, this action is outdated and the + // caller should not proceed with migration. + let updated = self + .handle_instance_put_result(&db_instance, instance_put_result) + .await?; + + if updated { + Ok(self + .db_datastore + .instance_refetch(opctx, &authz_instance) + .await?) + } else { + Err(Error::internal_error( + "instance's Propolis generation is out of date", + )) + } } /// Reboot the specified instance. @@ -888,6 +946,10 @@ impl super::Nexus { ) -> Result<(), Error> { let log = &self.log; + slog::debug!(log, "received new runtime state from sled agent"; + "instance_id" => %id, + "runtime_state" => ?new_runtime_state); + let result = self .db_datastore .instance_update_runtime(id, &(new_runtime_state.clone().into())) diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 58a75958e70..f438cd55055 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1075,10 +1075,9 @@ async fn ensure_instance_disk_attach_state( pub(super) async fn allocate_sled_ipv6( opctx: &OpContext, sagactx: NexusActionContext, - sled_id_name: &str, + sled_uuid: Uuid, ) -> Result { let osagactx = sagactx.user_data(); - let sled_uuid = sagactx.lookup::(sled_id_name)?; osagactx .datastore() .next_ipv6_address(opctx, sled_uuid) @@ -1145,7 +1144,8 @@ async fn sic_allocate_propolis_ip( &sagactx, ¶ms.serialized_authn, ); - allocate_sled_ipv6(&opctx, sagactx, "server_id").await + let sled_uuid = sagactx.lookup::("server_id")?; + allocate_sled_ipv6(&opctx, sagactx, sled_uuid).await } async fn sic_create_instance_record( diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 7148a1b9764..c15dee456cc 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -5,12 +5,17 @@ use super::instance_create::allocate_sled_ipv6; use super::{NexusActionContext, NexusSaga, ACTION_GENERATE_ID}; use crate::app::sagas::declare_saga_actions; -use crate::authn; -use crate::db::identity::Resource; +use crate::db::{identity::Resource, lookup::LookupPath}; use crate::external_api::params; +use crate::{authn, authz, db}; +use omicron_common::api::external::InstanceState; use omicron_common::api::internal::nexus::InstanceRuntimeState; use serde::Deserialize; use serde::Serialize; +use sled_agent_client::types::{ + InstanceMigrationSourceParams, InstanceMigrationTargetParams, + InstanceStateRequested, +}; use std::net::Ipv6Addr; use steno::ActionError; use steno::Node; @@ -21,32 +26,107 @@ use uuid::Uuid; #[derive(Debug, Deserialize, Serialize)] pub struct Params { pub serialized_authn: authn::saga::Serialized, - pub instance_id: Uuid, + pub instance: db::model::Instance, pub migrate_params: params::InstanceMigrate, } -// instance migrate saga: actions - +// The migration saga is similar to the instance creation saga: get a +// destination sled, allocate a Propolis process on it, and send it a request to +// initialize via migration, then wait (outside the saga) for this to resolve. +// +// Most of the complexity in this saga comes from the fact that during +// migration, there are two sleds with their own instance runtime states, and +// both the saga and the work that happen after it have to specify carefully +// which of the two participating VMMs is actually running the VM once the +// migration is over. declare_saga_actions! { instance_migrate; ALLOCATE_PROPOLIS_IP -> "dst_propolis_ip" { + sim_allocate_propolis_ip } - MIGRATE_PREP -> "migrate_instance" { - + sim_migrate_prep + + // This step sets the migration ID and destination Propolis ID fields in + // CRDB by asking the instance's current sled to paste them into its runtime + // state. Sled agent provides the synchronization here: while this operation + // is idempotent for any single transition between IDs, sled agent ensures + // that if multiple concurrent sagas try to set migration IDs at the same + // Propolis generation, then only one will win and get to proceed through + // the saga. + SET_MIGRATION_IDS -> "set_migration_ids" { + + sim_set_migration_ids } - INSTANCE_MIGRATE -> "instance_migrate" { - // TODO robustness: This needs an undo action - + sim_instance_migrate + + // The instance state on the destination looks like the instance state on + // the source, except that it bears all of the destination's "location" + // information--its Propolis ID, sled ID, and Propolis IP--with the same + // Propolis generation number as the source set in the previous step. + // Consider an example: + // + // - Before the saga begins, the instance has Propolis generation 10 and no + // migration IDs. + // - The previous step sets Propolis generation 11 and sets the instance's + // migration ID. + // - This step synthesizes a record with Propolis generation 11 and the same + // migration IDs, but with the Propolis identifiers set to the + // destination's IDs. + // + // When the migration resolves one way or the other, one or both of the + // participating sleds will update CRDB with Propolis generation 12 and the + // Propolis ID set to whichever Propolis ended up running the instance. + // + // This step must be infallible and does not have an undo action for reasons + // described below. + CREATE_DESTINATION_STATE -> "dst_runtime_state" { + + sim_create_destination_state } - V2P_ENSURE -> "v2p_ensure" { - // TODO robustness: This needs an undo action - + sim_v2p_ensure + + // The next three steps are organized so that if the saga unwinds after the + // destination Propolis is created, the "set migration IDs" step is undone + // before the "ensure destination Propolis" step. Consider again the example + // above, but this time, let the saga unwind after the destination Propolis + // starts. If sled agent changes an instance's Propolis generation when a + // Propolis is destroyed, and the undo steps are specified in the + // traditional way, the following can occur: + // + // - After the steps above, the instance is registered with the source and + // target sleds, both at Propolis generation 11. + // - Undoing the "ensure destination Propolis" step sets the Propolis + // generation to 12, but with the location information from the target! + // - When the "set migration IDs" step is undone, the source will publish + // Propolis generation 12, but this will be dropped because the generation + // was already incremented. + // + // Now CRDB is pointing at the wrong Propolis, and instance state updates + // from the former migration source will be ignored. Oops. + // + // Instead of asking sled agent to reason about what steps in a migration + // have and haven't been undertaken, the following steps are arranged so + // that the update that clears migration IDs happens before the one that + // destroys the destination Propolis, which unwinds the saga to the expected + // state. + // + // Note that this implies that `sim_ensure_destination_propolis_undo` must + // be able to succeed even if the "ensure destination Propolis" step was + // never reached. + ENSURE_DESTINATION_PROPOLIS_UNDO -> "unused_ensure_destination_undo" { + + sim_noop + - sim_ensure_destination_propolis_undo } - CLEANUP_SOURCE -> "cleanup_source" { - // TODO robustness: This needs an undo action. Is it even possible - // to undo at this point? - + sim_cleanup_source + SET_MIGRATION_IDS_UNDO -> "unused_set_ids_undo" { + + sim_noop + - sim_clear_migration_ids + } + ENSURE_DESTINATION_PROPOLIS -> "ensure_destination" { + + sim_ensure_destination_propolis + } + + // Note that this step only requests migration by sending a "migrate in" + // request to the destination sled. It does not wait for migration to + // finish. It cannot be unwound, either, because there is no way to cancel + // an in-progress migration (indeed, a requested migration might have + // finished entirely by the time the undo step runs). + INSTANCE_MIGRATE -> "instance_migrate" { + + sim_instance_migrate } } @@ -79,195 +159,180 @@ impl NexusSaga for SagaInstanceMigrate { )); builder.append(allocate_propolis_ip_action()); - builder.append(migrate_prep_action()); + builder.append(set_migration_ids_action()); + builder.append(create_destination_state_action()); + builder.append(ensure_destination_propolis_undo_action()); + builder.append(set_migration_ids_undo_action()); + builder.append(ensure_destination_propolis_action()); builder.append(instance_migrate_action()); - builder.append(v2p_ensure_action()); - builder.append(cleanup_source_action()); Ok(builder.build()?) } } -async fn sim_migrate_prep( +/// A no-op forward action. +async fn sim_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> { + Ok(()) +} + +/// Allocates an IP address on the destination sled for the Propolis server. +async fn sim_allocate_propolis_ip( sagactx: NexusActionContext, -) -> Result<(Uuid, InstanceRuntimeState), ActionError> { - let osagactx = sagactx.user_data(); +) -> Result { let params = sagactx.saga_params::()?; let opctx = crate::context::op_context_for_saga_action( &sagactx, ¶ms.serialized_authn, ); + allocate_sled_ipv6(&opctx, sagactx, params.migrate_params.dst_sled_id).await +} - let migrate_uuid = sagactx.lookup::("migrate_id")?; - let dst_propolis_uuid = sagactx.lookup::("dst_propolis_id")?; +async fn sim_set_migration_ids( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); - // We have sled-agent (via Nexus) attempt to place - // the instance in a "Migrating" state w/ the given - // migration id. This will also update the instance - // state in the db - let instance = osagactx + let db_instance = ¶ms.instance; + let migration_id = sagactx.lookup::("migrate_id")?; + let dst_propolis_id = sagactx.lookup::("dst_propolis_id")?; + let updated_record = osagactx .nexus() - .instance_start_migrate( + .instance_set_migration_ids( &opctx, - params.instance_id, - migrate_uuid, - dst_propolis_uuid, + db_instance.id(), + db_instance, + Some(InstanceMigrationSourceParams { + dst_propolis_id, + migration_id, + }), ) .await .map_err(ActionError::action_failed)?; - let instance_id = instance.id(); - Ok((instance_id, instance.runtime_state.into())) + Ok(updated_record) } -// Allocate an IP address on the destination sled for the Propolis server. -async fn sim_allocate_propolis_ip( +async fn sim_clear_migration_ids( sagactx: NexusActionContext, -) -> Result { +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; let opctx = crate::context::op_context_for_saga_action( &sagactx, ¶ms.serialized_authn, ); - allocate_sled_ipv6(&opctx, sagactx, "dst_sled_uuid").await + let db_instance = + sagactx.lookup::("set_migration_ids")?; + + // The instance may have moved to another Propolis generation by this point + // (if e.g. it was stopped and restarted), so this is not guaranteed to + // succeed. + // + // REVIEW(gjc): This callee has enough information to distinguish "outdated + // generation" failures from other errors. What is the right way to handle + // that class of other errors? For example, suppose CRDB is totally + // unavailable right now, but becomes available again later. The migration + // IDs in this step need to be cleared so that the instance can try to + // migrate again. How should this step achieve that? + let _ = osagactx + .nexus() + .instance_set_migration_ids( + &opctx, + db_instance.id(), + &db_instance, + None, + ) + .await; + + Ok(()) } -async fn sim_instance_migrate( - _sagactx: NexusActionContext, -) -> Result<(), ActionError> { - todo!("Migration action not yet implemented"); +async fn sim_create_destination_state( + sagactx: NexusActionContext, +) -> Result { + let params = sagactx.saga_params::()?; + let mut db_instance = + sagactx.lookup::("set_migration_ids")?; + let dst_propolis_id = sagactx.lookup::("dst_propolis_id")?; + let dst_propolis_ip = sagactx.lookup::("dst_propolis_ip")?; + + // Update the runtime state to refer to the new Propolis. + let new_runtime = db::model::InstanceRuntimeState { + state: db::model::InstanceState::new(InstanceState::Creating), + sled_id: params.migrate_params.dst_sled_id, + propolis_id: dst_propolis_id, + propolis_ip: Some(ipnetwork::Ipv6Network::from(dst_propolis_ip).into()), + ..db_instance.runtime_state + }; + + db_instance.runtime_state = new_runtime; + Ok(db_instance) +} - /* +async fn sim_ensure_destination_propolis( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; let opctx = crate::context::op_context_for_saga_action( &sagactx, ¶ms.serialized_authn, ); - - let migration_id = sagactx.lookup::("migrate_id")?; - let dst_sled_id = params.migrate_params.dst_sled_id; - let dst_propolis_id = sagactx.lookup::("dst_propolis_id")?; - let (instance_id, old_runtime) = - sagactx.lookup::<(Uuid, InstanceRuntimeState)>("migrate_instance")?; - - // Allocate an IP address the destination sled for the new Propolis server. - let propolis_addr = osagactx - .datastore() - .next_ipv6_address(&opctx, dst_sled_id) + let db_instance = + sagactx.lookup::("dst_runtime_state")?; + let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(db_instance.id()) + .lookup_for(authz::Action::Modify) .await .map_err(ActionError::action_failed)?; - let runtime = InstanceRuntimeState { - sled_id: dst_sled_id, - propolis_id: dst_propolis_id, - propolis_addr: Some(std::net::SocketAddr::new( - propolis_addr.into(), - 12400, - )), - ..old_runtime - }; - - // Collect the external IPs for the instance. - // https://github.com/oxidecomputer/omicron/issues/1467 - // TODO-correctness: Handle Floating IPs, see - // https://github.com/oxidecomputer/omicron/issues/1334 - let (snat_ip, external_ips): (Vec<_>, Vec<_>) = osagactx - .datastore() - .instance_lookup_external_ips(&opctx, instance_id) + osagactx + .nexus() + .instance_ensure_registered(&opctx, &authz_instance, &db_instance) .await - .map_err(ActionError::action_failed)? - .into_iter() - .partition(|ip| ip.kind == IpKind::SNat); - - // Sanity checks on the number and kind of each IP address. - if external_ips.len() > crate::app::MAX_EXTERNAL_IPS_PER_INSTANCE { - return Err(ActionError::action_failed(Error::internal_error( - format!( - "Expected the number of external IPs to be limited to \ - {}, but found {}", - crate::app::MAX_EXTERNAL_IPS_PER_INSTANCE, - external_ips.len(), - ) - .as_str(), - ))); - } - let external_ips = - external_ips.into_iter().map(|model| model.ip.ip()).collect(); - if snat_ip.len() != 1 { - return Err(ActionError::action_failed(Error::internal_error( - "Expected exactly one SNAT IP address for an instance", - ))); - } - let source_nat = SourceNatConfig::from(snat_ip.into_iter().next().unwrap()); - - // The TODO items below are tracked in - // https://github.com/oxidecomputer/omicron/issues/1783 - let instance_hardware = InstanceHardware { - runtime: runtime.into(), - // TODO: populate NICs - nics: vec![], - source_nat, - external_ips, - // TODO: populate firewall rules - firewall_rules: vec![], - // TODO: populate disks - disks: vec![], - // TODO: populate cloud init bytes - cloud_init_bytes: None, - }; - let target = InstanceRuntimeStateRequested { - run_state: InstanceStateRequested::Migrating, - migration_params: Some(InstanceRuntimeStateMigrateParams { - migration_id, - dst_propolis_id, - }), - }; + .map_err(ActionError::action_failed)?; - let src_propolis_id = old_runtime.propolis_id; - let src_propolis_addr = old_runtime.propolis_addr.ok_or_else(|| { - ActionError::action_failed(Error::invalid_request( - "expected source propolis-addr", - )) - })?; + Ok(()) +} - let dst_sa = osagactx - .sled_client(&dst_sled_id) +async fn sim_ensure_destination_propolis_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + let db_instance = + sagactx.lookup::("dst_runtime_state")?; + let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(db_instance.id()) + .lookup_for(authz::Action::Modify) .await .map_err(ActionError::action_failed)?; - let new_runtime_state: InstanceRuntimeState = dst_sa - .instance_put( - &instance_id, - &InstanceEnsureBody { - initial: instance_hardware, - target, - migrate: Some(InstanceMigrationTargetParams { - src_propolis_addr: src_propolis_addr.to_string(), - src_propolis_id, - }), - }, - ) - .await - .map_err(omicron_common::api::external::Error::from) - .map_err(ActionError::action_failed)? - .into_inner() - .into(); - + // Ensure that the destination sled has no Propolis matching the description + // the saga previously generated. + // + // Note that this step can run before the instance was actually ensured. + // This is OK; sled agent will quietly succeed if asked to unregister an + // unregistered instance. osagactx - .datastore() - .instance_update_runtime(&instance_id, &new_runtime_state.into()) + .nexus() + .instance_ensure_unregistered(&opctx, &authz_instance, &db_instance) .await .map_err(ActionError::action_failed)?; Ok(()) - */ } -/// Add V2P mappings for the destination instance -// Note this must run after sled_id of the instance is set to the destination -// sled! -async fn sim_v2p_ensure( +async fn sim_instance_migrate( sagactx: NexusActionContext, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); @@ -276,35 +341,370 @@ async fn sim_v2p_ensure( &sagactx, ¶ms.serialized_authn, ); - let (instance_id, _) = - sagactx.lookup::<(Uuid, InstanceRuntimeState)>("migrate_instance")?; - - // TODO-performance the instance_put in sim_instance_migrate will *start* a - // migration, but the source and destination propolis servers will perform - // it asynchronously. If this step occurs before the source instance vCPUs - // are paused, updating the mappings here will briefly "disconnect" the - // source instance in the sense that it will be able to send packets out (as - // other instance's V2P mappings will be untouched) but will not be able to - // receive any packets (other instances will send packets to the destination - // propolis' sled but the destination instance vCPUs may not have started - // yet). Until the destination propolis takes over, there will be a inbound - // network outage for the instance. + let src_runtime: InstanceRuntimeState = sagactx + .lookup::("set_migration_ids")? + .runtime() + .clone() + .into(); + let dst_db_instance = + sagactx.lookup::("dst_runtime_state")?; + let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(dst_db_instance.id()) + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; + + // TODO-correctness: This needs to be retried if a transient error occurs to + // avoid a problem like the following: + // + // 1. The saga executor runs this step and successfully starts migration. + // 2. The executor crashes. + // 3. Migration completes. + // 4. The executor restarts, runs this step, encounters a transient error, + // and then tries to unwind the saga. + // + // Now the "ensure destination" undo step will tear down the (running) + // migration target. // - // TODO-correctness if the migration fails, there's nothing that will unwind - // this and restore the original V2P mappings + // Possibly sled agent can help with this by using state or Propolis + // generation numbers to filter out stale destruction requests. osagactx .nexus() - .create_instance_v2p_mappings(&opctx, instance_id) + .instance_request_state( + &opctx, + &authz_instance, + &dst_db_instance, + InstanceStateRequested::MigrationTarget( + InstanceMigrationTargetParams { + src_propolis_addr: src_runtime + .propolis_addr + .unwrap() + .to_string(), + src_propolis_id: src_runtime.propolis_id, + }, + ), + ) .await .map_err(ActionError::action_failed)?; Ok(()) } -async fn sim_cleanup_source( - _sagactx: NexusActionContext, -) -> Result<(), ActionError> { - // TODO: clean up the previous instance whether it's on the same sled or a - // different one - Ok(()) +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use crate::{ + app::{saga::create_saga_dag, sagas::instance_create}, + Nexus, TestInterfaces as _, + }; + + use dropshot::test_util::ClientTestContext; + use http::{method::Method, StatusCode}; + use nexus_test_interface::NexusServer; + use nexus_test_utils::{ + http_testing::{AuthnMode, NexusRequest, RequestBuilder}, + resource_helpers::{create_project, object_create, populate_ip_pool}, + start_sled_agent, + }; + use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::{ + ByteCount, IdentityMetadataCreateParams, InstanceCpuCount, + }; + use omicron_sled_agent::sim::Server; + use sled_agent_client::TestInterfaces as _; + + use super::*; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + const PROJECT_NAME: &str = "test-project"; + const INSTANCE_NAME: &str = "test-instance"; + + async fn setup_test_project(client: &ClientTestContext) -> Uuid { + populate_ip_pool(&client, "default", None).await; + let project = create_project(&client, PROJECT_NAME).await; + project.identity.id + } + + async fn add_sleds( + cptestctx: &ControlPlaneTestContext, + num_sleds: usize, + ) -> Vec<(Uuid, Server)> { + let mut sas = Vec::with_capacity(num_sleds); + for _ in 0..num_sleds { + let sa_id = Uuid::new_v4(); + let log = + cptestctx.logctx.log.new(o!("sled_id" => sa_id.to_string())); + let addr = + cptestctx.server.get_http_server_internal_address().await; + + info!(&cptestctx.logctx.log, "Adding simulated sled"; "sled_id" => %sa_id); + let update_dir = std::path::Path::new("/should/be/unused"); + let sa = start_sled_agent( + log, + addr, + sa_id, + &update_dir, + omicron_sled_agent::sim::SimMode::Explicit, + ) + .await + .unwrap(); + sas.push((sa_id, sa)); + } + + sas + } + + async fn create_instance( + client: &ClientTestContext, + ) -> omicron_common::api::external::Instance { + let instances_url = format!("/v1/instances?project={}", PROJECT_NAME); + object_create( + client, + &instances_url, + ¶ms::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: INSTANCE_NAME.parse().unwrap(), + description: format!("instance {:?}", INSTANCE_NAME), + }, + ncpus: InstanceCpuCount(2), + memory: ByteCount::from_gibibytes_u32(2), + hostname: String::from(INSTANCE_NAME), + user_data: b"#cloud-config".to_vec(), + network_interfaces: + params::InstanceNetworkInterfaceAttachment::None, + external_ips: vec![], + disks: vec![], + start: true, + }, + ) + .await + } + + async fn instance_simulate( + cptestctx: &ControlPlaneTestContext, + nexus: &Arc, + instance_id: &Uuid, + ) { + info!(&cptestctx.logctx.log, "Poking simulated instance"; + "instance_id" => %instance_id); + let sa = nexus.instance_sled_by_id(instance_id).await.unwrap(); + sa.instance_finish_transition(*instance_id).await; + } + + async fn fetch_db_instance( + cptestctx: &ControlPlaneTestContext, + opctx: &nexus_db_queries::context::OpContext, + id: Uuid, + ) -> nexus_db_model::Instance { + let datastore = cptestctx.server.apictx().nexus.datastore().clone(); + let (.., db_instance) = LookupPath::new(&opctx, &datastore) + .instance_id(id) + .fetch() + .await + .expect("test instance should be present in datastore"); + + info!(&cptestctx.logctx.log, "refetched instance from db"; + "instance" => ?db_instance); + + db_instance + } + + async fn instance_start(cptestctx: &ControlPlaneTestContext, id: &Uuid) { + let client = &cptestctx.external_client; + let instance_stop_url = format!("/v1/instances/{}/start", id); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &instance_stop_url) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Failed to start instance"); + } + + async fn instance_stop(cptestctx: &ControlPlaneTestContext, id: &Uuid) { + let client = &cptestctx.external_client; + let instance_stop_url = format!("/v1/instances/{}/stop", id); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &instance_stop_url) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Failed to stop instance"); + } + + fn select_first_alternate_sled( + db_instance: &db::model::Instance, + other_sleds: &[(Uuid, Server)], + ) -> Uuid { + let default_sled_uuid = + Uuid::parse_str(nexus_test_utils::SLED_AGENT_UUID).unwrap(); + if other_sleds.is_empty() { + panic!("need at least one other sled"); + } + + if other_sleds.iter().any(|sled| sled.0 == default_sled_uuid) { + panic!("default test sled agent was in other_sleds"); + } + + if db_instance.runtime().sled_id == default_sled_uuid { + other_sleds[0].0 + } else { + default_sled_uuid + } + } + + #[nexus_test(server = crate::Server)] + async fn test_saga_basic_usage_succeeds( + cptestctx: &ControlPlaneTestContext, + ) { + let other_sleds = add_sleds(cptestctx, 1).await; + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.apictx().nexus; + let _project_id = setup_test_project(&client).await; + + let opctx = instance_create::test::test_opctx(cptestctx); + let instance = create_instance(client).await; + + // Poke the instance to get it into the Running state. + instance_simulate(cptestctx, nexus, &instance.identity.id).await; + + let db_instance = + fetch_db_instance(cptestctx, &opctx, instance.identity.id).await; + let old_runtime = db_instance.runtime().clone(); + let dst_sled_id = + select_first_alternate_sled(&db_instance, &other_sleds); + let params = Params { + serialized_authn: authn::saga::Serialized::for_opctx(&opctx), + instance: db_instance, + migrate_params: params::InstanceMigrate { dst_sled_id }, + }; + + let dag = create_saga_dag::(params).unwrap(); + let saga = nexus.create_runnable_saga(dag).await.unwrap(); + nexus.run_saga(saga).await.expect("Migration saga should succeed"); + + // Merely running the migration saga (without simulating any completion + // steps in the simulated agents) should not change where the instance + // is running. + let new_db_instance = + fetch_db_instance(cptestctx, &opctx, instance.identity.id).await; + assert_eq!(new_db_instance.runtime().sled_id, old_runtime.sled_id); + assert_eq!( + new_db_instance.runtime().propolis_id, + old_runtime.propolis_id + ); + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind( + cptestctx: &ControlPlaneTestContext, + ) { + let log = &cptestctx.logctx.log; + let other_sleds = add_sleds(cptestctx, 1).await; + let client = &cptestctx.external_client; + let nexus = &cptestctx.server.apictx().nexus; + let _project_id = setup_test_project(&client).await; + + let opctx = instance_create::test::test_opctx(cptestctx); + let instance = create_instance(client).await; + + // Poke the instance to get it into the Running state. + instance_simulate(cptestctx, nexus, &instance.identity.id).await; + + let db_instance = + fetch_db_instance(cptestctx, &opctx, instance.identity.id).await; + let old_runtime = db_instance.runtime().clone(); + let dst_sled_id = + select_first_alternate_sled(&db_instance, &other_sleds); + let params = Params { + serialized_authn: authn::saga::Serialized::for_opctx(&opctx), + instance: db_instance, + migrate_params: params::InstanceMigrate { dst_sled_id }, + }; + + let dag = create_saga_dag::(params).unwrap(); + + // Some of the nodes in the DAG are expected to be infallible to allow + // certain undo steps to be reordered. Specify these nodes here, then + // verify that every infallible node actually appears in the DAG (to try + // to detect drift between the saga specification and the test). + let infallible_nodes = vec![ + "dst_runtime_state", + "unused_ensure_destination_undo", + "unused_set_ids_undo", + ]; + let dag_node_names: Vec = + dag.get_nodes().map(|n| n.name().as_ref().to_owned()).collect(); + assert!(infallible_nodes + .iter() + .all(|n| dag_node_names.iter().any(|d| d == n))); + + for node in dag.get_nodes() { + if infallible_nodes.contains(&node.name().as_ref()) { + info!(log, "Skipping infallible node"; + "node_name" => node.name().as_ref()); + continue; + } + + info!( + log, + "Creating new saga which will fail at index {:?}", node.index(); + "node_name" => node.name().as_ref(), + "label" => node.label(), + ); + + let runnable_saga = + nexus.create_runnable_saga(dag.clone()).await.unwrap(); + nexus + .sec() + .saga_inject_error(runnable_saga.id(), node.index()) + .await + .unwrap(); + nexus + .run_saga(runnable_saga) + .await + .expect_err("Saga should have failed"); + + // Unwinding at any step should clear the migration IDs from the + // instance record and leave the instance's location otherwise + // untouched. + let new_db_instance = + fetch_db_instance(cptestctx, &opctx, instance.identity.id) + .await; + + assert!(new_db_instance.runtime().migration_id.is_none()); + assert!(new_db_instance.runtime().dst_propolis_id.is_none()); + assert_eq!(new_db_instance.runtime().sled_id, old_runtime.sled_id); + assert_eq!( + new_db_instance.runtime().propolis_id, + old_runtime.propolis_id + ); + + // Ensure the instance can stop. This helps to check that destroying + // the migration destination (if one was ensured) doesn't advance + // the Propolis ID generation in a way that prevents the source from + // issuing further state updates. + instance_stop(cptestctx, &instance.identity.id).await; + instance_simulate(cptestctx, nexus, &instance.identity.id).await; + let new_db_instance = + fetch_db_instance(cptestctx, &opctx, instance.identity.id) + .await; + assert_eq!( + new_db_instance.runtime().state.0, + InstanceState::Stopped + ); + + // Restart the instance for the next iteration. + instance_start(cptestctx, &instance.identity.id).await; + instance_simulate(cptestctx, nexus, &instance.identity.id).await; + } + } } From 2dfe1b736d5ed78ea05c537f80f46ebad8027854 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Mon, 17 Apr 2023 23:35:16 +0000 Subject: [PATCH 02/13] reserve resources on migration destination sleds Add the ability to constrain sled selection to a specific set of desired target sleds. This uses a builder that can be extended in the future for other purposes (e.g. switching to "don't schedule to this sled," "schedule only to sleds having such-and-such property," and so forth). Use this to add a resource-reservation step to the migration saga. Also fix a simulated sled agent bug that made simulated unregistration not idempotent. --- nexus/db-model/src/sled.rs | 38 +++++++++++++++++ nexus/db-queries/src/db/datastore/sled.rs | 22 ++++++++-- nexus/src/app/sagas/instance_create.rs | 1 + nexus/src/app/sagas/instance_migrate.rs | 52 +++++++++++++++++++++++ nexus/src/app/sled.rs | 2 + sled-agent/src/sim/sled_agent.rs | 10 ++++- 6 files changed, 120 insertions(+), 5 deletions(-) diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 68387625431..c6666a5b4e5 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -143,3 +143,41 @@ impl DatastoreCollectionConfig for Sled { type CollectionTimeDeletedColumn = sled::dsl::time_deleted; type CollectionIdColumn = service::dsl::sled_id; } + +/// A set of constraints that can be placed on operations that select a sled. +#[derive(Debug)] +pub struct SledReservationConstraints { + must_select_from: Vec, +} + +impl SledReservationConstraints { + pub fn none() -> Self { + Self { must_select_from: Vec::new() } + } + + pub fn must_select_from(&self) -> &[Uuid] { + &self.must_select_from + } +} + +#[derive(Debug)] +pub struct SledReservationConstraintBuilder { + constraints: SledReservationConstraints, +} + +impl SledReservationConstraintBuilder { + pub fn new() -> Self { + SledReservationConstraintBuilder { + constraints: SledReservationConstraints::none(), + } + } + + pub fn must_select_from(mut self, sled_ids: &[Uuid]) -> Self { + self.constraints.must_select_from.extend(sled_ids); + self + } + + pub fn build(self) -> SledReservationConstraints { + self.constraints + } +} diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 94a1334f19c..59b6569b3cb 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -76,6 +76,7 @@ impl DataStore { resource_id: Uuid, resource_kind: db::model::SledResourceKind, resources: db::model::Resources, + constraints: db::model::SledReservationConstraints, ) -> CreateResult { #[derive(Debug)] enum SledReservationError { @@ -120,10 +121,10 @@ impl DataStore { resource_dsl::rss_ram::NAME )) + resources.rss_ram) .le(sled_dsl::usable_physical_ram); - sql_function!(fn random() -> diesel::sql_types::Float); - let sled_targets = sled_dsl::sled - // LEFT JOIN so we can observe sleds with no - // currently-allocated resources as potential targets + + // Generate a query describing all of the sleds that have space + // for this reservation. + let mut sled_targets = sled_dsl::sled .left_join( resource_dsl::sled_resource .on(resource_dsl::sled_id.eq(sled_dsl::id)), @@ -135,6 +136,19 @@ impl DataStore { ) .filter(sled_dsl::time_deleted.is_null()) .select(sled_dsl::id) + .into_boxed(); + + // Further constrain the sled IDs according to any caller- + // supplied constraints. + if !constraints.must_select_from().is_empty() { + sled_targets = sled_targets.filter( + sled_dsl::id + .eq_any(constraints.must_select_from().to_vec()), + ); + } + + sql_function!(fn random() -> diesel::sql_types::Float); + let sled_targets = sled_targets .order(random()) .limit(1) .get_results_async::(&conn) diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index f438cd55055..7481564ded3 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -615,6 +615,7 @@ async fn sic_alloc_server( propolis_id, db::model::SledResourceKind::Instance, resources, + db::model::SledReservationConstraints::none(), ) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index c15dee456cc..d17c6d15f0c 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -41,6 +41,12 @@ pub struct Params { // migration is over. declare_saga_actions! { instance_migrate; + + RESERVE_RESOURCES -> "server_id" { + + sim_reserve_sled_resources + - sim_release_sled_resources + } + ALLOCATE_PROPOLIS_IP -> "dst_propolis_ip" { + sim_allocate_propolis_ip } @@ -158,6 +164,7 @@ impl NexusSaga for SagaInstanceMigrate { ACTION_GENERATE_ID.as_ref(), )); + builder.append(reserve_resources_action()); builder.append(allocate_propolis_ip_action()); builder.append(set_migration_ids_action()); builder.append(create_destination_state_action()); @@ -175,6 +182,51 @@ async fn sim_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> { Ok(()) } +/// Reserves resources for the destination on the specified target sled. +async fn sim_reserve_sled_resources( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + // N.B. This assumes that the instance's shape (CPU/memory allotment) is + // immutable despite being in the instance's "runtime" state. + let resources = db::model::Resources::new( + params.instance.runtime_state.ncpus.0 .0.into(), + params.instance.runtime_state.memory.into(), + // TODO(#2804): Properly specify reservoir size. + omicron_common::api::external::ByteCount::from(0).into(), + ); + + // Add a constraint that the only allowed sled is the one specified in the + // parameters. + let constraints = db::model::SledReservationConstraintBuilder::new() + .must_select_from(&[params.migrate_params.dst_sled_id]) + .build(); + + let propolis_id = sagactx.lookup::("dst_propolis_id")?; + let resource = osagactx + .nexus() + .reserve_on_random_sled( + propolis_id, + db::model::SledResourceKind::Instance, + resources, + constraints, + ) + .await + .map_err(ActionError::action_failed)?; + Ok(resource.sled_id) +} + +async fn sim_release_sled_resources( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let propolis_id = sagactx.lookup::("dst_propolis_id")?; + osagactx.nexus().delete_sled_reservation(propolis_id).await?; + Ok(()) +} + /// Allocates an IP address on the destination sled for the Propolis server. async fn sim_allocate_propolis_ip( sagactx: NexusActionContext, diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 3ba687fc912..8188a118c7c 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -113,6 +113,7 @@ impl super::Nexus { resource_id: Uuid, resource_kind: db::model::SledResourceKind, resources: db::model::Resources, + constraints: db::model::SledReservationConstraints, ) -> Result { self.db_datastore .sled_reservation_create( @@ -120,6 +121,7 @@ impl super::Nexus { resource_id, resource_kind, resources, + constraints, ) .await } diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index a05f68e2fdf..b8b50c0122e 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -325,7 +325,15 @@ impl SledAgent { instance_id: Uuid, ) -> Result { let instance = - self.instances.sim_get_cloned_object(&instance_id).await?; + match self.instances.sim_get_cloned_object(&instance_id).await { + Ok(instance) => instance, + Err(Error::ObjectNotFound { .. }) => { + return Ok(InstanceUnregisterResponse { + updated_runtime: None, + }) + } + Err(e) => return Err(e), + }; self.detach_disks_from_instance(instance_id).await?; Ok(InstanceUnregisterResponse { From 4d2f664730275d52dcb4cf6bfd3467cfaa768537 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Thu, 20 Apr 2023 18:31:35 +0000 Subject: [PATCH 03/13] clippy i love you but you're bringing me down --- nexus/src/app/sagas/instance_migrate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index d17c6d15f0c..49011c4fa78 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -193,7 +193,7 @@ async fn sim_reserve_sled_resources( // immutable despite being in the instance's "runtime" state. let resources = db::model::Resources::new( params.instance.runtime_state.ncpus.0 .0.into(), - params.instance.runtime_state.memory.into(), + params.instance.runtime_state.memory, // TODO(#2804): Properly specify reservoir size. omicron_common::api::external::ByteCount::from(0).into(), ); From b0ef001736cb3b4320f49b55109ec3495148cff0 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Fri, 21 Apr 2023 15:36:08 +0000 Subject: [PATCH 04/13] split setting/clearing migration IDs into two operations --- nexus/src/app/instance.rs | 68 +++++++++++++++++-------- nexus/src/app/sagas/instance_migrate.rs | 55 ++++++++++---------- 2 files changed, 75 insertions(+), 48 deletions(-) diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index afac19c6e36..8c974da6f78 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -306,12 +306,10 @@ impl super::Nexus { } /// Attempts to set the migration IDs for the supplied instance via the - /// instance's current sled. + /// sled specified in `db_instance`. /// /// The caller is assumed to have fetched the current instance record from - /// the DB and verified either that the record has no migration IDs (in - /// the case where the caller is setting IDs) or that it has the expected - /// IDs (if the caller is clearing them). + /// the DB and verified that the record has no migration IDs. /// /// Returns `Ok` and the updated instance record if this call successfully /// updated the instance with the sled agent and that update was @@ -320,27 +318,17 @@ impl super::Nexus { /// /// # Panics /// - /// Raises an assertion failure if `migration_params` is `Some` and the - /// supplied `db_instance` already has a migration ID or destination - /// Propolis ID set. - /// - /// Raises an assertion failure if `migration_params` is `None` and the - /// supplied `db_instance`'s migration ID or destination Propolis ID is not - /// set. + /// Asserts that `db_instance` has no migration ID or destination Propolis + /// ID set. pub async fn instance_set_migration_ids( &self, opctx: &OpContext, instance_id: Uuid, db_instance: &db::model::Instance, - migration_params: Option, + migration_params: InstanceMigrationSourceParams, ) -> UpdateResult { - if migration_params.is_some() { - assert!(db_instance.runtime().migration_id.is_none()); - assert!(db_instance.runtime().dst_propolis_id.is_none()); - } else { - assert!(db_instance.runtime().migration_id.is_some()); - assert!(db_instance.runtime().dst_propolis_id.is_some()); - } + assert!(db_instance.runtime().migration_id.is_none()); + assert!(db_instance.runtime().dst_propolis_id.is_none()); let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore) .instance_id(instance_id) @@ -354,7 +342,7 @@ impl super::Nexus { &instance_id, &InstancePutMigrationIdsBody { old_runtime: db_instance.runtime().clone().into(), - migration_params, + migration_params: Some(migration_params), }, ) .await @@ -380,6 +368,46 @@ impl super::Nexus { } } + /// Attempts to clear the migration IDs for the supplied instance via the + /// sled specified in `db_instance`. + /// + /// The supplied instance record must contain valid migration IDs. + /// + /// Returns `Ok` if sled agent accepted the request to clear migration IDs + /// and the resulting attempt to write instance runtime state back to CRDB + /// succeeded. This routine returns `Ok` even if the update was not actually + /// applied (due to a separate generation number change). + /// + /// # Panics + /// + /// Asserts that `db_instance` has a migration ID and destination Propolis + /// ID set. + pub async fn instance_clear_migration_ids( + &self, + instance_id: Uuid, + db_instance: &db::model::Instance, + ) -> Result<(), Error> { + assert!(db_instance.runtime().migration_id.is_some()); + assert!(db_instance.runtime().dst_propolis_id.is_some()); + + let sa = self.instance_sled(&db_instance).await?; + let instance_put_result = sa + .instance_put_migration_ids( + &instance_id, + &InstancePutMigrationIdsBody { + old_runtime: db_instance.runtime().clone().into(), + migration_params: None, + }, + ) + .await + .map(|res| Some(res.into_inner())); + + self.handle_instance_put_result(&db_instance, instance_put_result) + .await?; + + Ok(()) + } + /// Reboot the specified instance. pub async fn instance_reboot( &self, diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 49011c4fa78..2bab7ecbf37 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -16,6 +16,7 @@ use sled_agent_client::types::{ InstanceMigrationSourceParams, InstanceMigrationTargetParams, InstanceStateRequested, }; +use slog::warn; use std::net::Ipv6Addr; use steno::ActionError; use steno::Node; @@ -136,8 +137,6 @@ declare_saga_actions! { } } -// instance migrate saga: definition - #[derive(Debug)] pub struct SagaInstanceMigrate; impl NexusSaga for SagaInstanceMigrate { @@ -258,10 +257,7 @@ async fn sim_set_migration_ids( &opctx, db_instance.id(), db_instance, - Some(InstanceMigrationSourceParams { - dst_propolis_id, - migration_id, - }), + InstanceMigrationSourceParams { dst_propolis_id, migration_id }, ) .await .map_err(ActionError::action_failed)?; @@ -273,33 +269,36 @@ async fn sim_clear_migration_ids( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); let db_instance = sagactx.lookup::("set_migration_ids")?; - // The instance may have moved to another Propolis generation by this point - // (if e.g. it was stopped and restarted), so this is not guaranteed to - // succeed. + // If this call to clear migration IDs failed, one of the following things + // must have happened: + // + // 1. Sled agent's view of the instance changed in a way that invalidated + // this request. Sled agent is expected to push its own state updates on + // these transitions and properly manage migration IDs when they occur. + // 2. Nexus failed to contact sled agent entirely. + // 3. Nexus talked to sled agent but then failed to reach CRDB when trying + // to write back the instance state with the cleared IDs. (Note that + // when clearing migration IDs, failing to update CRDB because the + // current generation number is too far advanced is not actually treated + // as a failure.) // - // REVIEW(gjc): This callee has enough information to distinguish "outdated - // generation" failures from other errors. What is the right way to handle - // that class of other errors? For example, suppose CRDB is totally - // unavailable right now, but becomes available again later. The migration - // IDs in this step need to be cleared so that the instance can try to - // migrate again. How should this step achieve that? - let _ = osagactx + // In case 1, the instance should already be updated properly (or will be + // updated properly soon), and in cases 2 and 3 there's nothing that can + // reliably be done (the error may not be transient), so just swallow all + // errors here (but warn that they occurred). + if let Err(e) = osagactx .nexus() - .instance_set_migration_ids( - &opctx, - db_instance.id(), - &db_instance, - None, - ) - .await; + .instance_clear_migration_ids(db_instance.id(), &db_instance) + .await + { + warn!(osagactx.log(), + "Error clearing migration IDs during rollback"; + "instance_id" => %db_instance.id(), + "error" => ?e); + } Ok(()) } From e988665e552ddcc6535269aa59d9fbf4e6384fbb Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 26 Apr 2023 20:55:24 +0000 Subject: [PATCH 05/13] eliminate undo shenanigans & improve comments --- nexus/src/app/instance.rs | 18 ++- nexus/src/app/sagas/instance_create.rs | 8 +- nexus/src/app/sagas/instance_migrate.rs | 165 ++++++++++++------------ 3 files changed, 108 insertions(+), 83 deletions(-) diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 8c974da6f78..83ea3b5f0e7 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -53,6 +53,11 @@ use uuid::Uuid; const MAX_KEYS_PER_INSTANCE: u32 = 8; +pub(crate) enum WriteBackUpdatedInstance { + WriteBack, + Drop, +} + impl super::Nexus { pub fn instance_lookup<'a>( &'a self, @@ -488,6 +493,7 @@ impl super::Nexus { opctx: &OpContext, authz_instance: &authz::Instance, db_instance: &db::model::Instance, + write_back: WriteBackUpdatedInstance, ) -> Result<(), Error> { opctx.authorize(authz::Action::Modify, authz_instance).await?; let sa = self.instance_sled(&db_instance).await?; @@ -495,7 +501,17 @@ impl super::Nexus { .instance_unregister(&db_instance.id()) .await .map(|res| res.into_inner().updated_runtime); - self.handle_instance_put_result(db_instance, result).await.map(|_| ()) + + match write_back { + WriteBackUpdatedInstance::WriteBack => self + .handle_instance_put_result(db_instance, result) + .await + .map(|_| ()), + WriteBackUpdatedInstance::Drop => { + result?; + Ok(()) + } + } } /// Returns the SledAgentClient for the host where this Instance is running. diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 7481564ded3..a3b8a4eae14 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::{NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID}; +use crate::app::instance::WriteBackUpdatedInstance; use crate::app::sagas::declare_saga_actions; use crate::app::sagas::disk_create::{self, SagaDiskCreate}; use crate::app::{ @@ -1401,7 +1402,12 @@ async fn sic_instance_ensure_registered_undo( osagactx .nexus() - .instance_ensure_unregistered(&opctx, &authz_instance, &db_instance) + .instance_ensure_unregistered( + &opctx, + &authz_instance, + &db_instance, + WriteBackUpdatedInstance::WriteBack, + ) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 2bab7ecbf37..3ab4ca24f2f 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -4,6 +4,7 @@ use super::instance_create::allocate_sled_ipv6; use super::{NexusActionContext, NexusSaga, ACTION_GENERATE_ID}; +use crate::app::instance::WriteBackUpdatedInstance; use crate::app::sagas::declare_saga_actions; use crate::db::{identity::Resource, lookup::LookupPath}; use crate::external_api::params; @@ -40,6 +41,19 @@ pub struct Params { // both the saga and the work that happen after it have to specify carefully // which of the two participating VMMs is actually running the VM once the // migration is over. +// +// At the start of the saga, the participating sleds have the following +// information about the instance's location. (Instance runtime states include +// other information, like per-Propolis states, that's not relevant here and +// is ignored.) +// +// | Item | Source | Dest | CRDB | +// |--------------|--------|------|------| +// | Propolis gen | G | None | G | +// | Propolis ID | P1 | None | P1 | +// | Sled ID | S1 | None | S1 | +// | Dst Prop. ID | None | None | None | +// | Migration ID | None | None | None | declare_saga_actions! { instance_migrate; @@ -59,72 +73,85 @@ declare_saga_actions! { // that if multiple concurrent sagas try to set migration IDs at the same // Propolis generation, then only one will win and get to proceed through // the saga. + // + // Once this update completes, the sleds have the following states, and the + // source sled's state will be stored in CRDB: + // + // | Item | Source | Dest | CRDB | + // |--------------|--------|------|------| + // | Propolis gen | G+1 | None | G+1 | + // | Propolis ID | P1 | None | P1 | + // | Sled ID | S1 | None | S1 | + // | Dst Prop. ID | P2 | None | P2 | + // | Migration ID | M | None | M | + // + // Unwinding this step clears the migration IDs using the source sled: + // + // | Item | Source | Dest | CRDB | + // |--------------|--------|------|------| + // | Propolis gen | G+2 | None | G+2 | + // | Propolis ID | P1 | None | P1 | + // | Sled ID | S1 | None | S1 | + // | Dst Prop. ID | None | None | None | + // | Migration ID | None | None | None | SET_MIGRATION_IDS -> "set_migration_ids" { + sim_set_migration_ids + - sim_clear_migration_ids } // The instance state on the destination looks like the instance state on // the source, except that it bears all of the destination's "location" // information--its Propolis ID, sled ID, and Propolis IP--with the same // Propolis generation number as the source set in the previous step. - // Consider an example: - // - // - Before the saga begins, the instance has Propolis generation 10 and no - // migration IDs. - // - The previous step sets Propolis generation 11 and sets the instance's - // migration ID. - // - This step synthesizes a record with Propolis generation 11 and the same - // migration IDs, but with the Propolis identifiers set to the - // destination's IDs. - // - // When the migration resolves one way or the other, one or both of the - // participating sleds will update CRDB with Propolis generation 12 and the - // Propolis ID set to whichever Propolis ended up running the instance. - // - // This step must be infallible and does not have an undo action for reasons - // described below. CREATE_DESTINATION_STATE -> "dst_runtime_state" { + sim_create_destination_state } - // The next three steps are organized so that if the saga unwinds after the - // destination Propolis is created, the "set migration IDs" step is undone - // before the "ensure destination Propolis" step. Consider again the example - // above, but this time, let the saga unwind after the destination Propolis - // starts. If sled agent changes an instance's Propolis generation when a - // Propolis is destroyed, and the undo steps are specified in the - // traditional way, the following can occur: + // Instantiate the new Propolis on the destination sled. This uses the + // record created in the previous step, so the sleds end up with the + // following state: // - // - After the steps above, the instance is registered with the source and - // target sleds, both at Propolis generation 11. - // - Undoing the "ensure destination Propolis" step sets the Propolis - // generation to 12, but with the location information from the target! - // - When the "set migration IDs" step is undone, the source will publish - // Propolis generation 12, but this will be dropped because the generation - // was already incremented. + // | Item | Source | Dest | CRDB | + // |--------------|--------|------|------| + // | Propolis gen | G+1 | G+1 | G+1 | + // | Propolis ID | P1 | P2 | P1 | + // | Sled ID | S1 | S2 | S1 | + // | Dst Prop. ID | P2 | P2 | P2 | + // | Migration ID | M | M | M | // - // Now CRDB is pointing at the wrong Propolis, and instance state updates - // from the former migration source will be ignored. Oops. + // Note that, because the source and destination have the same Propolis + // generation, the destination's record will not be written back to CRDB. // - // Instead of asking sled agent to reason about what steps in a migration - // have and haven't been undertaken, the following steps are arranged so - // that the update that clears migration IDs happens before the one that - // destroys the destination Propolis, which unwinds the saga to the expected - // state. + // Once the migration completes (whether successfully or not), the sled that + // ends up with the instance will publish an update that clears the + // generation numbers and (on success) updates the Propolis ID pointer. If + // migration succeeds, this produces the following: // - // Note that this implies that `sim_ensure_destination_propolis_undo` must - // be able to succeed even if the "ensure destination Propolis" step was - // never reached. - ENSURE_DESTINATION_PROPOLIS_UNDO -> "unused_ensure_destination_undo" { - + sim_noop - - sim_ensure_destination_propolis_undo - } - SET_MIGRATION_IDS_UNDO -> "unused_set_ids_undo" { - + sim_noop - - sim_clear_migration_ids - } + // | Item | Source | Dest | CRDB | + // |--------------|--------|------|------| + // | Propolis gen | G+1 | G+2 | G+2 | + // | Propolis ID | P1 | P2 | P2 | + // | Sled ID | S1 | S2 | S2 | + // | Dst Prop. ID | P2 | None | None | + // | Migration ID | M | None | None | + // + // The undo step for this node requires special care. Unregistering a + // Propolis from a sled typically increments its Propolis generation number. + // (This is so that Nexus can rudely terminate a Propolis via unregistration + // and end up with the state it would have gotten if the Propolis had shut + // down normally.) If this step unwinds, this will produce the same state + // on the destination as in the previous table, even though no migration + // has started yet. If that update gets written back, then it will write + // Propolis generation G+2 to CRDB (as in the table above) with the wrong + // Propolis ID, and the subsequent request to clear migration IDs will not + // fix it (because the source sled's generation number is still at G+1 and + // will move to G+2, which is not recent enough to push another update). + // + // To avoid this problem, this undo step takes special care not to write + // back the updated record the destination sled returns to it. ENSURE_DESTINATION_PROPOLIS -> "ensure_destination" { + sim_ensure_destination_propolis + - sim_ensure_destination_propolis_undo } // Note that this step only requests migration by sending a "migrate in" @@ -167,8 +194,6 @@ impl NexusSaga for SagaInstanceMigrate { builder.append(allocate_propolis_ip_action()); builder.append(set_migration_ids_action()); builder.append(create_destination_state_action()); - builder.append(ensure_destination_propolis_undo_action()); - builder.append(set_migration_ids_undo_action()); builder.append(ensure_destination_propolis_action()); builder.append(instance_migrate_action()); @@ -176,11 +201,6 @@ impl NexusSaga for SagaInstanceMigrate { } } -/// A no-op forward action. -async fn sim_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> { - Ok(()) -} - /// Reserves resources for the destination on the specified target sled. async fn sim_reserve_sled_resources( sagactx: NexusActionContext, @@ -371,12 +391,17 @@ async fn sim_ensure_destination_propolis_undo( // Ensure that the destination sled has no Propolis matching the description // the saga previously generated. // - // Note that this step can run before the instance was actually ensured. - // This is OK; sled agent will quietly succeed if asked to unregister an - // unregistered instance. + // The updated instance record from this undo action must be dropped so + // that a later undo action (clearing migration IDs) can update the record + // instead. See the saga definition for more details. osagactx .nexus() - .instance_ensure_unregistered(&opctx, &authz_instance, &db_instance) + .instance_ensure_unregistered( + &opctx, + &authz_instance, + &db_instance, + WriteBackUpdatedInstance::Drop, + ) .await .map_err(ActionError::action_failed)?; @@ -682,29 +707,7 @@ mod tests { }; let dag = create_saga_dag::(params).unwrap(); - - // Some of the nodes in the DAG are expected to be infallible to allow - // certain undo steps to be reordered. Specify these nodes here, then - // verify that every infallible node actually appears in the DAG (to try - // to detect drift between the saga specification and the test). - let infallible_nodes = vec![ - "dst_runtime_state", - "unused_ensure_destination_undo", - "unused_set_ids_undo", - ]; - let dag_node_names: Vec = - dag.get_nodes().map(|n| n.name().as_ref().to_owned()).collect(); - assert!(infallible_nodes - .iter() - .all(|n| dag_node_names.iter().any(|d| d == n))); - for node in dag.get_nodes() { - if infallible_nodes.contains(&node.name().as_ref()) { - info!(log, "Skipping infallible node"; - "node_name" => node.name().as_ref()); - continue; - } - info!( log, "Creating new saga which will fail at index {:?}", node.index(); From 444b05cec355b5217ca1b23bcb83b0b68fe99cb2 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 26 Apr 2023 21:06:05 +0000 Subject: [PATCH 06/13] switch error type & improve message --- nexus/src/app/instance.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 83ea3b5f0e7..483664bdadc 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -367,8 +367,9 @@ impl super::Nexus { .instance_refetch(opctx, &authz_instance) .await?) } else { - Err(Error::internal_error( - "instance's Propolis generation is out of date", + Err(Error::unavail( + "instance is already migrating, or underwent an operation that \ + prevented this migration from proceeding" )) } } From ce760e91b0357e3f125e9e7cce52448f9de6b3fe Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 26 Apr 2023 21:21:01 +0000 Subject: [PATCH 07/13] clarify semantics of must_select_from --- nexus/db-model/src/sled.rs | 17 +++++++++++++++-- nexus/db-queries/src/db/datastore/sled.rs | 8 +++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index c6666a5b4e5..22d1b1ddd35 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -151,12 +151,21 @@ pub struct SledReservationConstraints { } impl SledReservationConstraints { + /// Creates a constraint set with no constraints in it. pub fn none() -> Self { Self { must_select_from: Vec::new() } } - pub fn must_select_from(&self) -> &[Uuid] { - &self.must_select_from + /// If the constraints include a set of sleds that the caller must select + /// from, returns `Some` and a slice containing the members of that set. + /// + /// If no "must select from these" constraint exists, returns None. + pub fn must_select_from(&self) -> Option<&[Uuid]> { + if self.must_select_from.is_empty() { + None + } else { + Some(&self.must_select_from) + } } } @@ -172,11 +181,15 @@ impl SledReservationConstraintBuilder { } } + /// Adds a "must select from the following sled IDs" constraint. If such a + /// constraint already exists, appends the supplied sled IDs to the "must + /// select from" list. pub fn must_select_from(mut self, sled_ids: &[Uuid]) -> Self { self.constraints.must_select_from.extend(sled_ids); self } + /// Builds a set of constraints from this builder's current state. pub fn build(self) -> SledReservationConstraints { self.constraints } diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 59b6569b3cb..9ab3e026d18 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -140,11 +140,9 @@ impl DataStore { // Further constrain the sled IDs according to any caller- // supplied constraints. - if !constraints.must_select_from().is_empty() { - sled_targets = sled_targets.filter( - sled_dsl::id - .eq_any(constraints.must_select_from().to_vec()), - ); + if let Some(must_select_from) = constraints.must_select_from() { + sled_targets = sled_targets + .filter(sled_dsl::id.eq_any(must_select_from.to_vec())); } sql_function!(fn random() -> diesel::sql_types::Float); From 77863ca3efd615eb718ebed7d301569ce4b8bb04 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Thu, 27 Apr 2023 23:57:00 +0000 Subject: [PATCH 08/13] use try semantics instead of unwrapping --- nexus/src/app/instance.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 483664bdadc..d37a27c7294 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -338,8 +338,7 @@ impl super::Nexus { let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore) .instance_id(instance_id) .lookup_for(authz::Action::Modify) - .await - .unwrap(); + .await?; let sa = self.instance_sled(&db_instance).await?; let instance_put_result = sa From 10cadec2eb11e4afb01dc18e547918c06d193fda Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Fri, 28 Apr 2023 18:44:57 +0000 Subject: [PATCH 09/13] add & make use of 409 Conflict error type --- common/src/api/external/error.rs | 29 ++++++++++++++++++++++++++++- nexus/src/app/instance.rs | 2 +- nexus/src/app/session.rs | 3 ++- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/common/src/api/external/error.rs b/common/src/api/external/error.rs index dc1459f46aa..e508b7ecba2 100644 --- a/common/src/api/external/error.rs +++ b/common/src/api/external/error.rs @@ -61,6 +61,9 @@ pub enum Error { #[error("Type version mismatch! {internal_message}")] TypeVersionMismatch { internal_message: String }, + + #[error("Conflict: {internal_message}")] + Conflict { internal_message: String }, } /// Indicates how an object was looked up (for an `ObjectNotFound` error) @@ -118,7 +121,8 @@ impl Error { | Error::Forbidden | Error::MethodNotAllowed { .. } | Error::InternalError { .. } - | Error::TypeVersionMismatch { .. } => false, + | Error::TypeVersionMismatch { .. } + | Error::Conflict { .. } => false, } } @@ -174,6 +178,18 @@ impl Error { Error::TypeVersionMismatch { internal_message: message.to_owned() } } + /// Generates an [`Error::Conflict`] with a specific message. + /// + /// This is used in cases where a request cannot proceed because the target + /// resource is currently in a state that's incompatible with that request, + /// but where the request might succeed if it is retried or modified and + /// retried. The internal message should provide more information about the + /// source of the conflict and possible actions the caller can take to + /// resolve it (if any). + pub fn conflict(message: &str) -> Error { + Error::Conflict { internal_message: message.to_owned() } + } + /// Given an [`Error`] with an internal message, return the same error with /// `context` prepended to it to provide more context /// @@ -223,6 +239,9 @@ impl Error { ), } } + Error::Conflict { internal_message } => Error::Conflict { + internal_message: format!("{}: {}", context, internal_message), + }, } } } @@ -317,6 +336,14 @@ impl From for HttpError { Error::TypeVersionMismatch { internal_message } => { HttpError::for_internal_error(internal_message) } + + Error::Conflict { internal_message } => { + HttpError::for_client_error( + Some(String::from("Conflict")), + http::StatusCode::CONFLICT, + internal_message, + ) + } } } } diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index d37a27c7294..ac8c912c7e3 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -366,7 +366,7 @@ impl super::Nexus { .instance_refetch(opctx, &authz_instance) .await?) } else { - Err(Error::unavail( + Err(Error::conflict( "instance is already migrating, or underwent an operation that \ prevented this migration from proceeding" )) diff --git a/nexus/src/app/session.rs b/nexus/src/app/session.rs index 00de241321d..04a9170c7d8 100644 --- a/nexus/src/app/session.rs +++ b/nexus/src/app/session.rs @@ -155,7 +155,8 @@ impl super::Nexus { | Error::InternalError { .. } | Error::ServiceUnavailable { .. } | Error::MethodNotAllowed { .. } - | Error::TypeVersionMismatch { .. } => { + | Error::TypeVersionMismatch { .. } + | Error::Conflict { .. } => { Reason::UnknownError { source: error } } })?; From 4c55cfc68b0ffa1a76d6e8347ce8bd30a3ef90df Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Fri, 28 Apr 2023 20:26:35 +0000 Subject: [PATCH 10/13] improve saga comments --- nexus/src/app/sagas/instance_migrate.rs | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 3ab4ca24f2f..eeccfd0dac9 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -42,10 +42,12 @@ pub struct Params { // which of the two participating VMMs is actually running the VM once the // migration is over. // -// At the start of the saga, the participating sleds have the following -// information about the instance's location. (Instance runtime states include -// other information, like per-Propolis states, that's not relevant here and -// is ignored.) +// Only active instances can migrate. While an instance is active on some sled +// (and isn't migrating), that sled's sled agent maintains the instance's +// runtime state and sends updated state to Nexus when it changes. At the start +// of this saga, the participating sled agents and CRDB have the following +// runtime states (note that some fields, like the actual Propolis state, are +// not relevant to migration and are omitted here): // // | Item | Source | Dest | CRDB | // |--------------|--------|------|------| @@ -66,11 +68,15 @@ declare_saga_actions! { + sim_allocate_propolis_ip } - // This step sets the migration ID and destination Propolis ID fields in - // CRDB by asking the instance's current sled to paste them into its runtime - // state. Sled agent provides the synchronization here: while this operation - // is idempotent for any single transition between IDs, sled agent ensures - // that if multiple concurrent sagas try to set migration IDs at the same + // This step sets the instance's migration ID and destination Propolis ID + // fields. Because the instance is active, its current sled agent maintains + // the most recent runtime state, so to update it, the saga calls into the + // sled and asks it to produce an updated record with the appropriate + // migration IDs and a new generation number. + // + // Sled agent provides the synchronization here: while this operation is + // idempotent for any single transition between IDs, sled agent ensures that + // if multiple concurrent sagas try to set migration IDs at the same // Propolis generation, then only one will win and get to proceed through // the saga. // From 32ac89df208d130ec4d12748d85537237699008a Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Fri, 28 Apr 2023 21:27:39 +0000 Subject: [PATCH 11/13] fix comment in sim_clear_migration_ids --- nexus/src/app/sagas/instance_migrate.rs | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index eeccfd0dac9..b4e87d60a18 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -298,23 +298,15 @@ async fn sim_clear_migration_ids( let db_instance = sagactx.lookup::("set_migration_ids")?; - // If this call to clear migration IDs failed, one of the following things - // must have happened: + // Because the migration never actually started (and thus didn't finish), + // the instance should be at the same Propolis generation as it was when + // migration IDs were set, which means sled agent should accept a request to + // clear them. The only exception is if the instance stopped, but that also + // clears its migration IDs; in that case there is no work to do here. // - // 1. Sled agent's view of the instance changed in a way that invalidated - // this request. Sled agent is expected to push its own state updates on - // these transitions and properly manage migration IDs when they occur. - // 2. Nexus failed to contact sled agent entirely. - // 3. Nexus talked to sled agent but then failed to reach CRDB when trying - // to write back the instance state with the cleared IDs. (Note that - // when clearing migration IDs, failing to update CRDB because the - // current generation number is too far advanced is not actually treated - // as a failure.) - // - // In case 1, the instance should already be updated properly (or will be - // updated properly soon), and in cases 2 and 3 there's nothing that can - // reliably be done (the error may not be transient), so just swallow all - // errors here (but warn that they occurred). + // Other error cases (e.g. an unreachable sled agent) are handled by this + // callee using the standard discipline for handling failed requests to + // change an instance's state. Warn for these errors. if let Err(e) = osagactx .nexus() .instance_clear_migration_ids(db_instance.id(), &db_instance) From 15b8377cce960b05f070ba078f0f9d296436042e Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Fri, 28 Apr 2023 23:16:28 +0000 Subject: [PATCH 12/13] further correct comment --- nexus/src/app/sagas/instance_migrate.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index b4e87d60a18..b6df0554546 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -304,9 +304,12 @@ async fn sim_clear_migration_ids( // clear them. The only exception is if the instance stopped, but that also // clears its migration IDs; in that case there is no work to do here. // - // Other error cases (e.g. an unreachable sled agent) are handled by this - // callee using the standard discipline for handling failed requests to - // change an instance's state. Warn for these errors. + // Other failures to clear migration IDs are handled like any other failure + // to update an instance's state: the callee attempts to mark the instance + // as failed; if the failure occurred because the instance changed state + // such that sled agent could not fulfill the request, the callee will + // produce a stale generation number and will not actually mark the instance + // as failed. if let Err(e) = osagactx .nexus() .instance_clear_migration_ids(db_instance.id(), &db_instance) From 2b5106a9b5ba6eb60f3534170a4be21bd6e23205 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Fri, 28 Apr 2023 23:16:48 +0000 Subject: [PATCH 13/13] don't convert migration ID conflicts into 500 errors --- nexus/src/app/instance.rs | 8 +++++--- sled-agent/src/instance.rs | 4 ++-- sled-agent/src/sled_agent.rs | 8 ++++++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index ac8c912c7e3..803269b311b 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -828,9 +828,11 @@ impl super::Nexus { // what to do with status codes. error!(self.log, "saw {} from instance_put!", e); - // this is unfortunate, but sled_agent_client::Error doesn't - // implement Copy, and can't be match'ed upon below without this - // line. + // Convert to the Omicron API error type. + // + // N.B. The match below assumes that this conversion will turn + // any 400-level error status from sled agent into an + // `Error::InvalidRequest`. let e = e.into(); match &e { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 18a408241b1..0f0977f8fab 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -729,8 +729,8 @@ impl Instance { } return Err(Error::Transition( - omicron_common::api::external::Error::InvalidRequest { - message: format!( + omicron_common::api::external::Error::Conflict { + internal_message: format!( "wrong Propolis ID generation: expected {}, got {}", inner.state.current().propolis_gen, old_runtime.propolis_gen diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 9ae0a2a2b99..efd7a8c576d 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -119,10 +119,14 @@ impl From for dropshot::HttpError { } } } - + crate::instance::Error::Transition(omicron_error) => { + // Preserve the status associated with the wrapped + // Omicron error so that Nexus will see it in the + // Progenitor client error it gets back. + HttpError::from(omicron_error) + } e => HttpError::for_internal_error(e.to_string()), }, - e => HttpError::for_internal_error(e.to_string()), } }