From 04ef3bdcb3ee5efabd6106ada3b3b28e9c7e3ef4 Mon Sep 17 00:00:00 2001
From: Greg Colombo <greg@oxidecomputer.com>
Date: Thu, 30 Mar 2023 00:13:56 +0000
Subject: [PATCH 01/13] nexus: flesh out migration saga

Flesh out the live migration saga. See RFD 361 and the theory statements in
instance_migrate.rs for more details about the design.

The new saga will start migration, but (yet) more changes are required for
it to succeed end-to-end, since sled agent still needs to be taught to send the
correct updates when a migration ends.

Remove the portion of the migration saga that prospectively reprograms OPTE
V2P mappings using the instance's destination sled. A future commit will update
V2P mappings in response to the end-of-migration instance update that records
an instance's new home sled.
---
 nexus/db-model/src/instance.rs          |   5 +-
 nexus/src/app/instance.rs               | 128 ++--
 nexus/src/app/sagas/instance_create.rs  |   6 +-
 nexus/src/app/sagas/instance_migrate.rs | 750 ++++++++++++++++++------
 4 files changed, 677 insertions(+), 212 deletions(-)
diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs
index 1ee25ec8b6e..5d722912da6 100644
--- a/nexus/db-model/src/instance.rs
+++ b/nexus/db-model/src/instance.rs
@@ -98,7 +98,10 @@ impl DatastoreAttachTargetConfig<Disk> for Instance {
     Serialize,
     Deserialize,
 )]
-#[diesel(table_name = instance)]
+// N.B. Setting `treat_none_as_null` is required for these fields to be cleared
+//      properly during live migrations. See the documentation for
+//      `diesel::prelude::AsChangeset`.
+#[diesel(table_name = instance, treat_none_as_null = true)]
 pub struct InstanceRuntimeState {
     /// The instance's current user-visible instance state.
     ///
diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs
index 1248dbf0853..afac19c6e36 100644
--- a/nexus/src/app/instance.rs
+++ b/nexus/src/app/instance.rs
@@ -35,6 +35,8 @@ use omicron_common::api::external::NameOrId;
 use omicron_common::api::external::UpdateResult;
 use omicron_common::api::external::Vni;
 use omicron_common::api::internal::nexus;
+use sled_agent_client::types::InstanceMigrationSourceParams;
+use sled_agent_client::types::InstancePutMigrationIdsBody;
 use sled_agent_client::types::InstancePutStateBody;
 use sled_agent_client::types::InstanceStateRequested;
 use sled_agent_client::types::SourceNatConfig;
@@ -267,13 +269,29 @@ impl super::Nexus {
         instance_lookup: &lookup::Instance<'_>,
         params: params::InstanceMigrate,
     ) -> UpdateResult<db::model::Instance> {
-        let (.., authz_instance) =
-            instance_lookup.lookup_for(authz::Action::Modify).await?;
+        let (.., authz_instance, db_instance) =
+            instance_lookup.fetch_for(authz::Action::Modify).await?;
+
+        if db_instance.runtime().state.0 != InstanceState::Running {
+            return Err(Error::invalid_request(
+                "instance must be running before it can migrate",
+            ));
+        }
+
+        if db_instance.runtime().sled_id == params.dst_sled_id {
+            return Err(Error::invalid_request(
+                "instance is already running on destination sled",
+            ));
+        }
+
+        if db_instance.runtime().migration_id.is_some() {
+            return Err(Error::unavail("instance is already migrating"));
+        }
 
         // Kick off the migration saga
         let saga_params = sagas::instance_migrate::Params {
             serialized_authn: authn::saga::Serialized::for_opctx(opctx),
-            instance_id: authz_instance.id(),
+            instance: db_instance,
             migrate_params: params,
         };
         self.execute_saga::<sagas::instance_migrate::SagaInstanceMigrate>(
@@ -287,39 +305,79 @@ impl super::Nexus {
         self.db_datastore.instance_refetch(opctx, &authz_instance).await
     }
 
-    /// Idempotently place the instance in a 'Migrating' state.
-    pub async fn instance_start_migrate(
+    /// Attempts to set the migration IDs for the supplied instance via the
+    /// instance's current sled.
+    ///
+    /// The caller is assumed to have fetched the current instance record from
+    /// the DB and verified either that the record has no migration IDs (in
+    /// the case where the caller is setting IDs) or that it has the expected
+    /// IDs (if the caller is clearing them).
+    ///
+    /// Returns `Ok` and the updated instance record if this call successfully
+    /// updated the instance with the sled agent and that update was
+    /// successfully reflected into CRDB. Returns `Err` with an appropriate
+    /// error otherwise.
+    ///
+    /// # Panics
+    ///
+    /// Raises an assertion failure if `migration_params` is `Some` and the
+    /// supplied `db_instance` already has a migration ID or destination
+    /// Propolis ID set.
+    ///
+    /// Raises an assertion failure if `migration_params` is `None` and the
+    /// supplied `db_instance`'s migration ID or destination Propolis ID is not
+    /// set.
+    pub async fn instance_set_migration_ids(
         &self,
-        _opctx: &OpContext,
-        _instance_id: Uuid,
-        _migration_id: Uuid,
-        _dst_propolis_id: Uuid,
+        opctx: &OpContext,
+        instance_id: Uuid,
+        db_instance: &db::model::Instance,
+        migration_params: Option<InstanceMigrationSourceParams>,
     ) -> UpdateResult<db::model::Instance> {
-        todo!("Migration endpoint not yet implemented in sled agent");
+        if migration_params.is_some() {
+            assert!(db_instance.runtime().migration_id.is_none());
+            assert!(db_instance.runtime().dst_propolis_id.is_none());
+        } else {
+            assert!(db_instance.runtime().migration_id.is_some());
+            assert!(db_instance.runtime().dst_propolis_id.is_some());
+        }
 
-        /*
-        let (.., authz_instance, db_instance) =
-            LookupPath::new(opctx, &self.db_datastore)
-                .instance_id(instance_id)
-                .fetch()
-                .await
-                .unwrap();
-        let requested = InstanceRuntimeStateRequested {
-            run_state: InstanceStateRequested::Migrating,
-            migration_params: Some(InstanceRuntimeStateMigrateParams {
-                migration_id,
-                dst_propolis_id,
-            }),
-        };
-        self.instance_set_runtime(
-            opctx,
-            &authz_instance,
-            &db_instance,
-            requested,
-        )
-        .await?;
-        self.db_datastore.instance_refetch(opctx, &authz_instance).await
-        */
+        let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore)
+            .instance_id(instance_id)
+            .lookup_for(authz::Action::Modify)
+            .await
+            .unwrap();
+
+        let sa = self.instance_sled(&db_instance).await?;
+        let instance_put_result = sa
+            .instance_put_migration_ids(
+                &instance_id,
+                &InstancePutMigrationIdsBody {
+                    old_runtime: db_instance.runtime().clone().into(),
+                    migration_params,
+                },
+            )
+            .await
+            .map(|res| Some(res.into_inner()));
+
+        // Write the updated instance runtime state back to CRDB. If this
+        // outright fails, this operation fails. If the operation nominally
+        // succeeds but nothing was updated, this action is outdated and the
+        // caller should not proceed with migration.
+        let updated = self
+            .handle_instance_put_result(&db_instance, instance_put_result)
+            .await?;
+
+        if updated {
+            Ok(self
+                .db_datastore
+                .instance_refetch(opctx, &authz_instance)
+                .await?)
+        } else {
+            Err(Error::internal_error(
+                "instance's Propolis generation is out of date",
+            ))
+        }
     }
 
     /// Reboot the specified instance.
@@ -888,6 +946,10 @@ impl super::Nexus {
     ) -> Result<(), Error> {
         let log = &self.log;
 
+        slog::debug!(log, "received new runtime state from sled agent";
+                     "instance_id" => %id,
+                     "runtime_state" => ?new_runtime_state);
+
         let result = self
             .db_datastore
             .instance_update_runtime(id, &(new_runtime_state.clone().into()))
diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs
index 58a75958e70..f438cd55055 100644
--- a/nexus/src/app/sagas/instance_create.rs
+++ b/nexus/src/app/sagas/instance_create.rs
@@ -1075,10 +1075,9 @@ async fn ensure_instance_disk_attach_state(
 pub(super) async fn allocate_sled_ipv6(
     opctx: &OpContext,
     sagactx: NexusActionContext,
-    sled_id_name: &str,
+    sled_uuid: Uuid,
 ) -> Result<Ipv6Addr, ActionError> {
     let osagactx = sagactx.user_data();
-    let sled_uuid = sagactx.lookup::<Uuid>(sled_id_name)?;
     osagactx
         .datastore()
         .next_ipv6_address(opctx, sled_uuid)
@@ -1145,7 +1144,8 @@ async fn sic_allocate_propolis_ip(
         &sagactx,
         &params.serialized_authn,
     );
-    allocate_sled_ipv6(&opctx, sagactx, "server_id").await
+    let sled_uuid = sagactx.lookup::<Uuid>("server_id")?;
+    allocate_sled_ipv6(&opctx, sagactx, sled_uuid).await
 }
 
 async fn sic_create_instance_record(
diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs
index 7148a1b9764..c15dee456cc 100644
--- a/nexus/src/app/sagas/instance_migrate.rs
+++ b/nexus/src/app/sagas/instance_migrate.rs
@@ -5,12 +5,17 @@
 use super::instance_create::allocate_sled_ipv6;
 use super::{NexusActionContext, NexusSaga, ACTION_GENERATE_ID};
 use crate::app::sagas::declare_saga_actions;
-use crate::authn;
-use crate::db::identity::Resource;
+use crate::db::{identity::Resource, lookup::LookupPath};
 use crate::external_api::params;
+use crate::{authn, authz, db};
+use omicron_common::api::external::InstanceState;
 use omicron_common::api::internal::nexus::InstanceRuntimeState;
 use serde::Deserialize;
 use serde::Serialize;
+use sled_agent_client::types::{
+    InstanceMigrationSourceParams, InstanceMigrationTargetParams,
+    InstanceStateRequested,
+};
 use std::net::Ipv6Addr;
 use steno::ActionError;
 use steno::Node;
@@ -21,32 +26,107 @@ use uuid::Uuid;
 #[derive(Debug, Deserialize, Serialize)]
 pub struct Params {
     pub serialized_authn: authn::saga::Serialized,
-    pub instance_id: Uuid,
+    pub instance: db::model::Instance,
     pub migrate_params: params::InstanceMigrate,
 }
 
-// instance migrate saga: actions
-
+// The migration saga is similar to the instance creation saga: get a
+// destination sled, allocate a Propolis process on it, and send it a request to
+// initialize via migration, then wait (outside the saga) for this to resolve.
+//
+// Most of the complexity in this saga comes from the fact that during
+// migration, there are two sleds with their own instance runtime states, and
+// both the saga and the work that happen after it have to specify carefully
+// which of the two participating VMMs is actually running the VM once the
+// migration is over.
 declare_saga_actions! {
     instance_migrate;
     ALLOCATE_PROPOLIS_IP -> "dst_propolis_ip" {
         + sim_allocate_propolis_ip
     }
-    MIGRATE_PREP -> "migrate_instance" {
-        + sim_migrate_prep
+
+    // This step sets the migration ID and destination Propolis ID fields in
+    // CRDB by asking the instance's current sled to paste them into its runtime
+    // state. Sled agent provides the synchronization here: while this operation
+    // is idempotent for any single transition between IDs, sled agent ensures
+    // that if multiple concurrent sagas try to set migration IDs at the same
+    // Propolis generation, then only one will win and get to proceed through
+    // the saga.
+    SET_MIGRATION_IDS -> "set_migration_ids" {
+        + sim_set_migration_ids
     }
-    INSTANCE_MIGRATE -> "instance_migrate" {
-        // TODO robustness: This needs an undo action
-        + sim_instance_migrate
+
+    // The instance state on the destination looks like the instance state on
+    // the source, except that it bears all of the destination's "location"
+    // information--its Propolis ID, sled ID, and Propolis IP--with the same
+    // Propolis generation number as the source set in the previous step.
+    // Consider an example:
+    //
+    // - Before the saga begins, the instance has Propolis generation 10 and no
+    //   migration IDs.
+    // - The previous step sets Propolis generation 11 and sets the instance's
+    //   migration ID.
+    // - This step synthesizes a record with Propolis generation 11 and the same
+    //   migration IDs, but with the Propolis identifiers set to the
+    //   destination's IDs.
+    //
+    // When the migration resolves one way or the other, one or both of the
+    // participating sleds will update CRDB with Propolis generation 12 and the
+    // Propolis ID set to whichever Propolis ended up running the instance.
+    //
+    // This step must be infallible and does not have an undo action for reasons
+    // described below.
+    CREATE_DESTINATION_STATE -> "dst_runtime_state" {
+        + sim_create_destination_state
     }
-    V2P_ENSURE -> "v2p_ensure" {
-        // TODO robustness: This needs an undo action
-        + sim_v2p_ensure
+
+    // The next three steps are organized so that if the saga unwinds after the
+    // destination Propolis is created, the "set migration IDs" step is undone
+    // before the "ensure destination Propolis" step. Consider again the example
+    // above, but this time, let the saga unwind after the destination Propolis
+    // starts. If sled agent changes an instance's Propolis generation when a
+    // Propolis is destroyed, and the undo steps are specified in the
+    // traditional way, the following can occur:
+    //
+    // - After the steps above, the instance is registered with the source and
+    //   target sleds, both at Propolis generation 11.
+    // - Undoing the "ensure destination Propolis" step sets the Propolis
+    //   generation to 12, but with the location information from the target!
+    // - When the "set migration IDs" step is undone, the source will publish
+    //   Propolis generation 12, but this will be dropped because the generation
+    //   was already incremented.
+    //
+    // Now CRDB is pointing at the wrong Propolis, and instance state updates
+    // from the former migration source will be ignored. Oops.
+    //
+    // Instead of asking sled agent to reason about what steps in a migration
+    // have and haven't been undertaken, the following steps are arranged so
+    // that the update that clears migration IDs happens before the one that
+    // destroys the destination Propolis, which unwinds the saga to the expected
+    // state.
+    //
+    // Note that this implies that `sim_ensure_destination_propolis_undo` must
+    // be able to succeed even if the "ensure destination Propolis" step was
+    // never reached.
+    ENSURE_DESTINATION_PROPOLIS_UNDO -> "unused_ensure_destination_undo" {
+        + sim_noop
+        - sim_ensure_destination_propolis_undo
     }
-    CLEANUP_SOURCE -> "cleanup_source" {
-        // TODO robustness: This needs an undo action. Is it even possible
-        // to undo at this point?
-        + sim_cleanup_source
+    SET_MIGRATION_IDS_UNDO -> "unused_set_ids_undo" {
+        + sim_noop
+        - sim_clear_migration_ids
+    }
+    ENSURE_DESTINATION_PROPOLIS -> "ensure_destination" {
+        + sim_ensure_destination_propolis
+    }
+
+    // Note that this step only requests migration by sending a "migrate in"
+    // request to the destination sled. It does not wait for migration to
+    // finish. It cannot be unwound, either, because there is no way to cancel
+    // an in-progress migration (indeed, a requested migration might have
+    // finished entirely by the time the undo step runs).
+    INSTANCE_MIGRATE -> "instance_migrate" {
+        + sim_instance_migrate
     }
 }
 
@@ -79,195 +159,180 @@ impl NexusSaga for SagaInstanceMigrate {
         ));
 
         builder.append(allocate_propolis_ip_action());
-        builder.append(migrate_prep_action());
+        builder.append(set_migration_ids_action());
+        builder.append(create_destination_state_action());
+        builder.append(ensure_destination_propolis_undo_action());
+        builder.append(set_migration_ids_undo_action());
+        builder.append(ensure_destination_propolis_action());
         builder.append(instance_migrate_action());
-        builder.append(v2p_ensure_action());
-        builder.append(cleanup_source_action());
 
         Ok(builder.build()?)
     }
 }
 
-async fn sim_migrate_prep(
+/// A no-op forward action.
+async fn sim_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> {
+    Ok(())
+}
+
+/// Allocates an IP address on the destination sled for the Propolis server.
+async fn sim_allocate_propolis_ip(
     sagactx: NexusActionContext,
-) -> Result<(Uuid, InstanceRuntimeState), ActionError> {
-    let osagactx = sagactx.user_data();
+) -> Result<Ipv6Addr, ActionError> {
     let params = sagactx.saga_params::<Params>()?;
     let opctx = crate::context::op_context_for_saga_action(
         &sagactx,
         &params.serialized_authn,
     );
+    allocate_sled_ipv6(&opctx, sagactx, params.migrate_params.dst_sled_id).await
+}
 
-    let migrate_uuid = sagactx.lookup::<Uuid>("migrate_id")?;
-    let dst_propolis_uuid = sagactx.lookup::<Uuid>("dst_propolis_id")?;
+async fn sim_set_migration_ids(
+    sagactx: NexusActionContext,
+) -> Result<db::model::Instance, ActionError> {
+    let osagactx = sagactx.user_data();
+    let params = sagactx.saga_params::<Params>()?;
+    let opctx = crate::context::op_context_for_saga_action(
+        &sagactx,
+        &params.serialized_authn,
+    );
 
-    // We have sled-agent (via Nexus) attempt to place
-    // the instance in a "Migrating" state w/ the given
-    // migration id. This will also update the instance
-    // state in the db
-    let instance = osagactx
+    let db_instance = &params.instance;
+    let migration_id = sagactx.lookup::<Uuid>("migrate_id")?;
+    let dst_propolis_id = sagactx.lookup::<Uuid>("dst_propolis_id")?;
+    let updated_record = osagactx
         .nexus()
-        .instance_start_migrate(
+        .instance_set_migration_ids(
             &opctx,
-            params.instance_id,
-            migrate_uuid,
-            dst_propolis_uuid,
+            db_instance.id(),
+            db_instance,
+            Some(InstanceMigrationSourceParams {
+                dst_propolis_id,
+                migration_id,
+            }),
         )
         .await
         .map_err(ActionError::action_failed)?;
-    let instance_id = instance.id();
 
-    Ok((instance_id, instance.runtime_state.into()))
+    Ok(updated_record)
 }
 
-// Allocate an IP address on the destination sled for the Propolis server.
-async fn sim_allocate_propolis_ip(
+async fn sim_clear_migration_ids(
     sagactx: NexusActionContext,
-) -> Result<Ipv6Addr, ActionError> {
+) -> Result<(), anyhow::Error> {
+    let osagactx = sagactx.user_data();
     let params = sagactx.saga_params::<Params>()?;
     let opctx = crate::context::op_context_for_saga_action(
         &sagactx,
         &params.serialized_authn,
     );
-    allocate_sled_ipv6(&opctx, sagactx, "dst_sled_uuid").await
+    let db_instance =
+        sagactx.lookup::<db::model::Instance>("set_migration_ids")?;
+
+    // The instance may have moved to another Propolis generation by this point
+    // (if e.g. it was stopped and restarted), so this is not guaranteed to
+    // succeed.
+    //
+    // REVIEW(gjc): This callee has enough information to distinguish "outdated
+    // generation" failures from other errors. What is the right way to handle
+    // that class of other errors? For example, suppose CRDB is totally
+    // unavailable right now, but becomes available again later. The migration
+    // IDs in this step need to be cleared so that the instance can try to
+    // migrate again. How should this step achieve that?
+    let _ = osagactx
+        .nexus()
+        .instance_set_migration_ids(
+            &opctx,
+            db_instance.id(),
+            &db_instance,
+            None,
+        )
+        .await;
+
+    Ok(())
 }
 
-async fn sim_instance_migrate(
-    _sagactx: NexusActionContext,
-) -> Result<(), ActionError> {
-    todo!("Migration action not yet implemented");
+async fn sim_create_destination_state(
+    sagactx: NexusActionContext,
+) -> Result<db::model::Instance, ActionError> {
+    let params = sagactx.saga_params::<Params>()?;
+    let mut db_instance =
+        sagactx.lookup::<db::model::Instance>("set_migration_ids")?;
+    let dst_propolis_id = sagactx.lookup::<Uuid>("dst_propolis_id")?;
+    let dst_propolis_ip = sagactx.lookup::<Ipv6Addr>("dst_propolis_ip")?;
+
+    // Update the runtime state to refer to the new Propolis.
+    let new_runtime = db::model::InstanceRuntimeState {
+        state: db::model::InstanceState::new(InstanceState::Creating),
+        sled_id: params.migrate_params.dst_sled_id,
+        propolis_id: dst_propolis_id,
+        propolis_ip: Some(ipnetwork::Ipv6Network::from(dst_propolis_ip).into()),
+        ..db_instance.runtime_state
+    };
+
+    db_instance.runtime_state = new_runtime;
+    Ok(db_instance)
+}
 
-    /*
+async fn sim_ensure_destination_propolis(
+    sagactx: NexusActionContext,
+) -> Result<(), ActionError> {
     let osagactx = sagactx.user_data();
     let params = sagactx.saga_params::<Params>()?;
     let opctx = crate::context::op_context_for_saga_action(
         &sagactx,
         &params.serialized_authn,
     );
-
-    let migration_id = sagactx.lookup::<Uuid>("migrate_id")?;
-    let dst_sled_id = params.migrate_params.dst_sled_id;
-    let dst_propolis_id = sagactx.lookup::<Uuid>("dst_propolis_id")?;
-    let (instance_id, old_runtime) =
-        sagactx.lookup::<(Uuid, InstanceRuntimeState)>("migrate_instance")?;
-
-    // Allocate an IP address the destination sled for the new Propolis server.
-    let propolis_addr = osagactx
-        .datastore()
-        .next_ipv6_address(&opctx, dst_sled_id)
+    let db_instance =
+        sagactx.lookup::<db::model::Instance>("dst_runtime_state")?;
+    let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore())
+        .instance_id(db_instance.id())
+        .lookup_for(authz::Action::Modify)
         .await
         .map_err(ActionError::action_failed)?;
 
-    let runtime = InstanceRuntimeState {
-        sled_id: dst_sled_id,
-        propolis_id: dst_propolis_id,
-        propolis_addr: Some(std::net::SocketAddr::new(
-            propolis_addr.into(),
-            12400,
-        )),
-        ..old_runtime
-    };
-
-    // Collect the external IPs for the instance.
-    //  https://github.com/oxidecomputer/omicron/issues/1467
-    // TODO-correctness: Handle Floating IPs, see
-    //  https://github.com/oxidecomputer/omicron/issues/1334
-    let (snat_ip, external_ips): (Vec<_>, Vec<_>) = osagactx
-        .datastore()
-        .instance_lookup_external_ips(&opctx, instance_id)
+    osagactx
+        .nexus()
+        .instance_ensure_registered(&opctx, &authz_instance, &db_instance)
         .await
-        .map_err(ActionError::action_failed)?
-        .into_iter()
-        .partition(|ip| ip.kind == IpKind::SNat);
-
-    // Sanity checks on the number and kind of each IP address.
-    if external_ips.len() > crate::app::MAX_EXTERNAL_IPS_PER_INSTANCE {
-        return Err(ActionError::action_failed(Error::internal_error(
-            format!(
-                "Expected the number of external IPs to be limited to \
-                {}, but found {}",
-                crate::app::MAX_EXTERNAL_IPS_PER_INSTANCE,
-                external_ips.len(),
-            )
-            .as_str(),
-        )));
-    }
-    let external_ips =
-        external_ips.into_iter().map(|model| model.ip.ip()).collect();
-    if snat_ip.len() != 1 {
-        return Err(ActionError::action_failed(Error::internal_error(
-            "Expected exactly one SNAT IP address for an instance",
-        )));
-    }
-    let source_nat = SourceNatConfig::from(snat_ip.into_iter().next().unwrap());
-
-    // The TODO items below are tracked in
-    //   https://github.com/oxidecomputer/omicron/issues/1783
-    let instance_hardware = InstanceHardware {
-        runtime: runtime.into(),
-        // TODO: populate NICs
-        nics: vec![],
-        source_nat,
-        external_ips,
-        // TODO: populate firewall rules
-        firewall_rules: vec![],
-        // TODO: populate disks
-        disks: vec![],
-        // TODO: populate cloud init bytes
-        cloud_init_bytes: None,
-    };
-    let target = InstanceRuntimeStateRequested {
-        run_state: InstanceStateRequested::Migrating,
-        migration_params: Some(InstanceRuntimeStateMigrateParams {
-            migration_id,
-            dst_propolis_id,
-        }),
-    };
+        .map_err(ActionError::action_failed)?;
 
-    let src_propolis_id = old_runtime.propolis_id;
-    let src_propolis_addr = old_runtime.propolis_addr.ok_or_else(|| {
-        ActionError::action_failed(Error::invalid_request(
-            "expected source propolis-addr",
-        ))
-    })?;
+    Ok(())
+}
 
-    let dst_sa = osagactx
-        .sled_client(&dst_sled_id)
+async fn sim_ensure_destination_propolis_undo(
+    sagactx: NexusActionContext,
+) -> Result<(), anyhow::Error> {
+    let osagactx = sagactx.user_data();
+    let params = sagactx.saga_params::<Params>()?;
+    let opctx = crate::context::op_context_for_saga_action(
+        &sagactx,
+        &params.serialized_authn,
+    );
+    let db_instance =
+        sagactx.lookup::<db::model::Instance>("dst_runtime_state")?;
+    let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore())
+        .instance_id(db_instance.id())
+        .lookup_for(authz::Action::Modify)
         .await
         .map_err(ActionError::action_failed)?;
 
-    let new_runtime_state: InstanceRuntimeState = dst_sa
-        .instance_put(
-            &instance_id,
-            &InstanceEnsureBody {
-                initial: instance_hardware,
-                target,
-                migrate: Some(InstanceMigrationTargetParams {
-                    src_propolis_addr: src_propolis_addr.to_string(),
-                    src_propolis_id,
-                }),
-            },
-        )
-        .await
-        .map_err(omicron_common::api::external::Error::from)
-        .map_err(ActionError::action_failed)?
-        .into_inner()
-        .into();
-
+    // Ensure that the destination sled has no Propolis matching the description
+    // the saga previously generated.
+    //
+    // Note that this step can run before the instance was actually ensured.
+    // This is OK; sled agent will quietly succeed if asked to unregister an
+    // unregistered instance.
     osagactx
-        .datastore()
-        .instance_update_runtime(&instance_id, &new_runtime_state.into())
+        .nexus()
+        .instance_ensure_unregistered(&opctx, &authz_instance, &db_instance)
         .await
         .map_err(ActionError::action_failed)?;
 
     Ok(())
-        */
 }
 
-/// Add V2P mappings for the destination instance
-// Note this must run after sled_id of the instance is set to the destination
-// sled!
-async fn sim_v2p_ensure(
+async fn sim_instance_migrate(
     sagactx: NexusActionContext,
 ) -> Result<(), ActionError> {
     let osagactx = sagactx.user_data();
@@ -276,35 +341,370 @@ async fn sim_v2p_ensure(
         &sagactx,
         &params.serialized_authn,
     );
-    let (instance_id, _) =
-        sagactx.lookup::<(Uuid, InstanceRuntimeState)>("migrate_instance")?;
-
-    // TODO-performance the instance_put in sim_instance_migrate will *start* a
-    // migration, but the source and destination propolis servers will perform
-    // it asynchronously. If this step occurs before the source instance vCPUs
-    // are paused, updating the mappings here will briefly "disconnect" the
-    // source instance in the sense that it will be able to send packets out (as
-    // other instance's V2P mappings will be untouched) but will not be able to
-    // receive any packets (other instances will send packets to the destination
-    // propolis' sled but the destination instance vCPUs may not have started
-    // yet). Until the destination propolis takes over, there will be a inbound
-    // network outage for the instance.
+    let src_runtime: InstanceRuntimeState = sagactx
+        .lookup::<db::model::Instance>("set_migration_ids")?
+        .runtime()
+        .clone()
+        .into();
+    let dst_db_instance =
+        sagactx.lookup::<db::model::Instance>("dst_runtime_state")?;
+    let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore())
+        .instance_id(dst_db_instance.id())
+        .lookup_for(authz::Action::Modify)
+        .await
+        .map_err(ActionError::action_failed)?;
+
+    // TODO-correctness: This needs to be retried if a transient error occurs to
+    // avoid a problem like the following:
+    //
+    // 1. The saga executor runs this step and successfully starts migration.
+    // 2. The executor crashes.
+    // 3. Migration completes.
+    // 4. The executor restarts, runs this step, encounters a transient error,
+    //    and then tries to unwind the saga.
+    //
+    // Now the "ensure destination" undo step will tear down the (running)
+    // migration target.
     //
-    // TODO-correctness if the migration fails, there's nothing that will unwind
-    // this and restore the original V2P mappings
+    // Possibly sled agent can help with this by using state or Propolis
+    // generation numbers to filter out stale destruction requests.
     osagactx
         .nexus()
-        .create_instance_v2p_mappings(&opctx, instance_id)
+        .instance_request_state(
+            &opctx,
+            &authz_instance,
+            &dst_db_instance,
+            InstanceStateRequested::MigrationTarget(
+                InstanceMigrationTargetParams {
+                    src_propolis_addr: src_runtime
+                        .propolis_addr
+                        .unwrap()
+                        .to_string(),
+                    src_propolis_id: src_runtime.propolis_id,
+                },
+            ),
+        )
         .await
         .map_err(ActionError::action_failed)?;
 
     Ok(())
 }
 
-async fn sim_cleanup_source(
-    _sagactx: NexusActionContext,
-) -> Result<(), ActionError> {
-    // TODO: clean up the previous instance whether it's on the same sled or a
-    // different one
-    Ok(())
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use crate::{
+        app::{saga::create_saga_dag, sagas::instance_create},
+        Nexus, TestInterfaces as _,
+    };
+
+    use dropshot::test_util::ClientTestContext;
+    use http::{method::Method, StatusCode};
+    use nexus_test_interface::NexusServer;
+    use nexus_test_utils::{
+        http_testing::{AuthnMode, NexusRequest, RequestBuilder},
+        resource_helpers::{create_project, object_create, populate_ip_pool},
+        start_sled_agent,
+    };
+    use nexus_test_utils_macros::nexus_test;
+    use omicron_common::api::external::{
+        ByteCount, IdentityMetadataCreateParams, InstanceCpuCount,
+    };
+    use omicron_sled_agent::sim::Server;
+    use sled_agent_client::TestInterfaces as _;
+
+    use super::*;
+
+    type ControlPlaneTestContext =
+        nexus_test_utils::ControlPlaneTestContext<crate::Server>;
+
+    const PROJECT_NAME: &str = "test-project";
+    const INSTANCE_NAME: &str = "test-instance";
+
+    async fn setup_test_project(client: &ClientTestContext) -> Uuid {
+        populate_ip_pool(&client, "default", None).await;
+        let project = create_project(&client, PROJECT_NAME).await;
+        project.identity.id
+    }
+
+    async fn add_sleds(
+        cptestctx: &ControlPlaneTestContext,
+        num_sleds: usize,
+    ) -> Vec<(Uuid, Server)> {
+        let mut sas = Vec::with_capacity(num_sleds);
+        for _ in 0..num_sleds {
+            let sa_id = Uuid::new_v4();
+            let log =
+                cptestctx.logctx.log.new(o!("sled_id" => sa_id.to_string()));
+            let addr =
+                cptestctx.server.get_http_server_internal_address().await;
+
+            info!(&cptestctx.logctx.log, "Adding simulated sled"; "sled_id" => %sa_id);
+            let update_dir = std::path::Path::new("/should/be/unused");
+            let sa = start_sled_agent(
+                log,
+                addr,
+                sa_id,
+                &update_dir,
+                omicron_sled_agent::sim::SimMode::Explicit,
+            )
+            .await
+            .unwrap();
+            sas.push((sa_id, sa));
+        }
+
+        sas
+    }
+
+    async fn create_instance(
+        client: &ClientTestContext,
+    ) -> omicron_common::api::external::Instance {
+        let instances_url = format!("/v1/instances?project={}", PROJECT_NAME);
+        object_create(
+            client,
+            &instances_url,
+            &params::InstanceCreate {
+                identity: IdentityMetadataCreateParams {
+                    name: INSTANCE_NAME.parse().unwrap(),
+                    description: format!("instance {:?}", INSTANCE_NAME),
+                },
+                ncpus: InstanceCpuCount(2),
+                memory: ByteCount::from_gibibytes_u32(2),
+                hostname: String::from(INSTANCE_NAME),
+                user_data: b"#cloud-config".to_vec(),
+                network_interfaces:
+                    params::InstanceNetworkInterfaceAttachment::None,
+                external_ips: vec![],
+                disks: vec![],
+                start: true,
+            },
+        )
+        .await
+    }
+
+    async fn instance_simulate(
+        cptestctx: &ControlPlaneTestContext,
+        nexus: &Arc<Nexus>,
+        instance_id: &Uuid,
+    ) {
+        info!(&cptestctx.logctx.log, "Poking simulated instance";
+              "instance_id" => %instance_id);
+        let sa = nexus.instance_sled_by_id(instance_id).await.unwrap();
+        sa.instance_finish_transition(*instance_id).await;
+    }
+
+    async fn fetch_db_instance(
+        cptestctx: &ControlPlaneTestContext,
+        opctx: &nexus_db_queries::context::OpContext,
+        id: Uuid,
+    ) -> nexus_db_model::Instance {
+        let datastore = cptestctx.server.apictx().nexus.datastore().clone();
+        let (.., db_instance) = LookupPath::new(&opctx, &datastore)
+            .instance_id(id)
+            .fetch()
+            .await
+            .expect("test instance should be present in datastore");
+
+        info!(&cptestctx.logctx.log, "refetched instance from db";
+              "instance" => ?db_instance);
+
+        db_instance
+    }
+
+    async fn instance_start(cptestctx: &ControlPlaneTestContext, id: &Uuid) {
+        let client = &cptestctx.external_client;
+        let instance_stop_url = format!("/v1/instances/{}/start", id);
+        NexusRequest::new(
+            RequestBuilder::new(client, Method::POST, &instance_stop_url)
+                .body(None as Option<&serde_json::Value>)
+                .expect_status(Some(StatusCode::ACCEPTED)),
+        )
+        .authn_as(AuthnMode::PrivilegedUser)
+        .execute()
+        .await
+        .expect("Failed to start instance");
+    }
+
+    async fn instance_stop(cptestctx: &ControlPlaneTestContext, id: &Uuid) {
+        let client = &cptestctx.external_client;
+        let instance_stop_url = format!("/v1/instances/{}/stop", id);
+        NexusRequest::new(
+            RequestBuilder::new(client, Method::POST, &instance_stop_url)
+                .body(None as Option<&serde_json::Value>)
+                .expect_status(Some(StatusCode::ACCEPTED)),
+        )
+        .authn_as(AuthnMode::PrivilegedUser)
+        .execute()
+        .await
+        .expect("Failed to stop instance");
+    }
+
+    fn select_first_alternate_sled(
+        db_instance: &db::model::Instance,
+        other_sleds: &[(Uuid, Server)],
+    ) -> Uuid {
+        let default_sled_uuid =
+            Uuid::parse_str(nexus_test_utils::SLED_AGENT_UUID).unwrap();
+        if other_sleds.is_empty() {
+            panic!("need at least one other sled");
+        }
+
+        if other_sleds.iter().any(|sled| sled.0 == default_sled_uuid) {
+            panic!("default test sled agent was in other_sleds");
+        }
+
+        if db_instance.runtime().sled_id == default_sled_uuid {
+            other_sleds[0].0
+        } else {
+            default_sled_uuid
+        }
+    }
+
+    #[nexus_test(server = crate::Server)]
+    async fn test_saga_basic_usage_succeeds(
+        cptestctx: &ControlPlaneTestContext,
+    ) {
+        let other_sleds = add_sleds(cptestctx, 1).await;
+        let client = &cptestctx.external_client;
+        let nexus = &cptestctx.server.apictx().nexus;
+        let _project_id = setup_test_project(&client).await;
+
+        let opctx = instance_create::test::test_opctx(cptestctx);
+        let instance = create_instance(client).await;
+
+        // Poke the instance to get it into the Running state.
+        instance_simulate(cptestctx, nexus, &instance.identity.id).await;
+
+        let db_instance =
+            fetch_db_instance(cptestctx, &opctx, instance.identity.id).await;
+        let old_runtime = db_instance.runtime().clone();
+        let dst_sled_id =
+            select_first_alternate_sled(&db_instance, &other_sleds);
+        let params = Params {
+            serialized_authn: authn::saga::Serialized::for_opctx(&opctx),
+            instance: db_instance,
+            migrate_params: params::InstanceMigrate { dst_sled_id },
+        };
+
+        let dag = create_saga_dag::<SagaInstanceMigrate>(params).unwrap();
+        let saga = nexus.create_runnable_saga(dag).await.unwrap();
+        nexus.run_saga(saga).await.expect("Migration saga should succeed");
+
+        // Merely running the migration saga (without simulating any completion
+        // steps in the simulated agents) should not change where the instance
+        // is running.
+        let new_db_instance =
+            fetch_db_instance(cptestctx, &opctx, instance.identity.id).await;
+        assert_eq!(new_db_instance.runtime().sled_id, old_runtime.sled_id);
+        assert_eq!(
+            new_db_instance.runtime().propolis_id,
+            old_runtime.propolis_id
+        );
+    }
+
+    #[nexus_test(server = crate::Server)]
+    async fn test_action_failure_can_unwind(
+        cptestctx: &ControlPlaneTestContext,
+    ) {
+        let log = &cptestctx.logctx.log;
+        let other_sleds = add_sleds(cptestctx, 1).await;
+        let client = &cptestctx.external_client;
+        let nexus = &cptestctx.server.apictx().nexus;
+        let _project_id = setup_test_project(&client).await;
+
+        let opctx = instance_create::test::test_opctx(cptestctx);
+        let instance = create_instance(client).await;
+
+        // Poke the instance to get it into the Running state.
+        instance_simulate(cptestctx, nexus, &instance.identity.id).await;
+
+        let db_instance =
+            fetch_db_instance(cptestctx, &opctx, instance.identity.id).await;
+        let old_runtime = db_instance.runtime().clone();
+        let dst_sled_id =
+            select_first_alternate_sled(&db_instance, &other_sleds);
+        let params = Params {
+            serialized_authn: authn::saga::Serialized::for_opctx(&opctx),
+            instance: db_instance,
+            migrate_params: params::InstanceMigrate { dst_sled_id },
+        };
+
+        let dag = create_saga_dag::<SagaInstanceMigrate>(params).unwrap();
+
+        // Some of the nodes in the DAG are expected to be infallible to allow
+        // certain undo steps to be reordered. Specify these nodes here, then
+        // verify that every infallible node actually appears in the DAG (to try
+        // to detect drift between the saga specification and the test).
+        let infallible_nodes = vec![
+            "dst_runtime_state",
+            "unused_ensure_destination_undo",
+            "unused_set_ids_undo",
+        ];
+        let dag_node_names: Vec<String> =
+            dag.get_nodes().map(|n| n.name().as_ref().to_owned()).collect();
+        assert!(infallible_nodes
+            .iter()
+            .all(|n| dag_node_names.iter().any(|d| d == n)));
+
+        for node in dag.get_nodes() {
+            if infallible_nodes.contains(&node.name().as_ref()) {
+                info!(log, "Skipping infallible node";
+                      "node_name" => node.name().as_ref());
+                continue;
+            }
+
+            info!(
+                log,
+                "Creating new saga which will fail at index {:?}", node.index();
+                "node_name" => node.name().as_ref(),
+                "label" => node.label(),
+            );
+
+            let runnable_saga =
+                nexus.create_runnable_saga(dag.clone()).await.unwrap();
+            nexus
+                .sec()
+                .saga_inject_error(runnable_saga.id(), node.index())
+                .await
+                .unwrap();
+            nexus
+                .run_saga(runnable_saga)
+                .await
+                .expect_err("Saga should have failed");
+
+            // Unwinding at any step should clear the migration IDs from the
+            // instance record and leave the instance's location otherwise
+            // untouched.
+            let new_db_instance =
+                fetch_db_instance(cptestctx, &opctx, instance.identity.id)
+                    .await;
+
+            assert!(new_db_instance.runtime().migration_id.is_none());
+            assert!(new_db_instance.runtime().dst_propolis_id.is_none());
+            assert_eq!(new_db_instance.runtime().sled_id, old_runtime.sled_id);
+            assert_eq!(
+                new_db_instance.runtime().propolis_id,
+                old_runtime.propolis_id
+            );
+
+            // Ensure the instance can stop. This helps to check that destroying
+            // the migration destination (if one was ensured) doesn't advance
+            // the Propolis ID generation in a way that prevents the source from
+            // issuing further state updates.
+            instance_stop(cptestctx, &instance.identity.id).await;
+            instance_simulate(cptestctx, nexus, &instance.identity.id).await;
+            let new_db_instance =
+                fetch_db_instance(cptestctx, &opctx, instance.identity.id)
+                    .await;
+            assert_eq!(
+                new_db_instance.runtime().state.0,
+                InstanceState::Stopped
+            );
+
+            // Restart the instance for the next iteration.
+            instance_start(cptestctx, &instance.identity.id).await;
+            instance_simulate(cptestctx, nexus, &instance.identity.id).await;
+        }
+    }
 }

From 2dfe1b736d5ed78ea05c537f80f46ebad8027854 Mon Sep 17 00:00:00 2001
From: Greg Colombo <greg@oxidecomputer.com>
Date: Mon, 17 Apr 2023 23:35:16 +0000
Subject: [PATCH 02/13] reserve resources on migration destination sleds

Add the ability to constrain sled selection to a specific set of desired target
sleds. This uses a builder that can be extended in the future for other
purposes (e.g. switching to "don't schedule to this sled," "schedule only to
sleds having such-and-such property," and so forth). Use this to add a
resource-reservation step to the migration saga.

Also fix a simulated sled agent bug that made simulated unregistration not
idempotent.
---
 nexus/db-model/src/sled.rs                | 38 +++++++++++++++++
 nexus/db-queries/src/db/datastore/sled.rs | 22 ++++++++--
 nexus/src/app/sagas/instance_create.rs    |  1 +
 nexus/src/app/sagas/instance_migrate.rs   | 52 +++++++++++++++++++++++
 nexus/src/app/sled.rs                     |  2 +
 sled-agent/src/sim/sled_agent.rs          | 10 ++++-
 6 files changed, 120 insertions(+), 5 deletions(-)

diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs
index 68387625431..c6666a5b4e5 100644
--- a/nexus/db-model/src/sled.rs
+++ b/nexus/db-model/src/sled.rs
@@ -143,3 +143,41 @@ impl DatastoreCollectionConfig<super::Service> for Sled {
     type CollectionTimeDeletedColumn = sled::dsl::time_deleted;
     type CollectionIdColumn = service::dsl::sled_id;
 }
+
+/// A set of constraints that can be placed on operations that select a sled.
+#[derive(Debug)]
+pub struct SledReservationConstraints {
+    must_select_from: Vec<Uuid>,
+}
+
+impl SledReservationConstraints {
+    pub fn none() -> Self {
+        Self { must_select_from: Vec::new() }
+    }
+
+    pub fn must_select_from(&self) -> &[Uuid] {
+        &self.must_select_from
+    }
+}
+
+#[derive(Debug)]
+pub struct SledReservationConstraintBuilder {
+    constraints: SledReservationConstraints,
+}
+
+impl SledReservationConstraintBuilder {
+    pub fn new() -> Self {
+        SledReservationConstraintBuilder {
+            constraints: SledReservationConstraints::none(),
+        }
+    }
+
+    pub fn must_select_from(mut self, sled_ids: &[Uuid]) -> Self {
+        self.constraints.must_select_from.extend(sled_ids);
+        self
+    }
+
+    pub fn build(self) -> SledReservationConstraints {
+        self.constraints
+    }
+}
diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs
index 94a1334f19c..59b6569b3cb 100644
--- a/nexus/db-queries/src/db/datastore/sled.rs
+++ b/nexus/db-queries/src/db/datastore/sled.rs
@@ -76,6 +76,7 @@ impl DataStore {
         resource_id: Uuid,
         resource_kind: db::model::SledResourceKind,
         resources: db::model::Resources,
+        constraints: db::model::SledReservationConstraints,
     ) -> CreateResult<db::model::SledResource> {
         #[derive(Debug)]
         enum SledReservationError {
@@ -120,10 +121,10 @@ impl DataStore {
                         resource_dsl::rss_ram::NAME
                     )) + resources.rss_ram)
                         .le(sled_dsl::usable_physical_ram);
-                sql_function!(fn random() -> diesel::sql_types::Float);
-                let sled_targets = sled_dsl::sled
-                    // LEFT JOIN so we can observe sleds with no
-                    // currently-allocated resources as potential targets
+
+                // Generate a query describing all of the sleds that have space
+                // for this reservation.
+                let mut sled_targets = sled_dsl::sled
                     .left_join(
                         resource_dsl::sled_resource
                             .on(resource_dsl::sled_id.eq(sled_dsl::id)),
@@ -135,6 +136,19 @@ impl DataStore {
                     )
                     .filter(sled_dsl::time_deleted.is_null())
                     .select(sled_dsl::id)
+                    .into_boxed();
+
+                // Further constrain the sled IDs according to any caller-
+                // supplied constraints.
+                if !constraints.must_select_from().is_empty() {
+                    sled_targets = sled_targets.filter(
+                        sled_dsl::id
+                            .eq_any(constraints.must_select_from().to_vec()),
+                    );
+                }
+
+                sql_function!(fn random() -> diesel::sql_types::Float);
+                let sled_targets = sled_targets
                     .order(random())
                     .limit(1)
                     .get_results_async::<Uuid>(&conn)
diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs
index f438cd55055..7481564ded3 100644
--- a/nexus/src/app/sagas/instance_create.rs
+++ b/nexus/src/app/sagas/instance_create.rs
@@ -615,6 +615,7 @@ async fn sic_alloc_server(
             propolis_id,
             db::model::SledResourceKind::Instance,
             resources,
+            db::model::SledReservationConstraints::none(),
         )
         .await
         .map_err(ActionError::action_failed)?;
diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs
index c15dee456cc..d17c6d15f0c 100644
--- a/nexus/src/app/sagas/instance_migrate.rs
+++ b/nexus/src/app/sagas/instance_migrate.rs
@@ -41,6 +41,12 @@ pub struct Params {
 // migration is over.
 declare_saga_actions! {
     instance_migrate;
+
+    RESERVE_RESOURCES -> "server_id" {
+        + sim_reserve_sled_resources
+        - sim_release_sled_resources
+    }
+
     ALLOCATE_PROPOLIS_IP -> "dst_propolis_ip" {
         + sim_allocate_propolis_ip
     }
@@ -158,6 +164,7 @@ impl NexusSaga for SagaInstanceMigrate {
             ACTION_GENERATE_ID.as_ref(),
         ));
 
+        builder.append(reserve_resources_action());
         builder.append(allocate_propolis_ip_action());
         builder.append(set_migration_ids_action());
         builder.append(create_destination_state_action());
@@ -175,6 +182,51 @@ async fn sim_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> {
     Ok(())
 }
 
+/// Reserves resources for the destination on the specified target sled.
+async fn sim_reserve_sled_resources(
+    sagactx: NexusActionContext,
+) -> Result<Uuid, ActionError> {
+    let osagactx = sagactx.user_data();
+    let params = sagactx.saga_params::<Params>()?;
+
+    // N.B. This assumes that the instance's shape (CPU/memory allotment) is
+    //      immutable despite being in the instance's "runtime" state.
+    let resources = db::model::Resources::new(
+        params.instance.runtime_state.ncpus.0 .0.into(),
+        params.instance.runtime_state.memory.into(),
+        // TODO(#2804): Properly specify reservoir size.
+        omicron_common::api::external::ByteCount::from(0).into(),
+    );
+
+    // Add a constraint that the only allowed sled is the one specified in the
+    // parameters.
+    let constraints = db::model::SledReservationConstraintBuilder::new()
+        .must_select_from(&[params.migrate_params.dst_sled_id])
+        .build();
+
+    let propolis_id = sagactx.lookup::<Uuid>("dst_propolis_id")?;
+    let resource = osagactx
+        .nexus()
+        .reserve_on_random_sled(
+            propolis_id,
+            db::model::SledResourceKind::Instance,
+            resources,
+            constraints,
+        )
+        .await
+        .map_err(ActionError::action_failed)?;
+    Ok(resource.sled_id)
+}
+
+async fn sim_release_sled_resources(
+    sagactx: NexusActionContext,
+) -> Result<(), anyhow::Error> {
+    let osagactx = sagactx.user_data();
+    let propolis_id = sagactx.lookup::<Uuid>("dst_propolis_id")?;
+    osagactx.nexus().delete_sled_reservation(propolis_id).await?;
+    Ok(())
+}
+
 /// Allocates an IP address on the destination sled for the Propolis server.
 async fn sim_allocate_propolis_ip(
     sagactx: NexusActionContext,
diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs
index 3ba687fc912..8188a118c7c 100644
--- a/nexus/src/app/sled.rs
+++ b/nexus/src/app/sled.rs
@@ -113,6 +113,7 @@ impl super::Nexus {
         resource_id: Uuid,
         resource_kind: db::model::SledResourceKind,
         resources: db::model::Resources,
+        constraints: db::model::SledReservationConstraints,
     ) -> Result<db::model::SledResource, Error> {
         self.db_datastore
             .sled_reservation_create(
@@ -120,6 +121,7 @@ impl super::Nexus {
                 resource_id,
                 resource_kind,
                 resources,
+                constraints,
             )
             .await
     }
diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs
index a05f68e2fdf..b8b50c0122e 100644
--- a/sled-agent/src/sim/sled_agent.rs
+++ b/sled-agent/src/sim/sled_agent.rs
@@ -325,7 +325,15 @@ impl SledAgent {
         instance_id: Uuid,
     ) -> Result<InstanceUnregisterResponse, Error> {
         let instance =
-            self.instances.sim_get_cloned_object(&instance_id).await?;
+            match self.instances.sim_get_cloned_object(&instance_id).await {
+                Ok(instance) => instance,
+                Err(Error::ObjectNotFound { .. }) => {
+                    return Ok(InstanceUnregisterResponse {
+                        updated_runtime: None,
+                    })
+                }
+                Err(e) => return Err(e),
+            };
 
         self.detach_disks_from_instance(instance_id).await?;
         Ok(InstanceUnregisterResponse {

From 4d2f664730275d52dcb4cf6bfd3467cfaa768537 Mon Sep 17 00:00:00 2001
From: Greg Colombo <greg@oxidecomputer.com>
Date: Thu, 20 Apr 2023 18:31:35 +0000
Subject: [PATCH 03/13] clippy i love you but you're bringing me down

---
 nexus/src/app/sagas/instance_migrate.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs
index d17c6d15f0c..49011c4fa78 100644
--- a/nexus/src/app/sagas/instance_migrate.rs
+++ b/nexus/src/app/sagas/instance_migrate.rs
@@ -193,7 +193,7 @@ async fn sim_reserve_sled_resources(
     //      immutable despite being in the instance's "runtime" state.
     let resources = db::model::Resources::new(
         params.instance.runtime_state.ncpus.0 .0.into(),
-        params.instance.runtime_state.memory.into(),
+        params.instance.runtime_state.memory,
         // TODO(#2804): Properly specify reservoir size.
         omicron_common::api::external::ByteCount::from(0).into(),
     );

From b0ef001736cb3b4320f49b55109ec3495148cff0 Mon Sep 17 00:00:00 2001
From: Greg Colombo <greg@oxidecomputer.com>
Date: Fri, 21 Apr 2023 15:36:08 +0000
Subject: [PATCH 04/13] split setting/clearing migration IDs into two
 operations

---
 nexus/src/app/instance.rs               | 68 +++++++++++++++++--------
 nexus/src/app/sagas/instance_migrate.rs | 55 ++++++++++----------
 2 files changed, 75 insertions(+), 48 deletions(-)

diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs
index afac19c6e36..8c974da6f78 100644
--- a/nexus/src/app/instance.rs
+++ b/nexus/src/app/instance.rs
@@ -306,12 +306,10 @@ impl super::Nexus {
     }
 
     /// Attempts to set the migration IDs for the supplied instance via the
-    /// instance's current sled.
+    /// sled specified in `db_instance`.
     ///
     /// The caller is assumed to have fetched the current instance record from
-    /// the DB and verified either that the record has no migration IDs (in
-    /// the case where the caller is setting IDs) or that it has the expected
-    /// IDs (if the caller is clearing them).
+    /// the DB and verified that the record has no migration IDs.
     ///
     /// Returns `Ok` and the updated instance record if this call successfully
     /// updated the instance with the sled agent and that update was
@@ -320,27 +318,17 @@ impl super::Nexus {
     ///
     /// # Panics
     ///
-    /// Raises an assertion failure if `migration_params` is `Some` and the
-    /// supplied `db_instance` already has a migration ID or destination
-    /// Propolis ID set.
-    ///
-    /// Raises an assertion failure if `migration_params` is `None` and the
-    /// supplied `db_instance`'s migration ID or destination Propolis ID is not
-    /// set.
+    /// Asserts that `db_instance` has no migration ID or destination Propolis
+    /// ID set.
     pub async fn instance_set_migration_ids(
         &self,
         opctx: &OpContext,
         instance_id: Uuid,
         db_instance: &db::model::Instance,
-        migration_params: Option<InstanceMigrationSourceParams>,
+        migration_params: InstanceMigrationSourceParams,
     ) -> UpdateResult<db::model::Instance> {
-        if migration_params.is_some() {
-            assert!(db_instance.runtime().migration_id.is_none());
-            assert!(db_instance.runtime().dst_propolis_id.is_none());
-        } else {
-            assert!(db_instance.runtime().migration_id.is_some());
-            assert!(db_instance.runtime().dst_propolis_id.is_some());
-        }
+        assert!(db_instance.runtime().migration_id.is_none());
+        assert!(db_instance.runtime().dst_propolis_id.is_none());
 
         let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore)
             .instance_id(instance_id)
@@ -354,7 +342,7 @@ impl super::Nexus {
                 &instance_id,
                 &InstancePutMigrationIdsBody {
                     old_runtime: db_instance.runtime().clone().into(),
-                    migration_params,
+                    migration_params: Some(migration_params),
                 },
             )
             .await
@@ -380,6 +368,46 @@ impl super::Nexus {
         }
     }
 
+    /// Attempts to clear the migration IDs for the supplied instance via the
+    /// sled specified in `db_instance`.
+    ///
+    /// The supplied instance record must contain valid migration IDs.
+    ///
+    /// Returns `Ok` if sled agent accepted the request to clear migration IDs
+    /// and the resulting attempt to write instance runtime state back to CRDB
+    /// succeeded. This routine returns `Ok` even if the update was not actually
+    /// applied (due to a separate generation number change).
+    ///
+    /// # Panics
+    ///
+    /// Asserts that `db_instance` has a migration ID and destination Propolis
+    /// ID set.
+    pub async fn instance_clear_migration_ids(
+        &self,
+        instance_id: Uuid,
+        db_instance: &db::model::Instance,
+    ) -> Result<(), Error> {
+        assert!(db_instance.runtime().migration_id.is_some());
+        assert!(db_instance.runtime().dst_propolis_id.is_some());
+
+        let sa = self.instance_sled(&db_instance).await?;
+        let instance_put_result = sa
+            .instance_put_migration_ids(
+                &instance_id,
+                &InstancePutMigrationIdsBody {
+                    old_runtime: db_instance.runtime().clone().into(),
+                    migration_params: None,
+                },
+            )
+            .await
+            .map(|res| Some(res.into_inner()));
+
+        self.handle_instance_put_result(&db_instance, instance_put_result)
+            .await?;
+
+        Ok(())
+    }
+
     /// Reboot the specified instance.
     pub async fn instance_reboot(
         &self,
diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs
index 49011c4fa78..2bab7ecbf37 100644
--- a/nexus/src/app/sagas/instance_migrate.rs
+++ b/nexus/src/app/sagas/instance_migrate.rs
@@ -16,6 +16,7 @@ use sled_agent_client::types::{
     InstanceMigrationSourceParams, InstanceMigrationTargetParams,
     InstanceStateRequested,
 };
+use slog::warn;
 use std::net::Ipv6Addr;
 use steno::ActionError;
 use steno::Node;
@@ -136,8 +137,6 @@ declare_saga_actions! {
     }
 }
 
-// instance migrate saga: definition
-
 #[derive(Debug)]
 pub struct SagaInstanceMigrate;
 impl NexusSaga for SagaInstanceMigrate {
@@ -258,10 +257,7 @@ async fn sim_set_migration_ids(
             &opctx,
             db_instance.id(),
             db_instance,
-            Some(InstanceMigrationSourceParams {
-                dst_propolis_id,
-                migration_id,
-            }),
+            InstanceMigrationSourceParams { dst_propolis_id, migration_id },
         )
         .await
         .map_err(ActionError::action_failed)?;
@@ -273,33 +269,36 @@ async fn sim_clear_migration_ids(
     sagactx: NexusActionContext,
 ) -> Result<(), anyhow::Error> {
     let osagactx = sagactx.user_data();
-    let params = sagactx.saga_params::<Params>()?;
-    let opctx = crate::context::op_context_for_saga_action(
-        &sagactx,
-        &params.serialized_authn,
-    );
     let db_instance =
         sagactx.lookup::<db::model::Instance>("set_migration_ids")?;
 
-    // The instance may have moved to another Propolis generation by this point
-    // (if e.g. it was stopped and restarted), so this is not guaranteed to
-    // succeed.
+    // If this call to clear migration IDs failed, one of the following things
+    // must have happened:
+    //
+    // 1. Sled agent's view of the instance changed in a way that invalidated
+    //    this request. Sled agent is expected to push its own state updates on
+    //    these transitions and properly manage migration IDs when they occur.
+    // 2. Nexus failed to contact sled agent entirely.
+    // 3. Nexus talked to sled agent but then failed to reach CRDB when trying
+    //    to write back the instance state with the cleared IDs. (Note that
+    //    when clearing migration IDs, failing to update CRDB because the
+    //    current generation number is too far advanced is not actually treated
+    //    as a failure.)
     //
-    // REVIEW(gjc): This callee has enough information to distinguish "outdated
-    // generation" failures from other errors. What is the right way to handle
-    // that class of other errors? For example, suppose CRDB is totally
-    // unavailable right now, but becomes available again later. The migration
-    // IDs in this step need to be cleared so that the instance can try to
-    // migrate again. How should this step achieve that?
-    let _ = osagactx
+    // In case 1, the instance should already be updated properly (or will be
+    // updated properly soon), and in cases 2 and 3 there's nothing that can
+    // reliably be done (the error may not be transient), so just swallow all
+    // errors here (but warn that they occurred).
+    if let Err(e) = osagactx
         .nexus()
-        .instance_set_migration_ids(
-            &opctx,
-            db_instance.id(),
-            &db_instance,
-            None,
-        )
-        .await;
+        .instance_clear_migration_ids(db_instance.id(), &db_instance)
+        .await
+    {
+        warn!(osagactx.log(),
+              "Error clearing migration IDs during rollback";
+              "instance_id" => %db_instance.id(),
+              "error" => ?e);
+    }
 
     Ok(())
 }

From e988665e552ddcc6535269aa59d9fbf4e6384fbb Mon Sep 17 00:00:00 2001
From: Greg Colombo <greg@oxidecomputer.com>
Date: Wed, 26 Apr 2023 20:55:24 +0000
Subject: [PATCH 05/13] eliminate undo shenanigans & improve comments

---
 nexus/src/app/instance.rs               |  18 ++-
 nexus/src/app/sagas/instance_create.rs  |   8 +-
 nexus/src/app/sagas/instance_migrate.rs | 165 ++++++++++++------------
 3 files changed, 108 insertions(+), 83 deletions(-)

diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs
index 8c974da6f78..83ea3b5f0e7 100644
--- a/nexus/src/app/instance.rs
+++ b/nexus/src/app/instance.rs
@@ -53,6 +53,11 @@ use uuid::Uuid;
 
 const MAX_KEYS_PER_INSTANCE: u32 = 8;
 
+pub(crate) enum WriteBackUpdatedInstance {
+    WriteBack,
+    Drop,
+}
+
 impl super::Nexus {
     pub fn instance_lookup<'a>(
         &'a self,
@@ -488,6 +493,7 @@ impl super::Nexus {
         opctx: &OpContext,
         authz_instance: &authz::Instance,
         db_instance: &db::model::Instance,
+        write_back: WriteBackUpdatedInstance,
     ) -> Result<(), Error> {
         opctx.authorize(authz::Action::Modify, authz_instance).await?;
         let sa = self.instance_sled(&db_instance).await?;
@@ -495,7 +501,17 @@ impl super::Nexus {
             .instance_unregister(&db_instance.id())
             .await
             .map(|res| res.into_inner().updated_runtime);
-        self.handle_instance_put_result(db_instance, result).await.map(|_| ())
+
+        match write_back {
+            WriteBackUpdatedInstance::WriteBack => self
+                .handle_instance_put_result(db_instance, result)
+                .await
+                .map(|_| ()),
+            WriteBackUpdatedInstance::Drop => {
+                result?;
+                Ok(())
+            }
+        }
     }
 
     /// Returns the SledAgentClient for the host where this Instance is running.
diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs
index 7481564ded3..a3b8a4eae14 100644
--- a/nexus/src/app/sagas/instance_create.rs
+++ b/nexus/src/app/sagas/instance_create.rs
@@ -3,6 +3,7 @@
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
 use super::{NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID};
+use crate::app::instance::WriteBackUpdatedInstance;
 use crate::app::sagas::declare_saga_actions;
 use crate::app::sagas::disk_create::{self, SagaDiskCreate};
 use crate::app::{
@@ -1401,7 +1402,12 @@ async fn sic_instance_ensure_registered_undo(
 
     osagactx
         .nexus()
-        .instance_ensure_unregistered(&opctx, &authz_instance, &db_instance)
+        .instance_ensure_unregistered(
+            &opctx,
+            &authz_instance,
+            &db_instance,
+            WriteBackUpdatedInstance::WriteBack,
+        )
         .await
         .map_err(ActionError::action_failed)?;
 
diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs
index 2bab7ecbf37..3ab4ca24f2f 100644
--- a/nexus/src/app/sagas/instance_migrate.rs
+++ b/nexus/src/app/sagas/instance_migrate.rs
@@ -4,6 +4,7 @@
 
 use super::instance_create::allocate_sled_ipv6;
 use super::{NexusActionContext, NexusSaga, ACTION_GENERATE_ID};
+use crate::app::instance::WriteBackUpdatedInstance;
 use crate::app::sagas::declare_saga_actions;
 use crate::db::{identity::Resource, lookup::LookupPath};
 use crate::external_api::params;
@@ -40,6 +41,19 @@ pub struct Params {
 // both the saga and the work that happen after it have to specify carefully
 // which of the two participating VMMs is actually running the VM once the
 // migration is over.
+//
+// At the start of the saga, the participating sleds have the following
+// information about the instance's location. (Instance runtime states include
+// other information, like per-Propolis states, that's not relevant here and
+// is ignored.)
+//
+// | Item         | Source | Dest | CRDB |
+// |--------------|--------|------|------|
+// | Propolis gen | G      | None | G    |
+// | Propolis ID  | P1     | None | P1   |
+// | Sled ID      | S1     | None | S1   |
+// | Dst Prop. ID | None   | None | None |
+// | Migration ID | None   | None | None |
 declare_saga_actions! {
     instance_migrate;
 
@@ -59,72 +73,85 @@ declare_saga_actions! {
     // that if multiple concurrent sagas try to set migration IDs at the same
     // Propolis generation, then only one will win and get to proceed through
     // the saga.
+    //
+    // Once this update completes, the sleds have the following states, and the
+    // source sled's state will be stored in CRDB:
+    //
+    // | Item         | Source | Dest | CRDB |
+    // |--------------|--------|------|------|
+    // | Propolis gen | G+1    | None | G+1  |
+    // | Propolis ID  | P1     | None | P1   |
+    // | Sled ID      | S1     | None | S1   |
+    // | Dst Prop. ID | P2     | None | P2   |
+    // | Migration ID | M      | None | M    |
+    //
+    // Unwinding this step clears the migration IDs using the source sled:
+    //
+    // | Item         | Source | Dest | CRDB |
+    // |--------------|--------|------|------|
+    // | Propolis gen | G+2    | None | G+2  |
+    // | Propolis ID  | P1     | None | P1   |
+    // | Sled ID      | S1     | None | S1   |
+    // | Dst Prop. ID | None   | None | None |
+    // | Migration ID | None   | None | None |
     SET_MIGRATION_IDS -> "set_migration_ids" {
         + sim_set_migration_ids
+        - sim_clear_migration_ids
     }
 
     // The instance state on the destination looks like the instance state on
     // the source, except that it bears all of the destination's "location"
     // information--its Propolis ID, sled ID, and Propolis IP--with the same
     // Propolis generation number as the source set in the previous step.
-    // Consider an example:
-    //
-    // - Before the saga begins, the instance has Propolis generation 10 and no
-    //   migration IDs.
-    // - The previous step sets Propolis generation 11 and sets the instance's
-    //   migration ID.
-    // - This step synthesizes a record with Propolis generation 11 and the same
-    //   migration IDs, but with the Propolis identifiers set to the
-    //   destination's IDs.
-    //
-    // When the migration resolves one way or the other, one or both of the
-    // participating sleds will update CRDB with Propolis generation 12 and the
-    // Propolis ID set to whichever Propolis ended up running the instance.
-    //
-    // This step must be infallible and does not have an undo action for reasons
-    // described below.
     CREATE_DESTINATION_STATE -> "dst_runtime_state" {
         + sim_create_destination_state
     }
 
-    // The next three steps are organized so that if the saga unwinds after the
-    // destination Propolis is created, the "set migration IDs" step is undone
-    // before the "ensure destination Propolis" step. Consider again the example
-    // above, but this time, let the saga unwind after the destination Propolis
-    // starts. If sled agent changes an instance's Propolis generation when a
-    // Propolis is destroyed, and the undo steps are specified in the
-    // traditional way, the following can occur:
+    // Instantiate the new Propolis on the destination sled. This uses the
+    // record created in the previous step, so the sleds end up with the
+    // following state:
     //
-    // - After the steps above, the instance is registered with the source and
-    //   target sleds, both at Propolis generation 11.
-    // - Undoing the "ensure destination Propolis" step sets the Propolis
-    //   generation to 12, but with the location information from the target!
-    // - When the "set migration IDs" step is undone, the source will publish
-    //   Propolis generation 12, but this will be dropped because the generation
-    //   was already incremented.
+    // | Item         | Source | Dest | CRDB |
+    // |--------------|--------|------|------|
+    // | Propolis gen | G+1    | G+1  | G+1  |
+    // | Propolis ID  | P1     | P2   | P1   |
+    // | Sled ID      | S1     | S2   | S1   |
+    // | Dst Prop. ID | P2     | P2   | P2   |
+    // | Migration ID | M      | M    | M    |
     //
-    // Now CRDB is pointing at the wrong Propolis, and instance state updates
-    // from the former migration source will be ignored. Oops.
+    // Note that, because the source and destination have the same Propolis
+    // generation, the destination's record will not be written back to CRDB.
     //
-    // Instead of asking sled agent to reason about what steps in a migration
-    // have and haven't been undertaken, the following steps are arranged so
-    // that the update that clears migration IDs happens before the one that
-    // destroys the destination Propolis, which unwinds the saga to the expected
-    // state.
+    // Once the migration completes (whether successfully or not), the sled that
+    // ends up with the instance will publish an update that clears the
+    // generation numbers and (on success) updates the Propolis ID pointer. If
+    // migration succeeds, this produces the following:
     //
-    // Note that this implies that `sim_ensure_destination_propolis_undo` must
-    // be able to succeed even if the "ensure destination Propolis" step was
-    // never reached.
-    ENSURE_DESTINATION_PROPOLIS_UNDO -> "unused_ensure_destination_undo" {
-        + sim_noop
-        - sim_ensure_destination_propolis_undo
-    }
-    SET_MIGRATION_IDS_UNDO -> "unused_set_ids_undo" {
-        + sim_noop
-        - sim_clear_migration_ids
-    }
+    // | Item         | Source | Dest | CRDB |
+    // |--------------|--------|------|------|
+    // | Propolis gen | G+1    | G+2  | G+2  |
+    // | Propolis ID  | P1     | P2   | P2   |
+    // | Sled ID      | S1     | S2   | S2   |
+    // | Dst Prop. ID | P2     | None | None |
+    // | Migration ID | M      | None | None |
+    //
+    // The undo step for this node requires special care. Unregistering a
+    // Propolis from a sled typically increments its Propolis generation number.
+    // (This is so that Nexus can rudely terminate a Propolis via unregistration
+    // and end up with the state it would have gotten if the Propolis had shut
+    // down normally.) If this step unwinds, this will produce the same state
+    // on the destination as in the previous table, even though no migration
+    // has started yet. If that update gets written back, then it will write
+    // Propolis generation G+2 to CRDB (as in the table above) with the wrong
+    // Propolis ID, and the subsequent request to clear migration IDs will not
+    // fix it (because the source sled's generation number is still at G+1 and
+    // will move to G+2, which is not recent enough to push another update).
+    //
+    // To avoid this problem, this undo step takes special care not to write
+    // back the updated record the destination sled returns to it.
     ENSURE_DESTINATION_PROPOLIS -> "ensure_destination" {
         + sim_ensure_destination_propolis
+        - sim_ensure_destination_propolis_undo
     }
 
     // Note that this step only requests migration by sending a "migrate in"
@@ -167,8 +194,6 @@ impl NexusSaga for SagaInstanceMigrate {
         builder.append(allocate_propolis_ip_action());
         builder.append(set_migration_ids_action());
         builder.append(create_destination_state_action());
-        builder.append(ensure_destination_propolis_undo_action());
-        builder.append(set_migration_ids_undo_action());
         builder.append(ensure_destination_propolis_action());
         builder.append(instance_migrate_action());
 
@@ -176,11 +201,6 @@ impl NexusSaga for SagaInstanceMigrate {
     }
 }
 
-/// A no-op forward action.
-async fn sim_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> {
-    Ok(())
-}
-
 /// Reserves resources for the destination on the specified target sled.
 async fn sim_reserve_sled_resources(
     sagactx: NexusActionContext,
@@ -371,12 +391,17 @@ async fn sim_ensure_destination_propolis_undo(
     // Ensure that the destination sled has no Propolis matching the description
     // the saga previously generated.
     //
-    // Note that this step can run before the instance was actually ensured.
-    // This is OK; sled agent will quietly succeed if asked to unregister an
-    // unregistered instance.
+    // The updated instance record from this undo action must be dropped so
+    // that a later undo action (clearing migration IDs) can update the record
+    // instead. See the saga definition for more details.
     osagactx
         .nexus()
-        .instance_ensure_unregistered(&opctx, &authz_instance, &db_instance)
+        .instance_ensure_unregistered(
+            &opctx,
+            &authz_instance,
+            &db_instance,
+            WriteBackUpdatedInstance::Drop,
+        )
         .await
         .map_err(ActionError::action_failed)?;
 
@@ -682,29 +707,7 @@ mod tests {
         };
 
         let dag = create_saga_dag::<SagaInstanceMigrate>(params).unwrap();
-
-        // Some of the nodes in the DAG are expected to be infallible to allow
-        // certain undo steps to be reordered. Specify these nodes here, then
-        // verify that every infallible node actually appears in the DAG (to try
-        // to detect drift between the saga specification and the test).
-        let infallible_nodes = vec![
-            "dst_runtime_state",
-            "unused_ensure_destination_undo",
-            "unused_set_ids_undo",
-        ];
-        let dag_node_names: Vec<String> =
-            dag.get_nodes().map(|n| n.name().as_ref().to_owned()).collect();
-        assert!(infallible_nodes
-            .iter()
-            .all(|n| dag_node_names.iter().any(|d| d == n)));
-
         for node in dag.get_nodes() {
-            if infallible_nodes.contains(&node.name().as_ref()) {
-                info!(log, "Skipping infallible node";
-                      "node_name" => node.name().as_ref());
-                continue;
-            }
-
             info!(
                 log,
                 "Creating new saga which will fail at index {:?}", node.index();

From 444b05cec355b5217ca1b23bcb83b0b68fe99cb2 Mon Sep 17 00:00:00 2001
From: Greg Colombo <greg@oxidecomputer.com>
Date: Wed, 26 Apr 2023 21:06:05 +0000
Subject: [PATCH 06/13] switch error type & improve message

---
 nexus/src/app/instance.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs
index 83ea3b5f0e7..483664bdadc 100644
--- a/nexus/src/app/instance.rs
+++ b/nexus/src/app/instance.rs
@@ -367,8 +367,9 @@ impl super::Nexus {
                 .instance_refetch(opctx, &authz_instance)
                 .await?)
         } else {
-            Err(Error::internal_error(
-                "instance's Propolis generation is out of date",
+            Err(Error::unavail(
+                "instance is already migrating, or underwent an operation that \
+                 prevented this migration from proceeding"
             ))
         }
     }

From ce760e91b0357e3f125e9e7cce52448f9de6b3fe Mon Sep 17 00:00:00 2001
From: Greg Colombo <greg@oxidecomputer.com>
Date: Wed, 26 Apr 2023 21:21:01 +0000
Subject: [PATCH 07/13] clarify semantics of must_select_from

---
 nexus/db-model/src/sled.rs                | 17 +++++++++++++++--
 nexus/db-queries/src/db/datastore/sled.rs |  8 +++-----
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs
index c6666a5b4e5..22d1b1ddd35 100644
--- a/nexus/db-model/src/sled.rs
+++ b/nexus/db-model/src/sled.rs
@@ -151,12 +151,21 @@ pub struct SledReservationConstraints {
 }
 
 impl SledReservationConstraints {
+    /// Creates a constraint set with no constraints in it.
     pub fn none() -> Self {
         Self { must_select_from: Vec::new() }
     }
 
-    pub fn must_select_from(&self) -> &[Uuid] {
-        &self.must_select_from
+    /// If the constraints include a set of sleds that the caller must select
+    /// from, returns `Some` and a slice containing the members of that set.
+    ///
+    /// If no "must select from these" constraint exists, returns None.
+    pub fn must_select_from(&self) -> Option<&[Uuid]> {
+        if self.must_select_from.is_empty() {
+            None
+        } else {
+            Some(&self.must_select_from)
+        }
     }
 }
 
@@ -172,11 +181,15 @@ impl SledReservationConstraintBuilder {
         }
     }
 
+    /// Adds a "must select from the following sled IDs" constraint. If such a
+    /// constraint already exists, appends the supplied sled IDs to the "must
+    /// select from" list.
     pub fn must_select_from(mut self, sled_ids: &[Uuid]) -> Self {
         self.constraints.must_select_from.extend(sled_ids);
         self
     }
 
+    /// Builds a set of constraints from this builder's current state.
     pub fn build(self) -> SledReservationConstraints {
         self.constraints
     }
diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs
index 59b6569b3cb..9ab3e026d18 100644
--- a/nexus/db-queries/src/db/datastore/sled.rs
+++ b/nexus/db-queries/src/db/datastore/sled.rs
@@ -140,11 +140,9 @@ impl DataStore {
 
                 // Further constrain the sled IDs according to any caller-
                 // supplied constraints.
-                if !constraints.must_select_from().is_empty() {
-                    sled_targets = sled_targets.filter(
-                        sled_dsl::id
-                            .eq_any(constraints.must_select_from().to_vec()),
-                    );
+                if let Some(must_select_from) = constraints.must_select_from() {
+                    sled_targets = sled_targets
+                        .filter(sled_dsl::id.eq_any(must_select_from.to_vec()));
                 }
 
                 sql_function!(fn random() -> diesel::sql_types::Float);

From 77863ca3efd615eb718ebed7d301569ce4b8bb04 Mon Sep 17 00:00:00 2001
From: Greg Colombo <greg@oxidecomputer.com>
Date: Thu, 27 Apr 2023 23:57:00 +0000
Subject: [PATCH 08/13] use try semantics instead of unwrapping

---
 nexus/src/app/instance.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs
index 483664bdadc..d37a27c7294 100644
--- a/nexus/src/app/instance.rs
+++ b/nexus/src/app/instance.rs
@@ -338,8 +338,7 @@ impl super::Nexus {
         let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore)
             .instance_id(instance_id)
             .lookup_for(authz::Action::Modify)
-            .await
-            .unwrap();
+            .await?;
 
         let sa = self.instance_sled(&db_instance).await?;
         let instance_put_result = sa

From 10cadec2eb11e4afb01dc18e547918c06d193fda Mon Sep 17 00:00:00 2001
From: Greg Colombo <greg@oxidecomputer.com>
Date: Fri, 28 Apr 2023 18:44:57 +0000
Subject: [PATCH 09/13] add & make use of 409 Conflict error type

---
 common/src/api/external/error.rs | 29 ++++++++++++++++++++++++++++-
 nexus/src/app/instance.rs        |  2 +-
 nexus/src/app/session.rs         |  3 ++-
 3 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/common/src/api/external/error.rs b/common/src/api/external/error.rs
index dc1459f46aa..e508b7ecba2 100644
--- a/common/src/api/external/error.rs
+++ b/common/src/api/external/error.rs
@@ -61,6 +61,9 @@ pub enum Error {
 
     #[error("Type version mismatch! {internal_message}")]
     TypeVersionMismatch { internal_message: String },
+
+    #[error("Conflict: {internal_message}")]
+    Conflict { internal_message: String },
 }
 
 /// Indicates how an object was looked up (for an `ObjectNotFound` error)
@@ -118,7 +121,8 @@ impl Error {
             | Error::Forbidden
             | Error::MethodNotAllowed { .. }
             | Error::InternalError { .. }
-            | Error::TypeVersionMismatch { .. } => false,
+            | Error::TypeVersionMismatch { .. }
+            | Error::Conflict { .. } => false,
         }
     }
 
@@ -174,6 +178,18 @@ impl Error {
         Error::TypeVersionMismatch { internal_message: message.to_owned() }
     }
 
+    /// Generates an [`Error::Conflict`] with a specific message.
+    ///
+    /// This is used in cases where a request cannot proceed because the target
+    /// resource is currently in a state that's incompatible with that request,
+    /// but where the request might succeed if it is retried or modified and
+    /// retried. The internal message should provide more information about the
+    /// source of the conflict and possible actions the caller can take to
+    /// resolve it (if any).
+    pub fn conflict(message: &str) -> Error {
+        Error::Conflict { internal_message: message.to_owned() }
+    }
+
     /// Given an [`Error`] with an internal message, return the same error with
     /// `context` prepended to it to provide more context
     ///
@@ -223,6 +239,9 @@ impl Error {
                     ),
                 }
             }
+            Error::Conflict { internal_message } => Error::Conflict {
+                internal_message: format!("{}: {}", context, internal_message),
+            },
         }
     }
 }
@@ -317,6 +336,14 @@ impl From<Error> for HttpError {
             Error::TypeVersionMismatch { internal_message } => {
                 HttpError::for_internal_error(internal_message)
             }
+
+            Error::Conflict { internal_message } => {
+                HttpError::for_client_error(
+                    Some(String::from("Conflict")),
+                    http::StatusCode::CONFLICT,
+                    internal_message,
+                )
+            }
         }
     }
 }
diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs
index d37a27c7294..ac8c912c7e3 100644
--- a/nexus/src/app/instance.rs
+++ b/nexus/src/app/instance.rs
@@ -366,7 +366,7 @@ impl super::Nexus {
                 .instance_refetch(opctx, &authz_instance)
                 .await?)
         } else {
-            Err(Error::unavail(
+            Err(Error::conflict(
                 "instance is already migrating, or underwent an operation that \
                  prevented this migration from proceeding"
             ))
diff --git a/nexus/src/app/session.rs b/nexus/src/app/session.rs
index 00de241321d..04a9170c7d8 100644
--- a/nexus/src/app/session.rs
+++ b/nexus/src/app/session.rs
@@ -155,7 +155,8 @@ impl super::Nexus {
                 | Error::InternalError { .. }
                 | Error::ServiceUnavailable { .. }
                 | Error::MethodNotAllowed { .. }
-                | Error::TypeVersionMismatch { .. } => {
+                | Error::TypeVersionMismatch { .. }
+                | Error::Conflict { .. } => {
                     Reason::UnknownError { source: error }
                 }
             })?;

From 4c55cfc68b0ffa1a76d6e8347ce8bd30a3ef90df Mon Sep 17 00:00:00 2001
From: Greg Colombo <greg@oxidecomputer.com>
Date: Fri, 28 Apr 2023 20:26:35 +0000
Subject: [PATCH 10/13] improve saga comments

---
 nexus/src/app/sagas/instance_migrate.rs | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs
index 3ab4ca24f2f..eeccfd0dac9 100644
--- a/nexus/src/app/sagas/instance_migrate.rs
+++ b/nexus/src/app/sagas/instance_migrate.rs
@@ -42,10 +42,12 @@ pub struct Params {
 // which of the two participating VMMs is actually running the VM once the
 // migration is over.
 //
-// At the start of the saga, the participating sleds have the following
-// information about the instance's location. (Instance runtime states include
-// other information, like per-Propolis states, that's not relevant here and
-// is ignored.)
+// Only active instances can migrate. While an instance is active on some sled
+// (and isn't migrating), that sled's sled agent maintains the instance's
+// runtime state and sends updated state to Nexus when it changes. At the start
+// of this saga, the participating sled agents and CRDB have the following
+// runtime states (note that some fields, like the actual Propolis state, are
+// not relevant to migration and are omitted here):
 //
 // | Item         | Source | Dest | CRDB |
 // |--------------|--------|------|------|
@@ -66,11 +68,15 @@ declare_saga_actions! {
         + sim_allocate_propolis_ip
     }
 
-    // This step sets the migration ID and destination Propolis ID fields in
-    // CRDB by asking the instance's current sled to paste them into its runtime
-    // state. Sled agent provides the synchronization here: while this operation
-    // is idempotent for any single transition between IDs, sled agent ensures
-    // that if multiple concurrent sagas try to set migration IDs at the same
+    // This step sets the instance's migration ID and destination Propolis ID
+    // fields. Because the instance is active, its current sled agent maintains
+    // the most recent runtime state, so to update it, the saga calls into the
+    // sled and asks it to produce an updated record with the appropriate
+    // migration IDs and a new generation number.
+    //
+    // Sled agent provides the synchronization here: while this operation is
+    // idempotent for any single transition between IDs, sled agent ensures that
+    // if multiple concurrent sagas try to set migration IDs at the same
     // Propolis generation, then only one will win and get to proceed through
     // the saga.
     //

From 32ac89df208d130ec4d12748d85537237699008a Mon Sep 17 00:00:00 2001
From: Greg Colombo <greg@oxidecomputer.com>
Date: Fri, 28 Apr 2023 21:27:39 +0000
Subject: [PATCH 11/13] fix comment in sim_clear_migration_ids

---
 nexus/src/app/sagas/instance_migrate.rs | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs
index eeccfd0dac9..b4e87d60a18 100644
--- a/nexus/src/app/sagas/instance_migrate.rs
+++ b/nexus/src/app/sagas/instance_migrate.rs
@@ -298,23 +298,15 @@ async fn sim_clear_migration_ids(
     let db_instance =
         sagactx.lookup::<db::model::Instance>("set_migration_ids")?;
 
-    // If this call to clear migration IDs failed, one of the following things
-    // must have happened:
+    // Because the migration never actually started (and thus didn't finish),
+    // the instance should be at the same Propolis generation as it was when
+    // migration IDs were set, which means sled agent should accept a request to
+    // clear them. The only exception is if the instance stopped, but that also
+    // clears its migration IDs; in that case there is no work to do here.
     //
-    // 1. Sled agent's view of the instance changed in a way that invalidated
-    //    this request. Sled agent is expected to push its own state updates on
-    //    these transitions and properly manage migration IDs when they occur.
-    // 2. Nexus failed to contact sled agent entirely.
-    // 3. Nexus talked to sled agent but then failed to reach CRDB when trying
-    //    to write back the instance state with the cleared IDs. (Note that
-    //    when clearing migration IDs, failing to update CRDB because the
-    //    current generation number is too far advanced is not actually treated
-    //    as a failure.)
-    //
-    // In case 1, the instance should already be updated properly (or will be
-    // updated properly soon), and in cases 2 and 3 there's nothing that can
-    // reliably be done (the error may not be transient), so just swallow all
-    // errors here (but warn that they occurred).
+    // Other error cases (e.g. an unreachable sled agent) are handled by this
+    // callee using the standard discipline for handling failed requests to
+    // change an instance's state. Warn for these errors.
     if let Err(e) = osagactx
         .nexus()
         .instance_clear_migration_ids(db_instance.id(), &db_instance)

From 15b8377cce960b05f070ba078f0f9d296436042e Mon Sep 17 00:00:00 2001
From: Greg Colombo <greg@oxidecomputer.com>
Date: Fri, 28 Apr 2023 23:16:28 +0000
Subject: [PATCH 12/13] further correct comment

---
 nexus/src/app/sagas/instance_migrate.rs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs
index b4e87d60a18..b6df0554546 100644
--- a/nexus/src/app/sagas/instance_migrate.rs
+++ b/nexus/src/app/sagas/instance_migrate.rs
@@ -304,9 +304,12 @@ async fn sim_clear_migration_ids(
     // clear them. The only exception is if the instance stopped, but that also
     // clears its migration IDs; in that case there is no work to do here.
     //
-    // Other error cases (e.g. an unreachable sled agent) are handled by this
-    // callee using the standard discipline for handling failed requests to
-    // change an instance's state. Warn for these errors.
+    // Other failures to clear migration IDs are handled like any other failure
+    // to update an instance's state: the callee attempts to mark the instance
+    // as failed; if the failure occurred because the instance changed state
+    // such that sled agent could not fulfill the request, the callee will
+    // produce a stale generation number and will not actually mark the instance
+    // as failed.
     if let Err(e) = osagactx
         .nexus()
         .instance_clear_migration_ids(db_instance.id(), &db_instance)

From 2b5106a9b5ba6eb60f3534170a4be21bd6e23205 Mon Sep 17 00:00:00 2001
From: Greg Colombo <greg@oxidecomputer.com>
Date: Fri, 28 Apr 2023 23:16:48 +0000
Subject: [PATCH 13/13] don't convert migration ID conflicts into 500 errors

---
 nexus/src/app/instance.rs    | 8 +++++---
 sled-agent/src/instance.rs   | 4 ++--
 sled-agent/src/sled_agent.rs | 8 ++++++--
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs
index ac8c912c7e3..803269b311b 100644
--- a/nexus/src/app/instance.rs
+++ b/nexus/src/app/instance.rs
@@ -828,9 +828,11 @@ impl super::Nexus {
                 // what to do with status codes.
                 error!(self.log, "saw {} from instance_put!", e);
 
-                // this is unfortunate, but sled_agent_client::Error doesn't
-                // implement Copy, and can't be match'ed upon below without this
-                // line.
+                // Convert to the Omicron API error type.
+                //
+                // N.B. The match below assumes that this conversion will turn
+                //      any 400-level error status from sled agent into an
+                //      `Error::InvalidRequest`.
                 let e = e.into();
 
                 match &e {
diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs
index 18a408241b1..0f0977f8fab 100644
--- a/sled-agent/src/instance.rs
+++ b/sled-agent/src/instance.rs
@@ -729,8 +729,8 @@ impl Instance {
             }
 
             return Err(Error::Transition(
-                omicron_common::api::external::Error::InvalidRequest {
-                    message: format!(
+                omicron_common::api::external::Error::Conflict {
+                    internal_message: format!(
                         "wrong Propolis ID generation: expected {}, got {}",
                         inner.state.current().propolis_gen,
                         old_runtime.propolis_gen
diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs
index 9ae0a2a2b99..efd7a8c576d 100644
--- a/sled-agent/src/sled_agent.rs
+++ b/sled-agent/src/sled_agent.rs
@@ -119,10 +119,14 @@ impl From<Error> for dropshot::HttpError {
                                 }
                             }
                         }
-
+                        crate::instance::Error::Transition(omicron_error) => {
+                            // Preserve the status associated with the wrapped
+                            // Omicron error so that Nexus will see it in the
+                            // Progenitor client error it gets back.
+                            HttpError::from(omicron_error)
+                        }
                         e => HttpError::for_internal_error(e.to_string()),
                     },
-
                     e => HttpError::for_internal_error(e.to_string()),
                 }
             }