diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 97187d11e80..710f7318fac 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -508,19 +508,22 @@ Options: Show sleds that match the given filter Possible values: - - all: All sleds in the system, regardless of policy or state - - commissioned: All sleds that are currently part of the control plane cluster - - decommissioned: All sleds that were previously part of the control plane + - all: All sleds in the system, regardless of policy or state + - commissioned: All sleds that are currently part of the control plane + cluster + - decommissioned: All sleds that were previously part of the control plane cluster but have been decommissioned - - discretionary: Sleds that are eligible for discretionary services - - in-service: Sleds that are in service (even if they might not be eligible - for discretionary services) - - query-during-inventory: Sleds whose sled agents should be queried for inventory - - reservation-create: Sleds on which reservations can be created - - vpc-routing: Sleds which should be sent OPTE V2P mappings and Routing rules - - vpc-firewall: Sleds which should be sent VPC firewall rules - - tuf-artifact-replication: Sleds which should have TUF repo artifacts replicated onto - them + - discretionary: Sleds that are eligible for discretionary services + - in-service: Sleds that are in service (even if they might not be + eligible for discretionary services) + - query-during-inventory: Sleds whose sled agents should be queried for inventory + - reservation-create: Sleds on which reservations can be created + - vpc-routing: Sleds which should be sent OPTE V2P mappings and Routing + rules + - vpc-firewall: Sleds which should be sent VPC firewall rules + - tuf-artifact-replication: Sleds which should have TUF repo artifacts replicated + onto them + - sps-updated-by-reconfigurator: Sleds whose SPs should be updated by Reconfigurator --log-level log level filter diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout index c77200a56b1..eee75c23d89 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout @@ -497,6 +497,7 @@ WARN failed to place all new desired InternalDns zones, placed: 0, wanted_to_pla INFO sufficient ExternalDns zones exist in plan, desired_count: 0, current_count: 0 WARN failed to place all new desired Nexus zones, placed: 0, wanted_to_place: 3 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 +WARN cannot issue more SP updates (no current artifacts) INFO some zones not yet up-to-date, sled_id: 89d02b1b-478c-401a-8e28-7a26f74fa41b INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 86db3308-f817-4626-8838-4085949a6a41 based on parent blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout index 07b3e6c10cd..a2cafb89b43 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout @@ -974,6 +974,7 @@ INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count INFO added zone to sled, sled_id: a88790de-5962-4871-8686-61c1fd5b7094, kind: ExternalDns INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 +WARN cannot issue more SP updates (no current artifacts) INFO some zones not yet up-to-date, sled_id: a88790de-5962-4871-8686-61c1fd5b7094 INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 9c998c1d-1a7b-440a-ae0c-40f781dea6e2 based on parent blueprint 366b0b68-d80e-4bc1-abd3-dc69837847e0 diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout index ad0fbd0baa8..f3995b05d93 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout @@ -1004,6 +1004,7 @@ INFO added zone to sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, kind: In INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 +WARN cannot issue more SP updates (no current artifacts) INFO some zones not yet up-to-date, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763 INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint af934083-59b5-4bf6-8966-6fb5292c29e1 based on parent blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4 diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 9a1793cf3cd..93a71bec015 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1884,6 +1884,7 @@ mod tests { use nexus_types::external_api::views::PhysicalDiskState; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledState; + use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; @@ -2002,6 +2003,10 @@ mod tests { policy: SledPolicy::provisionable(), state: SledState::Active, resources, + baseboard_id: BaseboardId { + part_number: String::from("unused"), + serial_number: String::from("unused"), + }, } } diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index ed416e4c8bd..3a4bfde7f7c 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -1960,6 +1960,13 @@ impl<'a> BlueprintBuilder<'a> { Ok(self.resource_allocator()?.inject_untracked_external_dns_ip(addr)?) } + pub fn pending_mgs_updates_replace_all( + &mut self, + updates: nexus_types::deployment::PendingMgsUpdates, + ) { + self.pending_mgs_updates = updates; + } + pub fn pending_mgs_update_insert( &mut self, update: nexus_types::deployment::PendingMgsUpdate, diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index c109ef30078..9dab7e10139 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -13,7 +13,9 @@ use crate::blueprint_builder::Error; use crate::blueprint_builder::Operation; use crate::blueprint_editor::DisksEditError; use crate::blueprint_editor::SledEditError; +use crate::mgs_updates::plan_mgs_updates; use crate::planner::omicron_zone_placement::PlacementError; +use gateway_client::types::SpType; use nexus_sled_agent_shared::inventory::OmicronZoneType; use nexus_sled_agent_shared::inventory::ZoneKind; use nexus_types::deployment::Blueprint; @@ -50,6 +52,37 @@ pub use self::rng::SledPlannerRng; mod omicron_zone_placement; pub(crate) mod rng; +/// Maximum number of MGS-managed updates (updates to SP, RoT, RoT bootloader, +/// or host OS) that we allow to be pending across the whole system at one time +/// +/// For now, we limit this to 1 for safety. That's for a few reasons: +/// +/// - SP updates reboot the corresponding host. Thus, if we have one of these +/// updates outstanding, we should assume that host may be offline. Most +/// control plane services are designed to survive multiple failures (e.g., +/// the Cockroach cluster can sustain two failures and stay online), but +/// having one sled offline eats into that margin. And some services like +/// Crucible volumes can only sustain one failure. Taking down two sleds +/// would render unavailable any Crucible volumes with regions on those two +/// sleds. +/// +/// - There is unfortunately some risk in updating the RoT bootloader, in that +/// there's a window where a failure could render the device unbootable. See +/// oxidecomputer/omicron#7819 for more on this. Updating only one at a time +/// helps mitigate this risk. +/// +/// More sophisticated schemes are certainly possible (e.g., allocate Crucible +/// regions in such a way that there are at least pairs of sleds we could update +/// concurrently without taking volumes down; and/or be willing to update +/// multiple sleds as long as they don't have overlapping control plane +/// services, etc.). +const NUM_CONCURRENT_MGS_UPDATES: usize = 1; + +enum UpdateStepResult { + ContinueToNextStep, + Waiting, +} + pub struct Planner<'a> { log: Logger, input: &'a PlanningInput, @@ -115,7 +148,10 @@ impl<'a> Planner<'a> { self.do_plan_expunge()?; self.do_plan_add()?; self.do_plan_decommission()?; - self.do_plan_zone_updates()?; + if let UpdateStepResult::ContinueToNextStep = self.do_plan_mgs_updates() + { + self.do_plan_zone_updates()?; + } self.do_plan_cockroachdb_settings(); Ok(()) } @@ -901,6 +937,63 @@ impl<'a> Planner<'a> { Ok(()) } + /// Update at most one MGS-managed device (SP, RoT, etc.), if any are out of + /// date. + fn do_plan_mgs_updates(&mut self) -> UpdateStepResult { + // Determine which baseboards we will consider updating. + // + // Sleds may be present but not adopted as part of the control plane. + // In deployed systems, this would probably only happen if a sled was + // about to be added. In dev/test environments, it's common to leave + // some number of sleds out of the control plane for various reasons. + // Inventory will still report them, but we don't want to touch them. + // + // For better or worse, switches and PSCs do not have the same idea of + // being adopted into the control plane. If they're present, they're + // part of the system, and we will update them. + let included_sled_baseboards: BTreeSet<_> = self + .input + .all_sleds(SledFilter::SpsUpdatedByReconfigurator) + .map(|(_sled_id, details)| &details.baseboard_id) + .collect(); + let included_baseboards = + self.inventory + .sps + .iter() + .filter_map(|(baseboard_id, sp_state)| { + let do_include = match sp_state.sp_type { + SpType::Sled => included_sled_baseboards + .contains(baseboard_id.as_ref()), + SpType::Power => true, + SpType::Switch => true, + }; + do_include.then_some(baseboard_id.clone()) + }) + .collect(); + + // Compute the new set of PendingMgsUpdates. + let current_updates = + &self.blueprint.parent_blueprint().pending_mgs_updates; + let current_artifacts = self.input.tuf_repo(); + let next = plan_mgs_updates( + &self.log, + &self.inventory, + &included_baseboards, + ¤t_updates, + current_artifacts, + NUM_CONCURRENT_MGS_UPDATES, + ); + + // TODO This is not quite right. See oxidecomputer/omicron#8285. + let rv = if next.is_empty() { + UpdateStepResult::ContinueToNextStep + } else { + UpdateStepResult::Waiting + }; + self.blueprint.pending_mgs_updates_replace_all(next); + rv + } + /// Update at most one existing zone to use a new image source. fn do_plan_zone_updates(&mut self) -> Result<(), Error> { // We are only interested in non-decommissioned sleds. diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index fd277b77582..dc2fd474492 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -491,6 +491,18 @@ impl SystemDescription { policy: sled.policy, state: sled.state, resources: sled.resources.clone(), + baseboard_id: BaseboardId { + part_number: sled + .inventory_sled_agent + .baseboard + .model() + .to_owned(), + serial_number: sled + .inventory_sled_agent + .baseboard + .identifier() + .to_owned(), + }, }; builder.add_sled(sled.sled_id, sled_details)?; } diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index 59e6a8ab6aa..b0fe41ce1c1 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -33,6 +33,7 @@ use nexus_types::deployment::SledResources; use nexus_types::deployment::UnstableReconfiguratorState; use nexus_types::identity::Asset; use nexus_types::identity::Resource; +use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; @@ -274,6 +275,10 @@ impl PlanningInputFromDb<'_> { policy: sled_row.policy(), state: sled_row.state().into(), resources: SledResources { subnet, zpools }, + baseboard_id: BaseboardId { + part_number: sled_row.part_number().to_owned(), + serial_number: sled_row.serial_number().to_owned(), + }, }; // TODO-cleanup use `TypedUuid` everywhere let sled_id = SledUuid::from_untyped_uuid(sled_id); diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 86e9fb02d94..51ff668b0d2 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -14,6 +14,7 @@ use crate::external_api::views::PhysicalDiskState; use crate::external_api::views::SledPolicy; use crate::external_api::views::SledProvisionPolicy; use crate::external_api::views::SledState; +use crate::inventory::BaseboardId; use chrono::DateTime; use chrono::Utc; use clap::ValueEnum; @@ -724,6 +725,9 @@ pub enum SledFilter { /// Sleds which should have TUF repo artifacts replicated onto them. TufArtifactReplication, + + /// Sleds whose SPs should be updated by Reconfigurator + SpsUpdatedByReconfigurator, } impl SledFilter { @@ -780,6 +784,7 @@ impl SledPolicy { SledFilter::VpcRouting => true, SledFilter::VpcFirewall => true, SledFilter::TufArtifactReplication => true, + SledFilter::SpsUpdatedByReconfigurator => true, }, SledPolicy::InService { provision_policy: SledProvisionPolicy::NonProvisionable, @@ -794,6 +799,7 @@ impl SledPolicy { SledFilter::VpcRouting => true, SledFilter::VpcFirewall => true, SledFilter::TufArtifactReplication => true, + SledFilter::SpsUpdatedByReconfigurator => true, }, SledPolicy::Expunged => match filter { SledFilter::All => true, @@ -806,6 +812,7 @@ impl SledPolicy { SledFilter::VpcRouting => false, SledFilter::VpcFirewall => false, SledFilter::TufArtifactReplication => false, + SledFilter::SpsUpdatedByReconfigurator => false, }, } } @@ -840,6 +847,7 @@ impl SledState { SledFilter::VpcRouting => true, SledFilter::VpcFirewall => true, SledFilter::TufArtifactReplication => true, + SledFilter::SpsUpdatedByReconfigurator => true, }, SledState::Decommissioned => match filter { SledFilter::All => true, @@ -852,6 +860,7 @@ impl SledState { SledFilter::VpcRouting => false, SledFilter::VpcFirewall => false, SledFilter::TufArtifactReplication => false, + SledFilter::SpsUpdatedByReconfigurator => false, }, } } @@ -1058,6 +1067,8 @@ pub struct SledDetails { pub state: SledState, /// current resources allocated to this sled pub resources: SledResources, + /// baseboard id for this sled + pub baseboard_id: BaseboardId, } #[derive(Debug, thiserror::Error)]