diff --git a/Cargo.lock b/Cargo.lock index d5c2342d227..f8e7b4808ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6557,6 +6557,7 @@ dependencies = [ "sled-agent-client", "slog", "slog-error-chain", + "sp-sim", "static_assertions", "strum", "test-strategy", diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 97187d11e80..710f7318fac 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -508,19 +508,22 @@ Options: Show sleds that match the given filter Possible values: - - all: All sleds in the system, regardless of policy or state - - commissioned: All sleds that are currently part of the control plane cluster - - decommissioned: All sleds that were previously part of the control plane + - all: All sleds in the system, regardless of policy or state + - commissioned: All sleds that are currently part of the control plane + cluster + - decommissioned: All sleds that were previously part of the control plane cluster but have been decommissioned - - discretionary: Sleds that are eligible for discretionary services - - in-service: Sleds that are in service (even if they might not be eligible - for discretionary services) - - query-during-inventory: Sleds whose sled agents should be queried for inventory - - reservation-create: Sleds on which reservations can be created - - vpc-routing: Sleds which should be sent OPTE V2P mappings and Routing rules - - vpc-firewall: Sleds which should be sent VPC firewall rules - - tuf-artifact-replication: Sleds which should have TUF repo artifacts replicated onto - them + - discretionary: Sleds that are eligible for discretionary services + - in-service: Sleds that are in service (even if they might not be + eligible for discretionary services) + - query-during-inventory: Sleds whose sled agents should be queried for inventory + - reservation-create: Sleds on which reservations can be created + - vpc-routing: Sleds which should be sent OPTE V2P mappings and Routing + rules + - vpc-firewall: Sleds which should be sent VPC firewall rules + - tuf-artifact-replication: Sleds which should have TUF repo artifacts replicated + onto them + - sps-updated-by-reconfigurator: Sleds whose SPs should be updated by Reconfigurator --log-level log level filter diff --git a/dev-tools/reconfigurator-cli/src/lib.rs b/dev-tools/reconfigurator-cli/src/lib.rs index 3bcbf07789f..85de6045b98 100644 --- a/dev-tools/reconfigurator-cli/src/lib.rs +++ b/dev-tools/reconfigurator-cli/src/lib.rs @@ -208,6 +208,7 @@ fn process_command( Commands::SledRemove(args) => cmd_sled_remove(sim, args), Commands::SledShow(args) => cmd_sled_show(sim, args), Commands::SledSetPolicy(args) => cmd_sled_set_policy(sim, args), + Commands::SledUpdateSp(args) => cmd_sled_update_sp(sim, args), Commands::SiloList => cmd_silo_list(sim), Commands::SiloAdd(args) => cmd_silo_add(sim, args), Commands::SiloRemove(args) => cmd_silo_remove(sim, args), @@ -261,6 +262,8 @@ enum Commands { SledShow(SledArgs), /// set a sled's policy SledSetPolicy(SledSetPolicyArgs), + /// simulate updating the sled's SP versions + SledUpdateSp(SledUpdateSpArgs), /// list silos SiloList, @@ -372,6 +375,20 @@ impl From for SledPolicy { } } +#[derive(Debug, Args)] +struct SledUpdateSpArgs { + /// id of the sled + sled_id: SledUuid, + + /// sets the version reported for the SP active slot + #[clap(long, required_unless_present_any = &["inactive"])] + active: Option, + + /// sets the version reported for the SP inactive slot + #[clap(long, required_unless_present_any = &["active"])] + inactive: Option, +} + #[derive(Debug, Args)] struct SledRemoveArgs { /// id of the sled @@ -885,18 +902,22 @@ fn cmd_sled_show( args: SledArgs, ) -> anyhow::Result> { let state = sim.current_state(); - let planning_input = state - .system() - .description() + let description = state.system().description(); + let sled_id = args.sled_id; + let sp_active_version = description.sled_sp_active_version(sled_id)?; + let sp_inactive_version = description.sled_sp_inactive_version(sled_id)?; + let planning_input = description .to_planning_input_builder() .context("failed to generate planning_input builder")? .build(); - let sled_id = args.sled_id; - let sled_resources = - &planning_input.sled_lookup(args.filter, sled_id)?.resources; + let sled = planning_input.sled_lookup(args.filter, sled_id)?; + let sled_resources = &sled.resources; let mut s = String::new(); swriteln!(s, "sled {}", sled_id); + swriteln!(s, "serial {}", sled.baseboard_id.serial_number); swriteln!(s, "subnet {}", sled_resources.subnet.net()); + swriteln!(s, "SP active version: {:?}", sp_active_version); + swriteln!(s, "SP inactive version: {:?}", sp_inactive_version); swriteln!(s, "zpools ({}):", sled_resources.zpools.len()); for (zpool, disk) in &sled_resources.zpools { swriteln!(s, " {:?}", zpool); @@ -924,6 +945,46 @@ fn cmd_sled_set_policy( Ok(Some(format!("set sled {} policy to {}", args.sled_id, args.policy))) } +fn cmd_sled_update_sp( + sim: &mut ReconfiguratorSim, + args: SledUpdateSpArgs, +) -> anyhow::Result> { + let mut labels = Vec::new(); + if let Some(active) = &args.active { + labels.push(format!("active -> {}", active)); + } + if let Some(inactive) = &args.inactive { + labels.push(format!("inactive -> {}", inactive)); + } + + assert!( + !labels.is_empty(), + "clap configuration requires that at least one argument is specified" + ); + + let mut state = sim.current_state().to_mut(); + state.system_mut().description_mut().sled_update_sp_versions( + args.sled_id, + args.active, + args.inactive, + )?; + + sim.commit_and_bump( + format!( + "reconfigurator-cli sled-update-sp: {}: {}", + args.sled_id, + labels.join(", "), + ), + state, + ); + + Ok(Some(format!( + "set sled {} SP versions: {}", + args.sled_id, + labels.join(", ") + ))) +} + fn cmd_inventory_list( sim: &mut ReconfiguratorSim, ) -> anyhow::Result> { diff --git a/dev-tools/reconfigurator-cli/tests/input/cmds.txt b/dev-tools/reconfigurator-cli/tests/input/cmds.txt index 12e4245dc23..d8d15011399 100644 --- a/dev-tools/reconfigurator-cli/tests/input/cmds.txt +++ b/dev-tools/reconfigurator-cli/tests/input/cmds.txt @@ -13,6 +13,18 @@ sled-add 90c1102a-b9f5-4d88-92a2-60d54a2d98cc sled-add 04ef3330-c682-4a08-8def-fcc4bef31bcd sled-list +sled-update-sp dde1c0e2-b10d-4621-b420-f179f7a7a00a +sled-update-sp dde1c0e2-b10d-4621-b420-f179f7a7a00a --active 1.0.0 +sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a +sled-update-sp dde1c0e2-b10d-4621-b420-f179f7a7a00a --inactive 2.0.0 +sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a +sled-update-sp dde1c0e2-b10d-4621-b420-f179f7a7a00a --active 3.0.0 +sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a +sled-update-sp dde1c0e2-b10d-4621-b420-f179f7a7a00a --active 4.0.0 --inactive invalid +sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a +sled-update-sp dde1c0e2-b10d-4621-b420-f179f7a7a00a --active 4.0.0 --inactive 5.0.0 +sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a + inventory-generate inventory-list diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout index c77200a56b1..e15f7087686 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout @@ -37,7 +37,10 @@ T ENA ID PARENT > sled-show 2eb69596-f081-4e2d-9425-9994926e0832 sled 2eb69596-f081-4e2d-9425-9994926e0832 +serial serial1 subnet fd00:1122:3344:102::/64 +SP active version: Some("0.0.1") +SP inactive version: None zpools (10): 088ed702-551e-453b-80d7-57700372a844 (zpool) SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-088ed702-551e-453b-80d7-57700372a844" }, disk_id: b2850ccb-4ac7-4034-aeab-b1cd582d407b (physical_disk), policy: InService, state: Active } @@ -395,7 +398,10 @@ T ENA ID PARENT > sled-show 89d02b1b-478c-401a-8e28-7a26f74fa41b sled 89d02b1b-478c-401a-8e28-7a26f74fa41b +serial serial0 subnet fd00:1122:3344:101::/64 +SP active version: Some("0.0.1") +SP inactive version: None zpools (4): 44fa7024-c2bc-4d2c-b478-c4997e4aece8 (zpool) SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-44fa7024-c2bc-4d2c-b478-c4997e4aece8" }, disk_id: 2a15b33c-dd0e-45b7-aba9-d05f40f030ff (physical_disk), policy: InService, state: Active } @@ -497,6 +503,7 @@ WARN failed to place all new desired InternalDns zones, placed: 0, wanted_to_pla INFO sufficient ExternalDns zones exist in plan, desired_count: 0, current_count: 0 WARN failed to place all new desired Nexus zones, placed: 0, wanted_to_place: 3 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 +WARN cannot issue more SP updates (no current artifacts) INFO some zones not yet up-to-date, sled_id: 89d02b1b-478c-401a-8e28-7a26f74fa41b INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 86db3308-f817-4626-8838-4085949a6a41 based on parent blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout index 07b3e6c10cd..a2cafb89b43 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout @@ -974,6 +974,7 @@ INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count INFO added zone to sled, sled_id: a88790de-5962-4871-8686-61c1fd5b7094, kind: ExternalDns INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 +WARN cannot issue more SP updates (no current artifacts) INFO some zones not yet up-to-date, sled_id: a88790de-5962-4871-8686-61c1fd5b7094 INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 9c998c1d-1a7b-440a-ae0c-40f781dea6e2 based on parent blueprint 366b0b68-d80e-4bc1-abd3-dc69837847e0 diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout index ad0fbd0baa8..f3995b05d93 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout @@ -1004,6 +1004,7 @@ INFO added zone to sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, kind: In INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 +WARN cannot issue more SP updates (no current artifacts) INFO some zones not yet up-to-date, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763 INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint af934083-59b5-4bf6-8966-6fb5292c29e1 based on parent blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4 diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-stderr b/dev-tools/reconfigurator-cli/tests/output/cmds-stderr index e69de29bb2d..cf184b190ab 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-stderr +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-stderr @@ -0,0 +1,7 @@ +error: the following required arguments were not provided: + --active + --inactive + +Usage: sled-update-sp --active --inactive + +For more information, try '--help'. diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-stdout index 2ad9fd53316..412e0baa5d0 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-stdout @@ -15,7 +15,7 @@ T ENA ID PARENT TIME_CREATED > sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a -error: sled dde1c0e2-b10d-4621-b420-f179f7a7a00a was not found in the planning input +error: attempted to access sled dde1c0e2-b10d-4621-b420-f179f7a7a00a not found in system > sled-add dde1c0e2-b10d-4621-b420-f179f7a7a00a added sled dde1c0e2-b10d-4621-b420-f179f7a7a00a @@ -26,7 +26,10 @@ dde1c0e2-b10d-4621-b420-f179f7a7a00a 10 fd00:1122:3344:101::/64 > sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a sled dde1c0e2-b10d-4621-b420-f179f7a7a00a +serial serial0 subnet fd00:1122:3344:101::/64 +SP active version: Some("0.0.1") +SP inactive version: None zpools (10): 0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa (zpool) SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa" }, disk_id: 2dbf19d4-7b7d-48d5-9d1c-64ac2922093b (physical_disk), policy: InService, state: Active } @@ -63,6 +66,169 @@ ID NZPOOLS SUBNET dde1c0e2-b10d-4621-b420-f179f7a7a00a 10 fd00:1122:3344:101::/64 +> sled-update-sp dde1c0e2-b10d-4621-b420-f179f7a7a00a + +> sled-update-sp dde1c0e2-b10d-4621-b420-f179f7a7a00a --active 1.0.0 +set sled dde1c0e2-b10d-4621-b420-f179f7a7a00a SP versions: active -> 1.0.0 + +> sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a +sled dde1c0e2-b10d-4621-b420-f179f7a7a00a +serial serial0 +subnet fd00:1122:3344:101::/64 +SP active version: Some("1.0.0") +SP inactive version: None +zpools (10): + 0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa" }, disk_id: 2dbf19d4-7b7d-48d5-9d1c-64ac2922093b (physical_disk), policy: InService, state: Active } + 104f891f-e018-4787-a346-3cfaa6cc7e9d (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-104f891f-e018-4787-a346-3cfaa6cc7e9d" }, disk_id: 301ab9e6-bdc1-4287-a37d-2604893712f8 (physical_disk), policy: InService, state: Active } + 111f7a4e-5696-4be8-b13d-8ef314bc83e0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-111f7a4e-5696-4be8-b13d-8ef314bc83e0" }, disk_id: 1f77c099-8205-41b3-ac34-3807f3bbaf56 (physical_disk), policy: InService, state: Active } + 5a1786e9-770d-4ac9-b291-4501398170b5 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-5a1786e9-770d-4ac9-b291-4501398170b5" }, disk_id: b111a961-be34-4ede-80e2-ef92af5e0a1f (physical_disk), policy: InService, state: Active } + 658fef3f-c3cd-4e6d-8823-79f9a0bec4c0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-658fef3f-c3cd-4e6d-8823-79f9a0bec4c0" }, disk_id: b3a01997-9894-4abd-83ad-e2d520d4c3a0 (physical_disk), policy: InService, state: Active } + 73ce66f5-a39a-4dd1-ad84-5647a5038d35 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-73ce66f5-a39a-4dd1-ad84-5647a5038d35" }, disk_id: 48568b33-8f21-4537-b330-666aa3334236 (physical_disk), policy: InService, state: Active } + 7480aa69-3a3d-478d-bbdb-ba1fb74752ef (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-7480aa69-3a3d-478d-bbdb-ba1fb74752ef" }, disk_id: 9a968677-4da7-40b3-9579-9c54a7620b58 (physical_disk), policy: InService, state: Active } + 9ff438c6-00bb-4daf-9013-87969c892b02 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-9ff438c6-00bb-4daf-9013-87969c892b02" }, disk_id: cc22404e-8a30-4b98-9552-790e84a162bd (physical_disk), policy: InService, state: Active } + ad0602bf-f577-401a-a28b-687c3d86f6bb (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-ad0602bf-f577-401a-a28b-687c3d86f6bb" }, disk_id: 32baf388-4cd9-4435-b70b-d8b2e515d918 (physical_disk), policy: InService, state: Active } + da6e6a21-8d32-46f9-a2b3-635f6700c3f0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-da6e6a21-8d32-46f9-a2b3-635f6700c3f0" }, disk_id: 1e7ee543-fe10-4ba7-b8f3-d579e8e0803a (physical_disk), policy: InService, state: Active } + + +> sled-update-sp dde1c0e2-b10d-4621-b420-f179f7a7a00a --inactive 2.0.0 +set sled dde1c0e2-b10d-4621-b420-f179f7a7a00a SP versions: inactive -> 2.0.0 + +> sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a +sled dde1c0e2-b10d-4621-b420-f179f7a7a00a +serial serial0 +subnet fd00:1122:3344:101::/64 +SP active version: Some("1.0.0") +SP inactive version: Some("2.0.0") +zpools (10): + 0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa" }, disk_id: 2dbf19d4-7b7d-48d5-9d1c-64ac2922093b (physical_disk), policy: InService, state: Active } + 104f891f-e018-4787-a346-3cfaa6cc7e9d (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-104f891f-e018-4787-a346-3cfaa6cc7e9d" }, disk_id: 301ab9e6-bdc1-4287-a37d-2604893712f8 (physical_disk), policy: InService, state: Active } + 111f7a4e-5696-4be8-b13d-8ef314bc83e0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-111f7a4e-5696-4be8-b13d-8ef314bc83e0" }, disk_id: 1f77c099-8205-41b3-ac34-3807f3bbaf56 (physical_disk), policy: InService, state: Active } + 5a1786e9-770d-4ac9-b291-4501398170b5 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-5a1786e9-770d-4ac9-b291-4501398170b5" }, disk_id: b111a961-be34-4ede-80e2-ef92af5e0a1f (physical_disk), policy: InService, state: Active } + 658fef3f-c3cd-4e6d-8823-79f9a0bec4c0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-658fef3f-c3cd-4e6d-8823-79f9a0bec4c0" }, disk_id: b3a01997-9894-4abd-83ad-e2d520d4c3a0 (physical_disk), policy: InService, state: Active } + 73ce66f5-a39a-4dd1-ad84-5647a5038d35 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-73ce66f5-a39a-4dd1-ad84-5647a5038d35" }, disk_id: 48568b33-8f21-4537-b330-666aa3334236 (physical_disk), policy: InService, state: Active } + 7480aa69-3a3d-478d-bbdb-ba1fb74752ef (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-7480aa69-3a3d-478d-bbdb-ba1fb74752ef" }, disk_id: 9a968677-4da7-40b3-9579-9c54a7620b58 (physical_disk), policy: InService, state: Active } + 9ff438c6-00bb-4daf-9013-87969c892b02 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-9ff438c6-00bb-4daf-9013-87969c892b02" }, disk_id: cc22404e-8a30-4b98-9552-790e84a162bd (physical_disk), policy: InService, state: Active } + ad0602bf-f577-401a-a28b-687c3d86f6bb (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-ad0602bf-f577-401a-a28b-687c3d86f6bb" }, disk_id: 32baf388-4cd9-4435-b70b-d8b2e515d918 (physical_disk), policy: InService, state: Active } + da6e6a21-8d32-46f9-a2b3-635f6700c3f0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-da6e6a21-8d32-46f9-a2b3-635f6700c3f0" }, disk_id: 1e7ee543-fe10-4ba7-b8f3-d579e8e0803a (physical_disk), policy: InService, state: Active } + + +> sled-update-sp dde1c0e2-b10d-4621-b420-f179f7a7a00a --active 3.0.0 +set sled dde1c0e2-b10d-4621-b420-f179f7a7a00a SP versions: active -> 3.0.0 + +> sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a +sled dde1c0e2-b10d-4621-b420-f179f7a7a00a +serial serial0 +subnet fd00:1122:3344:101::/64 +SP active version: Some("3.0.0") +SP inactive version: Some("2.0.0") +zpools (10): + 0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa" }, disk_id: 2dbf19d4-7b7d-48d5-9d1c-64ac2922093b (physical_disk), policy: InService, state: Active } + 104f891f-e018-4787-a346-3cfaa6cc7e9d (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-104f891f-e018-4787-a346-3cfaa6cc7e9d" }, disk_id: 301ab9e6-bdc1-4287-a37d-2604893712f8 (physical_disk), policy: InService, state: Active } + 111f7a4e-5696-4be8-b13d-8ef314bc83e0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-111f7a4e-5696-4be8-b13d-8ef314bc83e0" }, disk_id: 1f77c099-8205-41b3-ac34-3807f3bbaf56 (physical_disk), policy: InService, state: Active } + 5a1786e9-770d-4ac9-b291-4501398170b5 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-5a1786e9-770d-4ac9-b291-4501398170b5" }, disk_id: b111a961-be34-4ede-80e2-ef92af5e0a1f (physical_disk), policy: InService, state: Active } + 658fef3f-c3cd-4e6d-8823-79f9a0bec4c0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-658fef3f-c3cd-4e6d-8823-79f9a0bec4c0" }, disk_id: b3a01997-9894-4abd-83ad-e2d520d4c3a0 (physical_disk), policy: InService, state: Active } + 73ce66f5-a39a-4dd1-ad84-5647a5038d35 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-73ce66f5-a39a-4dd1-ad84-5647a5038d35" }, disk_id: 48568b33-8f21-4537-b330-666aa3334236 (physical_disk), policy: InService, state: Active } + 7480aa69-3a3d-478d-bbdb-ba1fb74752ef (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-7480aa69-3a3d-478d-bbdb-ba1fb74752ef" }, disk_id: 9a968677-4da7-40b3-9579-9c54a7620b58 (physical_disk), policy: InService, state: Active } + 9ff438c6-00bb-4daf-9013-87969c892b02 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-9ff438c6-00bb-4daf-9013-87969c892b02" }, disk_id: cc22404e-8a30-4b98-9552-790e84a162bd (physical_disk), policy: InService, state: Active } + ad0602bf-f577-401a-a28b-687c3d86f6bb (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-ad0602bf-f577-401a-a28b-687c3d86f6bb" }, disk_id: 32baf388-4cd9-4435-b70b-d8b2e515d918 (physical_disk), policy: InService, state: Active } + da6e6a21-8d32-46f9-a2b3-635f6700c3f0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-da6e6a21-8d32-46f9-a2b3-635f6700c3f0" }, disk_id: 1e7ee543-fe10-4ba7-b8f3-d579e8e0803a (physical_disk), policy: InService, state: Active } + + +> sled-update-sp dde1c0e2-b10d-4621-b420-f179f7a7a00a --active 4.0.0 --inactive invalid +set sled dde1c0e2-b10d-4621-b420-f179f7a7a00a SP versions: active -> 4.0.0, inactive -> invalid + +> sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a +sled dde1c0e2-b10d-4621-b420-f179f7a7a00a +serial serial0 +subnet fd00:1122:3344:101::/64 +SP active version: Some("4.0.0") +SP inactive version: None +zpools (10): + 0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa" }, disk_id: 2dbf19d4-7b7d-48d5-9d1c-64ac2922093b (physical_disk), policy: InService, state: Active } + 104f891f-e018-4787-a346-3cfaa6cc7e9d (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-104f891f-e018-4787-a346-3cfaa6cc7e9d" }, disk_id: 301ab9e6-bdc1-4287-a37d-2604893712f8 (physical_disk), policy: InService, state: Active } + 111f7a4e-5696-4be8-b13d-8ef314bc83e0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-111f7a4e-5696-4be8-b13d-8ef314bc83e0" }, disk_id: 1f77c099-8205-41b3-ac34-3807f3bbaf56 (physical_disk), policy: InService, state: Active } + 5a1786e9-770d-4ac9-b291-4501398170b5 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-5a1786e9-770d-4ac9-b291-4501398170b5" }, disk_id: b111a961-be34-4ede-80e2-ef92af5e0a1f (physical_disk), policy: InService, state: Active } + 658fef3f-c3cd-4e6d-8823-79f9a0bec4c0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-658fef3f-c3cd-4e6d-8823-79f9a0bec4c0" }, disk_id: b3a01997-9894-4abd-83ad-e2d520d4c3a0 (physical_disk), policy: InService, state: Active } + 73ce66f5-a39a-4dd1-ad84-5647a5038d35 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-73ce66f5-a39a-4dd1-ad84-5647a5038d35" }, disk_id: 48568b33-8f21-4537-b330-666aa3334236 (physical_disk), policy: InService, state: Active } + 7480aa69-3a3d-478d-bbdb-ba1fb74752ef (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-7480aa69-3a3d-478d-bbdb-ba1fb74752ef" }, disk_id: 9a968677-4da7-40b3-9579-9c54a7620b58 (physical_disk), policy: InService, state: Active } + 9ff438c6-00bb-4daf-9013-87969c892b02 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-9ff438c6-00bb-4daf-9013-87969c892b02" }, disk_id: cc22404e-8a30-4b98-9552-790e84a162bd (physical_disk), policy: InService, state: Active } + ad0602bf-f577-401a-a28b-687c3d86f6bb (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-ad0602bf-f577-401a-a28b-687c3d86f6bb" }, disk_id: 32baf388-4cd9-4435-b70b-d8b2e515d918 (physical_disk), policy: InService, state: Active } + da6e6a21-8d32-46f9-a2b3-635f6700c3f0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-da6e6a21-8d32-46f9-a2b3-635f6700c3f0" }, disk_id: 1e7ee543-fe10-4ba7-b8f3-d579e8e0803a (physical_disk), policy: InService, state: Active } + + +> sled-update-sp dde1c0e2-b10d-4621-b420-f179f7a7a00a --active 4.0.0 --inactive 5.0.0 +set sled dde1c0e2-b10d-4621-b420-f179f7a7a00a SP versions: active -> 4.0.0, inactive -> 5.0.0 + +> sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a +sled dde1c0e2-b10d-4621-b420-f179f7a7a00a +serial serial0 +subnet fd00:1122:3344:101::/64 +SP active version: Some("4.0.0") +SP inactive version: Some("5.0.0") +zpools (10): + 0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa" }, disk_id: 2dbf19d4-7b7d-48d5-9d1c-64ac2922093b (physical_disk), policy: InService, state: Active } + 104f891f-e018-4787-a346-3cfaa6cc7e9d (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-104f891f-e018-4787-a346-3cfaa6cc7e9d" }, disk_id: 301ab9e6-bdc1-4287-a37d-2604893712f8 (physical_disk), policy: InService, state: Active } + 111f7a4e-5696-4be8-b13d-8ef314bc83e0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-111f7a4e-5696-4be8-b13d-8ef314bc83e0" }, disk_id: 1f77c099-8205-41b3-ac34-3807f3bbaf56 (physical_disk), policy: InService, state: Active } + 5a1786e9-770d-4ac9-b291-4501398170b5 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-5a1786e9-770d-4ac9-b291-4501398170b5" }, disk_id: b111a961-be34-4ede-80e2-ef92af5e0a1f (physical_disk), policy: InService, state: Active } + 658fef3f-c3cd-4e6d-8823-79f9a0bec4c0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-658fef3f-c3cd-4e6d-8823-79f9a0bec4c0" }, disk_id: b3a01997-9894-4abd-83ad-e2d520d4c3a0 (physical_disk), policy: InService, state: Active } + 73ce66f5-a39a-4dd1-ad84-5647a5038d35 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-73ce66f5-a39a-4dd1-ad84-5647a5038d35" }, disk_id: 48568b33-8f21-4537-b330-666aa3334236 (physical_disk), policy: InService, state: Active } + 7480aa69-3a3d-478d-bbdb-ba1fb74752ef (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-7480aa69-3a3d-478d-bbdb-ba1fb74752ef" }, disk_id: 9a968677-4da7-40b3-9579-9c54a7620b58 (physical_disk), policy: InService, state: Active } + 9ff438c6-00bb-4daf-9013-87969c892b02 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-9ff438c6-00bb-4daf-9013-87969c892b02" }, disk_id: cc22404e-8a30-4b98-9552-790e84a162bd (physical_disk), policy: InService, state: Active } + ad0602bf-f577-401a-a28b-687c3d86f6bb (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-ad0602bf-f577-401a-a28b-687c3d86f6bb" }, disk_id: 32baf388-4cd9-4435-b70b-d8b2e515d918 (physical_disk), policy: InService, state: Active } + da6e6a21-8d32-46f9-a2b3-635f6700c3f0 (zpool) + SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-da6e6a21-8d32-46f9-a2b3-635f6700c3f0" }, disk_id: 1e7ee543-fe10-4ba7-b8f3-d579e8e0803a (physical_disk), policy: InService, state: Active } + + + > inventory-generate generated inventory collection 6e066695-94bc-4250-bd63-fd799c166cc1 from configured sleds @@ -99,7 +265,10 @@ result: > sled-show dde1c0e2-b10d-4621-b420-f179f7a7a00a sled dde1c0e2-b10d-4621-b420-f179f7a7a00a +serial serial0 subnet fd00:1122:3344:101::/64 +SP active version: Some("4.0.0") +SP inactive version: Some("5.0.0") zpools (10): 0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa (zpool) SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", model: "fake-model", serial: "serial-0f3f1de0-7e5a-4032-a73a-74fbdabbd2fa" }, disk_id: 2dbf19d4-7b7d-48d5-9d1c-64ac2922093b (physical_disk), policy: InService, state: Active } diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 9a1793cf3cd..93a71bec015 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1884,6 +1884,7 @@ mod tests { use nexus_types::external_api::views::PhysicalDiskState; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledState; + use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; @@ -2002,6 +2003,10 @@ mod tests { policy: SledPolicy::provisionable(), state: SledState::Active, resources, + baseboard_id: BaseboardId { + part_number: String::from("unused"), + serial_number: String::from("unused"), + }, } } diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml index e7520fc8e3b..756d7dd604c 100644 --- a/nexus/reconfigurator/planning/Cargo.toml +++ b/nexus/reconfigurator/planning/Cargo.toml @@ -33,6 +33,7 @@ semver.workspace = true sled-agent-client.workspace = true slog.workspace = true slog-error-chain.workspace = true +sp-sim.workspace = true static_assertions.workspace = true strum.workspace = true thiserror.workspace = true diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index ed416e4c8bd..3a4bfde7f7c 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -1960,6 +1960,13 @@ impl<'a> BlueprintBuilder<'a> { Ok(self.resource_allocator()?.inject_untracked_external_dns_ip(addr)?) } + pub fn pending_mgs_updates_replace_all( + &mut self, + updates: nexus_types::deployment::PendingMgsUpdates, + ) { + self.pending_mgs_updates = updates; + } + pub fn pending_mgs_update_insert( &mut self, update: nexus_types::deployment::PendingMgsUpdate, diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index c109ef30078..9dab7e10139 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -13,7 +13,9 @@ use crate::blueprint_builder::Error; use crate::blueprint_builder::Operation; use crate::blueprint_editor::DisksEditError; use crate::blueprint_editor::SledEditError; +use crate::mgs_updates::plan_mgs_updates; use crate::planner::omicron_zone_placement::PlacementError; +use gateway_client::types::SpType; use nexus_sled_agent_shared::inventory::OmicronZoneType; use nexus_sled_agent_shared::inventory::ZoneKind; use nexus_types::deployment::Blueprint; @@ -50,6 +52,37 @@ pub use self::rng::SledPlannerRng; mod omicron_zone_placement; pub(crate) mod rng; +/// Maximum number of MGS-managed updates (updates to SP, RoT, RoT bootloader, +/// or host OS) that we allow to be pending across the whole system at one time +/// +/// For now, we limit this to 1 for safety. That's for a few reasons: +/// +/// - SP updates reboot the corresponding host. Thus, if we have one of these +/// updates outstanding, we should assume that host may be offline. Most +/// control plane services are designed to survive multiple failures (e.g., +/// the Cockroach cluster can sustain two failures and stay online), but +/// having one sled offline eats into that margin. And some services like +/// Crucible volumes can only sustain one failure. Taking down two sleds +/// would render unavailable any Crucible volumes with regions on those two +/// sleds. +/// +/// - There is unfortunately some risk in updating the RoT bootloader, in that +/// there's a window where a failure could render the device unbootable. See +/// oxidecomputer/omicron#7819 for more on this. Updating only one at a time +/// helps mitigate this risk. +/// +/// More sophisticated schemes are certainly possible (e.g., allocate Crucible +/// regions in such a way that there are at least pairs of sleds we could update +/// concurrently without taking volumes down; and/or be willing to update +/// multiple sleds as long as they don't have overlapping control plane +/// services, etc.). +const NUM_CONCURRENT_MGS_UPDATES: usize = 1; + +enum UpdateStepResult { + ContinueToNextStep, + Waiting, +} + pub struct Planner<'a> { log: Logger, input: &'a PlanningInput, @@ -115,7 +148,10 @@ impl<'a> Planner<'a> { self.do_plan_expunge()?; self.do_plan_add()?; self.do_plan_decommission()?; - self.do_plan_zone_updates()?; + if let UpdateStepResult::ContinueToNextStep = self.do_plan_mgs_updates() + { + self.do_plan_zone_updates()?; + } self.do_plan_cockroachdb_settings(); Ok(()) } @@ -901,6 +937,63 @@ impl<'a> Planner<'a> { Ok(()) } + /// Update at most one MGS-managed device (SP, RoT, etc.), if any are out of + /// date. + fn do_plan_mgs_updates(&mut self) -> UpdateStepResult { + // Determine which baseboards we will consider updating. + // + // Sleds may be present but not adopted as part of the control plane. + // In deployed systems, this would probably only happen if a sled was + // about to be added. In dev/test environments, it's common to leave + // some number of sleds out of the control plane for various reasons. + // Inventory will still report them, but we don't want to touch them. + // + // For better or worse, switches and PSCs do not have the same idea of + // being adopted into the control plane. If they're present, they're + // part of the system, and we will update them. + let included_sled_baseboards: BTreeSet<_> = self + .input + .all_sleds(SledFilter::SpsUpdatedByReconfigurator) + .map(|(_sled_id, details)| &details.baseboard_id) + .collect(); + let included_baseboards = + self.inventory + .sps + .iter() + .filter_map(|(baseboard_id, sp_state)| { + let do_include = match sp_state.sp_type { + SpType::Sled => included_sled_baseboards + .contains(baseboard_id.as_ref()), + SpType::Power => true, + SpType::Switch => true, + }; + do_include.then_some(baseboard_id.clone()) + }) + .collect(); + + // Compute the new set of PendingMgsUpdates. + let current_updates = + &self.blueprint.parent_blueprint().pending_mgs_updates; + let current_artifacts = self.input.tuf_repo(); + let next = plan_mgs_updates( + &self.log, + &self.inventory, + &included_baseboards, + ¤t_updates, + current_artifacts, + NUM_CONCURRENT_MGS_UPDATES, + ); + + // TODO This is not quite right. See oxidecomputer/omicron#8285. + let rv = if next.is_empty() { + UpdateStepResult::ContinueToNextStep + } else { + UpdateStepResult::Waiting + }; + self.blueprint.pending_mgs_updates_replace_all(next); + rv + } + /// Update at most one existing zone to use a new image source. fn do_plan_zone_updates(&mut self) -> Result<(), Error> { // We are only interested in non-decommissioned sleds. diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index fd277b77582..c07ce554944 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -8,6 +8,7 @@ use anyhow::{Context, anyhow, bail, ensure}; use chrono::Utc; use gateway_client::types::RotState; +use gateway_client::types::SpComponentCaboose; use gateway_client::types::SpState; use indexmap::IndexMap; use ipnet::Ipv6Net; @@ -25,6 +26,7 @@ use nexus_sled_agent_shared::inventory::SledRole; use nexus_types::deployment::ClickhousePolicy; use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::CockroachDbSettings; +use nexus_types::deployment::ExpectedVersion; use nexus_types::deployment::OximeterReadPolicy; use nexus_types::deployment::PlanningInputBuilder; use nexus_types::deployment::Policy; @@ -37,6 +39,8 @@ use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::external_api::views::SledState; use nexus_types::inventory::BaseboardId; +use nexus_types::inventory::Caboose; +use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::PowerState; use nexus_types::inventory::RotSlot; use nexus_types::inventory::SpType; @@ -61,6 +65,7 @@ use std::net::Ipv4Addr; use std::net::Ipv6Addr; use std::sync::Arc; use std::time::Duration; +use tufaceous_artifact::ArtifactVersion; /// Describes an actual or synthetic Oxide rack for planning and testing /// @@ -425,6 +430,43 @@ impl SystemDescription { Ok(self) } + /// Update the SP versions reported for a sled. + /// + /// Where `None` is provided, no changes are made. + pub fn sled_update_sp_versions( + &mut self, + sled_id: SledUuid, + active_version: Option, + inactive_version: Option, + ) -> anyhow::Result<&mut Self> { + let sled = self.sleds.get_mut(&sled_id).with_context(|| { + format!("attempted to access sled {} not found in system", sled_id) + })?; + let sled = Arc::make_mut(sled); + sled.set_sp_versions(active_version, inactive_version); + Ok(self) + } + + pub fn sled_sp_active_version( + &self, + sled_id: SledUuid, + ) -> anyhow::Result> { + let sled = self.sleds.get(&sled_id).with_context(|| { + format!("attempted to access sled {} not found in system", sled_id) + })?; + Ok(sled.sp_active_caboose().map(|c| c.version.as_ref())) + } + + pub fn sled_sp_inactive_version( + &self, + sled_id: SledUuid, + ) -> anyhow::Result> { + let sled = self.sleds.get(&sled_id).with_context(|| { + format!("attempted to access sled {} not found in system", sled_id) + })?; + Ok(sled.sp_inactive_caboose().map(|c| c.version.as_ref())) + } + pub fn to_collection_builder(&self) -> anyhow::Result { let collector_label = self .collector @@ -443,6 +485,46 @@ impl SystemDescription { sp_state.clone(), ) .context("recording SP state")?; + + let baseboard_id = BaseboardId { + part_number: sp_state.model.clone(), + serial_number: sp_state.serial_number.clone(), + }; + if let Some(active) = &s.sp_active_caboose() { + builder + .found_caboose( + &baseboard_id, + CabooseWhich::SpSlot0, + "fake MGS 1", + SpComponentCaboose { + board: active.board.clone(), + epoch: None, + git_commit: active.git_commit.clone(), + name: active.name.clone(), + sign: active.sign.clone(), + version: active.version.clone(), + }, + ) + .context("recording SP active caboose")?; + } + + if let Some(inactive) = &s.sp_inactive_caboose() { + builder + .found_caboose( + &baseboard_id, + CabooseWhich::SpSlot1, + "fake MGS 1", + SpComponentCaboose { + board: inactive.board.clone(), + epoch: None, + git_commit: inactive.git_commit.clone(), + name: inactive.name.clone(), + sign: inactive.sign.clone(), + version: inactive.version.clone(), + }, + ) + .context("recording SP inactive caboose")?; + } } builder @@ -491,6 +573,18 @@ impl SystemDescription { policy: sled.policy, state: sled.state, resources: sled.resources.clone(), + baseboard_id: BaseboardId { + part_number: sled + .inventory_sled_agent + .baseboard + .model() + .to_owned(), + serial_number: sled + .inventory_sled_agent + .baseboard + .identifier() + .to_owned(), + }, }; builder.add_sled(sled.sled_id, sled_details)?; } @@ -595,6 +689,8 @@ pub struct SledHwInventory<'a> { pub baseboard_id: &'a BaseboardId, pub sp: &'a nexus_types::inventory::ServiceProcessor, pub rot: &'a nexus_types::inventory::RotState, + pub sp_active: Option>, + pub sp_inactive: Option>, } /// Our abstract description of a `Sled` @@ -609,6 +705,8 @@ pub struct Sled { policy: SledPolicy, state: SledState, resources: SledResources, + sp_active_caboose: Option>, + sp_inactive_caboose: Option>, } impl Sled { @@ -755,6 +853,10 @@ impl Sled { }, state: SledState::Active, resources: SledResources { subnet: sled_subnet, zpools }, + sp_active_caboose: Some(Arc::new(Self::default_sp_caboose( + String::from("0.0.1"), + ))), + sp_inactive_caboose: None, } } @@ -785,6 +887,10 @@ impl Sled { }) .unwrap_or(Baseboard::Unknown); + let sp_active_caboose = + inventory_sp.as_ref().and_then(|hw| hw.sp_active.clone()); + let sp_inactive_caboose = + inventory_sp.as_ref().and_then(|hw| hw.sp_inactive.clone()); let inventory_sp = inventory_sp.map(|sledhw| { // RotStateV3 unconditionally sets all of these let sp_state = if sledhw.rot.slot_a_sha3_256_digest.is_some() @@ -892,6 +998,8 @@ impl Sled { policy: sled_policy, state: sled_state, resources: sled_resources, + sp_active_caboose, + sp_inactive_caboose, } } @@ -921,6 +1029,68 @@ impl Sled { fn sled_agent_inventory(&self) -> &Inventory { &self.inventory_sled_agent } + + fn sp_active_caboose(&self) -> Option<&Caboose> { + self.sp_active_caboose.as_deref() + } + + fn sp_inactive_caboose(&self) -> Option<&Caboose> { + self.sp_inactive_caboose.as_deref() + } + + /// Update the reported SP versions + /// + /// If either field is `None`, that field is _unchanged_. + // Note that this means there's no way to _unset_ the version. + fn set_sp_versions( + &mut self, + active_version: Option, + inactive_version: Option, + ) { + if let Some(active_version) = active_version { + match &mut self.sp_active_caboose { + Some(caboose) => { + Arc::make_mut(caboose).version = active_version.to_string() + } + new @ None => { + *new = Some(Arc::new(Self::default_sp_caboose( + active_version.to_string(), + ))); + } + } + } + + if let Some(inactive_version) = inactive_version { + match inactive_version { + ExpectedVersion::NoValidVersion => { + self.sp_inactive_caboose = None; + } + ExpectedVersion::Version(v) => { + match &mut self.sp_inactive_caboose { + Some(caboose) => { + Arc::make_mut(caboose).version = v.to_string() + } + new @ None => { + *new = Some(Arc::new(Self::default_sp_caboose( + v.to_string(), + ))); + } + } + } + } + } + } + + fn default_sp_caboose(version: String) -> Caboose { + let board = sp_sim::SIM_GIMLET_BOARD.to_string(); + Caboose { + board: board.clone(), + git_commit: String::from("unknown"), + name: board, + version: version.to_string(), + sign: None, + } + } } #[derive(Clone, Copy, Debug)] diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index 59e6a8ab6aa..b0fe41ce1c1 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -33,6 +33,7 @@ use nexus_types::deployment::SledResources; use nexus_types::deployment::UnstableReconfiguratorState; use nexus_types::identity::Asset; use nexus_types::identity::Resource; +use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; @@ -274,6 +275,10 @@ impl PlanningInputFromDb<'_> { policy: sled_row.policy(), state: sled_row.state().into(), resources: SledResources { subnet, zpools }, + baseboard_id: BaseboardId { + part_number: sled_row.part_number().to_owned(), + serial_number: sled_row.serial_number().to_owned(), + }, }; // TODO-cleanup use `TypedUuid` everywhere let sled_id = SledUuid::from_untyped_uuid(sled_id); diff --git a/nexus/reconfigurator/simulation/src/system.rs b/nexus/reconfigurator/simulation/src/system.rs index 137b7026abb..5b5662d5cb5 100644 --- a/nexus/reconfigurator/simulation/src/system.rs +++ b/nexus/reconfigurator/simulation/src/system.rs @@ -17,7 +17,7 @@ use nexus_types::{ Blueprint, BlueprintTarget, SledFilter, UnstableReconfiguratorState, }, internal_api::params::{DnsConfigParams, DnsConfigZone}, - inventory::Collection, + inventory::{CabooseWhich, Collection}, }; use omicron_common::{address::IpRange, api::external::Generation}; use omicron_uuid_kinds::{BlueprintUuid, CollectionUuid, SledUuid}; @@ -687,7 +687,7 @@ impl SimSystemBuilderInner { else { res.warnings.push(format!( "sled {}: skipped (no inventory found for sled agent in \ - collection {}", + collection {})", sled_id, primary_collection_id )); continue; @@ -699,11 +699,19 @@ impl SimSystemBuilderInner { .and_then(|baseboard_id| { let inv_sp = primary_collection.sps.get(baseboard_id); let inv_rot = primary_collection.rots.get(baseboard_id); + let sp_active = primary_collection + .caboose_for(CabooseWhich::SpSlot0, baseboard_id) + .map(|c| c.caboose.clone()); + let sp_inactive = primary_collection + .caboose_for(CabooseWhich::SpSlot1, baseboard_id) + .map(|c| c.caboose.clone()); if let (Some(inv_sp), Some(inv_rot)) = (inv_sp, inv_rot) { Some(SledHwInventory { baseboard_id: &baseboard_id, sp: inv_sp, rot: inv_rot, + sp_active, + sp_inactive, }) } else { None diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 270a3e20e96..b97a8276c92 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -1428,6 +1428,15 @@ impl FromStr for ExpectedVersion { } } +impl fmt::Display for ExpectedVersion { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ExpectedVersion::NoValidVersion => f.write_str("invalid"), + ExpectedVersion::Version(v) => v.fmt(f), + } + } +} + /// Describes the expected active RoT slot, and the version we expect to find for it #[derive( Clone, Debug, Eq, PartialEq, JsonSchema, Deserialize, Serialize, Diffable, diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 86e9fb02d94..51ff668b0d2 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -14,6 +14,7 @@ use crate::external_api::views::PhysicalDiskState; use crate::external_api::views::SledPolicy; use crate::external_api::views::SledProvisionPolicy; use crate::external_api::views::SledState; +use crate::inventory::BaseboardId; use chrono::DateTime; use chrono::Utc; use clap::ValueEnum; @@ -724,6 +725,9 @@ pub enum SledFilter { /// Sleds which should have TUF repo artifacts replicated onto them. TufArtifactReplication, + + /// Sleds whose SPs should be updated by Reconfigurator + SpsUpdatedByReconfigurator, } impl SledFilter { @@ -780,6 +784,7 @@ impl SledPolicy { SledFilter::VpcRouting => true, SledFilter::VpcFirewall => true, SledFilter::TufArtifactReplication => true, + SledFilter::SpsUpdatedByReconfigurator => true, }, SledPolicy::InService { provision_policy: SledProvisionPolicy::NonProvisionable, @@ -794,6 +799,7 @@ impl SledPolicy { SledFilter::VpcRouting => true, SledFilter::VpcFirewall => true, SledFilter::TufArtifactReplication => true, + SledFilter::SpsUpdatedByReconfigurator => true, }, SledPolicy::Expunged => match filter { SledFilter::All => true, @@ -806,6 +812,7 @@ impl SledPolicy { SledFilter::VpcRouting => false, SledFilter::VpcFirewall => false, SledFilter::TufArtifactReplication => false, + SledFilter::SpsUpdatedByReconfigurator => false, }, } } @@ -840,6 +847,7 @@ impl SledState { SledFilter::VpcRouting => true, SledFilter::VpcFirewall => true, SledFilter::TufArtifactReplication => true, + SledFilter::SpsUpdatedByReconfigurator => true, }, SledState::Decommissioned => match filter { SledFilter::All => true, @@ -852,6 +860,7 @@ impl SledState { SledFilter::VpcRouting => false, SledFilter::VpcFirewall => false, SledFilter::TufArtifactReplication => false, + SledFilter::SpsUpdatedByReconfigurator => false, }, } } @@ -1058,6 +1067,8 @@ pub struct SledDetails { pub state: SledState, /// current resources allocated to this sled pub resources: SledResources, + /// baseboard id for this sled + pub baseboard_id: BaseboardId, } #[derive(Debug, thiserror::Error)]