diff --git a/Cargo.lock b/Cargo.lock index 73f5a4a2ca1..78c7feed520 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6830,6 +6830,7 @@ dependencies = [ "internal-dns-resolver", "internal-dns-types", "ipnet", + "ipnetwork", "newtype-uuid", "nexus-config", "nexus-db-lookup", diff --git a/nexus/db-queries/src/db/datastore/db_metadata.rs b/nexus/db-queries/src/db/datastore/db_metadata.rs index 2fc349f2d83..e3e35414ef2 100644 --- a/nexus/db-queries/src/db/datastore/db_metadata.rs +++ b/nexus/db-queries/src/db/datastore/db_metadata.rs @@ -23,7 +23,6 @@ use nexus_db_model::DbMetadataNexusState; use nexus_db_model::EARLIEST_SUPPORTED_VERSION; use nexus_db_model::SchemaUpgradeStep; use nexus_db_model::SchemaVersion; -use nexus_types::deployment::BlueprintZoneDisposition; use omicron_common::api::external::Error; use omicron_uuid_kinds::BlueprintUuid; use omicron_uuid_kinds::GenericUuid; @@ -830,24 +829,19 @@ impl DataStore { pub async fn database_nexus_access_create( &self, opctx: &OpContext, - blueprint: &nexus_types::deployment::Blueprint, + blueprint_id: BlueprintUuid, + active: &BTreeSet, + not_yet: &BTreeSet, ) -> Result<(), Error> { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; - // TODO: Without https://github.com/oxidecomputer/omicron/pull/8863, we - // treat all Nexuses as active. Some will become "not_yet", depending on - // the Nexus Generation, once it exists. - let active_nexus_zones = blueprint - .all_omicron_zones(BlueprintZoneDisposition::is_in_service) - .filter_map(|(_sled, zone_cfg)| { - if zone_cfg.zone_type.is_nexus() { - Some(zone_cfg) - } else { - None - } - }); - let new_nexuses = active_nexus_zones - .map(|z| DbMetadataNexus::new(z.id, DbMetadataNexusState::Active)) + let active_nexuses = active + .into_iter() + .map(|id| DbMetadataNexus::new(*id, DbMetadataNexusState::Active)) + .collect::>(); + let not_yet_nexuses = not_yet + .into_iter() + .map(|id| DbMetadataNexus::new(*id, DbMetadataNexusState::NotYet)) .collect::>(); let conn = &*self.pool_connection_authorized(&opctx).await?; @@ -855,14 +849,15 @@ impl DataStore { &conn, "database_nexus_access_create", opctx, - blueprint.id, + blueprint_id, |conn| { - let new_nexuses = new_nexuses.clone(); + let nexus_records = + [&active_nexuses[..], ¬_yet_nexuses[..]].concat(); async move { use nexus_db_schema::schema::db_metadata_nexus::dsl; diesel::insert_into(dsl::db_metadata_nexus) - .values(new_nexuses) + .values(nexus_records) .on_conflict(dsl::nexus_id) .do_nothing() .execute_async(conn) @@ -1209,35 +1204,8 @@ mod test { use crate::db::pub_test_utils::TestDatabase; use camino::Utf8Path; use camino_tempfile::Utf8TempDir; - use id_map::IdMap; use nexus_db_model::SCHEMA_VERSION; - use nexus_inventory::now_db_precision; - use nexus_types::deployment::Blueprint; - use nexus_types::deployment::BlueprintHostPhase2DesiredSlots; - use nexus_types::deployment::BlueprintSledConfig; - use nexus_types::deployment::BlueprintTarget; - use nexus_types::deployment::BlueprintZoneConfig; - use nexus_types::deployment::BlueprintZoneDisposition; - use nexus_types::deployment::BlueprintZoneImageSource; - use nexus_types::deployment::BlueprintZoneType; - use nexus_types::deployment::CockroachDbPreserveDowngrade; - use nexus_types::deployment::OximeterReadMode; - use nexus_types::deployment::PendingMgsUpdates; - use nexus_types::deployment::PlanningReport; - use nexus_types::deployment::blueprint_zone_type; - use nexus_types::external_api::views::SledState; - use nexus_types::inventory::NetworkInterface; - use nexus_types::inventory::NetworkInterfaceKind; - use omicron_common::api::external::Generation; - use omicron_common::api::external::MacAddr; - use omicron_common::api::external::Vni; - use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev; - use omicron_uuid_kinds::BlueprintUuid; - use omicron_uuid_kinds::ExternalIpUuid; - use omicron_uuid_kinds::SledUuid; - use omicron_uuid_kinds::ZpoolUuid; - use std::collections::BTreeMap; // Confirms that calling the internal "ensure_schema" function can succeed // when the database is already at that version. @@ -2109,334 +2077,6 @@ mod test { logctx.cleanup_successful(); } - fn create_test_blueprint( - nexus_zones: Vec<(OmicronZoneUuid, BlueprintZoneDisposition)>, - ) -> Blueprint { - let blueprint_id = BlueprintUuid::new_v4(); - let sled_id = SledUuid::new_v4(); - - let zones: IdMap = nexus_zones - .into_iter() - .map(|(zone_id, disposition)| BlueprintZoneConfig { - disposition, - id: zone_id, - filesystem_pool: ZpoolName::new_external(ZpoolUuid::new_v4()), - zone_type: BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { - internal_address: "[::1]:0".parse().unwrap(), - external_dns_servers: Vec::new(), - external_ip: nexus_types::deployment::OmicronZoneExternalFloatingIp { - id: ExternalIpUuid::new_v4(), - ip: std::net::IpAddr::V6(std::net::Ipv6Addr::LOCALHOST), - }, - external_tls: true, - nic: NetworkInterface { - id: uuid::Uuid::new_v4(), - kind: NetworkInterfaceKind::Service { - id: zone_id.into_untyped_uuid(), - }, - name: "test-nic".parse().unwrap(), - ip: "192.168.1.1".parse().unwrap(), - mac: MacAddr::random_system(), - subnet: ipnetwork::IpNetwork::V4( - "192.168.1.0/24".parse().unwrap() - ).into(), - vni: Vni::try_from(100).unwrap(), - primary: true, - slot: 0, - transit_ips: Vec::new(), - }, - nexus_generation: Generation::new(), - }), - image_source: BlueprintZoneImageSource::InstallDataset, - }) - .collect(); - - let mut sleds = BTreeMap::new(); - sleds.insert( - sled_id, - BlueprintSledConfig { - state: SledState::Active, - sled_agent_generation: Generation::new(), - zones, - disks: IdMap::new(), - datasets: IdMap::new(), - remove_mupdate_override: None, - host_phase_2: BlueprintHostPhase2DesiredSlots::current_contents( - ), - }, - ); - - Blueprint { - id: blueprint_id, - sleds, - pending_mgs_updates: PendingMgsUpdates::new(), - parent_blueprint_id: None, - internal_dns_version: Generation::new(), - external_dns_version: Generation::new(), - target_release_minimum_generation: Generation::new(), - nexus_generation: Generation::new(), - cockroachdb_fingerprint: String::new(), - cockroachdb_setting_preserve_downgrade: - CockroachDbPreserveDowngrade::DoNotModify, - clickhouse_cluster_config: None, - oximeter_read_mode: OximeterReadMode::SingleNode, - oximeter_read_version: Generation::new(), - time_created: now_db_precision(), - creator: "test suite".to_string(), - comment: "test blueprint".to_string(), - report: PlanningReport::new(blueprint_id), - } - } - - #[tokio::test] - async fn test_database_nexus_access_create() { - let logctx = dev::test_setup_log("test_database_nexus_access_create"); - let db = TestDatabase::new_with_datastore(&logctx.log).await; - let datastore = db.datastore(); - let opctx = db.opctx(); - - // Create a blueprint with two in-service Nexus zones, - // and one expunged Nexus. - let nexus1_id = OmicronZoneUuid::new_v4(); - let nexus2_id = OmicronZoneUuid::new_v4(); - let expunged_nexus = OmicronZoneUuid::new_v4(); - let blueprint = create_test_blueprint(vec![ - (nexus1_id, BlueprintZoneDisposition::InService), - (nexus2_id, BlueprintZoneDisposition::InService), - ( - expunged_nexus, - BlueprintZoneDisposition::Expunged { - as_of_generation: Generation::new(), - ready_for_cleanup: true, - }, - ), - ]); - - // Insert the blueprint and make it the target - datastore - .blueprint_insert(&opctx, &blueprint) - .await - .expect("Failed to insert blueprint"); - datastore - .blueprint_target_set_current( - &opctx, - BlueprintTarget { - target_id: blueprint.id, - enabled: false, - time_made_target: chrono::Utc::now(), - }, - ) - .await - .expect("Failed to set blueprint target"); - - // Create nexus access records - datastore - .database_nexus_access_create(&opctx, &blueprint) - .await - .expect("Failed to create nexus access"); - - // Verify records were created with Active state - let nexus1_access = datastore - .database_nexus_access(nexus1_id) - .await - .expect("Failed to get nexus1 access"); - let nexus2_access = datastore - .database_nexus_access(nexus2_id) - .await - .expect("Failed to get nexus2 access"); - let expunged_access = datastore - .database_nexus_access(expunged_nexus) - .await - .expect("Failed to get expunged access"); - - assert!(nexus1_access.is_some(), "nexus1 should have access record"); - assert!(nexus2_access.is_some(), "nexus2 should have access record"); - assert!( - expunged_access.is_none(), - "expunged nexus should not have access record" - ); - - let nexus1_record = nexus1_access.unwrap(); - let nexus2_record = nexus2_access.unwrap(); - assert_eq!(nexus1_record.state(), DbMetadataNexusState::Active); - assert_eq!(nexus2_record.state(), DbMetadataNexusState::Active); - - db.terminate().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_database_nexus_access_create_idempotent() { - let logctx = - dev::test_setup_log("test_database_nexus_access_create_idempotent"); - let db = TestDatabase::new_with_datastore(&logctx.log).await; - let datastore = db.datastore(); - let opctx = db.opctx(); - - // Create a blueprint with one Nexus zone - let nexus_id = OmicronZoneUuid::new_v4(); - let blueprint = create_test_blueprint(vec![( - nexus_id, - BlueprintZoneDisposition::InService, - )]); - - // Insert the blueprint and make it the target - datastore - .blueprint_insert(&opctx, &blueprint) - .await - .expect("Failed to insert blueprint"); - datastore - .blueprint_target_set_current( - &opctx, - BlueprintTarget { - target_id: blueprint.id, - enabled: false, - time_made_target: chrono::Utc::now(), - }, - ) - .await - .expect("Failed to set blueprint target"); - - // Create nexus access records (first time) - datastore - .database_nexus_access_create(&opctx, &blueprint) - .await - .expect("Failed to create nexus access (first time)"); - - // Verify record was created - async fn confirm_state( - datastore: &DataStore, - nexus_id: OmicronZoneUuid, - expected_state: DbMetadataNexusState, - ) { - let state = datastore - .database_nexus_access(nexus_id) - .await - .expect("Failed to get nexus access after first create") - .expect("Entry for Nexus should have been inserted"); - assert_eq!(state.state(), expected_state); - } - - confirm_state(datastore, nexus_id, DbMetadataNexusState::Active).await; - - // Creating the record again: not an error. - datastore - .database_nexus_access_create(&opctx, &blueprint) - .await - .expect("Failed to create nexus access (first time)"); - confirm_state(datastore, nexus_id, DbMetadataNexusState::Active).await; - - // Manually make the record "Quiesced". - use nexus_db_schema::schema::db_metadata_nexus::dsl; - diesel::update(dsl::db_metadata_nexus) - .filter(dsl::nexus_id.eq(nexus_id.into_untyped_uuid())) - .set(dsl::state.eq(DbMetadataNexusState::Quiesced)) - .execute_async( - &*datastore.pool_connection_unauthorized().await.unwrap(), - ) - .await - .expect("Failed to update record"); - confirm_state(datastore, nexus_id, DbMetadataNexusState::Quiesced) - .await; - - // Create nexus access records another time - should be idempotent, - // but should be "on-conflict, ignore". - datastore - .database_nexus_access_create(&opctx, &blueprint) - .await - .expect("Failed to create nexus access (second time)"); - confirm_state(datastore, nexus_id, DbMetadataNexusState::Quiesced) - .await; - - db.terminate().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_database_nexus_access_create_fails_wrong_target_blueprint() { - let logctx = dev::test_setup_log( - "test_database_nexus_access_create_fails_wrong_target_blueprint", - ); - let db = TestDatabase::new_with_datastore(&logctx.log).await; - let datastore = db.datastore(); - let opctx = db.opctx(); - - // Create two different blueprints - let nexus_id = OmicronZoneUuid::new_v4(); - let target_blueprint = create_test_blueprint(vec![( - nexus_id, - BlueprintZoneDisposition::InService, - )]); - let non_target_blueprint = create_test_blueprint(vec![( - nexus_id, - BlueprintZoneDisposition::InService, - )]); - - // Insert both blueprints - datastore - .blueprint_insert(&opctx, &target_blueprint) - .await - .expect("Failed to insert target blueprint"); - datastore - .blueprint_insert(&opctx, &non_target_blueprint) - .await - .expect("Failed to insert non-target blueprint"); - - // Set the first blueprint as the target - datastore - .blueprint_target_set_current( - &opctx, - BlueprintTarget { - target_id: target_blueprint.id, - enabled: false, - time_made_target: chrono::Utc::now(), - }, - ) - .await - .expect("Failed to set target blueprint"); - - // Try to create nexus access records using the non-target blueprint. - // This should fail because the transaction should check if the - // blueprint is the current target - let result = datastore - .database_nexus_access_create(&opctx, &non_target_blueprint) - .await; - assert!( - result.is_err(), - "Creating nexus access with wrong target blueprint should fail" - ); - - // Verify no records were created for the nexus - let access = datastore - .database_nexus_access(nexus_id) - .await - .expect("Failed to get nexus access"); - assert!( - access.is_none(), - "No access record should exist when wrong blueprint is used" - ); - - // Verify that using the correct target blueprint works - datastore - .database_nexus_access_create(&opctx, &target_blueprint) - .await - .expect( - "Creating nexus access with correct blueprint should succeed", - ); - - let access_after_correct = datastore - .database_nexus_access(nexus_id) - .await - .expect("Failed to get nexus access after correct blueprint"); - assert!( - access_after_correct.is_some(), - "Access record should exist after using correct target blueprint" - ); - - db.terminate().await; - logctx.cleanup_successful(); - } - #[tokio::test] async fn test_database_nexus_access_delete() { let logctx = dev::test_setup_log("test_database_nexus_access_delete"); diff --git a/nexus/reconfigurator/execution/Cargo.toml b/nexus/reconfigurator/execution/Cargo.toml index 43f92b80113..1ed22ffa34d 100644 --- a/nexus/reconfigurator/execution/Cargo.toml +++ b/nexus/reconfigurator/execution/Cargo.toml @@ -55,6 +55,7 @@ async-bb8-diesel.workspace = true diesel.workspace = true httptest.workspace = true ipnet.workspace = true +ipnetwork.workspace = true nexus-db-queries = { workspace = true, features = ["testing"] } nexus-db-schema.workspace = true nexus-reconfigurator-planning.workspace = true diff --git a/nexus/reconfigurator/execution/src/database.rs b/nexus/reconfigurator/execution/src/database.rs index 9652e53ec1a..1ba534ed785 100644 --- a/nexus/reconfigurator/execution/src/database.rs +++ b/nexus/reconfigurator/execution/src/database.rs @@ -8,6 +8,9 @@ use anyhow::anyhow; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintZoneDisposition; +use omicron_uuid_kinds::OmicronZoneUuid; +use std::collections::BTreeSet; /// Idempotently ensure that the Nexus records for the zones are populated /// in the database. @@ -15,10 +18,658 @@ pub(crate) async fn deploy_db_metadata_nexus_records( opctx: &OpContext, datastore: &DataStore, blueprint: &Blueprint, + nexus_id: OmicronZoneUuid, ) -> Result<(), anyhow::Error> { + // To determine what state to use for new records, we need to know which is + // the currently active Nexus generation. This is not quite the same as the + // blueprint's `nexus_generation`. That field describes which generation + // the system is *trying* to put in control. It gets bumped in order to + // trigger the handoff process. But between when it gets bumped and when + // the handoff has finished, that generation number is ahead of the one + // currently in control. + // + // The actual generation number that's currently active is necessarily the + // generation number of the Nexus instance that's doing the execution. + let active_generation = blueprint + .all_nexus_zones(BlueprintZoneDisposition::is_in_service) + .find_map(|(_sled_id, zone_cfg, nexus_config)| { + (zone_cfg.id == nexus_id).then_some(nexus_config.nexus_generation) + }) + .ok_or_else(|| { + anyhow!( + "did not find nexus generation for current \ + Nexus zone ({nexus_id})" + ) + })?; + + let mut active = BTreeSet::new(); + let mut not_yet = BTreeSet::new(); + for (_sled_id, zone_config, nexus_config) in + blueprint.all_nexus_zones(BlueprintZoneDisposition::is_in_service) + { + if nexus_config.nexus_generation == active_generation { + active.insert(zone_config.id); + } else if nexus_config.nexus_generation > active_generation { + not_yet.insert(zone_config.id); + } + } + datastore - .database_nexus_access_create(opctx, blueprint) + .database_nexus_access_create(opctx, blueprint.id, &active, ¬_yet) .await .map_err(|err| anyhow!(err))?; Ok(()) } + +#[cfg(test)] +mod test { + use super::*; + use id_map::IdMap; + use nexus_db_model::DbMetadataNexus; + use nexus_db_model::DbMetadataNexusState; + use nexus_db_queries::db::pub_test_utils::TestDatabase; + use nexus_inventory::now_db_precision; + use nexus_types::deployment::Blueprint; + use nexus_types::deployment::BlueprintHostPhase2DesiredSlots; + use nexus_types::deployment::BlueprintSledConfig; + use nexus_types::deployment::BlueprintTarget; + use nexus_types::deployment::BlueprintZoneConfig; + use nexus_types::deployment::BlueprintZoneDisposition; + use nexus_types::deployment::BlueprintZoneImageSource; + use nexus_types::deployment::BlueprintZoneType; + use nexus_types::deployment::CockroachDbPreserveDowngrade; + use nexus_types::deployment::OximeterReadMode; + use nexus_types::deployment::PendingMgsUpdates; + use nexus_types::deployment::PlanningReport; + use nexus_types::deployment::blueprint_zone_type; + use nexus_types::external_api::views::SledState; + use nexus_types::inventory::NetworkInterface; + use nexus_types::inventory::NetworkInterfaceKind; + use omicron_common::api::external::Error; + use omicron_common::api::external::Generation; + use omicron_common::api::external::MacAddr; + use omicron_common::api::external::Vni; + use omicron_common::zpool_name::ZpoolName; + use omicron_test_utils::dev; + use omicron_uuid_kinds::BlueprintUuid; + use omicron_uuid_kinds::ExternalIpUuid; + use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::OmicronZoneUuid; + use omicron_uuid_kinds::SledUuid; + use omicron_uuid_kinds::ZpoolUuid; + use std::collections::BTreeMap; + + fn create_test_blueprint( + top_level_nexus_generation: Generation, + nexus_zones: Vec<( + OmicronZoneUuid, + BlueprintZoneDisposition, + Generation, + )>, + ) -> Blueprint { + let blueprint_id = BlueprintUuid::new_v4(); + let sled_id = SledUuid::new_v4(); + + let zones: IdMap = nexus_zones + .into_iter() + .map(|(zone_id, disposition, nexus_generation)| BlueprintZoneConfig { + disposition, + id: zone_id, + filesystem_pool: ZpoolName::new_external(ZpoolUuid::new_v4()), + zone_type: BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + internal_address: "[::1]:0".parse().unwrap(), + external_dns_servers: Vec::new(), + external_ip: nexus_types::deployment::OmicronZoneExternalFloatingIp { + id: ExternalIpUuid::new_v4(), + ip: std::net::IpAddr::V6(std::net::Ipv6Addr::LOCALHOST), + }, + external_tls: true, + nic: NetworkInterface { + id: uuid::Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: zone_id.into_untyped_uuid(), + }, + name: "test-nic".parse().unwrap(), + ip: "192.168.1.1".parse().unwrap(), + mac: MacAddr::random_system(), + subnet: ipnetwork::IpNetwork::V4( + "192.168.1.0/24".parse().unwrap() + ).into(), + vni: Vni::try_from(100).unwrap(), + primary: true, + slot: 0, + transit_ips: Vec::new(), + }, + nexus_generation, + }), + image_source: BlueprintZoneImageSource::InstallDataset, + }) + .collect(); + + let mut sleds = BTreeMap::new(); + sleds.insert( + sled_id, + BlueprintSledConfig { + state: SledState::Active, + sled_agent_generation: Generation::new(), + zones, + disks: IdMap::new(), + datasets: IdMap::new(), + remove_mupdate_override: None, + host_phase_2: BlueprintHostPhase2DesiredSlots::current_contents( + ), + }, + ); + + Blueprint { + id: blueprint_id, + sleds, + pending_mgs_updates: PendingMgsUpdates::new(), + parent_blueprint_id: None, + internal_dns_version: Generation::new(), + external_dns_version: Generation::new(), + target_release_minimum_generation: Generation::new(), + nexus_generation: top_level_nexus_generation, + cockroachdb_fingerprint: String::new(), + cockroachdb_setting_preserve_downgrade: + CockroachDbPreserveDowngrade::DoNotModify, + clickhouse_cluster_config: None, + oximeter_read_mode: OximeterReadMode::SingleNode, + oximeter_read_version: Generation::new(), + time_created: now_db_precision(), + creator: "test suite".to_string(), + comment: "test blueprint".to_string(), + report: PlanningReport::new(blueprint_id), + } + } + + async fn database_nexus_access( + opctx: &OpContext, + datastore: &DataStore, + nexus_id: OmicronZoneUuid, + ) -> Result, Error> { + datastore + .database_nexus_access_all( + &opctx, + &std::iter::once(nexus_id).collect(), + ) + .await + .map(|v| v.into_iter().next()) + } + + #[tokio::test] + async fn test_database_nexus_access_create() { + let logctx = dev::test_setup_log("test_database_nexus_access_create"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let datastore = db.datastore(); + let opctx = db.opctx(); + + // Create a blueprint with in-service Nexus zones, and one expunged + // Nexus. + let nexus1_id = OmicronZoneUuid::new_v4(); + let nexus2_id = OmicronZoneUuid::new_v4(); + let expunged_nexus = OmicronZoneUuid::new_v4(); + + // Our currently-running Nexus must already have a record + let conn = datastore.pool_connection_for_tests().await.unwrap(); + datastore + .initialize_nexus_access_from_blueprint_on_connection( + &conn, + vec![nexus1_id], + ) + .await + .unwrap(); + + let blueprint = create_test_blueprint( + Generation::new(), + vec![ + // This nexus matches the top-level generation, and already + // exists as "active". + ( + nexus1_id, + BlueprintZoneDisposition::InService, + Generation::new(), + ), + // This nexus is ahead of the the top-level nexus generation, + // and will be created as "not yet". + ( + nexus2_id, + BlueprintZoneDisposition::InService, + Generation::new().next(), + ), + ( + expunged_nexus, + BlueprintZoneDisposition::Expunged { + as_of_generation: Generation::new(), + ready_for_cleanup: true, + }, + Generation::new(), + ), + ], + ); + + // Insert the blueprint and make it the target + datastore + .blueprint_insert(&opctx, &blueprint) + .await + .expect("Failed to insert blueprint"); + datastore + .blueprint_target_set_current( + &opctx, + BlueprintTarget { + target_id: blueprint.id, + enabled: false, + time_made_target: chrono::Utc::now(), + }, + ) + .await + .expect("Failed to set blueprint target"); + + // Create nexus access records + deploy_db_metadata_nexus_records( + &opctx, datastore, &blueprint, nexus1_id, + ) + .await + .expect("Failed to create nexus access"); + + // Verify records were created for in-service Nexuses. + let nexus1_access = database_nexus_access(&opctx, datastore, nexus1_id) + .await + .expect("Failed to get nexus1 access"); + let nexus2_access = database_nexus_access(opctx, datastore, nexus2_id) + .await + .expect("Failed to get nexus2 access"); + let expunged_access = + database_nexus_access(opctx, datastore, expunged_nexus) + .await + .expect("Failed to get expunged access"); + + assert!(nexus1_access.is_some(), "nexus1 should have access record"); + assert!(nexus2_access.is_some(), "nexus2 should have access record"); + assert!( + expunged_access.is_none(), + "expunged nexus should not have access record" + ); + + // See above for the rationale here: + // + // Nexus 1 already existed, and was active. + // Nexus 2 has a higher generation number (e.g., it represents + // a new deployment that has not yet been activated). + // The expunged Nexus was ignored. + let nexus1_record = nexus1_access.unwrap(); + let nexus2_record = nexus2_access.unwrap(); + assert_eq!(nexus1_record.state(), DbMetadataNexusState::Active); + assert_eq!(nexus2_record.state(), DbMetadataNexusState::NotYet); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_database_nexus_access_create_during_quiesce() { + let logctx = dev::test_setup_log( + "test_database_nexus_access_create_during_quiesce", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let datastore = db.datastore(); + let opctx = db.opctx(); + + // Create a blueprint with in-service Nexus zones, and one expunged + // Nexus. + let nexus1_id = OmicronZoneUuid::new_v4(); + let nexus2_id = OmicronZoneUuid::new_v4(); + let nexus3_id = OmicronZoneUuid::new_v4(); + + // Our currently-running Nexus must already have a record + let conn = datastore.pool_connection_for_tests().await.unwrap(); + datastore + .initialize_nexus_access_from_blueprint_on_connection( + &conn, + vec![nexus1_id], + ) + .await + .unwrap(); + + let blueprint = create_test_blueprint( + // NOTE: This is using a "Generation = 2", implying that all + // nexuses using "Generation = 1" should start quiescing. + Generation::new().next(), + vec![ + // This Nexus already exists as active - even though it's + // quiescing currently. + ( + nexus1_id, + BlueprintZoneDisposition::InService, + Generation::new(), + ), + // This Nexus matches the the top-level nexus generation, + // and will be created as "not yet", because "nexus1" is still + // running. + ( + nexus2_id, + BlueprintZoneDisposition::InService, + Generation::new().next(), + ), + // This Nexus will quiesce soon after starting, but can still be + // created as active. + ( + nexus3_id, + BlueprintZoneDisposition::InService, + Generation::new(), + ), + ], + ); + + // Insert the blueprint and make it the target + datastore + .blueprint_insert(&opctx, &blueprint) + .await + .expect("Failed to insert blueprint"); + datastore + .blueprint_target_set_current( + &opctx, + BlueprintTarget { + target_id: blueprint.id, + enabled: false, + time_made_target: chrono::Utc::now(), + }, + ) + .await + .expect("Failed to set blueprint target"); + + // Create nexus access records + deploy_db_metadata_nexus_records( + &opctx, datastore, &blueprint, nexus1_id, + ) + .await + .expect("Failed to create nexus access"); + + // Verify records were created for in-service Nexuses. + let nexus1_access = database_nexus_access(opctx, datastore, nexus1_id) + .await + .expect("Failed to get nexus1 access"); + let nexus2_access = database_nexus_access(opctx, datastore, nexus2_id) + .await + .expect("Failed to get nexus2 access"); + let nexus3_access = database_nexus_access(opctx, datastore, nexus3_id) + .await + .expect("Failed to get nexus3 access"); + + assert!(nexus1_access.is_some(), "nexus1 should have access record"); + assert!(nexus2_access.is_some(), "nexus2 should have access record"); + assert!(nexus2_access.is_some(), "nexus3 should have access record"); + + // See above for the rationale here: + // + // Nexus 1 already existed, and was active. + // Nexus 2 has a higher generation number (e.g., it represents + // a new deployment that has not yet been activated). + // Nexus 3 is getting a new record, but using the old generation number. + // It'll be treated as active. + let nexus1_record = nexus1_access.unwrap(); + let nexus2_record = nexus2_access.unwrap(); + let nexus3_record = nexus3_access.unwrap(); + assert_eq!(nexus1_record.state(), DbMetadataNexusState::Active); + assert_eq!(nexus2_record.state(), DbMetadataNexusState::NotYet); + assert_eq!(nexus3_record.state(), DbMetadataNexusState::Active); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_database_nexus_access_create_idempotent() { + let logctx = + dev::test_setup_log("test_database_nexus_access_create_idempotent"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let datastore = db.datastore(); + let opctx = db.opctx(); + + // Create a blueprint with a couple Nexus zones + let nexus1_id = OmicronZoneUuid::new_v4(); + let nexus2_id = OmicronZoneUuid::new_v4(); + let blueprint = create_test_blueprint( + Generation::new(), + vec![ + ( + nexus1_id, + BlueprintZoneDisposition::InService, + Generation::new(), + ), + ( + nexus2_id, + BlueprintZoneDisposition::InService, + Generation::new(), + ), + ], + ); + + // Insert the blueprint and make it the target + datastore + .blueprint_insert(&opctx, &blueprint) + .await + .expect("Failed to insert blueprint"); + datastore + .blueprint_target_set_current( + &opctx, + BlueprintTarget { + target_id: blueprint.id, + enabled: false, + time_made_target: chrono::Utc::now(), + }, + ) + .await + .expect("Failed to set blueprint target"); + + // Create nexus access records (first time) + let conn = datastore.pool_connection_for_tests().await.unwrap(); + datastore + .initialize_nexus_access_from_blueprint_on_connection( + &conn, + vec![nexus1_id, nexus2_id], + ) + .await + .unwrap(); + + // Verify record was created + async fn confirm_state( + opctx: &OpContext, + datastore: &DataStore, + nexus_id: OmicronZoneUuid, + expected_state: DbMetadataNexusState, + ) { + let state = database_nexus_access(opctx, datastore, nexus_id) + .await + .expect("Failed to get nexus access after first create") + .expect("Entry for Nexus should have been inserted"); + assert_eq!(state.state(), expected_state); + } + + confirm_state( + opctx, + datastore, + nexus1_id, + DbMetadataNexusState::Active, + ) + .await; + confirm_state( + opctx, + datastore, + nexus2_id, + DbMetadataNexusState::Active, + ) + .await; + + // Creating the record again: not an error. + deploy_db_metadata_nexus_records( + &opctx, datastore, &blueprint, nexus1_id, + ) + .await + .expect("Failed to create nexus access"); + confirm_state( + opctx, + datastore, + nexus1_id, + DbMetadataNexusState::Active, + ) + .await; + confirm_state( + opctx, + datastore, + nexus2_id, + DbMetadataNexusState::Active, + ) + .await; + + // Manually make the record "Quiesced". + datastore + .database_nexus_access_update_quiesced(nexus1_id) + .await + .unwrap(); + confirm_state( + opctx, + datastore, + nexus1_id, + DbMetadataNexusState::Quiesced, + ) + .await; + confirm_state( + opctx, + datastore, + nexus2_id, + DbMetadataNexusState::Active, + ) + .await; + + // Create nexus access records another time - should be idempotent, + // but should be "on-conflict, ignore". + deploy_db_metadata_nexus_records( + &opctx, datastore, &blueprint, nexus1_id, + ) + .await + .expect("Failed to create nexus access"); + confirm_state( + opctx, + datastore, + nexus1_id, + DbMetadataNexusState::Quiesced, + ) + .await; + confirm_state( + opctx, + datastore, + nexus2_id, + DbMetadataNexusState::Active, + ) + .await; + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_database_nexus_access_create_fails_wrong_target_blueprint() { + let logctx = dev::test_setup_log( + "test_database_nexus_access_create_fails_wrong_target_blueprint", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let datastore = db.datastore(); + let opctx = db.opctx(); + + // Create two different blueprints, each with two Nexuses. + // + // One of these Nexuses will have a "db_metadata_nexus" record + // for bootstrapping, the other won't exist (yet). + let nexus1_id = OmicronZoneUuid::new_v4(); + let nexus2_id = OmicronZoneUuid::new_v4(); + let both_nexuses = vec![ + (nexus1_id, BlueprintZoneDisposition::InService, Generation::new()), + (nexus2_id, BlueprintZoneDisposition::InService, Generation::new()), + ]; + + let target_blueprint = + create_test_blueprint(Generation::new(), both_nexuses.clone()); + let non_target_blueprint = + create_test_blueprint(Generation::new(), both_nexuses); + + // Initialize the "db_metadata_nexus" record for one of the Nexuses + let conn = datastore.pool_connection_for_tests().await.unwrap(); + datastore + .initialize_nexus_access_from_blueprint_on_connection( + &conn, + vec![nexus1_id], + ) + .await + .unwrap(); + + // Insert both blueprints + datastore + .blueprint_insert(&opctx, &target_blueprint) + .await + .expect("Failed to insert target blueprint"); + datastore + .blueprint_insert(&opctx, &non_target_blueprint) + .await + .expect("Failed to insert non-target blueprint"); + + // Set the first blueprint as the target + datastore + .blueprint_target_set_current( + &opctx, + BlueprintTarget { + target_id: target_blueprint.id, + enabled: false, + time_made_target: chrono::Utc::now(), + }, + ) + .await + .expect("Failed to set target blueprint"); + + // Try to create nexus access records using the non-target blueprint. + // This should fail because the transaction should check if the + // blueprint is the current target + let result = deploy_db_metadata_nexus_records( + &opctx, + datastore, + &non_target_blueprint, + nexus1_id, + ) + .await; + assert!( + result.is_err(), + "Creating nexus access with wrong target blueprint should fail" + ); + + // Verify no records were created for the second nexus + let access = database_nexus_access(opctx, datastore, nexus2_id) + .await + .expect("Failed to get nexus access"); + assert!( + access.is_none(), + "No access record should exist when wrong blueprint is used" + ); + + // Verify that using the correct target blueprint works + deploy_db_metadata_nexus_records( + &opctx, + datastore, + &target_blueprint, + nexus1_id, + ) + .await + .expect("Failed to create nexus access"); + + let access_after_correct = + database_nexus_access(opctx, datastore, nexus2_id) + .await + .expect("Failed to get nexus access after correct blueprint"); + assert!( + access_after_correct.is_some(), + "Access record should exist after using correct target blueprint" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } +} diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 46070623181..413c651401e 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -204,6 +204,7 @@ pub async fn realize_blueprint( &opctx, datastore, blueprint, + nexus_id, ); register_deploy_sled_configs_step( @@ -405,22 +406,30 @@ fn register_deploy_db_metadata_nexus_records_step<'a>( opctx: &'a OpContext, datastore: &'a DataStore, blueprint: &'a Blueprint, + nexus_id: Option, ) { registrar .new_step( ExecutionStepId::Ensure, "Ensure db_metadata_nexus_state records exist", - async move |_cx| match database::deploy_db_metadata_nexus_records( - opctx, &datastore, &blueprint, - ) - .await - { - Ok(()) => StepSuccess::new(()).into(), - Err(err) => StepWarning::new( - (), - err.context("ensuring db_metadata_nexus_state").to_string(), + async move |_cx| { + let Some(nexus_id) = nexus_id else { + return StepSkipped::new((), "not running as Nexus").into(); + }; + + match database::deploy_db_metadata_nexus_records( + opctx, &datastore, &blueprint, nexus_id, ) - .into(), + .await + { + Ok(()) => StepSuccess::new(()).into(), + Err(err) => StepWarning::new( + (), + err.context("ensuring db_metadata_nexus_state") + .to_string(), + ) + .into(), + } }, ) .register(); diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 3b103a7b6c4..fb602899709 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -310,6 +310,25 @@ impl Blueprint { ) } + /// Iterate over all Nexus zones that match the provided filter. + pub fn all_nexus_zones( + &self, + filter: F, + ) -> impl Iterator< + Item = (SledUuid, &BlueprintZoneConfig, &blueprint_zone_type::Nexus), + > + where + F: FnMut(BlueprintZoneDisposition) -> bool, + { + self.all_omicron_zones(filter).filter_map(|(sled_id, zone)| { + if let BlueprintZoneType::Nexus(nexus_config) = &zone.zone_type { + Some((sled_id, zone, nexus_config)) + } else { + None + } + }) + } + /// Iterate over the [`BlueprintZoneConfig`] instances that match the /// provided filter, along with the associated sled id. //