diff --git a/Cargo.lock b/Cargo.lock index 6f7c63b771f..48f1a6a7e07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3073,6 +3073,7 @@ dependencies = [ "httptest", "hyper", "ipnetwork", + "itertools", "lazy_static", "libc", "macaddr", diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index df130713c84..f1dcf9d54ba 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -983,7 +983,9 @@ CREATE TABLE omicron.public.ip_pool_range ( first_address INET NOT NULL, /* The range is inclusive of the last address. */ last_address INET NOT NULL, - ip_pool_id UUID NOT NULL + ip_pool_id UUID NOT NULL, + /* Tracks child resources, IP addresses allocated out of this range. */ + rcgen INT8 NOT NULL ); /* @@ -1002,6 +1004,66 @@ CREATE UNIQUE INDEX ON omicron.public.ip_pool_range ( STORING (first_address) WHERE time_deleted IS NULL; +/* + * External IP addresses used for instance source NAT. + * + * NOTE: This currently stores only address and port information for the + * automatic source NAT supplied for all guest instances. It does not currently + * store information about ephemeral or floating IPs. + */ +CREATE TABLE omicron.public.instance_external_ip ( + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + + /* FK to the `ip_pool` table. */ + ip_pool_id UUID NOT NULL, + + /* FK to the `ip_pool_range` table. */ + ip_pool_range_id UUID NOT NULL, + + /* FK to the `instance` table. */ + instance_id UUID NOT NULL, + + /* The actual external IP address. */ + ip INET NOT NULL, + + /* The first port in the allowed range, inclusive. */ + first_port INT4 NOT NULL, + + /* The last port in the allowed range, also inclusive. */ + last_port INT4 NOT NULL +); + +/* + * Index used to support quickly looking up children of the IP Pool range table, + * when checking for allocated addresses during deletion. + */ +CREATE INDEX ON omicron.public.instance_external_ip ( + ip_pool_id, + ip_pool_range_id +) + WHERE time_deleted IS NULL; + +/* + * Index used to enforce uniqueness of external IPs + * + * NOTE: This relies on the uniqueness constraint of IP addresses across all + * pools, _and_ on the fact that the number of ports assigned to each instance + * is fixed at compile time. + */ +CREATE UNIQUE INDEX ON omicron.public.instance_external_ip ( + ip, + first_port +) + WHERE time_deleted IS NULL; + +CREATE INDEX ON omicron.public.instance_external_ip ( + instance_id +) + WHERE time_deleted IS NULL; + /*******************************************************************/ /* diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 47f05cca7e5..600e8af7ed4 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -151,7 +151,24 @@ command line interface. Note that the `jq` command is required. In addition, th oxide org create myorg oxide project create -o myorg myproj -2. Define a global image that will be used as initial disk contents. +2. Create an IP Pool, for providing external connectivity to the instance later. +We need to create an IP Pool itself, and a range of IP addresses in that pool. + + oxide api /ip-pools --method POST --input - < SagaTemplate { new_action_noop_undo(sic_create_network_interfaces), ); + // Grab an external IP address and port range for the guest's Internet + // Gateway, allowing external connectivity. + template_builder.append( + "external_ip", + "ExternalIp", + ActionFunc::new_action( + sic_allocate_external_ip, + sic_allocate_external_ip_undo, + ), + ); + // Saga actions must be atomic - they have to fully complete or fully abort. // This is because Steno assumes that the saga actions are atomic and // therefore undo actions are *not* run for the failing node. @@ -470,6 +481,40 @@ async fn sic_create_network_interfaces_undo( Ok(()) } +/// Create an external IP address for the instance. +async fn sic_allocate_external_ip( + sagactx: ActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let saga_params = sagactx.saga_params(); + let opctx = + OpContext::for_saga_action(&sagactx, &saga_params.serialized_authn); + let instance_id = sagactx.lookup::("instance_id")?; + let external_ip = datastore + .allocate_instance_external_ip(&opctx, instance_id) + .await + .map_err(ActionError::action_failed)?; + Ok(external_ip.id) +} + +/// Destroy / release an external IP address allocated for the instance. +async fn sic_allocate_external_ip_undo( + sagactx: ActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let saga_params = sagactx.saga_params(); + let opctx = + OpContext::for_saga_action(&sagactx, &saga_params.serialized_authn); + let ip_id = sagactx.lookup::("external_ip")?; + datastore + .deallocate_instance_external_ip(&opctx, ip_id) + .await + .map_err(ActionError::action_failed)?; + Ok(()) +} + /// Create disks during instance creation, and return a list of disk names // TODO implement async fn sic_create_disks_for_instance( diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 8f944e84111..4433160aa2f 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -14,6 +14,7 @@ use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::InstanceRuntimeState; use serde::Deserialize; use serde::Serialize; +use sled_agent_client::types::ExternalIp; use sled_agent_client::types::InstanceEnsureBody; use sled_agent_client::types::InstanceHardware; use sled_agent_client::types::InstanceMigrateParams; @@ -162,10 +163,17 @@ async fn sim_instance_migrate( )), ..old_runtime }; + let external_ip = osagactx + .datastore() + .instance_lookup_external_ip(&opctx, instance_id) + .await + .map_err(ActionError::action_failed) + .map(ExternalIp::from)?; let instance_hardware = InstanceHardware { runtime: runtime.into(), // TODO: populate NICs nics: vec![], + external_ip, // TODO: populate disks disks: vec![], // TODO: populate cloud init bytes diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index d1a4dee06a0..4304fbe66b1 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -38,12 +38,15 @@ use crate::db::fixed_data::role_builtin::BUILTIN_ROLES; use crate::db::fixed_data::silo::DEFAULT_SILO; use crate::db::lookup::LookupPath; use crate::db::model::DatabaseString; +use crate::db::model::IncompleteInstanceExternalIp; use crate::db::model::IncompleteVpc; +use crate::db::model::InstanceExternalIp; use crate::db::model::IpPool; use crate::db::model::IpPoolRange; use crate::db::model::IpPoolUpdate; use crate::db::model::NetworkInterfaceUpdate; use crate::db::model::Vpc; +use crate::db::queries::external_ip::NextExternalIp; use crate::db::queries::ip_pool::FilterOverlappingIpRanges; use crate::db::queries::network_interface; use crate::db::queries::vpc::InsertVpcQuery; @@ -1267,41 +1270,181 @@ impl DataStore { authz_pool: &authz::IpPool, range: &IpRange, ) -> DeleteResult { + use db::schema::instance_external_ip; use db::schema::ip_pool_range::dsl; opctx.authorize(authz::Action::Modify, authz_pool).await?; - let now = Utc::now(); - // We can just delete the range, provided that it the exact first/last - // address pair exists. We are guaranteed that concurrent modifications - // don't affect this, since this query will be serialized with any - // requests to insert a new range, which must be non-overlapping. + let pool_id = authz_pool.id(); let first_address = range.first_address(); let last_address = range.last_address(); let first_net = ipnetwork::IpNetwork::from(first_address); let last_net = ipnetwork::IpNetwork::from(last_address); - let updated_rows = diesel::update(dsl::ip_pool_range) - .filter(dsl::first_address.eq(first_net)) - .filter(dsl::last_address.eq(last_net)) - .filter(dsl::time_deleted.is_null()) - .set(dsl::time_deleted.eq(now)) - .execute_async(self.pool_authorized(opctx).await?) - .await - .map_err(|e| { - public_error_from_diesel_pool(e, ErrorHandler::Server) - })?; - if updated_rows == 1 { - Ok(()) - } else { - Err(Error::invalid_request( + + // Fetch the range itself, if it exists. We'll need to protect against + // concurrent inserts of new external IPs from the target range by + // comparing the rcgen. + let range = diesel_pool_result_optional( + dsl::ip_pool_range + .filter(dsl::ip_pool_id.eq(pool_id)) + .filter(dsl::first_address.eq(first_net)) + .filter(dsl::last_address.eq(last_net)) + .filter(dsl::time_deleted.is_null()) + .select(IpPoolRange::as_select()) + .get_result_async::( + self.pool_authorized(opctx).await?, + ) + .await, + ) + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server))? + .ok_or_else(|| { + Error::invalid_request( format!( "The provided range {}-{} does not exist", first_address, last_address, ) .as_str(), + ) + })?; + + // Find external IPs allocated out of this pool and range. + let range_id = range.id; + let has_children = diesel::dsl::select(diesel::dsl::exists( + instance_external_ip::table + .filter(instance_external_ip::dsl::ip_pool_id.eq(pool_id)) + .filter( + instance_external_ip::dsl::ip_pool_range_id.eq(range_id), + ) + .filter(instance_external_ip::dsl::time_deleted.is_null()), + )) + .get_result_async::(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server))?; + if has_children { + return Err(Error::invalid_request( + "IP pool ranges cannot be deleted while \ + external IP addresses are allocated from them", + )); + } + + // Delete the range, conditional on the rcgen not having changed. This + // protects the delete from occuring if clients allocated a new external + // IP address in between the above check for children and this query. + let rcgen = range.rcgen; + let now = Utc::now(); + let updated_rows = diesel::update( + dsl::ip_pool_range + .find(range_id) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::rcgen.eq(rcgen)), + ) + .set(dsl::time_deleted.eq(now)) + .execute_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server))?; + if updated_rows == 1 { + Ok(()) + } else { + Err(Error::invalid_request( + "IP range deletion failed due to concurrent modification", )) } } + // External IP addresses + + /// Create an external IP address for an instance. + // TODO-correctness: This should be made idempotent. + pub async fn allocate_instance_external_ip( + &self, + opctx: &OpContext, + instance_id: Uuid, + ) -> CreateResult { + let query = + NextExternalIp::new(IncompleteInstanceExternalIp::new(instance_id)); + query + .get_result_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + use async_bb8_diesel::ConnectionError::Query; + use async_bb8_diesel::PoolError::Connection; + use diesel::result::Error::NotFound; + match e { + Connection(Query(NotFound)) => Error::invalid_request( + "No external IP addresses available for new instance", + ), + _ => public_error_from_diesel_pool(e, ErrorHandler::Server), + } + }) + } + + /// Deallocate the external IP address with the provided ID. + /// + /// To support idempotency, such as in saga operations, this method returns + /// an extra boolean, rather than the usual `DeleteResult`. The meaning of + /// return values are: + /// + /// - `Ok(true)`: The record was deleted during this call + /// - `Ok(false)`: The record was already deleted, such as by a previous + /// call + /// - `Err(_)`: Any other condition, including a non-existent record. + pub async fn deallocate_instance_external_ip( + &self, + opctx: &OpContext, + ip_id: Uuid, + ) -> Result { + use db::schema::instance_external_ip::dsl; + let now = Utc::now(); + diesel::update(dsl::instance_external_ip) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(ip_id)) + .set(dsl::time_deleted.eq(now)) + .check_if_exists::(ip_id) + .execute_and_check(self.pool_authorized(opctx).await?) + .await + .map(|r| match r.status { + UpdateStatus::Updated => true, + UpdateStatus::NotUpdatedButExists => false, + }) + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + + /// Delete all external IP addresses associated with the provided instance + /// ID. + // TODO-correctness: This should be made idempotent. + pub async fn deallocate_instance_external_ip_by_instance_id( + &self, + opctx: &OpContext, + instance_id: Uuid, + ) -> DeleteResult { + use db::schema::instance_external_ip::dsl; + let now = Utc::now(); + diesel::update(dsl::instance_external_ip) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::instance_id.eq(instance_id)) + .set(dsl::time_deleted.eq(now)) + .execute_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + Ok(()) + } + + pub async fn instance_lookup_external_ip( + &self, + opctx: &OpContext, + instance_id: Uuid, + ) -> LookupResult { + use db::schema::instance_external_ip::dsl; + dsl::instance_external_ip + .filter(dsl::instance_id.eq(instance_id)) + .filter(dsl::time_deleted.is_null()) + .select(InstanceExternalIp::as_select()) + .get_result_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + // Instances /// Idempotently insert a database record for an Instance @@ -2148,6 +2291,7 @@ impl DataStore { ipv4_block: db::model::Ipv4Net, ipv6_block: db::model::Ipv6Net, vni: db::model::Vni, + primary: bool, slot: i16, } @@ -2164,6 +2308,7 @@ impl DataStore { mac: sled_client_types::MacAddr::from(nic.mac.0), subnet: sled_client_types::IpNet::from(ip_subnet), vni: sled_client_types::Vni::from(nic.vni.0), + primary: nic.primary, slot: u8::try_from(nic.slot).unwrap(), } } @@ -2188,6 +2333,7 @@ impl DataStore { vpc_subnet::ipv4_block, vpc_subnet::ipv6_block, vpc::vni, + network_interface::is_primary, network_interface::slot, )) .get_results_async::(self.pool_authorized(opctx).await?) @@ -4192,6 +4338,7 @@ mod test { use crate::db::fixed_data::silo::SILO_ID; use crate::db::identity::Resource; use crate::db::lookup::LookupPath; + use crate::db::model::InstanceExternalIp; use crate::db::model::{ConsoleSession, DatasetKind, Project, ServiceKind}; use crate::external_api::params; use chrono::{Duration, Utc}; @@ -4934,4 +5081,64 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + #[tokio::test] + async fn test_deallocate_instance_external_ip_is_idempotent() { + use crate::db::schema::instance_external_ip::dsl; + + let logctx = dev::test_setup_log("test_table_scan"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a record. + let now = Utc::now(); + let ip = InstanceExternalIp { + id: Uuid::new_v4(), + time_created: now, + time_modified: now, + time_deleted: None, + ip_pool_id: Uuid::new_v4(), + ip_pool_range_id: Uuid::new_v4(), + instance_id: Uuid::new_v4(), + ip: ipnetwork::IpNetwork::from(IpAddr::from(Ipv4Addr::new( + 10, 0, 0, 1, + ))), + first_port: crate::db::model::SqlU16(0), + last_port: crate::db::model::SqlU16(10), + }; + diesel::insert_into(dsl::instance_external_ip) + .values(ip) + .execute_async(datastore.pool()) + .await + .unwrap(); + + // Delete it twice, make sure we get the right sentinel return values. + let deleted = datastore + .deallocate_instance_external_ip(&opctx, ip.id) + .await + .unwrap(); + assert!( + deleted, + "Got unexpected sentinel value back when \ + deleting external IP the first time" + ); + let deleted = datastore + .deallocate_instance_external_ip(&opctx, ip.id) + .await + .unwrap(); + assert!( + !deleted, + "Got unexpected sentinel value back when \ + deleting external IP the second time" + ); + + // Deleting a non-existing record fails + assert!(datastore + .deallocate_instance_external_ip(&opctx, Uuid::nil()) + .await + .is_err()); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/src/db/model/external_ip.rs b/nexus/src/db/model/external_ip.rs new file mode 100644 index 00000000000..d670e02aedc --- /dev/null +++ b/nexus/src/db/model/external_ip.rs @@ -0,0 +1,62 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Model types for external IPs, both for instances and externally-facing +//! services. + +use crate::db::model::SqlU16; +use crate::db::schema::instance_external_ip; +use chrono::DateTime; +use chrono::Utc; +use diesel::Queryable; +use diesel::Selectable; +use ipnetwork::IpNetwork; +use uuid::Uuid; + +#[derive(Debug, Clone, Copy, Selectable, Queryable, Insertable)] +#[diesel(table_name = instance_external_ip)] +pub struct InstanceExternalIp { + pub id: Uuid, + pub time_created: DateTime, + pub time_modified: DateTime, + pub time_deleted: Option>, + pub ip_pool_id: Uuid, + pub ip_pool_range_id: Uuid, + pub instance_id: Uuid, + pub ip: IpNetwork, + pub first_port: SqlU16, + pub last_port: SqlU16, +} + +impl From for sled_agent_client::types::ExternalIp { + fn from(eip: InstanceExternalIp) -> Self { + Self { + ip: eip.ip.ip().to_string(), + first_port: eip.first_port.0, + last_port: eip.last_port.0, + } + } +} + +#[derive(Debug, Clone, Copy)] +pub struct IncompleteInstanceExternalIp { + pub id: Uuid, + pub time_created: DateTime, + pub time_modified: DateTime, + pub time_deleted: Option>, + pub instance_id: Uuid, +} + +impl IncompleteInstanceExternalIp { + pub fn new(instance_id: Uuid) -> Self { + let now = Utc::now(); + Self { + id: Uuid::new_v4(), + time_created: now, + time_modified: now, + time_deleted: None, + instance_id, + } + } +} diff --git a/nexus/src/db/model/ip_pool.rs b/nexus/src/db/model/ip_pool.rs index a0a93531961..f28b78b5966 100644 --- a/nexus/src/db/model/ip_pool.rs +++ b/nexus/src/db/model/ip_pool.rs @@ -70,9 +70,15 @@ pub struct IpPoolRange { pub time_created: DateTime, pub time_modified: DateTime, pub time_deleted: Option>, + /// First (lowest) address in the range, inclusive. pub first_address: IpNetwork, + /// Last (highest) address in the range, inclusive. pub last_address: IpNetwork, + /// Foreign-key to the `ip_pool` table with the parent pool for this range pub ip_pool_id: Uuid, + /// The child resource generation number, tracking IP addresses allocated or + /// used from this range. + pub rcgen: i64, } impl IpPoolRange { @@ -94,6 +100,7 @@ impl IpPoolRange { first_address: IpNetwork::from(range.first_address()), last_address: IpNetwork::from(range.last_address()), ip_pool_id, + rcgen: 0, } } } diff --git a/nexus/src/db/model/mod.rs b/nexus/src/db/model/mod.rs index c5785c120c3..b741cfa3413 100644 --- a/nexus/src/db/model/mod.rs +++ b/nexus/src/db/model/mod.rs @@ -12,6 +12,7 @@ mod dataset_kind; mod digest; mod disk; mod disk_state; +mod external_ip; mod generation; mod global_image; mod identity_provider; @@ -63,6 +64,7 @@ pub use dataset_kind::*; pub use digest::*; pub use disk::*; pub use disk_state::*; +pub use external_ip::*; pub use generation::*; pub use global_image::*; pub use identity_provider::*; diff --git a/nexus/src/db/queries/external_ip.rs b/nexus/src/db/queries/external_ip.rs new file mode 100644 index 00000000000..a426858bc14 --- /dev/null +++ b/nexus/src/db/queries/external_ip.rs @@ -0,0 +1,627 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Implementation of queries for operating on external IP addresses from IP +//! Pools. + +use crate::db::model::IncompleteInstanceExternalIp; +use crate::db::model::InstanceExternalIp; +use crate::db::pool::DbConnection; +use crate::db::schema; +use chrono::DateTime; +use chrono::Utc; +use diesel::pg::Pg; +use diesel::query_builder::AstPass; +use diesel::query_builder::Query; +use diesel::query_builder::QueryFragment; +use diesel::query_builder::QueryId; +use diesel::sql_types; +use diesel::Column; +use diesel::QueryResult; +use diesel::RunQueryDsl; +use uuid::Uuid; + +type FromClause = + diesel::internal::table_macro::StaticQueryFragmentInstance; +type IpPoolRangeFromClause = FromClause; +const IP_POOL_RANGE_FROM_CLAUSE: IpPoolRangeFromClause = + IpPoolRangeFromClause::new(); +type InstanceExternalIpFromClause = + FromClause; +const INSTANCE_EXTERNAL_IP_FROM_CLAUSE: InstanceExternalIpFromClause = + InstanceExternalIpFromClause::new(); + +// The number of ports available to an instance when doing source NAT. Note +// that for static NAT, this value isn't used, and all ports are available. +// +// NOTE: This must be a power of 2. We're expecting to provide the Tofino with a +// port mask, e.g., a 16-bit mask such as `0b01...`, where those dots are any 14 +// bits. This signifies the port range `[16384, 32768)`. Such a port mask only +// works when the port-ranges are limited to powers of 2, not arbitrary ranges. +// +// Also NOTE: This is not going to work if we modify this value across different +// versions of Nexus. Currently, we're considering a port range free simply by +// checking if the _first_ address in a range is free. However, we'll need to +// instead to check if a candidate port range has any overlap with an existing +// port range, which is more complicated. That's deferred until we actually have +// that situation (which may be as soon as allocating ephemeral IPs). +// +// TODO-correctness: We're currently providing the entire port range, even for +// source NAT. We'd like to chunk this up in to something more like quadrants, +// e.g., ranges `[0, 16384)`, `[16384, 32768)`, etc. +const NUM_SOURCE_NAT_PORTS: usize = 1 << 16; +const MAX_PORT: i32 = u16::MAX as _; + +/// Select the next available IP address and port range for an instance's +/// external connectivity. +/// +/// All guest instances by default are able to make outbound network +/// connections, for example to `ping 8.8.8.8`. That requires an external IP +/// address, selected from an IP Pool maintained by rack operators. This query +/// can be used to select a portion of the full port-range of one IP address, +/// reserving them for use by an instance. +/// +/// In general, the query: +/// +/// - Selects the next available IP address and port range from _any_ IP Pool +/// - Inserts that record into the `instance_external_ip` table +/// - Updates the rcgen and time modified of the parent `ip_pool_range` table +/// +/// In detail, the query is: +/// +/// ```sql +/// WITH external_ip AS ( +/// -- Add a new external IP address +/// INSERT INTO +/// instance_external_ip +/// ( +/// SELECT +/// AS id, +/// AS time_created, +/// AS time_modified, +/// NULL AS time_deleted, +/// AS instance_id, +/// ip_pool_id, +/// ip_pool_range_id, +/// candidate_ip AS ip, +/// candidate_first_port AS first_port, +/// candidate_last_port AS last_port +/// FROM +/// ( +/// -- Select all IP addresses by pool and range. +/// SELECT +/// ip_pool_id, +/// id AS ip_pool_range_id, +/// first_address + +/// generate_series(0, last_address - first_address) +/// AS candidate_ip +/// FROM +/// ip_pool_range +/// WHERE +/// time_deleted IS NULL +/// ) +/// CROSS JOIN +/// ( +/// -- Cartesian product with all first/last port values +/// SELECT +/// candidate_first_port, +/// candidate_first_port + +/// +/// AS candidate_last_port +/// FROM +/// generate_series(0, , ) +/// AS candidate_first_port +/// ) +/// LEFT OUTER JOIN +/// -- Join with existing IPs, selecting the first row from the +/// -- address and port sequence subqueryes that has no match. I.e., +/// -- is not yet reserved. +/// instance_external_ip +/// ON +/// (ip, first_port, time_deleted IS NULL) = +/// (candidate_ip, candidate_first_port, TRUE) +/// WHERE +/// ip IS NULL AND +/// first_port IS NULL AND +/// time_deleted IS NULL +/// LIMIT 1 +/// ) +/// RETURNING +/// id, +/// time_created, +/// time_modified, +/// time_deleted, +/// instance_id, +/// ip_pool_id, +/// ip_pool_range_id, +/// ip, +/// first_port, +/// last_port +/// ), +/// updated_pool AS ( +/// UPDATE SET +/// ip_pool_range +/// SET +/// time_modified = NOW(), +/// rcgen = rcgen + 1 +/// WHERE +/// id = (select ip_pool_id from external_ip) AND +/// time_deleted IS NULL +/// RETURNING id +/// ) +/// SELECT * FROM external_ip; +/// ``` +/// +/// Notes +/// ----- +/// +/// This query currently searches _all_ available IP Pools and _all_ contained +/// ranges. It is similar to the common "next item" queries, in that its +/// worst-case peformance is proportional to the total number of IP addresses in +/// all pools. Specifically, its runtime is proportional to the lowest +/// unallocated address in any IP pool. However, it's important to note that +/// Cockroach currently completely runs all subqueries and caches their results +/// in memory, meaning this query will need to be redesigned to limit the +/// number of results it checks. As an example, suppose there were a large +/// number of IP pools each containing an IPv6 /64 subnet. That's 2**64 items +/// _per_ pool. +/// +/// A good way to start limiting this is cap the address-search subquery. It's +/// not clear what that limit should be though. +#[derive(Debug, Clone)] +pub struct NextExternalIp { + // TODO-completeness: Add `ip_pool_id`, and restrict search to it. + ip: IncompleteInstanceExternalIp, + // Number of ports reserved per IP address + n_ports_per_chunk: i32, + // The offset from the first port to the last, inclusive. This is required + // because the ranges must not overlap and `generate_series` is inclusive of + // its endpoints. + last_port_offset: i32, + now: DateTime, +} + +impl NextExternalIp { + pub fn new(ip: IncompleteInstanceExternalIp) -> Self { + let now = Utc::now(); + let n_ports_per_chunk = i32::try_from(NUM_SOURCE_NAT_PORTS).unwrap(); + Self { + ip, + n_ports_per_chunk, + last_port_offset: n_ports_per_chunk - 1, + now, + } + } + + fn push_next_ip_and_port_range_subquery<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> QueryResult<()> { + use schema::instance_external_ip::dsl; + out.push_sql("SELECT "); + + // NOTE: These columns must be selected in the order in which they + // appear in the table, to avoid needing to push the explicit column + // names as well. + out.push_bind_param::(&self.ip.id)?; + out.push_sql(" AS "); + out.push_identifier(dsl::id::NAME)?; + out.push_sql(", "); + + // Time-created + out.push_bind_param::>( + &self.ip.time_created, + )?; + out.push_sql(" AS "); + out.push_identifier(dsl::time_created::NAME)?; + out.push_sql(", "); + + // Time-modified + out.push_bind_param::>( + &self.ip.time_modified, + )?; + out.push_sql(" AS "); + out.push_identifier(dsl::time_modified::NAME)?; + out.push_sql(", "); + + // Time-modified + out.push_bind_param::, Option>>(&self.ip.time_deleted)?; + out.push_sql(" AS "); + out.push_identifier(dsl::time_deleted::NAME)?; + out.push_sql(", "); + + // Pool ID + out.push_identifier(dsl::ip_pool_id::NAME)?; + out.push_sql(", "); + + // Pool Range ID + out.push_identifier(dsl::ip_pool_range_id::NAME)?; + out.push_sql(", "); + + // Instance ID + out.push_bind_param::(&self.ip.instance_id)?; + out.push_sql(" AS "); + out.push_identifier(dsl::instance_id::NAME)?; + out.push_sql(", "); + + // Candidate IP from the subquery + out.push_sql("candidate_ip AS "); + out.push_identifier(dsl::ip::NAME)?; + + // Candidate first / last port from the subquery + out.push_sql(", candidate_first_port AS "); + out.push_identifier(dsl::first_port::NAME)?; + out.push_sql(", candidate_last_port AS "); + out.push_identifier(dsl::last_port::NAME)?; + out.push_sql(" FROM ("); + self.push_address_sequence_subquery(out.reborrow())?; + out.push_sql(") CROSS JOIN ("); + self.push_port_sequence_subquery(out.reborrow())?; + out.push_sql(") LEFT OUTER JOIN "); + INSTANCE_EXTERNAL_IP_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" ON ("); + out.push_identifier(dsl::ip::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::first_port::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::time_deleted::NAME)?; + out.push_sql( + " IS NULL) = (candidate_ip, candidate_first_port, TRUE) WHERE ", + ); + out.push_identifier(dsl::ip::NAME)?; + out.push_sql(" IS NULL AND "); + out.push_identifier(dsl::first_port::NAME)?; + out.push_sql(" IS NULL AND "); + out.push_identifier(dsl::time_deleted::NAME)?; + out.push_sql(" IS NULL LIMIT 1) RETURNING *"); + + Ok(()) + } + + // Push a subquery that selects the sequence of IP addresses, from each range in + // each IP Pool, along with the pool/range IDs. + // + // ```sql + // SELECT + // ip_pool_id, + // id AS ip_pool_range_id, + // first_address + + // generate_series(0, last_address - first_address) + // AS candidate_ip + // FROM + // ip_pool_range + // WHERE + // time_deleted IS NULL + // ``` + fn push_address_sequence_subquery<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> QueryResult<()> { + use schema::ip_pool_range::dsl; + out.push_sql("SELECT "); + out.push_identifier(dsl::ip_pool_id::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::id::NAME)?; + out.push_sql(" AS ip_pool_range_id, "); + out.push_identifier(dsl::first_address::NAME)?; + out.push_sql(" + generate_series(0, "); + out.push_identifier(dsl::last_address::NAME)?; + out.push_sql(" - "); + out.push_identifier(dsl::first_address::NAME)?; + out.push_sql(") AS candidate_ip FROM "); + IP_POOL_RANGE_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(dsl::time_deleted::NAME)?; + out.push_sql(" IS NULL"); + Ok(()) + } + + // Push a subquery that selects the possible values for a first port, based on + // the defined spacing. + // + // ```sql + // SELECT + // candidate_first_port, + // candidate_first_port + + // + // AS candidate_last_port + // FROM + // generate_series(0, , ) + // AS candidate_first_port + // ``` + fn push_port_sequence_subquery<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> QueryResult<()> { + out.push_sql("SELECT candidate_first_port, candidate_first_port + "); + out.push_bind_param::(&self.last_port_offset)?; + out.push_sql(" AS candidate_last_port FROM generate_series(0, "); + out.push_bind_param::(&MAX_PORT)?; + out.push_sql(", "); + out.push_bind_param::(&self.n_ports_per_chunk)?; + out.push_sql(") AS candidate_first_port"); + Ok(()) + } + + // Push a subquery to update the `ip_pool_range` table's rcgen, if we've + // successfully allocate an IP from that range. + fn push_update_ip_pool_range_subquery<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> QueryResult<()> { + use schema::ip_pool_range::dsl; + out.push_sql("UPDATE "); + IP_POOL_RANGE_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" SET "); + out.push_identifier(dsl::time_modified::NAME)?; + out.push_sql(" = "); + out.push_bind_param::>( + &self.now, + )?; + out.push_sql(", "); + out.push_identifier(dsl::rcgen::NAME)?; + out.push_sql(" = "); + out.push_identifier(dsl::rcgen::NAME)?; + out.push_sql(" + 1 WHERE "); + out.push_identifier(dsl::id::NAME)?; + out.push_sql(" = (SELECT "); + out.push_identifier( + schema::instance_external_ip::ip_pool_range_id::NAME, + )?; + out.push_sql(" FROM external_ip) AND "); + out.push_identifier(dsl::time_deleted::NAME)?; + out.push_sql(" IS NULL RETURNING "); + out.push_identifier(dsl::id::NAME)?; + Ok(()) + } +} + +impl QueryId for NextExternalIp { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl QueryFragment for NextExternalIp { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + out.unsafe_to_cache_prepared(); + + // Push the first CTE, inserting the candidate record. + out.push_sql("WITH external_ip AS (INSERT INTO "); + INSTANCE_EXTERNAL_IP_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" ("); + + // Push the subquery that inserts the next available IP address and port + // range, across all pools / ranges. + self.push_next_ip_and_port_range_subquery(out.reborrow())?; + out.push_sql("), updated_pool AS ("); + self.push_update_ip_pool_range_subquery(out.reborrow())?; + + // Select the contents of the first CTE + out.push_sql(") SELECT * FROM external_ip"); + + Ok(()) + } +} + +impl Query for NextExternalIp { + type SqlType = <>::SelectExpression as diesel::Expression>::SqlType; +} + +impl RunQueryDsl for NextExternalIp {} + +#[cfg(test)] +mod tests { + use crate::context::OpContext; + use crate::db::datastore::DataStore; + use crate::db::identity::Resource; + use crate::db::model::IpPool; + use crate::db::model::IpPoolRange; + use crate::external_api::shared::IpRange; + use async_bb8_diesel::AsyncRunQueryDsl; + use dropshot::test_util::LogContext; + use nexus_test_utils::db::test_setup_database; + use omicron_common::api::external::Error; + use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_test_utils::dev; + use omicron_test_utils::dev::db::CockroachInstance; + use std::net::Ipv4Addr; + use std::sync::Arc; + use uuid::Uuid; + + struct TestContext { + logctx: LogContext, + opctx: OpContext, + db: CockroachInstance, + db_datastore: Arc, + } + + impl TestContext { + async fn new(test_name: &str) -> Self { + let logctx = dev::test_setup_log(test_name); + let log = logctx.log.new(o!()); + let db = test_setup_database(&log).await; + let cfg = crate::db::Config { url: db.pg_config().clone() }; + let pool = Arc::new(crate::db::Pool::new(&cfg)); + let db_datastore = + Arc::new(crate::db::DataStore::new(Arc::clone(&pool))); + let opctx = + OpContext::for_tests(log.new(o!()), db_datastore.clone()); + Self { logctx, opctx, db, db_datastore } + } + + async fn create_ip_pool(&self, name: &str, range: IpRange) { + let pool = IpPool::new(&IdentityMetadataCreateParams { + name: String::from(name).parse().unwrap(), + description: format!("ip pool {}", name), + }); + diesel::insert_into(crate::db::schema::ip_pool::dsl::ip_pool) + .values(pool.clone()) + .execute_async( + self.db_datastore + .pool_authorized(&self.opctx) + .await + .unwrap(), + ) + .await + .expect("Failed to create IP Pool"); + + let pool_range = IpPoolRange::new(&range, pool.id()); + diesel::insert_into( + crate::db::schema::ip_pool_range::dsl::ip_pool_range, + ) + .values(pool_range) + .execute_async( + self.db_datastore.pool_authorized(&self.opctx).await.unwrap(), + ) + .await + .expect("Failed to create IP Pool range"); + } + + async fn success(mut self) { + self.db.cleanup().await.unwrap(); + self.logctx.cleanup_successful(); + } + } + + #[tokio::test] + async fn test_next_external_ip_allocation_and_exhaustion() { + let context = + TestContext::new("test_next_external_ip_allocation_and_exhaustion") + .await; + let range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 1), + )) + .unwrap(); + context.create_ip_pool("p0", range).await; + for first_port in + (0..super::MAX_PORT).step_by(super::NUM_SOURCE_NAT_PORTS) + { + let instance_id = Uuid::new_v4(); + let ip = context + .db_datastore + .allocate_instance_external_ip(&context.opctx, instance_id) + .await + .expect("Failed to allocate instance external IP address"); + assert_eq!(ip.ip.ip(), range.first_address()); + assert_eq!(ip.first_port.0, first_port as u16); + assert_eq!( + ip.last_port.0, + (first_port + (super::NUM_SOURCE_NAT_PORTS - 1) as i32) as u16 + ); + } + + // The next allocation should fail, due to IP exhaustion + let instance_id = Uuid::new_v4(); + let err = context + .db_datastore + .allocate_instance_external_ip(&context.opctx, instance_id) + .await + .expect_err( + "An error should be received when the IP pools are exhausted", + ); + assert_eq!( + err, + Error::InvalidRequest { + message: String::from( + "No external IP addresses available for new instance" + ), + } + ); + context.success().await; + } + + #[tokio::test] + async fn test_next_external_ip_out_of_order_allocation_ok() { + let context = TestContext::new( + "test_next_external_ip_out_of_order_allocation_ok", + ) + .await; + // Need a larger range, since we're currently limited to the whole port + // range for each external IP. + let range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 3), + )) + .unwrap(); + context.create_ip_pool("p0", range).await; + + // TODO-completess: Implementing Iterator for IpRange would be nice. + let addresses = [ + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 2), + Ipv4Addr::new(10, 0, 0, 3), + ]; + let ports = (0..super::MAX_PORT).step_by(super::NUM_SOURCE_NAT_PORTS); + let mut external_ips = itertools::iproduct!(addresses, ports); + + // Allocate two addresses + let mut ips = Vec::with_capacity(2); + for (expected_ip, expected_first_port) in external_ips.clone().take(2) { + let instance_id = Uuid::new_v4(); + let ip = context + .db_datastore + .allocate_instance_external_ip(&context.opctx, instance_id) + .await + .expect("Failed to allocate instance external IP address"); + assert_eq!(ip.ip.ip(), expected_ip); + assert_eq!(ip.first_port.0, expected_first_port as u16); + let expected_last_port = (expected_first_port + + (super::NUM_SOURCE_NAT_PORTS - 1) as i32) + as u16; + assert_eq!(ip.last_port.0, expected_last_port); + ips.push(ip); + } + + // Release the first + context + .db_datastore + .deallocate_instance_external_ip(&context.opctx, ips[0].id) + .await + .expect("Failed to release the first external IP address"); + + // Allocate a new one, ensure it's the same as the first one we + // released. + let instance_id = Uuid::new_v4(); + let ip = context + .db_datastore + .allocate_instance_external_ip(&context.opctx, instance_id) + .await + .expect("Failed to allocate instance external IP address"); + assert_eq!( + ip.ip, ips[0].ip, + "Expected to reallocate external IPs sequentially" + ); + assert_eq!( + ip.first_port, ips[0].first_port, + "Expected to reallocate external IPs sequentially" + ); + assert_eq!( + ip.last_port, ips[0].last_port, + "Expected to reallocate external IPs sequentially" + ); + + // Allocate one more, ensure it's the next chunk after the second one + // from the original loop. + let instance_id = Uuid::new_v4(); + let ip = context + .db_datastore + .allocate_instance_external_ip(&context.opctx, instance_id) + .await + .expect("Failed to allocate instance external IP address"); + let (expected_ip, expected_first_port) = external_ips.nth(2).unwrap(); + assert_eq!(ip.ip.ip(), std::net::IpAddr::from(expected_ip)); + assert_eq!(ip.first_port.0, expected_first_port as u16); + let expected_last_port = (expected_first_port + + (super::NUM_SOURCE_NAT_PORTS - 1) as i32) + as u16; + assert_eq!(ip.last_port.0, expected_last_port); + + context.success().await; + } +} diff --git a/nexus/src/db/queries/ip_pool.rs b/nexus/src/db/queries/ip_pool.rs index 539946b5c75..96f5d34f6a8 100644 --- a/nexus/src/db/queries/ip_pool.rs +++ b/nexus/src/db/queries/ip_pool.rs @@ -220,6 +220,8 @@ impl QueryFragment for FilterOverlappingIpRanges { )?; out.push_sql(", "); out.push_bind_param::(&self.range.ip_pool_id)?; + out.push_sql(", "); + out.push_bind_param::(&self.range.rcgen)?; out.push_sql(" WHERE NOT EXISTS("); push_candidate_contains_record_subquery::( diff --git a/nexus/src/db/queries/mod.rs b/nexus/src/db/queries/mod.rs index d23ecf583e1..0d2a7f86505 100644 --- a/nexus/src/db/queries/mod.rs +++ b/nexus/src/db/queries/mod.rs @@ -5,6 +5,7 @@ //! Specialized queries for inserting database records, usually to maintain //! complex invariants that are most accurately expressed in a single query. +pub mod external_ip; pub mod ip_pool; #[macro_use] mod next_item; diff --git a/nexus/src/db/schema.rs b/nexus/src/db/schema.rs index a5fe02bd7dc..ed968090ea4 100644 --- a/nexus/src/db/schema.rs +++ b/nexus/src/db/schema.rs @@ -156,6 +156,22 @@ table! { first_address -> Inet, last_address -> Inet, ip_pool_id -> Uuid, + rcgen -> Int8, + } +} + +table! { + instance_external_ip (id) { + id -> Uuid, + time_created -> Timestamptz, + time_modified -> Timestamptz, + time_deleted -> Nullable, + ip_pool_id -> Uuid, + ip_pool_range_id -> Uuid, + instance_id -> Uuid, + ip -> Inet, + first_port -> Int4, + last_port -> Int4, } } diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 10e137f20e6..6da36b46396 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -20,6 +20,9 @@ use omicron_nexus::crucible_agent_client::types::State as RegionState; use omicron_nexus::external_api::params; use omicron_nexus::external_api::shared; use omicron_nexus::external_api::shared::IdentityType; +use omicron_nexus::external_api::shared::IpRange; +use omicron_nexus::external_api::views::IpPool; +use omicron_nexus::external_api::views::IpPoolRange; use omicron_nexus::external_api::views::{ Organization, Project, Silo, Vpc, VpcRouter, }; @@ -61,6 +64,44 @@ where .unwrap() } +/// Create an IP pool with a single range for testing. +/// +/// The IP range may be specified if it's important for testing the behavior +/// around specific subnets, or a large subnet (2 ** 16 addresses) will be +/// provided, if the `ip_range` argument is `None`. +pub async fn create_ip_pool( + client: &ClientTestContext, + pool_name: &str, + ip_range: Option, +) -> (IpPool, IpPoolRange) { + let ip_range = ip_range.unwrap_or_else(|| { + use std::net::Ipv4Addr; + IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 0), + Ipv4Addr::new(10, 0, 255, 255), + )) + .unwrap() + }); + let pool = object_create( + client, + "/ip-pools", + ¶ms::IpPoolCreate { + identity: IdentityMetadataCreateParams { + name: pool_name.parse().unwrap(), + description: String::from("an ip pool"), + }, + }, + ) + .await; + let range = object_create( + client, + format!("/ip-pools/{}/ranges/add", pool_name).as_str(), + &ip_range, + ) + .await; + (pool, range) +} + pub async fn create_silo( client: &ClientTestContext, silo_name: &str, diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index e4525d2895b..ea3a64c5f82 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -15,6 +15,7 @@ use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::identity_eq; use nexus_test_utils::resource_helpers::create_disk; use nexus_test_utils::resource_helpers::create_instance; +use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::create_organization; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::DiskTest; @@ -62,6 +63,7 @@ fn get_disk_detach_url(instance_name: &str) -> String { } async fn create_org_and_project(client: &ClientTestContext) -> Uuid { + create_ip_pool(&client, "p0", None).await; create_organization(&client, ORG_NAME).await; let project = create_project(client, ORG_NAME, PROJECT_NAME).await; project.identity.id diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 755a650364c..e928c365c9a 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -294,8 +294,8 @@ lazy_static! { }, }; pub static ref DEMO_IP_POOL_RANGE: IpRange = IpRange::V4(Ipv4Range::new( - std::net::Ipv4Addr::new(10, 0, 0, 1), - std::net::Ipv4Addr::new(10, 0, 0, 2), + std::net::Ipv4Addr::new(10, 0, 0, 0), + std::net::Ipv4Addr::new(10, 0, 0, 255), ).unwrap()); pub static ref DEMO_IP_POOL_RANGES_URL: String = format!("{}/ranges", *DEMO_IP_POOL_URL); pub static ref DEMO_IP_POOL_RANGES_ADD_URL: String = format!("{}/add", *DEMO_IP_POOL_RANGES_URL); diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 5ba02e0a6f7..d1054f7f605 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -10,6 +10,7 @@ use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::resource_helpers::create_disk; +use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::resource_helpers::DiskTest; use omicron_common::api::external::ByteCount; @@ -40,6 +41,7 @@ use nexus_test_utils::resource_helpers::{ use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; +static POOL_NAME: &str = "p0"; static ORGANIZATION_NAME: &str = "test-org"; static PROJECT_NAME: &str = "springfield-squidport"; @@ -52,6 +54,7 @@ fn get_instances_url() -> String { } async fn create_org_and_project(client: &ClientTestContext) -> Uuid { + create_ip_pool(&client, "p0", None).await; create_organization(&client, ORGANIZATION_NAME).await; let project = create_project(client, ORGANIZATION_NAME, PROJECT_NAME).await; project.identity.id @@ -121,7 +124,8 @@ async fn test_instances_create_reboot_halt( let apictx = &cptestctx.server.apictx; let nexus = &apictx.nexus; - // Create a project that we'll use for testing. + // Create an IP pool and project that we'll use for testing. + create_ip_pool(&client, POOL_NAME, None).await; create_organization(&client, ORGANIZATION_NAME).await; let url_instances = format!( "/organizations/{}/projects/{}/instances", @@ -424,7 +428,8 @@ async fn test_instances_delete_fails_when_running_succeeds_when_stopped( let apictx = &cptestctx.server.apictx; let nexus = &apictx.nexus; - // Create a project that we'll use for testing. + // Create an IP pool and project that we'll use for testing. + create_ip_pool(&client, POOL_NAME, None).await; create_organization(&client, ORGANIZATION_NAME).await; let url_instances = format!( "/organizations/{}/projects/{}/instances", @@ -548,7 +553,8 @@ async fn test_instance_create_saga_removes_instance_database_record( // conflicted, should the second provision have succeeded. let client = &cptestctx.external_client; - // Create test organization and project + // Create test IP pool, organization and project + create_ip_pool(&client, POOL_NAME, None).await; create_organization(&client, ORGANIZATION_NAME).await; let url_instances = format!( "/organizations/{}/projects/{}/instances", @@ -656,7 +662,8 @@ async fn test_instance_with_single_explicit_ip_address( ) { let client = &cptestctx.external_client; - // Create test organization and project + // Create test IP pool, organization and project + create_ip_pool(&client, POOL_NAME, None).await; create_organization(&client, ORGANIZATION_NAME).await; let url_instances = format!( "/organizations/{}/projects/{}/instances", @@ -729,7 +736,8 @@ async fn test_instance_with_new_custom_network_interfaces( ) { let client = &cptestctx.external_client; - // Create test organization and project + // Create test IP pool, organization and project + create_ip_pool(&client, POOL_NAME, None).await; create_organization(&client, ORGANIZATION_NAME).await; let url_instances = format!( "/organizations/{}/projects/{}/instances", @@ -884,7 +892,8 @@ async fn test_instance_create_delete_network_interface( let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx.nexus; - // Create test organization and project + // Create test IP pool, organization and project + create_ip_pool(&client, POOL_NAME, None).await; create_organization(&client, ORGANIZATION_NAME).await; let url_instances = format!( "/organizations/{}/projects/{}/instances", @@ -1132,7 +1141,8 @@ async fn test_instance_update_network_interfaces( let client = &cptestctx.external_client; let nexus = &cptestctx.server.apictx.nexus; - // Create test organization and project + // Create test IP pool, organization and project + create_ip_pool(&client, POOL_NAME, None).await; create_organization(&client, ORGANIZATION_NAME).await; let url_instances = format!( "/organizations/{}/projects/{}/instances", @@ -1608,11 +1618,13 @@ async fn test_instance_with_multiple_nics_unwinds_completely( async fn test_attach_one_disk_to_instance(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; + const POOL_NAME: &str = "p0"; const ORGANIZATION_NAME: &str = "bobs-barrel-of-bytes"; const PROJECT_NAME: &str = "bit-barrel"; // Test pre-reqs DiskTest::new(&cptestctx).await; + create_ip_pool(&client, POOL_NAME, None).await; create_organization(&client, ORGANIZATION_NAME).await; create_project(client, ORGANIZATION_NAME, PROJECT_NAME).await; @@ -1697,11 +1709,13 @@ async fn test_attach_eight_disks_to_instance( ) { let client = &cptestctx.external_client; + const POOL_NAME: &str = "p0"; const ORGANIZATION_NAME: &str = "bobs-barrel-of-bytes"; const PROJECT_NAME: &str = "bit-barrel"; // Test pre-reqs DiskTest::new(&cptestctx).await; + create_ip_pool(&client, POOL_NAME, None).await; create_organization(&client, ORGANIZATION_NAME).await; create_project(client, ORGANIZATION_NAME, PROJECT_NAME).await; @@ -2038,11 +2052,13 @@ async fn test_disks_detached_when_instance_destroyed( ) { let client = &cptestctx.external_client; + const POOL_NAME: &str = "p0"; const ORGANIZATION_NAME: &str = "bobs-barrel-of-bytes"; const PROJECT_NAME: &str = "bit-barrel"; // Test pre-reqs DiskTest::new(&cptestctx).await; + create_ip_pool(&client, POOL_NAME, None).await; create_organization(&client, ORGANIZATION_NAME).await; create_project(client, ORGANIZATION_NAME, PROJECT_NAME).await; @@ -2282,6 +2298,7 @@ async fn test_instance_serial(cptestctx: &ControlPlaneTestContext) { let nexus = &apictx.nexus; // Create a project that we'll use for testing. + create_ip_pool(&client, POOL_NAME, None).await; create_organization(&client, ORGANIZATION_NAME).await; let url_instances = format!( "/organizations/{}/projects/{}/instances", diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index 884d5676e02..e6f1a6e863a 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -11,6 +11,9 @@ use http::StatusCode; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_instance; +use nexus_test_utils::resource_helpers::create_organization; +use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; @@ -23,6 +26,8 @@ use omicron_nexus::external_api::shared::Ipv4Range; use omicron_nexus::external_api::shared::Ipv6Range; use omicron_nexus::external_api::views::IpPool; use omicron_nexus::external_api::views::IpPoolRange; +use omicron_nexus::TestInterfaces; +use sled_agent_client::TestInterfaces as SledTestInterfaces; // Basic test verifying CRUD behavior on the IP Pool itself. #[nexus_test] @@ -548,6 +553,128 @@ async fn test_ip_pool_range_pagination(cptestctx: &ControlPlaneTestContext) { } } +#[nexus_test] +async fn test_ip_range_delete_with_allocated_external_ip_fails( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx; + let nexus = &apictx.nexus; + let ip_pools_url = "/ip-pools"; + let pool_name = "p0"; + let description = "an ip pool"; + let ip_pool_url = format!("{}/{}", ip_pools_url, pool_name); + let ip_pool_ranges_url = format!("{}/ranges", ip_pool_url); + let ip_pool_add_range_url = format!("{}/add", ip_pool_ranges_url); + let ip_pool_del_range_url = format!("{}/delete", ip_pool_ranges_url); + + // Add a pool and range. + let params = IpPoolCreate { + identity: IdentityMetadataCreateParams { + name: String::from(pool_name).parse().unwrap(), + description: String::from(description), + }, + }; + let created_pool: IpPool = + NexusRequest::objects_post(client, ip_pools_url, ¶ms) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!(created_pool.identity.name, pool_name); + assert_eq!(created_pool.identity.description, description); + let range = IpRange::V4( + Ipv4Range::new( + std::net::Ipv4Addr::new(10, 0, 0, 1), + std::net::Ipv4Addr::new(10, 0, 0, 2), + ) + .unwrap(), + ); + let created_range: IpPoolRange = + NexusRequest::objects_post(client, &ip_pool_add_range_url, &range) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!(range.first_address(), created_range.range.first_address()); + assert_eq!(range.last_address(), created_range.range.last_address()); + + // Create an org and project, and then an instance. The instance should have + // an IP address from this range (since it's the only one that exists), + // though we currently have no way to verify this as source NAT external IPs + // are not part of the public API. + const ORG_NAME: &str = "myorg"; + const PROJECT_NAME: &str = "myproj"; + const INSTANCE_NAME: &str = "myinst"; + create_organization(client, ORG_NAME).await; + create_project(client, ORG_NAME, PROJECT_NAME).await; + let instance = + create_instance(client, ORG_NAME, PROJECT_NAME, INSTANCE_NAME).await; + + // We should not be able to delete the range, since there's an external IP + // address in use out of it. + let err: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &ip_pool_del_range_url) + .body(Some(&range)) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!( + err.message, + "IP pool ranges cannot be deleted while \ + external IP addresses are allocated from them" + ); + + // Stop the instance, wait until it is in fact stopped. + let instance_url = format!( + "/organizations/{}/projects/{}/instances/{}", + ORG_NAME, PROJECT_NAME, INSTANCE_NAME, + ); + let instance_stop_url = format!("{}/stop", instance_url); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &instance_stop_url) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Failed to stop instance"); + + // Simulate the transition, wait until it is in fact stopped. + let sa = nexus.instance_sled_by_id(&instance.identity.id).await.unwrap(); + sa.instance_finish_transition(instance.identity.id).await; + + // Delete the instance + NexusRequest::object_delete(client, &instance_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Failed to delete instance"); + + // Now verify that we _can_ delete the IP range. + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &ip_pool_del_range_url) + .body(Some(&range)) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect( + "Should be able to delete IP range once no instances use its addresses", + ); +} + fn assert_pools_eq(first: &IpPool, second: &IpPool) { assert_eq!(first.identity, second.identity); } diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index 9293d1913c9..a270e1c14b0 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -14,6 +14,7 @@ use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::resource_helpers::create_instance_with_nics; +use nexus_test_utils::resource_helpers::create_ip_pool; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::resource_helpers::{create_organization, create_project}; use nexus_test_utils::ControlPlaneTestContext; @@ -80,6 +81,7 @@ async fn test_subnet_allocation(cptestctx: &ControlPlaneTestContext) { let project_name = "springfield-squidport"; // Create a project that we'll use for testing. + create_ip_pool(&client, "p0", None).await; create_organization(&client, organization_name).await; create_project(&client, organization_name, project_name).await; let url_instances = format!( diff --git a/nexus/tests/integration_tests/unauthorized.rs b/nexus/tests/integration_tests/unauthorized.rs index d8f63617fbe..d83f63c64ce 100644 --- a/nexus/tests/integration_tests/unauthorized.rs +++ b/nexus/tests/integration_tests/unauthorized.rs @@ -156,6 +156,16 @@ lazy_static! { url: "/silos", body: serde_json::to_value(&*DEMO_SILO_CREATE).unwrap(), }, + // Create an IP pool + SetupReq { + url: &*DEMO_IP_POOLS_URL, + body: serde_json::to_value(&*DEMO_IP_POOL_CREATE).unwrap(), + }, + // Create an IP Pool range + SetupReq { + url: &*DEMO_IP_POOL_RANGES_ADD_URL, + body: serde_json::to_value(&*DEMO_IP_POOL_RANGE).unwrap(), + }, // Create an Organization SetupReq { url: "/organizations", @@ -206,11 +216,6 @@ lazy_static! { url: &*SAML_IDENTITY_PROVIDERS_URL, body: serde_json::to_value(&*SAML_IDENTITY_PROVIDER).unwrap(), }, - // Create an IP pool - SetupReq { - url: &*DEMO_IP_POOLS_URL, - body: serde_json::to_value(&*DEMO_IP_POOL_CREATE).unwrap(), - } ]; } diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index fdb8eeee624..8dc890d68b8 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -704,6 +704,34 @@ "request_id" ] }, + "ExternalIp": { + "description": "An external IP address used for external connectivity for an instance.", + "type": "object", + "properties": { + "first_port": { + "description": "The first port used for instance NAT, inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "ip": { + "description": "The external address provided to the instance", + "type": "string", + "format": "ip" + }, + "last_port": { + "description": "The last port used for instance NAT, also inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "first_port", + "ip", + "last_port" + ] + }, "Generation": { "description": "Generation numbers stored in the database, used for optimistic concurrency control", "type": "integer", @@ -765,6 +793,9 @@ "$ref": "#/components/schemas/DiskRequest" } }, + "external_ip": { + "$ref": "#/components/schemas/ExternalIp" + }, "nics": { "type": "array", "items": { @@ -777,6 +808,7 @@ }, "required": [ "disks", + "external_ip", "nics", "runtime" ] @@ -1037,6 +1069,9 @@ "name": { "$ref": "#/components/schemas/Name" }, + "primary": { + "type": "boolean" + }, "slot": { "type": "integer", "format": "uint8", @@ -1053,6 +1088,7 @@ "ip", "mac", "name", + "primary", "slot", "subnet", "vni" diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 4669ffeaa1d..79bc5aeeb7b 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -15,6 +15,7 @@ use crate::instance_manager::InstanceTicket; use crate::nexus::NexusClient; use crate::opte::OptePort; use crate::opte::OptePortAllocator; +use crate::params::ExternalIp; use crate::params::NetworkInterface; use crate::params::{ InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, @@ -209,6 +210,7 @@ struct InstanceInner { underlay_addr: Ipv6Addr, port_allocator: OptePortAllocator, requested_nics: Vec, + external_ip: ExternalIp, // Disk related properties requested_disks: Vec, @@ -472,6 +474,7 @@ impl Instance { underlay_addr, port_allocator, requested_nics: initial.nics, + external_ip: initial.external_ip, requested_disks: initial.disks, cloud_init_bytes: initial.cloud_init_bytes, state: InstanceStates::new(initial.runtime), @@ -492,12 +495,15 @@ impl Instance { let mut ports = Vec::with_capacity(inner.requested_nics.len()); for nic in inner.requested_nics.iter() { let vni = crate::opte::Vni::new(nic.vni).expect("Invalid VNI"); + let external_ip = + if nic.primary { Some(inner.external_ip) } else { None }; let port = inner.port_allocator.new_port( nic.ip, *nic.mac, ipnetwork::IpNetwork::from(nic.subnet), vni, inner.underlay_addr, + external_ip, )?; info!(inner.log, "created OPTE port for guest"; "port_info" => ?port); ports.push(port); @@ -758,12 +764,15 @@ mod test { use crate::illumos::dladm::Etherstub; use crate::mocks::MockNexusClient; use crate::opte::OptePortAllocator; + use crate::params::ExternalIp; use crate::params::InstanceStateRequested; use chrono::Utc; use omicron_common::api::external::{ ByteCount, Generation, InstanceCpuCount, InstanceState, }; use omicron_common::api::internal::nexus::InstanceRuntimeState; + use std::net::IpAddr; + use std::net::Ipv4Addr; static INST_UUID_STR: &str = "e398c5d5-5059-4e55-beac-3a1071083aaa"; static PROPOLIS_UUID_STR: &str = "ed895b13-55d5-4e0b-88e9-3f4e74d0d936"; @@ -800,6 +809,11 @@ mod test { time_updated: Utc::now(), }, nics: vec![], + external_ip: ExternalIp { + ip: IpAddr::from(Ipv4Addr::new(10, 0, 0, 1)), + first_port: 0, + last_port: 1 << 14 - 1, + }, disks: vec![], cloud_init_bytes: None, } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index e3062ab6ece..5f0e9179470 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -224,12 +224,15 @@ mod test { use crate::illumos::{dladm::MockDladm, zone::MockZones}; use crate::instance::MockInstance; use crate::mocks::MockNexusClient; + use crate::params::ExternalIp; use crate::params::InstanceStateRequested; use chrono::Utc; use omicron_common::api::external::{ ByteCount, Generation, InstanceCpuCount, InstanceState, }; use omicron_common::api::internal::nexus::InstanceRuntimeState; + use std::net::IpAddr; + use std::net::Ipv4Addr; static INST_UUID_STR: &str = "e398c5d5-5059-4e55-beac-3a1071083aaa"; @@ -261,6 +264,11 @@ mod test { time_updated: Utc::now(), }, nics: vec![], + external_ip: ExternalIp { + ip: IpAddr::from(Ipv4Addr::new(10, 0, 0, 1)), + first_port: 0, + last_port: 1 << 14 - 1, + }, disks: vec![], cloud_init_bytes: None, } diff --git a/sled-agent/src/opte/mock_opte.rs b/sled-agent/src/opte/mock_opte.rs index cdd475b0c19..f8719dd8a03 100644 --- a/sled-agent/src/opte/mock_opte.rs +++ b/sled-agent/src/opte/mock_opte.rs @@ -6,6 +6,7 @@ //! building the sled agent on non-illumos systems. use crate::illumos::vnic::Vnic; +use crate::params::ExternalIp; use ipnetwork::IpNetwork; use macaddr::MacAddr6; use slog::Logger; @@ -63,6 +64,7 @@ impl OptePortAllocator { subnet: IpNetwork, vni: Vni, underlay_ip: Ipv6Addr, + external_ip: Option, ) -> Result { // TODO-completess: Remove IPv4 restrictions once OPTE supports virtual // IPv6 networks. @@ -91,6 +93,7 @@ impl OptePortAllocator { mac, vni, underlay_ip, + external_ip, gateway, boundary_services, vnic: None, @@ -159,6 +162,7 @@ pub struct OptePort { mac: MacAddr6, vni: Vni, underlay_ip: Ipv6Addr, + external_ip: Option, gateway: Gateway, boundary_services: BoundaryServices, vnic: Option, diff --git a/sled-agent/src/opte/opte.rs b/sled-agent/src/opte/opte.rs index a964215606f..630098f6c8e 100644 --- a/sled-agent/src/opte/opte.rs +++ b/sled-agent/src/opte/opte.rs @@ -11,6 +11,7 @@ use crate::illumos::dladm::Dladm; use crate::illumos::dladm::PhysicalLink; use crate::illumos::vnic::Vnic; use crate::illumos::zone::Zones; +use crate::params::ExternalIp; use ipnetwork::IpNetwork; use macaddr::MacAddr6; use opte::api::IpCidr; @@ -20,6 +21,7 @@ use opte::api::MacAddr; pub use opte::api::Vni; use opte::oxide_vpc::api::AddRouterEntryIpv4Req; use opte::oxide_vpc::api::RouterTarget; +use opte::oxide_vpc::api::SNatCfg; use opte_ioctl::OpteHdl; use slog::Logger; use std::net::IpAddr; @@ -89,6 +91,7 @@ impl OptePortAllocator { subnet: IpNetwork, vni: Vni, underlay_ip: Ipv6Addr, + external_ip: Option, ) -> Result { // TODO-completess: Remove IPv4 restrictions once OPTE supports virtual // IPv6 networks. @@ -120,6 +123,32 @@ impl OptePortAllocator { )), }?; + // Describe the source NAT for this instance. + let snat = match external_ip { + Some(ip) => { + let public_ip = match ip.ip { + IpAddr::V4(ip) => ip.into(), + IpAddr::V6(_) => { + return Err(opte_ioctl::Error::InvalidArgument( + String::from("IPv6 is not yet supported for external addresses") + ).into()); + } + }; + // OPTE's `SnatCfg` accepts a `std::ops::Range`, but the first/last + // port representation in the database and Nexus is inclusive of the + // last port. At the moment, that means we need to add 1 to the + // last port in the request data. However, if last port is + // `u16::MAX`, that would overflow and panic. We'll use saturating + // add for now, and live with missing the last port. + // + // TODO-correctness: Support the last port, see + // https://github.com/oxidecomputer/omicron/issues/1292 + let ports = ip.first_port..ip.last_port.saturating_add(1); + Some(SNatCfg { public_ip, ports }) + } + None => None, + }; + let hdl = OpteHdl::open(OpteHdl::DLD_CTL)?; hdl.create_xde( &name, @@ -132,7 +161,7 @@ impl OptePortAllocator { boundary_services.vni, vni, underlay_ip, - /* snat = */ None, + snat, /* passthru = */ false, )?; @@ -198,6 +227,7 @@ impl OptePortAllocator { mac, vni, underlay_ip, + external_ip, gateway, boundary_services, vnic, @@ -265,6 +295,9 @@ pub struct OptePort { mac: MacAddr6, vni: Vni, underlay_ip: Ipv6Addr, + // The external IP information for this port, or None if it has no external + // connectivity. Only the primary interface has Some(_) here. + external_ip: Option, gateway: Gateway, boundary_services: BoundaryServices, // TODO-correctness: Remove this once we can put Viona directly on top of an diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 4752caad940..ae4cb40c6e2 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -22,9 +22,21 @@ pub struct NetworkInterface { pub mac: external::MacAddr, pub subnet: external::IpNet, pub vni: external::Vni, + pub primary: bool, pub slot: u8, } +/// An external IP address used for external connectivity for an instance. +#[derive(Debug, Clone, Copy, Deserialize, Serialize, JsonSchema)] +pub struct ExternalIp { + /// The external address provided to the instance + pub ip: IpAddr, + /// The first port used for instance NAT, inclusive. + pub first_port: u16, + /// The last port used for instance NAT, also inclusive. + pub last_port: u16, +} + /// Used to request a Disk state change #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, JsonSchema)] #[serde(rename_all = "lowercase", tag = "state", content = "instance")] @@ -63,6 +75,7 @@ pub struct DiskEnsureBody { pub struct InstanceHardware { pub runtime: InstanceRuntimeState, pub nics: Vec, + pub external_ip: ExternalIp, pub disks: Vec, pub cloud_init_bytes: Option, }