Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
97 commits
Select commit Hold shift + click to select a range
b1f9749
First pass of resource utilization
smklein Oct 3, 2022
d14f8fc
less weird formatting
smklein Oct 3, 2022
0430d1a
Tweak CTE to avoid using 'OR'; prefer 'UNION' to avoid full table scans
smklein Oct 4, 2022
e8c937f
Merge with snapshots
smklein Oct 4, 2022
55c1e81
Add test
smklein Oct 4, 2022
98467b2
test snapshot usage accounting
smklein Oct 4, 2022
f838365
Emit disk usage info to clickhouse
smklein Oct 5, 2022
f1e4293
Add external API endpoint for querying metrics endpoints
smklein Oct 5, 2022
fda4c3c
fix json spec, minor tweaks
smklein Oct 5, 2022
b20fefd
Fix unauth tests
smklein Oct 5, 2022
5ff619e
Boost timeout to not miss utilization
smklein Oct 5, 2022
acb91ee
Merge branch 'main' into resource-usage
smklein Oct 6, 2022
5c33dba
Add CPUs provisioned
smklein Oct 6, 2022
4c48467
Add resource usage for non-default silo, add fleet to DB
smklein Oct 7, 2022
356d4fc
Silos know which fleets they belong to, fleet resource accounting
smklein Oct 7, 2022
ba874bb
Okay, fleet 'resource_usage' now created / tested, should be working
smklein Oct 7, 2022
e0f9919
Actually use CPU accounting saga node
smklein Oct 7, 2022
229480b
Add test for CPU usage
smklein Oct 7, 2022
d2c5d9d
RAM provisionining metrics are plumbed
smklein Oct 7, 2022
2d6f145
minor docs / label updates
smklein Oct 7, 2022
36f7f8f
Merge branch 'main' into resource-usage
smklein Oct 7, 2022
df4bd2f
physical_disk accounting updated to virtual_disk accounting
smklein Oct 7, 2022
2b30f2b
fix endpoint
smklein Oct 7, 2022
e0dd385
Make 'test_disk_metrics' less flaky
smklein Oct 8, 2022
613e820
Add resource_type
smklein Oct 8, 2022
0036869
Safer conversions, less unwrap
smklein Oct 8, 2022
f3e9d7b
Use authz::project instead of project_id
smklein Oct 8, 2022
4c237e1
Revert "Use authz::project instead of project_id"
smklein Oct 8, 2022
9e3e6ea
No project_id to volume_delete
smklein Oct 8, 2022
c0b7d85
Add test for idempotent fleet initialization
smklein Oct 8, 2022
83d8d9f
More transactions
smklein Oct 8, 2022
95bdaff
updated comment
smklein Oct 8, 2022
f05296e
Add 404 handling for resource usage
smklein Oct 8, 2022
9a758c2
comments
smklein Oct 8, 2022
8d153f6
http entrypoints naming
smklein Oct 8, 2022
91a1b9d
More endpoint updates
smklein Oct 8, 2022
c693630
rename 'ResourceUsage' to 'VirtualResourceProvisioning'
smklein Oct 9, 2022
88c1b0d
Merge branch 'main' into resource-usage (Compiling)
smklein Dec 7, 2022
eaf9fb3
usage -> Provision, update openapi
smklein Dec 7, 2022
acd4d77
link to query
smklein Dec 7, 2022
ec48ea0
VERY WIP - MOVE ACCOUNTING FOR DISKS/INSTANCES CLOSER TO CREATION/DEL…
smklein Dec 7, 2022
a414206
Merge branch 'main' into resource-usage
smklein Dec 9, 2022
2653911
fmt
smklein Dec 9, 2022
c8a2031
Distinguish between 'resource' and 'collection'. Add CTE to make upda…
smklein Dec 13, 2022
db41a2a
Rename virtual_resource_provisioning to collection vs resource
smklein Dec 13, 2022
1371404
Merge branch 'main' into resource-usage
smklein Dec 14, 2022
70facfb
Merge branch 'main' into resource-usage
smklein Dec 15, 2022
206572d
Remove fleet from DB
smklein Dec 15, 2022
82c6378
Still need to insert collection record for fleet
smklein Dec 15, 2022
e398fe1
Comments, distinguish between disk and snapshots
smklein Dec 15, 2022
29edafd
Just starting to make snapshot delete a saga
smklein Dec 16, 2022
081f649
Merge branch 'main' into resource-usage
smklein Dec 26, 2022
f382a73
Patch tests, pull in some saga-ification PRs
smklein Dec 26, 2022
5f5f15e
Transactions, deleting collections, validate collection empty on delete
smklein Dec 27, 2022
999585c
Remove fleet from views
smklein Dec 27, 2022
abf9923
Merge branch 'main' into resource-usage
smklein Dec 27, 2022
27b2c62
Merge branch 'main' into resource-usage
smklein Jan 6, 2023
109172d
Add resource accounting to saga idempotency/unwind tests
smklein Jan 6, 2023
2cada78
cleanup on project delete
smklein Jan 6, 2023
021744c
Merge branch 'main' into resource-usage
smklein Jan 6, 2023
fbc1fe4
cleanup tests
smklein Jan 6, 2023
dd326e8
testonly
smklein Jan 6, 2023
2e1c25a
Add more tests, oximeter test API
smklein Jan 9, 2023
84308a7
Merge branch 'main' into resource-usage
smklein Jan 9, 2023
4db6b9f
fix disk_delete test parameters
smklein Jan 9, 2023
14b3580
Merge branch 'main' into resource-usage
smklein Jan 11, 2023
254c2ec
Derive display
smklein Jan 11, 2023
76704e7
to ByteCount
smklein Jan 11, 2023
4e6ea23
silo not found when inserting returns 404
smklein Jan 11, 2023
e954c77
system_metric_lookup in app layer
smklein Jan 11, 2023
fc81ada
Remove ResourceType::VirtualProvision
smklein Jan 11, 2023
6837d17
Simplify nexus testing logic
smklein Jan 11, 2023
ae86c33
tweak some test timings to fight flakes when under load
smklein Jan 11, 2023
91fd7b8
De-duplicate Nexus metrics producer
smklein Jan 11, 2023
ee529d7
Merge branch 'main' into resource-usage
smklein Jan 13, 2023
292ee96
Local to txn
smklein Jan 13, 2023
2ab2a3f
fix tests
smklein Jan 13, 2023
5f534d6
Add a Nexus internal API endpoint for disk remove read only parent (#…
leftwo Jan 13, 2023
ade45d5
[wicket] add missing serde feature on hex dependency (#2163)
sunshowers Jan 13, 2023
eaef871
[wicket] move wizard into its own file (#2164)
sunshowers Jan 13, 2023
516acbc
[wicket] fix call to get_prev_component_id (#2167)
sunshowers Jan 14, 2023
338cbd0
[wicket] add an upload command (#2165)
sunshowers Jan 14, 2023
5935d13
Bump trybuild from 1.0.75 to 1.0.76 (#2172)
dependabot[bot] Jan 16, 2023
a02eca7
Bump clap from 4.0.32 to 4.1.1 (#2168)
dependabot[bot] Jan 16, 2023
b1f3879
Bump debug-ignore from 1.0.3 to 1.0.5 (#2173)
dependabot[bot] Jan 17, 2023
6476827
Bump dropshot from `ce5deee` to `120e168` (#2170)
dependabot[bot] Jan 17, 2023
a10f930
Bump progenitor from `634bf98` to `1ef131a` (#2174)
dependabot[bot] Jan 17, 2023
b3d3849
Bump indicatif from 0.17.2 to 0.17.3 (#2171)
dependabot[bot] Jan 17, 2023
653479b
Docs on ALLOW_FULL_TABLE_SCAN_SQL
smklein Jan 18, 2023
3db02c7
fix mis-merge
smklein Jan 18, 2023
f96cb95
Collect timestamps for Oximeter in CRDB to act as tie-breaker
smklein Jan 18, 2023
5f49bc3
Merge branch 'main' into resource-usage
smklein Jan 18, 2023
57c7389
Fix bad merge
smklein Jan 18, 2023
e18210e
mitigate races in metrics test
smklein Jan 18, 2023
a1feec2
Merge branch 'main' into resource-usage
smklein Jan 18, 2023
52af08a
Merge branch 'main' into resource-usage
smklein Jan 18, 2023
9f498f0
metrics test needs a wider time window, especially when under load
smklein Jan 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions common/src/sql/dbinit.sql
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,21 @@ CREATE INDEX ON omicron.public.service (
sled_id
);

/*
* A table describing resource usage which may be associated
* with a collection of objects, including:
* - Projects
* - Organizations
* - Silos
* - Fleet
*/
CREATE TABLE omicron.public.resource_usage (
/* Should match the UUID of the corresponding collection */
id UUID PRIMARY KEY,

disk_bytes_used INT8 NOT NULL
);

/*
* ZPools of Storage, attached to Sleds.
* Typically these are backed by a single physical disk.
Expand Down
2 changes: 2 additions & 0 deletions nexus/db-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pub mod queries;
mod rack;
mod region;
mod region_snapshot;
mod resource_usage;
mod role_assignment;
mod role_builtin;
pub mod saga_types;
Expand Down Expand Up @@ -112,6 +113,7 @@ pub use project::*;
pub use rack::*;
pub use region::*;
pub use region_snapshot::*;
pub use resource_usage::*;
pub use role_assignment::*;
pub use role_builtin::*;
pub use service::*;
Expand Down
1 change: 1 addition & 0 deletions nexus/db-model/src/queries/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
//! Subqueries used in CTEs.

pub mod region_allocation;
pub mod resource_usage_update;
28 changes: 28 additions & 0 deletions nexus/db-model/src/queries/resource_usage_update.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Describes the resource usage update CTE

use crate::schema::organization;
use crate::schema::resource_usage;

table! {
parent_org {
id -> Uuid,
}
}

table! {
parent_silo {
id -> Uuid,
}
}

diesel::allow_tables_to_appear_in_same_query!(organization, parent_org,);

diesel::allow_tables_to_appear_in_same_query!(
resource_usage,
parent_org,
parent_silo,
);
5 changes: 5 additions & 0 deletions nexus/db-model/src/region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,9 @@ impl Region {
// external, customer-supplied keys is a non-requirement.
true
}
pub fn size_used(&self) -> i64 {
self.extent_count()
* self.blocks_per_extent()
* self.block_size().to_bytes() as i64
}
}
21 changes: 21 additions & 0 deletions nexus/db-model/src/resource_usage.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use crate::schema::resource_usage;
use uuid::Uuid;

/// Describes resource_usage for a collection
#[derive(Selectable, Queryable, Insertable, Debug)]
#[diesel(table_name = resource_usage)]
pub struct ResourceUsage {
pub id: Uuid,

pub disk_bytes_used: i64,
}

impl ResourceUsage {
pub fn new(id: Uuid) -> Self {
Self { id, disk_bytes_used: 0 }
}
}
7 changes: 7 additions & 0 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,13 @@ table! {
}
}

table! {
resource_usage {
id -> Uuid,
disk_bytes_used -> Int8,
}
}

table! {
zpool (id) {
id -> Uuid,
Expand Down
24 changes: 14 additions & 10 deletions nexus/src/app/disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,15 +377,19 @@ impl super::Nexus {
project_name: &Name,
disk_name: &Name,
) -> DeleteResult {
let (.., authz_disk) = LookupPath::new(opctx, &self.db_datastore)
.organization_name(organization_name)
.project_name(project_name)
.disk_name(disk_name)
.lookup_for(authz::Action::Delete)
.await?;
let (.., project, authz_disk) =
LookupPath::new(opctx, &self.db_datastore)
.organization_name(organization_name)
.project_name(project_name)
.disk_name(disk_name)
.lookup_for(authz::Action::Delete)
.await?;

let saga_params =
sagas::disk_delete::Params { disk_id: authz_disk.id() };
let saga_params = sagas::disk_delete::Params {
serialized_authn: authn::saga::Serialized::for_opctx(opctx),
project_id: project.id(),
disk_id: authz_disk.id(),
};
self.execute_saga::<sagas::disk_delete::SagaDiskDelete>(saga_params)
.await?;
Ok(())
Expand Down Expand Up @@ -498,7 +502,7 @@ impl super::Nexus {
// reference counting for volumes, and probably means this needs to
// instead be a saga.

let (.., authz_snapshot, db_snapshot) =
let (.., project, authz_snapshot, db_snapshot) =
LookupPath::new(opctx, &self.db_datastore)
.organization_name(organization_name)
.project_name(project_name)
Expand All @@ -511,7 +515,7 @@ impl super::Nexus {
.await?;

// Kick off volume deletion saga
self.volume_delete(db_snapshot.volume_id).await?;
self.volume_delete(opctx, project.id(), db_snapshot.volume_id).await?;

Ok(())
}
Expand Down
11 changes: 6 additions & 5 deletions nexus/src/app/project.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ impl super::Nexus {
.lookup_for(authz::Action::CreateChild)
.await?;

// TODO: We probably want to have "project creation", "resource usage
// creation", and "default VPC creation" co-located within a saga for
// atomicity.
//
// Until then, we just perform the operations sequentially.

// Create a project.
let db_project =
db::model::Project::new(authz_org.id(), new_project.clone());
Expand All @@ -43,11 +49,6 @@ impl super::Nexus {
.project_create(opctx, &authz_org, db_project)
.await?;

// TODO: We probably want to have "project creation" and "default VPC
// creation" co-located within a saga for atomicity.
//
// Until then, we just perform the operations sequentially.

// Create a default VPC associated with the project.
// TODO-correctness We need to be using the project_id we just created.
// project_create() should return authz::Project and we should use that
Expand Down
89 changes: 86 additions & 3 deletions nexus/src/app/sagas/disk_create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,16 @@ lazy_static! {
sdc_create_disk_record,
sdc_create_disk_record_undo
);
static ref REGIONS_ALLOC: NexusAction =
new_action_noop_undo("disk-create.regions-alloc", sdc_alloc_regions,);
static ref REGIONS_ALLOC: NexusAction = ActionFunc::new_action(
"disk-create.allocate-regions",
sdc_alloc_regions,
sdc_alloc_regions_undo,
);
static ref REGIONS_ACCOUNT: NexusAction = ActionFunc::new_action(
"disk-create.account-regions",
sdc_account_regions,
sdc_account_regions_undo,
);
static ref REGIONS_ENSURE: NexusAction =
new_action_noop_undo("disk-create.regions-ensure", sdc_regions_ensure,);
static ref CREATE_VOLUME_RECORD: NexusAction = ActionFunc::new_action(
Expand Down Expand Up @@ -246,6 +254,76 @@ async fn sdc_alloc_regions(
Ok(datasets_and_regions)
}

async fn sdc_alloc_regions_undo(
sagactx: NexusActionContext,
) -> Result<(), anyhow::Error> {
let osagactx = sagactx.user_data();

let region_ids = sagactx
.lookup::<Vec<(db::model::Dataset, db::model::Region)>>(
"datasets_and_regions",
)?
.into_iter()
.map(|(_, region)| region.id())
.collect::<Vec<Uuid>>();

osagactx.datastore().regions_hard_delete(region_ids).await?;
Ok(())
}

fn get_space_used_by_allocated_regions(
sagactx: &NexusActionContext,
) -> Result<i64, ActionError> {
let space_used = sagactx
.lookup::<Vec<(db::model::Dataset, db::model::Region)>>(
"datasets_and_regions",
)?
.into_iter()
.map(|(_, region)| region.size_used())
.fold(0, |acc, x| acc + x);
Ok(space_used)
}

// TODO: Not yet idempotent
async fn sdc_account_regions(
sagactx: NexusActionContext,
) -> Result<(), ActionError> {
let osagactx = sagactx.user_data();
let params = sagactx.saga_params::<Params>()?;

let opctx = OpContext::for_saga_action(&sagactx, &params.serialized_authn);
osagactx
.datastore()
.resource_usage_update_disk(
&opctx,
params.project_id,
get_space_used_by_allocated_regions(&sagactx)?,
)
.await
.map_err(ActionError::action_failed)?;
Ok(())
}

// TODO: Not yet idempotent
async fn sdc_account_regions_undo(
sagactx: NexusActionContext,
) -> Result<(), anyhow::Error> {
let osagactx = sagactx.user_data();
let params = sagactx.saga_params::<Params>()?;

let opctx = OpContext::for_saga_action(&sagactx, &params.serialized_authn);
osagactx
.datastore()
.resource_usage_update_disk(
&opctx,
params.project_id,
-get_space_used_by_allocated_regions(&sagactx)?,
)
.await
.map_err(ActionError::action_failed)?;
Ok(())
}

/// Call out to Crucible agent and perform region creation.
async fn ensure_region_in_dataset(
log: &Logger,
Expand Down Expand Up @@ -560,9 +638,14 @@ async fn sdc_create_volume_record_undo(
sagactx: NexusActionContext,
) -> Result<(), anyhow::Error> {
let osagactx = sagactx.user_data();
let params = sagactx.saga_params::<Params>()?;

let opctx = OpContext::for_saga_action(&sagactx, &params.serialized_authn);
let volume_id = sagactx.lookup::<Uuid>("volume_id")?;
osagactx.nexus().volume_delete(volume_id).await?;
osagactx
.nexus()
.volume_delete(&opctx, params.project_id, volume_id)
.await?;
Ok(())
}

Expand Down
8 changes: 7 additions & 1 deletion nexus/src/app/sagas/disk_delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ use super::ActionRegistry;
use super::NexusActionContext;
use super::NexusSaga;
use crate::app::sagas::NexusAction;
use crate::authn;
use crate::context::OpContext;
use lazy_static::lazy_static;
use serde::Deserialize;
use serde::Serialize;
Expand All @@ -19,6 +21,8 @@ use uuid::Uuid;

#[derive(Debug, Deserialize, Serialize)]
pub struct Params {
pub serialized_authn: authn::saga::Serialized,
pub project_id: Uuid,
pub disk_id: Uuid,
}

Expand Down Expand Up @@ -89,10 +93,12 @@ async fn sdd_delete_volume(
sagactx: NexusActionContext,
) -> Result<(), ActionError> {
let osagactx = sagactx.user_data();
let params = sagactx.saga_params::<Params>()?;
let opctx = OpContext::for_saga_action(&sagactx, &params.serialized_authn);
let volume_id = sagactx.lookup::<Uuid>("volume_id")?;
osagactx
.nexus()
.volume_delete(volume_id)
.volume_delete(&opctx, params.project_id, volume_id)
.await
.map_err(ActionError::action_failed)?;
Ok(())
Expand Down
7 changes: 6 additions & 1 deletion nexus/src/app/sagas/snapshot_create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -584,10 +584,15 @@ async fn ssc_create_volume_record_undo(
) -> Result<(), anyhow::Error> {
let log = sagactx.user_data().log();
let osagactx = sagactx.user_data();
let params = sagactx.saga_params::<Params>()?;
let opctx = OpContext::for_saga_action(&sagactx, &params.serialized_authn);
let volume_id = sagactx.lookup::<Uuid>("volume_id")?;

info!(log, "deleting volume {}", volume_id);
osagactx.nexus().volume_delete(volume_id).await?;
osagactx
.nexus()
.volume_delete(&opctx, params.project_id, volume_id)
.await?;

Ok(())
}
Expand Down
22 changes: 22 additions & 0 deletions nexus/src/app/sagas/volume_delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ use super::NexusActionContext;
use super::NexusSaga;
use super::MAX_CONCURRENT_REGION_REQUESTS;
use crate::app::sagas::NexusAction;
use crate::authn;
use crate::context::OpContext;
use crate::db;
use crate::db::datastore::CrucibleResources;
use crucible_agent_client::{types::RegionId, Client as CrucibleAgentClient};
Expand All @@ -47,6 +49,8 @@ use uuid::Uuid;

#[derive(Debug, Deserialize, Serialize)]
pub struct Params {
pub serialized_authn: authn::saga::Serialized,
pub project_id: Uuid,
pub volume_id: Uuid,
}

Expand Down Expand Up @@ -171,6 +175,7 @@ async fn svd_delete_crucible_regions(
sagactx: NexusActionContext,
) -> Result<(), ActionError> {
let osagactx = sagactx.user_data();
let params = sagactx.saga_params::<Params>()?;

let crucible_resources_to_delete =
sagactx.lookup::<CrucibleResources>("crucible_resources_to_delete")?;
Expand All @@ -191,6 +196,23 @@ async fn svd_delete_crucible_regions(
.map(|(_, r)| r.id())
.collect();

// TODO: This accounting is not yet idempotent
let space_used = crucible_resources_to_delete
.datasets_and_regions
.iter()
.fold(0, |acc, (_, r)| acc + r.size_used());
let opctx =
OpContext::for_saga_action(&sagactx, &params.serialized_authn);
osagactx
.datastore()
.resource_usage_update_disk(
&opctx,
params.project_id,
-space_used,
)
.await
.map_err(ActionError::action_failed)?;

osagactx
.datastore()
.regions_hard_delete(region_ids_to_delete)
Expand Down
Loading