Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
0c15c32
blerrghhh
hawkw Nov 4, 2025
8a6e44e
well here's something
hawkw Nov 5, 2025
7fc3d27
start throwing together some kind of DE stuff
hawkw Nov 5, 2025
e0732c1
add alert classes
hawkw Nov 5, 2025
b0e54dc
tidiness
hawkw Nov 5, 2025
60f861c
sketchy
hawkw Nov 11, 2025
89da975
stuff
hawkw Nov 12, 2025
7b81664
woag
hawkw Nov 12, 2025
ef114eb
lots of case DB stuff
hawkw Dec 8, 2025
3cf5b29
start associating cases with SPs
hawkw Nov 13, 2025
e21a99c
quick case pretty printer
hawkw Nov 13, 2025
583023e
more case db ops
hawkw Nov 13, 2025
bf29dac
tweak case display a bit more
hawkw Nov 13, 2025
ba891bb
put thing into db
hawkw Nov 14, 2025
21e7fa7
now with '<-- this sitrep' technology
hawkw Nov 14, 2025
48ed757
format tweaks etc
hawkw Nov 14, 2025
34d1509
try to make impact lists usable
hawkw Nov 14, 2025
652ae85
BLURG
hawkw Nov 14, 2025
caccd34
reticulating DE trait
hawkw Nov 17, 2025
9a03b81
reticulating
hawkw Nov 18, 2025
1ce5eaa
reticulating
hawkw Nov 18, 2025
fb2b9b8
actually delete stuff
hawkw Nov 19, 2025
46fa7ee
post rebase fixy uppy
hawkw Nov 19, 2025
6f45557
rename some stuff people found confusing
hawkw Nov 20, 2025
dbd9f67
impact list stuff
hawkw Nov 20, 2025
48fc26e
wip
hawkw Nov 21, 2025
a05a186
oops fix deletion query
hawkw Nov 21, 2025
94c223e
reticulating
hawkw Nov 21, 2025
23d35be
quick alert request executor
hawkw Nov 24, 2025
d2ce867
some test infrastructure
hawkw Nov 24, 2025
af0b88f
s/fm_execution/fm_rendezvouz
hawkw Nov 25, 2025
16fe6ec
reorg crates
hawkw Nov 25, 2025
b3619bb
whoops rm bonus comma
hawkw Nov 25, 2025
a148517
consistent test rng thingy
hawkw Nov 25, 2025
84727da
more test util wiggling
hawkw Nov 25, 2025
c1cbae1
well here's some turbo jank that vaguely works
hawkw Nov 26, 2025
87dfabb
also assert the case gets closed
hawkw Nov 26, 2025
1bf6de0
fixup unrealistic ereport parsing
hawkw Dec 5, 2025
667e46f
use shared ereport parsing logic in test infra
hawkw Dec 5, 2025
4d41fb1
reticulating a bunch of stuff
hawkw Dec 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ members = [
"nexus/db-schema",
"nexus/defaults",
"nexus/external-api",
"nexus/fm",
"nexus/internal-api",
"nexus/inventory",
"nexus/lockstep-api",
Expand Down
9 changes: 8 additions & 1 deletion dev-tools/omdb/src/bin/omdb/db/sitrep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ async fn cmd_db_sitrep_show(
}
};

let fm::Sitrep { metadata } = sitrep;
let fm::Sitrep { metadata, cases } = sitrep;
let fm::SitrepMetadata {
id,
creator_id,
Expand Down Expand Up @@ -345,5 +345,12 @@ async fn cmd_db_sitrep_show(
}
}

if !cases.is_empty() {
println!("\n{:-<80}\n", "== CASES");
for case in cases {
println!("{}", case.display_indented(4, Some(id)));
}
}

Ok(())
}
14 changes: 13 additions & 1 deletion ereport/types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pub struct Ereport {
Serialize,
Deserialize,
JsonSchema,
Hash,
)]
#[repr(transparent)]
#[serde(from = "u64", into = "u64")]
Expand Down Expand Up @@ -102,7 +103,18 @@ impl TryFrom<i64> for Ena {
}

/// Unique identifier for an ereport.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[derive(
Debug,
Clone,
Copy,
PartialEq,
Eq,
Serialize,
Deserialize,
PartialOrd,
Ord,
Hash,
)]
pub struct EreportId {
pub restart_id: EreporterRestartUuid,
pub ena: Ena,
Expand Down
11 changes: 11 additions & 0 deletions nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,11 @@ pub struct FmTasksConfig {
/// garbage collects unneeded fault management sitreps in the database.
#[serde_as(as = "DurationSeconds<u64>")]
pub sitrep_gc_period_secs: Duration,
/// period (in seconds) for periodic activations of the background task that
/// updates externally-visible database tables to match the current situation
/// report.
#[serde_as(as = "DurationSeconds<u64>")]
pub rendezvouz_period_secs: Duration,
}

impl Default for FmTasksConfig {
Expand All @@ -940,6 +945,9 @@ impl Default for FmTasksConfig {
// time the current sitrep changes, and activating it more
// frequently won't make things more responsive.
sitrep_gc_period_secs: Duration::from_secs(600),
// This, too, is activated whenever a new sitrep is loaded, so we
// need not set the periodic activation interval too high.
rendezvouz_period_secs: Duration::from_secs(300),
}
}
}
Expand Down Expand Up @@ -1281,6 +1289,7 @@ mod test {
fm.sitrep_gc_period_secs = 49
probe_distributor.period_secs = 50
multicast_reconciler.period_secs = 60
fm.rendezvous_period_secs = 51
[default_region_allocation_strategy]
type = "random"
seed = 0
Expand Down Expand Up @@ -1529,6 +1538,7 @@ mod test {
fm: FmTasksConfig {
sitrep_load_period_secs: Duration::from_secs(48),
sitrep_gc_period_secs: Duration::from_secs(49),
rendezvouz_period_secs: Duration::from_secs(51),
},
probe_distributor: ProbeDistributorConfig {
period_secs: Duration::from_secs(50),
Expand Down Expand Up @@ -1640,6 +1650,7 @@ mod test {
fm.sitrep_load_period_secs = 45
fm.sitrep_gc_period_secs = 46
probe_distributor.period_secs = 47
fm.rendezvous_period_secs = 48
multicast_reconciler.period_secs = 60

[default_region_allocation_strategy]
Expand Down
1 change: 1 addition & 0 deletions nexus/background-task-interface/src/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ pub struct BackgroundTasks {
pub task_webhook_deliverator: Activator,
pub task_sp_ereport_ingester: Activator,
pub task_reconfigurator_config_loader: Activator,
pub task_fm_rendezvous: Activator,
pub task_fm_sitrep_loader: Activator,
pub task_fm_sitrep_gc: Activator,
pub task_probe_distributor: Activator,
Expand Down
37 changes: 37 additions & 0 deletions nexus/db-model/src/alert_class.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use super::impl_enum_type;
use nexus_types::external_api::views;
use omicron_common::api::external::Error;
use serde::de::{self, Deserialize, Deserializer};
use serde::ser::{Serialize, Serializer};
use std::fmt;
Expand All @@ -30,6 +31,8 @@ impl_enum_type!(
TestFooBaz => b"test.foo.baz"
TestQuuxBar => b"test.quux.bar"
TestQuuxBarBaz => b"test.quux.bar.baz"
PsuInserted => b"hw.insert.power.power_shelf.psu"
PsuRemoved => b"hw.remove.power.power_shelf.psu"
);

impl AlertClass {
Expand All @@ -44,6 +47,8 @@ impl AlertClass {
Self::TestFooBaz => "test.foo.baz",
Self::TestQuuxBar => "test.quux.bar",
Self::TestQuuxBarBaz => "test.quux.bar.baz",
Self::PsuInserted => "hw.insert.power.power_shelf.psu",
Self::PsuRemoved => "hw.remove.power.power_shelf.psu",
}
}

Expand Down Expand Up @@ -76,6 +81,12 @@ impl AlertClass {
| Self::TestQuuxBarBaz => {
"This is a test of the emergency alert system"
}
Self::PsuInserted => {
"A power supply unit (PSU) has been inserted into the power shelf"
}
Self::PsuRemoved => {
"A power supply unit (PSU) has been removed from the power shelf"
}
}
}

Expand All @@ -84,6 +95,32 @@ impl AlertClass {
<Self as strum::VariantArray>::VARIANTS;
}

impl From<nexus_types::fm::AlertClass> for AlertClass {
fn from(input: nexus_types::fm::AlertClass) -> Self {
use nexus_types::fm::AlertClass as In;
match input {
In::PsuRemoved => Self::PsuRemoved,
In::PsuInserted => Self::PsuInserted,
}
}
}

impl TryFrom<AlertClass> for nexus_types::fm::AlertClass {
type Error = Error;

fn try_from(input: AlertClass) -> Result<Self, Self::Error> {
use nexus_types::fm::AlertClass as Out;
match input {
AlertClass::PsuRemoved => Ok(Out::PsuRemoved),
AlertClass::PsuInserted => Ok(Out::PsuInserted),
class => Err(Error::invalid_value(
"alert_class",
format!("'{class}' is not a FM alert class"),
)),
}
}
}

impl fmt::Display for AlertClass {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.as_str())
Expand Down
7 changes: 7 additions & 0 deletions nexus/db-model/src/fm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ use chrono::{DateTime, Utc};
use nexus_db_schema::schema::{fm_sitrep, fm_sitrep_history};
use omicron_uuid_kinds::{CollectionKind, OmicronZoneKind, SitrepKind};

mod alert_request;
pub use alert_request::*;
mod case;
pub use case::*;
mod diagnosis_engine;
pub use diagnosis_engine::*;

#[derive(Queryable, Insertable, Clone, Debug, Selectable)]
#[diesel(table_name = fm_sitrep)]
pub struct SitrepMetadata {
Expand Down
55 changes: 55 additions & 0 deletions nexus/db-model/src/fm/alert_request.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Fault management alert requests.

use crate::AlertClass;
use crate::DbTypedUuid;
use nexus_db_schema::schema::fm_alert_request;
use nexus_types::fm;
use omicron_uuid_kinds::{
AlertKind, CaseKind, CaseUuid, SitrepKind, SitrepUuid,
};

#[derive(Queryable, Insertable, Clone, Debug, Selectable)]
#[diesel(table_name = fm_alert_request)]
pub struct AlertRequest {
pub id: DbTypedUuid<AlertKind>,
pub sitrep_id: DbTypedUuid<SitrepKind>,
pub requested_sitrep_id: DbTypedUuid<SitrepKind>,
pub case_id: DbTypedUuid<CaseKind>,
#[diesel(column_name = "class")]
pub class: AlertClass,
pub payload: serde_json::Value,
}

impl AlertRequest {
pub fn new(
current_sitrep_id: SitrepUuid,
case_id: CaseUuid,
req: fm::AlertRequest,
) -> Self {
let fm::AlertRequest { id, requested_sitrep_id, payload, class } = req;
AlertRequest {
id: id.into(),
sitrep_id: current_sitrep_id.into(),
requested_sitrep_id: requested_sitrep_id.into(),
case_id: case_id.into(),
class: class.into(),
payload,
}
}
}

impl TryFrom<AlertRequest> for fm::AlertRequest {
type Error = <fm::AlertClass as TryFrom<AlertClass>>::Error;
fn try_from(req: AlertRequest) -> Result<Self, Self::Error> {
Ok(fm::AlertRequest {
id: req.id.into(),
requested_sitrep_id: req.requested_sitrep_id.into(),
payload: req.payload,
class: req.class.try_into()?,
})
}
}
Loading
Loading