Skip to content

Commit 0fd1b74

Browse files
authored
[nexus] refactor SP ereport processing into nexus-types (#9480)
This commit moves the code that processes raw JSON representations of SP ereports from the `ereport_ingester` background task to `nexus_types::fm::ereport`. While a production system will only perform this processing when an ereport is received from a service processor via MGS in the `ereprot_ingester` task, I'd like to be able to also invoke the same logic with hard-coded example JSON ereports for test purposes. Currently, the test code for diagnosis engines in #9346 implemented its own version of this which was slightly different from the ereport_ingester version, so I've moved it to here so that we can use the same logic when processing test inputs. Note that this change was cherry-picked from #9346, where I had actually written tests that needed to use this logic. I figured it was a small and uncontroversial enough refactor (that makes no functional change whatsoever) to pull it out to merge now, in an attempt to keep that branch from getting even bigger and touching even more files.
1 parent 1b73ac9 commit 0fd1b74

File tree

2 files changed

+128
-100
lines changed

2 files changed

+128
-100
lines changed

nexus/src/app/background/tasks/ereport_ingester.rs

Lines changed: 7 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
use crate::app::background::BackgroundTask;
1313
use chrono::Utc;
1414
use ereport_types::Ena;
15-
use ereport_types::Ereport;
1615
use ereport_types::EreportId;
1716
use futures::future::BoxFuture;
1817
use internal_dns_types::names::ServiceName;
@@ -245,76 +244,19 @@ impl Ingester {
245244
} else {
246245
status.get_or_insert_default().requests += 1;
247246
}
248-
247+
let time_collected = Utc::now();
249248
let received = reports.items.len();
250249
let status = status.get_or_insert_default();
251250
status.ereports_received += received;
252251

253252
let db_ereports = reports.items.into_iter().map(|ereport| {
254-
const MISSING_VPD: &str =
255-
" (perhaps the SP doesn't know its own VPD?)";
256-
let part_number = get_sp_metadata_string(
257-
"baseboard_part_number",
258-
&ereport,
259-
&restart_id,
253+
EreportData::from_sp_ereport(
260254
&opctx.log,
261-
MISSING_VPD,
262-
);
263-
let serial_number = get_sp_metadata_string(
264-
"baseboard_serial_number",
265-
&ereport,
266-
&restart_id,
267-
&opctx.log,
268-
MISSING_VPD,
269-
);
270-
let ena = ereport.ena;
271-
let class = ereport
272-
.data
273-
// "k" (for "kind") is used as an abbreviation of
274-
// "class" to save 4 bytes of ereport.
275-
.get("k")
276-
.or_else(|| ereport.data.get("class"));
277-
let class = match (class, ereport.data.get("lost")) {
278-
(Some(serde_json::Value::String(class)), _) => {
279-
Some(class.to_string())
280-
}
281-
(Some(v), _) => {
282-
slog::warn!(
283-
&opctx.log,
284-
"malformed ereport: value for 'k'/'class' \
285-
should be a string, but found: {v:?}";
286-
"ena" => ?ena,
287-
"restart_id" => ?restart_id,
288-
);
289-
None
290-
}
291-
// This is a loss record! I know this!
292-
(None, Some(serde_json::Value::Null)) => {
293-
Some("ereport.data_loss.possible".to_string())
294-
}
295-
(None, Some(serde_json::Value::Number(_))) => {
296-
Some("ereport.data_loss.certain".to_string())
297-
}
298-
(None, _) => {
299-
slog::warn!(
300-
&opctx.log,
301-
"ereport missing 'k'/'class' key";
302-
"ena" => ?ena,
303-
"restart_id" => ?restart_id,
304-
);
305-
None
306-
}
307-
};
308-
309-
EreportData {
310-
id: EreportId { restart_id, ena },
311-
time_collected: Utc::now(),
312-
collector_id: self.nexus_id,
313-
part_number,
314-
serial_number,
315-
class,
316-
report: serde_json::Value::Object(ereport.data),
317-
}
255+
restart_id,
256+
ereport,
257+
time_collected,
258+
self.nexus_id,
259+
)
318260
});
319261
let created = match self
320262
.datastore
@@ -427,41 +369,6 @@ impl Ingester {
427369
}
428370
}
429371

430-
/// Attempt to extract a VPD metadata from an SP ereport, logging a warning if
431-
/// it's missing. We still want to keep such ereports, as the error condition
432-
/// could be that the SP couldn't determine the metadata field, but it's
433-
/// uncomfortable, so we ought to complain about it.
434-
fn get_sp_metadata_string(
435-
key: &str,
436-
ereport: &Ereport,
437-
restart_id: &EreporterRestartUuid,
438-
log: &slog::Logger,
439-
extra_context: &'static str,
440-
) -> Option<String> {
441-
match ereport.data.get(key) {
442-
Some(serde_json::Value::String(s)) => Some(s.clone()),
443-
Some(v) => {
444-
slog::warn!(
445-
&log,
446-
"malformed ereport: value for '{key}' should be a string, \
447-
but found: {v:?}";
448-
"ena" => ?ereport.ena,
449-
"restart_id" => ?restart_id,
450-
);
451-
None
452-
}
453-
None => {
454-
slog::warn!(
455-
&log,
456-
"ereport missing '{key}'{extra_context}";
457-
"ena" => ?ereport.ena,
458-
"restart_id" => ?restart_id,
459-
);
460-
None
461-
}
462-
}
463-
}
464-
465372
struct EreportQueryParams {
466373
committed_ena: Option<Ena>,
467374
start_ena: Option<Ena>,

nexus/types/src/fm/ereport.rs

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
77
use crate::inventory::SpType;
88
use chrono::{DateTime, Utc};
9+
use omicron_uuid_kinds::EreporterRestartUuid;
910
use omicron_uuid_kinds::OmicronZoneUuid;
1011
use omicron_uuid_kinds::SledUuid;
1112
use serde::Deserialize;
@@ -35,6 +36,91 @@ pub struct EreportData {
3536
pub report: serde_json::Value,
3637
}
3738

39+
impl EreportData {
40+
/// Interpret a service processor ereport from a raw JSON blobule, plus the
41+
/// restart ID and collection metadata.
42+
///
43+
/// This conversion is lossy; if some information is not present in the raw
44+
/// ereport JSON, such as the SP's VPD identity, we log a warning, rather
45+
/// than returning an error, and leave those fields empty in the returned
46+
/// `EreportData`. This is because, if we receive an ereport that is
47+
/// incomplete, we would still like to preserve whatever information *is*
48+
/// present, rather than throwing the whole thing away. Thus, this function
49+
/// also takes a `slog::Logger` for logging warnings if some expected fields
50+
/// are not present or malformed.
51+
pub fn from_sp_ereport(
52+
log: &slog::Logger,
53+
restart_id: EreporterRestartUuid,
54+
ereport: ereport_types::Ereport,
55+
time_collected: DateTime<Utc>,
56+
collector_id: OmicronZoneUuid,
57+
) -> Self {
58+
const MISSING_VPD: &str = " (perhaps the SP doesn't know its own VPD?)";
59+
let part_number = get_sp_metadata_string(
60+
"baseboard_part_number",
61+
&ereport,
62+
&restart_id,
63+
&log,
64+
MISSING_VPD,
65+
);
66+
let serial_number = get_sp_metadata_string(
67+
"baseboard_serial_number",
68+
&ereport,
69+
&restart_id,
70+
&log,
71+
MISSING_VPD,
72+
);
73+
let ena = ereport.ena;
74+
let class = ereport
75+
.data
76+
// "k" (for "kind") is used as an abbreviation of
77+
// "class" to save 4 bytes of ereport.
78+
.get("k")
79+
.or_else(|| ereport.data.get("class"));
80+
let class = match (class, ereport.data.get("lost")) {
81+
(Some(serde_json::Value::String(class)), _) => {
82+
Some(class.to_string())
83+
}
84+
(Some(v), _) => {
85+
slog::warn!(
86+
&log,
87+
"malformed ereport: value for 'k'/'class' \
88+
should be a string, but found: {v:?}";
89+
"ena" => ?ena,
90+
"restart_id" => ?restart_id,
91+
);
92+
None
93+
}
94+
// This is a loss record! I know this!
95+
(None, Some(serde_json::Value::Null)) => {
96+
Some("ereport.data_loss.possible".to_string())
97+
}
98+
(None, Some(serde_json::Value::Number(_))) => {
99+
Some("ereport.data_loss.certain".to_string())
100+
}
101+
(None, _) => {
102+
slog::warn!(
103+
&log,
104+
"ereport missing 'k'/'class' key";
105+
"ena" => ?ena,
106+
"restart_id" => ?restart_id,
107+
);
108+
None
109+
}
110+
};
111+
112+
EreportData {
113+
id: EreportId { restart_id, ena },
114+
time_collected,
115+
collector_id,
116+
part_number,
117+
serial_number,
118+
class,
119+
report: serde_json::Value::Object(ereport.data),
120+
}
121+
}
122+
}
123+
38124
/// Describes the source of an ereport.
39125
#[derive(
40126
Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize,
@@ -63,3 +149,38 @@ impl fmt::Display for Reporter {
63149
}
64150
}
65151
}
152+
153+
/// Attempt to extract a VPD metadata from an SP ereport, logging a warning if
154+
/// it's missing. We still want to keep such ereports, as the error condition
155+
/// could be that the SP couldn't determine the metadata field, but it's
156+
/// uncomfortable, so we ought to complain about it.
157+
fn get_sp_metadata_string(
158+
key: &str,
159+
ereport: &ereport_types::Ereport,
160+
restart_id: &EreporterRestartUuid,
161+
log: &slog::Logger,
162+
extra_context: &'static str,
163+
) -> Option<String> {
164+
match ereport.data.get(key) {
165+
Some(serde_json::Value::String(s)) => Some(s.clone()),
166+
Some(v) => {
167+
slog::warn!(
168+
&log,
169+
"malformed ereport: value for '{key}' should be a string, \
170+
but found: {v:?}";
171+
"ena" => ?ereport.ena,
172+
"restart_id" => ?restart_id,
173+
);
174+
None
175+
}
176+
None => {
177+
slog::warn!(
178+
&log,
179+
"ereport missing '{key}'{extra_context}";
180+
"ena" => ?ereport.ena,
181+
"restart_id" => ?restart_id,
182+
);
183+
None
184+
}
185+
}
186+
}

0 commit comments

Comments
 (0)