diff --git a/Cargo.lock b/Cargo.lock index cd8cb6ce274..ca2c2d19105 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6871,6 +6871,22 @@ dependencies = [ "tufaceous-artifact", ] +[[package]] +name = "nexus-fm" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "iddqd", + "nexus-types", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "schemars 0.8.22", + "serde", + "serde_json", + "slog", +] + [[package]] name = "nexus-internal-api" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 0aa02a2e81c..a78cd83afab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -87,6 +87,7 @@ members = [ "nexus/db-schema", "nexus/defaults", "nexus/external-api", + "nexus/fm", "nexus/internal-api", "nexus/inventory", "nexus/lockstep-api", diff --git a/dev-tools/omdb/src/bin/omdb/db/sitrep.rs b/dev-tools/omdb/src/bin/omdb/db/sitrep.rs index 439798d577b..17d0e1c96ce 100644 --- a/dev-tools/omdb/src/bin/omdb/db/sitrep.rs +++ b/dev-tools/omdb/src/bin/omdb/db/sitrep.rs @@ -238,7 +238,7 @@ async fn cmd_db_sitrep_show( } }; - let fm::Sitrep { metadata } = sitrep; + let fm::Sitrep { metadata, cases } = sitrep; let fm::SitrepMetadata { id, creator_id, diff --git a/ereport/types/src/lib.rs b/ereport/types/src/lib.rs index 9727684a6c8..d06f5f2fc3d 100644 --- a/ereport/types/src/lib.rs +++ b/ereport/types/src/lib.rs @@ -32,6 +32,7 @@ pub struct Ereport { Serialize, Deserialize, JsonSchema, + Hash, )] #[repr(transparent)] #[serde(from = "u64", into = "u64")] @@ -102,7 +103,18 @@ impl TryFrom for Ena { } /// Unique identifier for an ereport. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + Serialize, + Deserialize, + PartialOrd, + Ord, + Hash, +)] pub struct EreportId { pub restart_id: EreporterRestartUuid, pub ena: Ena, diff --git a/nexus/db-model/src/alert_class.rs b/nexus/db-model/src/alert_class.rs index 5f0b2129707..39004961b9b 100644 --- a/nexus/db-model/src/alert_class.rs +++ b/nexus/db-model/src/alert_class.rs @@ -4,6 +4,7 @@ use super::impl_enum_type; use nexus_types::external_api::views; +use omicron_common::api::external::Error; use serde::de::{self, Deserialize, Deserializer}; use serde::ser::{Serialize, Serializer}; use std::fmt; @@ -30,6 +31,8 @@ impl_enum_type!( TestFooBaz => b"test.foo.baz" TestQuuxBar => b"test.quux.bar" TestQuuxBarBaz => b"test.quux.bar.baz" + PsuInserted => b"hw.insert.power.power_shelf.psu" + PsuRemoved => b"hw.remove.power.power_shelf.psu" ); impl AlertClass { @@ -44,6 +47,8 @@ impl AlertClass { Self::TestFooBaz => "test.foo.baz", Self::TestQuuxBar => "test.quux.bar", Self::TestQuuxBarBaz => "test.quux.bar.baz", + Self::PsuInserted => "hw.insert.power.power_shelf.psu", + Self::PsuRemoved => "hw.remove.power.power_shelf.psu", } } @@ -76,6 +81,12 @@ impl AlertClass { | Self::TestQuuxBarBaz => { "This is a test of the emergency alert system" } + Self::PsuInserted => { + "A power supply unit (PSU) has been inserted into the power shelf" + } + Self::PsuRemoved => { + "A power supply unit (PSU) has been removed from the power shelf" + } } } @@ -84,6 +95,32 @@ impl AlertClass { ::VARIANTS; } +impl From for AlertClass { + fn from(input: nexus_types::fm::AlertClass) -> Self { + use nexus_types::fm::AlertClass as In; + match input { + In::PsuRemoved => Self::PsuRemoved, + In::PsuInserted => Self::PsuInserted, + } + } +} + +impl TryFrom for nexus_types::fm::AlertClass { + type Error = Error; + + fn try_from(input: AlertClass) -> Result { + use nexus_types::fm::AlertClass as Out; + match input { + AlertClass::PsuRemoved => Ok(Out::PsuRemoved), + AlertClass::PsuInserted => Ok(Out::PsuInserted), + class => Err(Error::invalid_value( + "alert_class", + format!("'{class}' is not a FM alert class"), + )), + } + } +} + impl fmt::Display for AlertClass { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.as_str()) diff --git a/nexus/db-model/src/fm.rs b/nexus/db-model/src/fm.rs index d9d7ac3c2dc..353a2174aac 100644 --- a/nexus/db-model/src/fm.rs +++ b/nexus/db-model/src/fm.rs @@ -19,6 +19,13 @@ use chrono::{DateTime, Utc}; use nexus_db_schema::schema::{fm_sitrep, fm_sitrep_history}; use omicron_uuid_kinds::{CollectionKind, OmicronZoneKind, SitrepKind}; +mod alert_request; +pub use alert_request::*; +mod case; +pub use case::*; +mod diagnosis_engine; +pub use diagnosis_engine::*; + #[derive(Queryable, Insertable, Clone, Debug, Selectable)] #[diesel(table_name = fm_sitrep)] pub struct SitrepMetadata { diff --git a/nexus/db-model/src/fm/alert_request.rs b/nexus/db-model/src/fm/alert_request.rs new file mode 100644 index 00000000000..551085aa065 --- /dev/null +++ b/nexus/db-model/src/fm/alert_request.rs @@ -0,0 +1,55 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Fault management alert requests. + +use crate::AlertClass; +use crate::DbTypedUuid; +use nexus_db_schema::schema::fm_alert_request; +use nexus_types::fm; +use omicron_uuid_kinds::{ + AlertKind, CaseKind, CaseUuid, SitrepKind, SitrepUuid, +}; + +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = fm_alert_request)] +pub struct AlertRequest { + pub id: DbTypedUuid, + pub sitrep_id: DbTypedUuid, + pub requested_sitrep_id: DbTypedUuid, + pub case_id: DbTypedUuid, + #[diesel(column_name = "class")] + pub class: AlertClass, + pub payload: serde_json::Value, +} + +impl AlertRequest { + pub fn new( + current_sitrep_id: SitrepUuid, + case_id: CaseUuid, + req: fm::AlertRequest, + ) -> Self { + let fm::AlertRequest { id, requested_sitrep_id, payload, class } = req; + AlertRequest { + id: id.into(), + sitrep_id: current_sitrep_id.into(), + requested_sitrep_id: requested_sitrep_id.into(), + case_id: case_id.into(), + class: class.into(), + payload, + } + } +} + +impl TryFrom for fm::AlertRequest { + type Error = >::Error; + fn try_from(req: AlertRequest) -> Result { + Ok(fm::AlertRequest { + id: req.id.into(), + requested_sitrep_id: req.requested_sitrep_id.into(), + payload: req.payload, + class: req.class.try_into()?, + }) + } +} diff --git a/nexus/db-model/src/fm/case.rs b/nexus/db-model/src/fm/case.rs new file mode 100644 index 00000000000..60349aa8bce --- /dev/null +++ b/nexus/db-model/src/fm/case.rs @@ -0,0 +1,53 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Fault management cases. + +use super::DiagnosisEngine; +use crate::DbTypedUuid; +use crate::SpMgsSlot; +use crate::SpType; +use crate::ereport; +use chrono::{DateTime, Utc}; +use nexus_db_schema::schema::{ + fm_case, fm_case_impacts_sp_slot, fm_ereport_in_case, +}; +use omicron_uuid_kinds::{CaseKind, EreporterRestartKind, SitrepKind}; + +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = fm_case)] +pub struct CaseMetadata { + pub id: DbTypedUuid, + pub sitrep_id: DbTypedUuid, + pub de: DiagnosisEngine, + + pub created_sitrep_id: DbTypedUuid, + pub time_created: DateTime, + + pub time_closed: Option>, + pub closed_sitrep_id: Option>, + + pub comment: String, +} + +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = fm_ereport_in_case)] +pub struct CaseEreport { + pub restart_id: DbTypedUuid, + pub ena: ereport::DbEna, + pub case_id: DbTypedUuid, + pub sitrep_id: DbTypedUuid, + pub assigned_sitrep_id: DbTypedUuid, +} + +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = fm_case_impacts_sp_slot)] +pub struct CaseImpactsSp { + pub sitrep_id: DbTypedUuid, + pub case_id: DbTypedUuid, + pub sp_type: SpType, + pub sp_slot: SpMgsSlot, + pub created_sitrep_id: DbTypedUuid, + pub comment: String, +} diff --git a/nexus/db-model/src/fm/diagnosis_engine.rs b/nexus/db-model/src/fm/diagnosis_engine.rs new file mode 100644 index 00000000000..7d4523fa74e --- /dev/null +++ b/nexus/db-model/src/fm/diagnosis_engine.rs @@ -0,0 +1,50 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::impl_enum_type; +use nexus_types::fm; +use serde::{Deserialize, Serialize}; +use std::fmt; + +impl_enum_type!( + DiagnosisEngineEnum: + + #[derive( + Copy, + Clone, + Debug, + PartialEq, + Serialize, + Deserialize, + AsExpression, + FromSqlRow, + )] + #[serde(rename_all = "snake_case")] + pub enum DiagnosisEngine; + + PowerShelf => b"power_shelf" + +); + +impl From for fm::DiagnosisEngine { + fn from(de: DiagnosisEngine) -> Self { + match de { + DiagnosisEngine::PowerShelf => fm::DiagnosisEngine::PowerShelf, + } + } +} + +impl From for DiagnosisEngine { + fn from(fm_de: fm::DiagnosisEngine) -> Self { + match fm_de { + fm::DiagnosisEngine::PowerShelf => DiagnosisEngine::PowerShelf, + } + } +} + +impl fmt::Display for DiagnosisEngine { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fm::DiagnosisEngine::from(*self).fmt(f) + } +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 91067f9ace9..f1e643bcb34 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -41,7 +41,7 @@ mod downstairs; pub mod ereport; mod ereporter_type; mod external_ip; -mod fm; +pub mod fm; mod generation; mod identity_provider; mod image; @@ -187,7 +187,7 @@ pub use downstairs::*; pub use ereport::Ereport; pub use ereporter_type::*; pub use external_ip::*; -pub use fm::*; +pub use fm::{SitrepMetadata, SitrepVersion}; pub use generation::*; pub use identity_provider::*; pub use image::*; diff --git a/nexus/db-queries/src/db/datastore/ereport.rs b/nexus/db-queries/src/db/datastore/ereport.rs index 2fa7489d5aa..5ea3dbbb5ba 100644 --- a/nexus/db-queries/src/db/datastore/ereport.rs +++ b/nexus/db-queries/src/db/datastore/ereport.rs @@ -98,6 +98,14 @@ impl DataStore { ) -> LookupResult { opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; let conn = self.pool_connection_authorized(opctx).await?; + self.ereport_fetch_on_conn(&conn, id).await + } + + pub(crate) async fn ereport_fetch_on_conn( + &self, + conn: &async_bb8_diesel::Connection, + id: fm::EreportId, + ) -> LookupResult { let restart_id = id.restart_id.into_untyped_uuid(); let ena = DbEna::from(id.ena); @@ -106,7 +114,7 @@ impl DataStore { .filter(dsl::ena.eq(ena)) .filter(dsl::time_deleted.is_null()) .select(Ereport::as_select()) - .first_async(&*conn) + .first_async(conn) .await .optional() .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? diff --git a/nexus/db-queries/src/db/datastore/fm.rs b/nexus/db-queries/src/db/datastore/fm.rs index 04d44c72e2a..7dbb8069bfb 100644 --- a/nexus/db-queries/src/db/datastore/fm.rs +++ b/nexus/db-queries/src/db/datastore/fm.rs @@ -12,9 +12,14 @@ use super::DataStore; use crate::authz; use crate::context::OpContext; use crate::db::datastore::RunnableQuery; +use crate::db::datastore::SQL_BATCH_SIZE; use crate::db::model; +use crate::db::model::DbTypedUuid; use crate::db::model::SqlU32; +use crate::db::model::ereport::DbEna; +use crate::db::pagination::Paginator; use crate::db::pagination::paginated; +use crate::db::pagination::paginated_multicolumn; use crate::db::raw_query_builder::QueryBuilder; use crate::db::raw_query_builder::TypedSqlQuery; use async_bb8_diesel::AsyncRunQueryDsl; @@ -26,6 +31,9 @@ use dropshot::PaginationOrder; use nexus_db_errors::ErrorHandler; use nexus_db_errors::public_error_from_diesel; use nexus_db_lookup::DbConnection; +use nexus_db_schema::schema::fm_alert_request::dsl as alert_req_dsl; +use nexus_db_schema::schema::fm_case::dsl as case_dsl; +use nexus_db_schema::schema::fm_ereport_in_case::dsl as case_ereport_dsl; use nexus_db_schema::schema::fm_sitrep::dsl as sitrep_dsl; use nexus_db_schema::schema::fm_sitrep_history::dsl as history_dsl; use nexus_types::fm; @@ -33,8 +41,10 @@ use nexus_types::fm::Sitrep; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; +use omicron_uuid_kinds::CaseKind; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SitrepUuid; +use std::sync::Arc; use uuid::Uuid; impl DataStore { @@ -139,10 +149,176 @@ impl DataStore { let metadata = self.fm_sitrep_metadata_read_on_conn(id, &conn).await?.into(); - // TODO(eliza): this is where we would read all the other sitrep data, - // if there was any. + let mut all_ereports = iddqd::IdOrdMap::>::new(); + let cases = { + let mut cases = iddqd::IdOrdMap::new(); + let mut paginator = + Paginator::new(SQL_BATCH_SIZE, PaginationOrder::Descending); + while let Some(p) = paginator.next() { + let batch = self + .fm_sitrep_cases_list_on_conn( + id, + &p.current_pagparams(), + &conn, + ) + .await + .map_err(|e| { + e.internal_context("failed to list sitrep cases") + })?; + paginator = p.found_batch(&batch, &|case| case.id); + + for case in batch { + // TODO(eliza): consider using a `ParallelTaskSet` to fetch the + // cases in parallel here. + let (ereport_assignments, alerts_requested) = + self.fm_case_read_on_conn(&case, conn).await?; + + // Fetch ereports assigned to this case. + let mut ereports = iddqd::IdOrdMap::with_capacity( + ereport_assignments.len(), + ); + for model::fm::CaseEreport { + restart_id, + ena: DbEna(ena), + .. + } in ereport_assignments + { + let ereport_id = fm::EreportId { + restart_id: restart_id.into(), + ena, + }; + let ereport = match all_ereports.entry(&ereport_id) { + iddqd::id_ord_map::Entry::Occupied(entry) => { + entry.get().clone() + } + iddqd::id_ord_map::Entry::Vacant(entry) => { + let ereport: fm::Ereport = self.ereport_fetch_on_conn(conn, ereport_id) + .await + .map_err(|e| e.internal_context(format!( + "failed to fetch ereport {ereport_id} for case {}", + case.id, + )))? + .into(); + entry.insert(Arc::new(ereport)).clone() + } + }; + ereports.insert_unique(ereport).unwrap(); + } + + cases + .insert_unique(fm::Case { + id: case.id.into(), + created_sitrep_id: case.created_sitrep_id.into(), + time_created: case.time_created.into(), + time_closed: case.time_closed.map(Into::into), + closed_sitrep_id: case + .closed_sitrep_id + .map(Into::into), + de: case.de.into(), + comment: case.comment, + ereports, + alerts_requested, + }) + .expect("case UUIDs should be unique"); + } + } + + cases + }; + + Ok(Sitrep { metadata, cases }) + } + + async fn fm_sitrep_cases_list_on_conn( + &self, + sitrep_id: SitrepUuid, + pagparams: &DataPageParams<'_, DbTypedUuid>, + conn: &async_bb8_diesel::Connection, + ) -> ListResultVec { + paginated(case_dsl::fm_case, case_dsl::id, &pagparams) + .filter(case_dsl::sitrep_id.eq(sitrep_id.into_untyped_uuid())) + .select(model::fm::CaseMetadata::as_select()) + .load_async::(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + async fn fm_case_read_on_conn( + &self, + case: &model::fm::CaseMetadata, + conn: &async_bb8_diesel::Connection, + ) -> Result< + (Vec, iddqd::IdOrdMap), + Error, + > { + // Read ereports assigned to this case. + let ereports = { + let mut ereports = Vec::new(); + let mut paginator = + Paginator::new(SQL_BATCH_SIZE, PaginationOrder::Descending); + while let Some(p) = paginator.next() { + let batch = paginated_multicolumn( + case_ereport_dsl::fm_ereport_in_case, + (case_ereport_dsl::restart_id, case_ereport_dsl::ena), + &p.current_pagparams(), + ) + .filter(case_ereport_dsl::case_id.eq(case.id)) + .filter(case_ereport_dsl::sitrep_id.eq(case.sitrep_id)) + .select(model::fm::CaseEreport::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + .internal_context(format!( + "failed to list ereports assigned to case {}", + case.id + )) + })?; + + paginator = p.found_batch(&batch, &|ereport| { + (ereport.restart_id, ereport.ena) + }); + ereports.extend(batch); + } + ereports + }; + + // Read alerts requested for this case. + let alerts_requested = { + let mut alerts = iddqd::IdOrdMap::new(); + let mut paginator = + Paginator::new(SQL_BATCH_SIZE, PaginationOrder::Descending); + while let Some(p) = paginator.next() { + let batch = paginated( + alert_req_dsl::fm_alert_request, + alert_req_dsl::id, + &p.current_pagparams(), + ) + .filter(alert_req_dsl::case_id.eq(case.id)) + .filter(alert_req_dsl::sitrep_id.eq(case.sitrep_id)) + .select(model::fm::AlertRequest::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + .internal_context(format!( + "failed to list alerts requested for case {}", + case.id + )) + })?; + + paginator = p.found_batch(&batch, &|req| req.id); + for alert in batch { + alerts + .insert_unique(alert.try_into()?) + .expect("alert UUIDs should be unique"); + } + } + + alerts + }; - Ok(Sitrep { metadata }) + Ok((ereports, alerts_requested)) } /// Insert the provided [`Sitrep`] into the database, and attempt to mark it @@ -755,6 +931,7 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: None, }, + cases: Default::default(), }; datastore.fm_sitrep_insert(&opctx, &sitrep).await.unwrap(); @@ -801,6 +978,7 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: None, }, + cases: Default::default(), }; datastore.fm_sitrep_insert(&opctx, &sitrep1).await.unwrap(); @@ -814,6 +992,7 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: Some(sitrep1.id()), }, + cases: Default::default(), }; datastore.fm_sitrep_insert(&opctx, &sitrep2).await.expect( "inserting a sitrep whose parent is current should succeed", @@ -854,6 +1033,7 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: None, }, + cases: Default::default(), }; datastore.fm_sitrep_insert(&opctx, &sitrep1).await.unwrap(); @@ -868,6 +1048,7 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: Some(nonexistent_id), }, + cases: Default::default(), }; let result = datastore.fm_sitrep_insert(&opctx, &sitrep2).await; @@ -902,6 +1083,7 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: None, }, + cases: Default::default(), }; datastore.fm_sitrep_insert(&opctx, &sitrep1).await.unwrap(); @@ -915,6 +1097,7 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: Some(sitrep1.id()), }, + cases: Default::default(), }; datastore.fm_sitrep_insert(&opctx, &sitrep2).await.unwrap(); @@ -929,6 +1112,7 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: Some(sitrep1.id()), }, + cases: Default::default(), }; let result = datastore.fm_sitrep_insert(&opctx, &sitrep3).await; @@ -969,6 +1153,7 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: None, }, + cases: Default::default(), }; datastore .fm_sitrep_insert(&opctx, &sitrep1) @@ -1009,6 +1194,7 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: Some(sitrep1.metadata.id), }, + cases: Default::default(), }; datastore .fm_sitrep_insert(&opctx, &sitrep2) @@ -1042,7 +1228,7 @@ mod tests { ) -> Result, Error> { let mut listed_orphans = BTreeSet::new(); let mut paginator = Paginator::new( - crate::db::datastore::SQL_BATCH_SIZE, + crate::dbSQL_BATC::datastore::H_SIZE, dropshot::PaginationOrder::Descending, ); while let Some(p) = paginator.next() { @@ -1072,6 +1258,7 @@ mod tests { time_created: Utc::now(), parent_sitrep_id, }, + cases: Default::default(), }; match datastore.fm_sitrep_insert(&opctx, &sitrep).await { Ok(_) => { diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index 5b966b38be4..684d656f308 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -39,6 +39,7 @@ define_enums! { ClickhouseModeEnum => "clickhouse_mode", DatasetKindEnum => "dataset_kind", DbMetadataNexusStateEnum => "db_metadata_nexus_state", + DiagnosisEngineEnum => "diagnosis_engine", DiskTypeEnum => "disk_type", DnsGroupEnum => "dns_group", DownstairsClientStopRequestReasonEnum => "downstairs_client_stop_request_reason_type", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index be483c399ea..153241ba929 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -2860,3 +2860,55 @@ table! { } allow_tables_to_appear_in_same_query!(fm_sitrep, fm_sitrep_history); + +table! { + fm_case (sitrep_id, id) { + id -> Uuid, + sitrep_id -> Uuid, + de -> crate::enums::DiagnosisEngineEnum, + + time_created -> Timestamptz, + created_sitrep_id -> Uuid, + + time_closed -> Nullable, + closed_sitrep_id -> Nullable, + + comment -> Text, + } +} + +table! { + fm_ereport_in_case (sitrep_id, restart_id, ena) { + restart_id -> Uuid, + ena -> Int8, + case_id -> Uuid, + sitrep_id -> Uuid, + assigned_sitrep_id -> Uuid, + } +} + +allow_tables_to_appear_in_same_query!(fm_sitrep, fm_case); + +table! { + fm_case_impacts_sp_slot (sitrep_id, case_id, sp_type, sp_slot) { + sitrep_id -> Uuid, + case_id -> Uuid, + sp_type -> crate::enums::SpTypeEnum, + sp_slot -> Int4, + created_sitrep_id -> Uuid, + comment -> Text, + } +} + +table! { + fm_alert_request (sitrep_id, id) { + id -> Uuid, + sitrep_id -> Uuid, + requested_sitrep_id -> Uuid, + case_id -> Uuid, + class -> crate::enums::AlertClassEnum, + payload -> Jsonb, + } +} + +allow_tables_to_appear_in_same_query!(fm_sitrep, fm_alert_request); diff --git a/nexus/fm/Cargo.toml b/nexus/fm/Cargo.toml new file mode 100644 index 00000000000..71c731a49a6 --- /dev/null +++ b/nexus/fm/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "nexus-fm" +version = "0.1.0" +edition = "2021" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +chrono.workspace = true +iddqd.workspace = true +nexus-types.workspace = true +omicron-uuid-kinds.workspace = true +schemars.workspace = true +serde.workspace = true +serde_json.workspace = true +slog.workspace = true + +omicron-workspace-hack.workspace = true diff --git a/nexus/fm/src/alert.rs b/nexus/fm/src/alert.rs new file mode 100644 index 00000000000..8e0e067706e --- /dev/null +++ b/nexus/fm/src/alert.rs @@ -0,0 +1,22 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Alert messages. + +use nexus_types::fm::AlertClass; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +pub mod power_shelf; + +pub trait Alert: Serialize + JsonSchema + std::fmt::Debug { + const CLASS: AlertClass; +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct VpdIdentity { + pub part_number: Option, + pub revision: Option, + pub serial_number: Option, +} diff --git a/nexus/fm/src/alert/power_shelf.rs b/nexus/fm/src/alert/power_shelf.rs new file mode 100644 index 00000000000..4b080d6b64a --- /dev/null +++ b/nexus/fm/src/alert/power_shelf.rs @@ -0,0 +1,52 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Power shelf alerts. + +use super::{Alert, VpdIdentity}; +use nexus_types::fm::AlertClass; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +#[serde(tag = "version", rename_all = "snake_case")] +pub enum PsuInserted { + V0 { + #[serde(flatten)] + psc_psu: PscPsu, + }, +} + +impl Alert for PsuInserted { + const CLASS: AlertClass = AlertClass::PsuInserted; +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +#[serde(tag = "version", rename_all = "snake_case")] +pub enum PsuRemoved { + V0 { + #[serde(flatten)] + psc_psu: PscPsu, + }, +} + +impl Alert for PsuRemoved { + const CLASS: AlertClass = AlertClass::PsuInserted; +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct PscPsu { + pub psc_id: VpdIdentity, + pub psc_slot: u16, + pub psu_id: PsuIdentity, + pub psu_slot: Option, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct PsuIdentity { + pub manufacturer: Option, + pub part_number: Option, + pub firmware_revision: Option, + pub serial_number: Option, +} diff --git a/nexus/fm/src/case.rs b/nexus/fm/src/case.rs new file mode 100644 index 00000000000..e69de29bb2d diff --git a/nexus/fm/src/de.rs b/nexus/fm/src/de.rs new file mode 100644 index 00000000000..0d1d2580785 --- /dev/null +++ b/nexus/fm/src/de.rs @@ -0,0 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Diagnosis engines + +pub mod power_shelf; diff --git a/nexus/fm/src/de/power_shelf.rs b/nexus/fm/src/de/power_shelf.rs new file mode 100644 index 00000000000..bd27e633b6e --- /dev/null +++ b/nexus/fm/src/de/power_shelf.rs @@ -0,0 +1,157 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Power shelf diagnosis + +use crate::SitrepBuilder; +use crate::alert; +use nexus_types::fm::DiagnosisEngine; +use nexus_types::fm::Ereport; +use nexus_types::fm::ereport; +use nexus_types::inventory::SpType; +use serde::de::DeserializeOwned; +use serde_json::Value; +use std::sync::Arc; + +pub fn diagnose( + sitrep: &mut SitrepBuilder<'_>, + new_ereports: &[Arc], +) -> anyhow::Result<()> { + for ereport in new_ereports { + // Skip non-power shelf reports + let ereport::Reporter::Sp { sp_type: SpType::Power, slot } = + ereport.reporter + else { + continue; + }; + + // TODO: check for existing cases tracked for this power shelf and see + // if the ereport is related to them... + + match ereport.data.class.as_deref() { + // PSU inserted + Some("hw.insert.psu") => { + let psc_psu = extract_psc_psu(&ereport, slot, &sitrep.log); + let mut case = sitrep.open_case(DiagnosisEngine::PowerShelf)?; + case.add_ereport(ereport); + case.comment = + format!("PSC {slot} PSU {:?} inserted", psc_psu.psu_slot); + case.request_alert(&alert::power_shelf::PsuInserted::V0 { + psc_psu, + })?; + // Nothing else to do at this time. + case.close(); + } + Some("hw.remove.psu") => { + let psc_psu = extract_psc_psu(&ereport, slot, &sitrep.log); + let mut case = sitrep.open_case(DiagnosisEngine::PowerShelf)?; + case.add_ereport(ereport); + case.comment = + format!("PSC {slot} PSU {:?} removed", psc_psu.psu_slot); + case.request_alert(&alert::power_shelf::PsuRemoved::V0 { + psc_psu, + })?; + + // Nothing else to do at this time. + case.close(); + } + Some(unknown) => { + slog::warn!( + &sitrep.log, + "ignoring unhandled PSC ereport class"; + "ereport_class" => %unknown, + "ereport" => %ereport.id, + ); + } + None => { + slog::warn!( + &sitrep.log, + "ignoring PSC ereport with no class"; + "ereport" => %ereport.id, + ); + } + } + } + + Ok(()) +} + +fn extract_psc_psu( + ereport: &Ereport, + psc_slot: u16, + log: &slog::Logger, +) -> alert::power_shelf::PscPsu { + let psc_id = extract_psc_id(ereport, log); + let psu_id = extract_psu_id(ereport, log); + let psu_slot = grab_json_value(ereport, "slot", &ereport.report, log); + alert::power_shelf::PscPsu { psc_id, psc_slot, psu_id, psu_slot } +} + +fn extract_psc_id(ereport: &Ereport, log: &slog::Logger) -> alert::VpdIdentity { + let serial_number = ereport.serial_number.clone(); + let revision = + grab_json_value(ereport, "baseboard_rev", &ereport.report, log); + let part_number = ereport.part_number.clone(); + alert::VpdIdentity { serial_number, revision, part_number } +} + +fn extract_psu_id( + ereport: &Ereport, + log: &slog::Logger, +) -> alert::power_shelf::PsuIdentity { + // These are the same field names that Hubris uses in the ereport. See: + // https://github.com/oxidecomputer/hubris/blob/ec18e4f11aaa14600c61f67335c32b250ef38269/drv/psc-seq-server/src/main.rs#L1107-L1117 + #[derive(serde::Deserialize, Default)] + struct Fruid { + mfr: Option, + mpn: Option, + serial: Option, + fw_rev: Option, + } + + let Fruid { mfr, mpn, serial, fw_rev } = + grab_json_value(ereport, "fruid", &ereport.report, log) + .unwrap_or_default(); + + alert::power_shelf::PsuIdentity { + serial_number: serial, + part_number: mpn, + firmware_revision: fw_rev, + manufacturer: mfr, + } +} + +fn grab_json_value( + ereport: &Ereport, + key: &str, + obj: &Value, + log: &slog::Logger, +) -> Option { + let v = match obj.get("key") { + Some(v) => v, + None => { + slog::warn!( + log, + "expected ereport to contain a '{key}' field"; + "ereport_id" => %ereport.id, + "ereport_class" => ?ereport.class, + ); + return None; + } + }; + match serde_json::from_value(v.clone()) { + Ok(v) => Some(v), + Err(e) => { + slog::warn!( + log, + "expected ereport '{key}' field to deserialize as a {}", + std::any::type_name::(); + "ereport_id" => %ereport.id, + "ereport_class" => ?ereport.class, + "error" => %e, + ); + None + } + } +} diff --git a/nexus/fm/src/lib.rs b/nexus/fm/src/lib.rs new file mode 100644 index 00000000000..ce9107b8d34 --- /dev/null +++ b/nexus/fm/src/lib.rs @@ -0,0 +1,228 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Fault management + +use nexus_types::fm; +use nexus_types::inventory; +use omicron_uuid_kinds::AlertUuid; +use omicron_uuid_kinds::CaseUuid; +use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::SitrepUuid; +use slog::Logger; +// use std::fmt::Write; +use anyhow::Context; +use chrono::Utc; +use std::sync::Arc; + +pub mod alert; +pub mod case; +pub mod de; + +#[derive(Debug)] +pub struct SitrepBuilder<'a> { + pub log: Logger, + pub inventory: &'a inventory::Collection, + pub parent_sitrep: Option<&'a fm::Sitrep>, + pub sitrep_id: SitrepUuid, + pub cases: iddqd::IdOrdMap, + comment: String, +} + +impl<'a> SitrepBuilder<'a> { + pub fn new( + log: &Logger, + inventory: &'a inventory::Collection, + parent_sitrep: Option<&'a fm::Sitrep>, + ) -> Self { + let sitrep_id = SitrepUuid::new_v4(); + let log = log.new(slog::o!( + "sitrep_id" => format!("{sitrep_id:?}"), + "parent_sitrep_id" => format!("{:?}", parent_sitrep.as_ref().map(|s| s.id())), + "inv_collection_id" => format!("{:?}", inventory.id), + )); + + // Copy forward any open cases from the parent sitrep. + // If a case was closed in the parent sitrep, skip it. + let cases: iddqd::IdOrdMap<_> = parent_sitrep + .iter() + .flat_map(|s| s.open_cases()) + .map(|case| CaseBuilder::new(&log, sitrep_id, case.clone())) + .collect(); + + slog::info!( + &log, + "preparing sitrep {sitrep_id:?}"; + "existing_open_cases" => cases.len(), + ); + + SitrepBuilder { + log, + sitrep_id, + inventory, + parent_sitrep, + comment: String::new(), + cases, + } + } + + pub fn open_case( + &mut self, + de: fm::DiagnosisEngine, + ) -> anyhow::Result> { + let id = CaseUuid::new_v4(); + let sitrep_id = self.sitrep_id; + let case = match self.cases.entry(&id) { + iddqd::id_ord_map::Entry::Occupied(_) => { + panic!("generated a colliding UUID!") + } + iddqd::id_ord_map::Entry::Vacant(entry) => { + let case = fm::Case { + id, + created_sitrep_id: self.sitrep_id, + time_created: chrono::Utc::now(), + closed_sitrep_id: None, + time_closed: None, + de, + comment: String::new(), + ereports: Default::default(), + alerts_requested: Default::default(), + impacted_sp_slots: Default::default(), + }; + entry.insert(CaseBuilder::new(&self.log, sitrep_id, case)) + } + }; + + slog::info!( + self.log, + "opened case {id:?}"; + "case_id" => ?id, + "de" => %de + ); + + Ok(case) + } + + pub fn build(self, creator_id: OmicronZoneUuid) -> fm::Sitrep { + fm::Sitrep { + metadata: fm::SitrepMetadata { + id: self.sitrep_id, + parent_sitrep_id: self.parent_sitrep.map(|s| s.metadata.id), + inv_collection_id: self.inventory.id, + creator_id, + comment: self.comment, + time_created: chrono::Utc::now(), + }, + cases: self + .cases + .into_iter() + .map(|builder| fm::Case::from(builder)) + .collect(), + } + } +} + +#[derive(Debug)] +pub struct CaseBuilder { + pub log: slog::Logger, + pub case: fm::Case, + pub sitrep_id: SitrepUuid, +} + +impl CaseBuilder { + fn new(log: &slog::Logger, sitrep_id: SitrepUuid, case: fm::Case) -> Self { + let log = log.new(slog::o!( + "case_id" => format!("{:?}", case.id), + "de" => case.de.to_string(), + "created_sitrep_id" => format!("{:?}", case.created_sitrep_id), + )); + Self { log, case, sitrep_id } + } + + pub fn request_alert( + &mut self, + alert: &A, + ) -> anyhow::Result<()> { + let id = AlertUuid::new_v4(); + let class = A::CLASS; + let req = fm::AlertRequest { + id, + class, + requested_sitrep_id: self.sitrep_id, + payload: serde_json::to_value(&alert).with_context(|| { + format!( + "failed to serialize payload for {class:?} alert {alert:?}" + ) + })?, + }; + self.case.alerts_requested.insert_unique(req).map_err(|_| { + anyhow::anyhow!("an alert with ID {id:?} already exists") + })?; + + slog::info!( + &self.log, + "requested an alert"; + "alert_id" => ?id, + "alert_class" => ?class, + ); + + Ok(()) + } + + pub fn close(&mut self) { + self.case.time_closed = Some(Utc::now()); + self.case.closed_sitrep_id = Some(self.sitrep_id); + + slog::info!(&self.log, "case closed"); + } + + pub fn add_ereport(&mut self, report: &Arc) { + match self.case.ereports.insert_unique(report.clone()) { + Ok(_) => { + slog::info!( + self.log, + "assigned ereport {} to case", report.id(); + "ereport_id" => ?report.id(), + "ereport_class" => ?report.class, + ); + } + Err(_) => { + slog::warn!( + self.log, + "ereport {} already assigned to case", report.id(); + "ereport_id" => ?report.id(), + "ereport_class" => ?report.class, + ); + } + } + } +} + +impl From for fm::Case { + fn from(CaseBuilder { case, .. }: CaseBuilder) -> Self { + case + } +} + +impl core::ops::Deref for CaseBuilder { + type Target = fm::Case; + fn deref(&self) -> &Self::Target { + &self.case + } +} + +impl core::ops::DerefMut for CaseBuilder { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.case + } +} + +impl iddqd::IdOrdItem for CaseBuilder { + type Key<'a> = &'a CaseUuid; + fn key(&self) -> Self::Key<'_> { + &self.case.id + } + + iddqd::id_upcast!(); +} diff --git a/nexus/src/app/background/tasks/fm_sitrep_gc.rs b/nexus/src/app/background/tasks/fm_sitrep_gc.rs index 92214faef4b..7295e3c2459 100644 --- a/nexus/src/app/background/tasks/fm_sitrep_gc.rs +++ b/nexus/src/app/background/tasks/fm_sitrep_gc.rs @@ -152,6 +152,7 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: None, }, + cases: Default::default(), }; datastore .fm_sitrep_insert(&opctx, &sitrep1) @@ -174,6 +175,7 @@ mod tests { time_created: Utc::now(), parent_sitrep_id: Some(sitrep1.metadata.id), }, + cases: Default::default(), }; datastore .fm_sitrep_insert(&opctx, &sitrep2) @@ -264,7 +266,10 @@ mod tests { comment: format!("test sitrep v{i}; orphan {i}"), time_created: Utc::now(), parent_sitrep_id, + // TODO(eliza): we should populate cases and assert they get + // cleaned up... }, + cases: Default::default(), }; match datastore.fm_sitrep_insert(&opctx, &sitrep).await { Ok(_) => { diff --git a/nexus/src/app/background/tasks/fm_sitrep_load.rs b/nexus/src/app/background/tasks/fm_sitrep_load.rs index 0a2c52f95b1..723a96bf3b7 100644 --- a/nexus/src/app/background/tasks/fm_sitrep_load.rs +++ b/nexus/src/app/background/tasks/fm_sitrep_load.rs @@ -224,6 +224,7 @@ mod test { comment: "test sitrep 1".to_string(), time_created: Utc::now(), }, + cases: Default::default(), }; datastore .fm_sitrep_insert(&opctx, &sitrep1) @@ -288,6 +289,7 @@ mod test { comment: "test sitrep 2".to_string(), time_created: Utc::now(), }, + cases: Default::default(), }; datastore .fm_sitrep_insert(&opctx, &sitrep2) diff --git a/nexus/types/src/fm.rs b/nexus/types/src/fm.rs index 3f90379388c..8504c5fd8df 100644 --- a/nexus/types/src/fm.rs +++ b/nexus/types/src/fm.rs @@ -8,11 +8,19 @@ //! structure containing fault management state. pub mod ereport; -pub use ereport::Ereport; +pub use ereport::{Ereport, EreportId}; + +mod alert; +pub use alert::*; + +pub mod case; +pub use case::Case; use chrono::{DateTime, Utc}; -use omicron_uuid_kinds::{CollectionUuid, OmicronZoneUuid, SitrepUuid}; -use schemars::JsonSchema; +use iddqd::IdOrdMap; +use omicron_uuid_kinds::{ + CaseUuid, CollectionUuid, OmicronZoneUuid, SitrepUuid, +}; use serde::{Deserialize, Serialize}; /// A fault management situation report, or _sitrep_. @@ -30,12 +38,12 @@ use serde::{Deserialize, Serialize}; /// The sitrep, how it is represented in the database, and how the fault /// management subsystem creates and interacts with sitreps, is described in /// detail in [RFD 603](https://rfd.shared.oxide.computer/rfd/0603). -#[derive(Clone, Debug, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub struct Sitrep { /// Metadata describing this sitrep, when it was created, its parent sitrep /// ID, and which Nexus produced it. pub metadata: SitrepMetadata, - // TODO(eliza): draw the rest of the sitrep + pub cases: IdOrdMap, } impl Sitrep { @@ -46,12 +54,26 @@ impl Sitrep { pub fn parent_id(&self) -> Option { self.metadata.parent_sitrep_id } + + pub fn open_cases(&self) -> impl Iterator + '_ { + self.cases.iter().filter(|c| c.is_open()) + } + + /// Iterate over all alerts requested by cases in this sitrep. + pub fn alerts_requested( + &self, + ) -> impl Iterator + '_ { + self.cases.iter().flat_map(|case| { + let case_id = case.id; + case.alerts_requested.iter().map(move |alert| (case_id, alert)) + }) + } } /// Metadata describing a sitrep. /// /// This corresponds to the records stored in the `fm_sitrep` database table. -#[derive(Clone, Debug, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub struct SitrepMetadata { /// The ID of this sitrep. pub id: SitrepUuid, @@ -91,9 +113,26 @@ pub struct SitrepMetadata { } /// An entry in the sitrep version history. -#[derive(Clone, Debug, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub struct SitrepVersion { pub id: SitrepUuid, pub version: u32, pub time_made_current: DateTime, } + +#[derive( + Copy, + Clone, + Debug, + PartialEq, + Eq, + Hash, + serde::Serialize, + serde::Deserialize, + strum::Display, +)] +#[serde(rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] +pub enum DiagnosisEngine { + PowerShelf, +} diff --git a/nexus/types/src/fm/alert.rs b/nexus/types/src/fm/alert.rs new file mode 100644 index 00000000000..05ad85073ee --- /dev/null +++ b/nexus/types/src/fm/alert.rs @@ -0,0 +1,30 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use omicron_uuid_kinds::AlertUuid; +use omicron_uuid_kinds::SitrepUuid; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct AlertRequest { + pub id: AlertUuid, + pub class: AlertClass, + pub payload: serde_json::Value, + pub requested_sitrep_id: SitrepUuid, +} + +impl iddqd::IdOrdItem for AlertRequest { + type Key<'a> = &'a AlertUuid; + fn key(&self) -> Self::Key<'_> { + &self.id + } + + iddqd::id_upcast!(); +} + +#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub enum AlertClass { + PsuInserted, + PsuRemoved, +} diff --git a/nexus/types/src/fm/case.rs b/nexus/types/src/fm/case.rs new file mode 100644 index 00000000000..c1842426d4d --- /dev/null +++ b/nexus/types/src/fm/case.rs @@ -0,0 +1,67 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::fm::AlertRequest; +use crate::fm::DiagnosisEngine; +use crate::fm::Ereport; +use crate::inventory::SpType; +use chrono::{DateTime, Utc}; +use iddqd::{IdOrdItem, IdOrdMap}; +use omicron_uuid_kinds::{ + CaseUuid, CollectionUuid, OmicronZoneUuid, SitrepUuid, +}; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; + +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct Case { + pub id: CaseUuid, + pub created_sitrep_id: SitrepUuid, + pub time_created: DateTime, + + pub closed_sitrep_id: Option, + pub time_closed: Option>, + + pub de: DiagnosisEngine, + + pub ereports: IdOrdMap>, + + pub alerts_requested: IdOrdMap, + + pub impacted_sp_slots: IdOrdMap, + + pub comment: String, +} + +impl Case { + pub fn is_open(&self) -> bool { + self.time_closed.is_none() + } +} + +impl IdOrdItem for Case { + type Key<'a> = &'a CaseUuid; + fn key(&self) -> Self::Key<'_> { + &self.id + } + + iddqd::id_upcast!(); +} + +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct ImpactedSpSlot { + pub sp_type: SpType, + pub slot: u8, + pub created_sitrep_id: SitrepUuid, + pub comment: String, +} + +impl IdOrdItem for ImpactedSpSlot { + type Key<'a> = (&'a SpType, &'a u8); + fn key(&self) -> Self::Key<'_> { + (&self.sp_type, &self.slot) + } + + iddqd::id_upcast!(); +} diff --git a/nexus/types/src/fm/ereport.rs b/nexus/types/src/fm/ereport.rs index 06012927bb6..17426a70179 100644 --- a/nexus/types/src/fm/ereport.rs +++ b/nexus/types/src/fm/ereport.rs @@ -22,6 +22,28 @@ pub struct Ereport { pub reporter: Reporter, } +impl Ereport { + pub fn id(&self) -> &EreportId { + &self.data.id + } +} + +impl core::ops::Deref for Ereport { + type Target = EreportData; + fn deref(&self) -> &Self::Target { + &self.data + } +} + +impl iddqd::IdOrdItem for Ereport { + type Key<'a> = &'a EreportId; + fn key(&self) -> Self::Key<'_> { + self.id() + } + + iddqd::id_upcast!(); +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct EreportData { #[serde(flatten)] diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 3d640255673..99f25b2eb78 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -6838,6 +6838,98 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_sitrep_version_by_id ON omicron.public.fm_sitrep_history (sitrep_id); +CREATE TYPE IF NOT EXISTS omicron.public.diagnosis_engine AS ENUM ( + 'power_shelf', +); + +CREATE TABLE IF NOT EXISTS omicron.public.fm_case ( + -- Case UUID + id UUID NOT NULL, + -- UUID of the sitrep in which the case had this state. + sitrep_id UUID NOT NULL, + + de omicron.public.diagnosis_engine NOT NULL, + + time_created TIMESTAMPTZ NOT NULL, + -- UUID of the sitrep in which the case was created. + created_sitrep_id UUID NOT NULL, + + -- Time when the case was closed (if not null). + time_closed TIMESTAMPTZ, + -- UUID of the sitrep in which the case was closed. + closed_sitrep_id UUID, + + comment TEXT NOT NULL, + + CONSTRAINT closed_case_validity CHECK ( + (closed_sitrep_id IS NULL AND time_closed IS NULL) OR + (closed_sitrep_id IS NOT NULL AND time_closed IS NOT NULL) + ), + + PRIMARY KEY (sitrep_id, id) +); + +CREATE INDEX IF NOT EXISTS + lookup_fm_cases_for_sitrep +ON omicron.public.fm_case (sitrep_id); + +CREATE TABLE IF NOT EXISTS omicron.public.fm_ereport_in_case ( + -- The ereport's identity. + restart_id UUID NOT NULL, + ena INT8 NOT NULL, + + -- UUID of the case the ereport is assigned to. + case_id UUID NOT NULL, + + -- UUID of the sitrep in which this assignment exists. + sitrep_id UUID NOT NULL, + -- UUID of the sitrep in which the ereport was initially assigned to this + -- case. + assigned_sitrep_id UUID NOT NULL, + + PRIMARY KEY (sitrep_id, restart_id, ena) +); + +CREATE INDEX IF NOT EXISTS + lookup_ereports_assigned_to_fm_case +ON omicron.public.fm_ereport_in_case (sitrep_id, case_id); + +CREATE TABLE IF NOT EXISTS omicron.public.fm_case_impacts_sp_slot ( + sitrep_id UUID NOT NULL, + case_id UUID NOT NULL, + -- location of this device according to MGS + sp_type omicron.public.sp_type NOT NULL, + sp_slot INT4 NOT NULL, + + -- ID of the sitrep in which this SP was added to the case. + created_sitrep_id UUID NOT NULL, + comment TEXT NOT NULL, + + PRIMARY KEY (sitrep_id, case_id, sp_type, sp_slot) +); + +CREATE TABLE IF NOT EXISTS omicron.public.fm_alert_request ( + -- Requested alert UUID + id UUID NOT NULL, + -- UUID of the sitrep in which the alert is requested. + sitrep_id UUID NOT NULL, + -- UUID of the sitrep in which the alert request was created. + requested_sitrep_id UUID NOT NULL, + -- UUID of the case to which this alert request belongs. + case_id UUID NOT NULL, + + -- The class of alert that was requested + alert_class omicron.public.alert_class NOT NULL, + -- Actual alert data. The structure of this depends on the alert class. + payload JSONB NOT NULL, + + PRIMARY KEY (sitrep_id, id) +); + +CREATE INDEX IF NOT EXISTS + lookup_fm_alert_requests_for_case +ON omicron.public.fm_alert_request (sitrep_id, case_id); + /* * List of datasets available to be sliced up and passed to VMMs for instance * local storage. diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index beef2f61da1..f119a7edef8 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -45,6 +45,7 @@ impl_typed_uuid_kinds! { AntiAffinityGroup = {}, Blueprint = {}, BuiltInUser = {}, + Case = {}, Collection = {}, ConsoleSession = {}, Dataset = {},