diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index ad4413d247c..d08f91b7dbc 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -961,14 +961,17 @@ impl OmicronZoneType { /// /// # String representations of this type /// -/// There are no fewer than five string representations for this type, all +/// There are no fewer than six string representations for this type, all /// slightly different from each other. /// /// 1. [`Self::zone_prefix`]: Used to construct zone names. /// 2. [`Self::service_prefix`]: Used to construct SMF service names. /// 3. [`Self::name_prefix`]: Used to construct `Name` instances. /// 4. [`Self::report_str`]: Used for reporting and testing. -/// 5. [`Self::artifact_name`]: Used to match TUF artifact names. +/// 5. [`Self::artifact_id_name`]: Used to match TUF artifact IDs. +/// 6. [`Self::artifact_in_install_dataset`]: Used to match zone image tarballs +/// in the install dataset. (This method is equivalent to appending `.tar.gz` +/// to the result of [`Self::zone_prefix`].) /// /// There is no `Display` impl to ensure that users explicitly choose the /// representation they want. (Please play close attention to this! The @@ -978,7 +981,7 @@ impl OmicronZoneType { /// ## Adding new representations /// /// If you have a new use case for a string representation, please reuse one of -/// the four representations if at all possible. If you must add a new one, +/// the six representations if at all possible. If you must add a new one, /// please add it here rather than doing something ad-hoc in the calling code /// so it's more legible. #[derive( @@ -1024,6 +1027,30 @@ impl ZoneKind { } } + /// Return a string that identifies **zone image filenames** in the install + /// dataset. + /// + /// This method is exactly equivalent to `format!("{}.tar.gz", + /// self.zone_prefix())`, but returns `&'static str`s. A unit test ensures + /// they stay consistent. + pub fn artifact_in_install_dataset(self) -> &'static str { + match self { + // BoundaryNtp and InternalNtp both use "ntp". + ZoneKind::BoundaryNtp | ZoneKind::InternalNtp => "ntp.tar.gz", + ZoneKind::Clickhouse => "clickhouse.tar.gz", + ZoneKind::ClickhouseKeeper => "clickhouse_keeper.tar.gz", + ZoneKind::ClickhouseServer => "clickhouse_server.tar.gz", + // Note "cockroachdb" for historical reasons. + ZoneKind::CockroachDb => "cockroachdb.tar.gz", + ZoneKind::Crucible => "crucible.tar.gz", + ZoneKind::CruciblePantry => "crucible_pantry.tar.gz", + ZoneKind::ExternalDns => "external_dns.tar.gz", + ZoneKind::InternalDns => "internal_dns.tar.gz", + ZoneKind::Nexus => "nexus.tar.gz", + ZoneKind::Oximeter => "oximeter.tar.gz", + } + } + /// Return a string that is used to construct **SMF service names**. This /// string is guaranteed to be stable over time. pub fn service_prefix(self) -> &'static str { @@ -1090,7 +1117,12 @@ impl ZoneKind { /// Return a string used as an artifact name for control-plane zones. /// This is **not guaranteed** to be stable. - pub fn artifact_name(self) -> &'static str { + /// + /// These strings match the `ArtifactId::name`s Nexus constructs when + /// unpacking the composite control-plane artifact in a TUF repo. Currently, + /// these are chosen by reading the `pkg` value of the `oxide.json` object + /// inside each zone image tarball. + pub fn artifact_id_name(self) -> &'static str { match self { ZoneKind::BoundaryNtp => "ntp", ZoneKind::Clickhouse => "clickhouse", @@ -1118,7 +1150,7 @@ impl ZoneKind { .to_known() .map(|kind| matches!(kind, KnownArtifactKind::Zone)) .unwrap_or(false) - && artifact_id.name == self.artifact_name() + && artifact_id.name == self.artifact_id_name() } } @@ -1194,6 +1226,18 @@ mod tests { }); } } + + #[test] + fn test_zone_prefix_matches_artifact_in_install_dataset() { + for zone_kind in ZoneKind::iter() { + let zone_prefix = zone_kind.zone_prefix(); + let expected_artifact = format!("{zone_prefix}.tar.gz"); + assert_eq!( + expected_artifact, + zone_kind.artifact_in_install_dataset() + ); + } + } } // Used for schemars to be able to be used with camino: diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 1fc84a9ea6c..c3d8f30b9f1 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -4721,7 +4721,7 @@ pub(crate) mod test { ($kind: ident, $version: expr) => { TufArtifactMeta { id: ArtifactId { - name: ZoneKind::$kind.artifact_name().to_string(), + name: ZoneKind::$kind.artifact_id_name().to_string(), version: $version, kind: ArtifactKind::from_known(KnownArtifactKind::Zone), }, diff --git a/sled-agent/config-reconciler/src/reconciler_task/zones.rs b/sled-agent/config-reconciler/src/reconciler_task/zones.rs index e0a6e370445..a46e3b2e547 100644 --- a/sled-agent/config-reconciler/src/reconciler_task/zones.rs +++ b/sled-agent/config-reconciler/src/reconciler_task/zones.rs @@ -22,11 +22,13 @@ use illumos_utils::zone::DeleteAddressError; use illumos_utils::zone::Zones; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; +use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; use nexus_sled_agent_shared::inventory::OmicronZoneType; use omicron_common::address::Ipv6Subnet; use omicron_uuid_kinds::OmicronZoneUuid; use sled_agent_types::time_sync::TimeSync; use sled_agent_types::zone_bundle::ZoneBundleCause; +use sled_agent_types::zone_images::ResolverStatus; use sled_storage::config::MountConfig; use slog::Logger; use slog::info; @@ -156,32 +158,43 @@ impl OmicronZones { zone_facilities: &U, log: &Logger, ) -> Result<(), NonZeroUsize> { + let resolver_status = + sled_agent_facilities.zone_image_resolver_status(); + // Filter desired zones down to just those that we need to stop. See // [`ZoneState`] for more discussion of why we're willing (or unwilling) // to stop zones in various current states. - let zones_to_shut_down = self.zones.iter().filter(|z| { - match desired_zones.get(&z.config.id) { + let mut zones_to_shut_down = Vec::new(); + for mut z in self.zones.iter_mut() { + let zone_name = z.config.zone_name(); + + let should_shut_down = match desired_zones.get(&z.config.id) { // We no longer want this zone to be running. None => true, // We do want this zone to be running; check the current // state. - Some(desired_config) => match &z.state { + Some(desired_config) => match &mut z.state { // Only shut down a running zone if the desired config - // has changed from the config used to start it. + // has changes that necessitate a restart. ZoneState::Running(_) => { - if z.config == *desired_config { - false - } else { + if does_new_config_require_zone_restart( + &mut z.config, + desired_config, + &resolver_status, + log, + ) { info!( log, "starting shutdown of running zone; config \ has changed"; - "zone" => z.config.zone_name(), + "zone" => zone_name, "old-config" => ?z.config, "new-config" => ?desired_config, ); true + } else { + false } } @@ -190,7 +203,7 @@ impl OmicronZones { info!( log, "resuming shutdown of partially-shut-down zone"; - "zone" => z.config.zone_name(), + "zone" => zone_name, "prev_err" => InlineErrorChain::new(err), ); true @@ -200,17 +213,24 @@ impl OmicronZones { info!( log, "starting shutdown of a failed-to-start zone"; - "zone" => z.config.zone_name(), + "zone" => zone_name, "prev_err" => InlineErrorChain::new(err), ); true } }, + }; + + if should_shut_down { + zones_to_shut_down.push(z.id()); } - }); + } // Map the zones to the futures that will try to shut them down. - let shutdown_futures = zones_to_shut_down.map(|zone| { + let shutdown_futures = zones_to_shut_down.iter().map(|zone_id| { + let zone = self.zones.get(zone_id).expect( + "zones_to_shut_down only has IDs present in self.zones", + ); zone.try_shut_down(sled_agent_facilities, zone_facilities, log) .map(|result| (zone.config.id, result)) }); @@ -952,13 +972,117 @@ impl ZoneFacilities for RealZoneFacilities { } } +// It's possible some new zone configs do not require us to restart the zone. +// A trivial case is if the config didn't change. We currently support one +// nontrivial kind of change: +// +// If the only change is to the `image_source`, and the change is from +// `InstallDataset` to `Artifact { hash }` (or vice versa), and the hash of the +// zone in the install dataset is exactly equal to the hash specified by +// `Artifact { hash }`, then we know the zone has not had any meaningful +// changes: it's running the exact bits with the exact config it would have if +// we restarted it, so we don't need to. +fn does_new_config_require_zone_restart( + existing_config: &mut OmicronZoneConfig, + new_config: &OmicronZoneConfig, + resolver_status: &ResolverStatus, + log: &Logger, +) -> bool { + // Trivial case + if *existing_config == *new_config { + return false; + } + + match (&existing_config.image_source, &new_config.image_source) { + ( + OmicronZoneImageSource::InstallDataset, + OmicronZoneImageSource::Artifact { hash }, + ) + | ( + OmicronZoneImageSource::Artifact { hash }, + OmicronZoneImageSource::InstallDataset, + ) => { + let install_dataset_hash = match resolver_status + .zone_manifest + .zone_hash(existing_config.zone_type.kind()) + { + Ok(hash) => hash, + Err(err) => { + // If we can't get the hash, assume we have to bounce the + // zone. + warn!( + log, + "failed to determine install dataset hash for zone; \ + assuming zone must be restarted for new config"; + "zone_name" => new_config.zone_name(), + InlineErrorChain::new(&err), + ); + return true; + } + }; + + // If hash matches, we don't have to restart as long as there are no + // other changes. + if install_dataset_hash == *hash + && config_differs_only_by_image_source( + existing_config, + new_config, + ) + { + info!( + log, + "updating config for zone without restarting; \ + only change is zone image source (install dataset <-> \ + artifact; hash of zone image matches in both)"; + "zone_name" => new_config.zone_name(), + ); + *existing_config = new_config.clone(); + false + } else { + true + } + } + // Any other config change requires a restart. + // + // If we support other kinds of changes that don't require restart, + // we'll need to rework the structure of this function to support both + // kinds of changes happening simultaneously. + _ => true, + } +} + +fn config_differs_only_by_image_source( + config1: &OmicronZoneConfig, + config2: &OmicronZoneConfig, +) -> bool { + // Unpack both configs, ignoring the one field we want to ignore. + let OmicronZoneConfig { + id: id1, + filesystem_pool: filesystem_pool1, + zone_type: zone_type1, + image_source: _, + } = config1; + let OmicronZoneConfig { + id: id2, + filesystem_pool: filesystem_pool2, + zone_type: zone_type2, + image_source: _, + } = config2; + + id1 == id2 + && filesystem_pool1 == filesystem_pool2 + && zone_type1 == zone_type2 +} + #[cfg(test)] mod tests { use super::*; use crate::dataset_serialization_task::DatasetEnsureError; use anyhow::anyhow; use assert_matches::assert_matches; + use camino::Utf8PathBuf; use camino_tempfile::Utf8TempDir; + use iddqd::IdOrdMap; use illumos_utils::dladm::Etherstub; use illumos_utils::dladm::EtherstubVnic; use illumos_utils::link::VnicAllocator; @@ -968,17 +1092,27 @@ mod tests { use illumos_utils::zpool::ZpoolOrRamdisk; use nexus_sled_agent_shared::inventory::OmicronZoneDataset; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; + use nexus_sled_agent_shared::inventory::ZoneKind; use omicron_common::address::SLED_PREFIX; use omicron_common::disk::DatasetConfig; use omicron_common::disk::DatasetKind; use omicron_common::disk::DatasetName; use omicron_common::disk::SharedDatasetConfig; + use omicron_common::update::OmicronZoneManifest; + use omicron_common::update::OmicronZoneManifestSource; use omicron_test_utils::dev; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::ZpoolUuid; + use sled_agent_types::zone_images::ArtifactReadResult; + use sled_agent_types::zone_images::MupdateOverrideStatus; + use sled_agent_types::zone_images::ResolverStatus; + use sled_agent_types::zone_images::ZoneManifestArtifactResult; + use sled_agent_types::zone_images::ZoneManifestArtifactsResult; + use sled_agent_types::zone_images::ZoneManifestStatus; use std::collections::BTreeSet; use std::collections::VecDeque; use std::sync::Mutex; + use tufaceous_artifact::ArtifactHash; // Helper to construct a `RunningZone` even on non-illumos systems. struct FakeZoneBuilder { @@ -1097,15 +1231,45 @@ mod tests { } #[derive(Debug)] - struct FakeSledAgentFacilities { - inner: Mutex, - underlay_vnic: EtherstubVnic, - } - - #[derive(Debug, Default)] struct FakeSledAgentFacilitiesInner { start_responses: VecDeque>, removed_ddm_prefixes: BTreeSet>, + resolver_status: ResolverStatus, + } + + impl Default for FakeSledAgentFacilitiesInner { + fn default() -> Self { + let boot_disk_path = Utf8PathBuf::from("/test/boot/disk"); + Self { + start_responses: Default::default(), + removed_ddm_prefixes: Default::default(), + // successful status containing no artifacts + resolver_status: ResolverStatus { + zone_manifest: ZoneManifestStatus { + boot_disk_path: boot_disk_path.clone(), + boot_disk_result: Ok(ZoneManifestArtifactsResult { + manifest: OmicronZoneManifest { + source: OmicronZoneManifestSource::SledAgent, + zones: IdOrdMap::new(), + }, + data: IdOrdMap::new(), + }), + non_boot_disk_metadata: IdOrdMap::new(), + }, + mupdate_override: MupdateOverrideStatus { + boot_disk_path, + boot_disk_override: Ok(None), + non_boot_disk_overrides: IdOrdMap::new(), + }, + }, + } + } + } + + #[derive(Debug)] + struct FakeSledAgentFacilities { + inner: Mutex, + underlay_vnic: EtherstubVnic, } impl Default for FakeSledAgentFacilities { @@ -1122,6 +1286,11 @@ mod tests { let mut inner = self.inner.lock().unwrap(); inner.start_responses.push_back(response); } + + fn set_resolver_status_zone_manifest(&self, zm: ZoneManifestStatus) { + let mut inner = self.inner.lock().unwrap(); + inner.resolver_status.zone_manifest = zm; + } } impl SledAgentFacilities for FakeSledAgentFacilities { @@ -1143,6 +1312,10 @@ mod tests { .expect("test should populate responses for start_omicron_zone") } + fn zone_image_resolver_status(&self) -> ResolverStatus { + self.inner.lock().unwrap().resolver_status.clone() + } + fn metrics_untrack_zone_links( &self, _zone: &RunningZone, @@ -1358,7 +1531,7 @@ mod tests { &desired_zones, &sled_agent_facilities, &zone_facilities, - true, + true, // is_time_synchronized &datasets, &logctx.log, ) @@ -1799,4 +1972,234 @@ mod tests { logctx.cleanup_successful(); } + + #[tokio::test] + async fn dont_restart_zone_if_config_change_is_only_image_source() { + let logctx = dev::test_setup_log( + "dont_restart_zone_if_config_change_is_only_image_source", + ); + + // Construct two zones we want to start; one runs out of the install + // dataset, and one runs a specific artifact. + let install_dataset_zone_hash = ArtifactHash([1; 32]); + let artifact_zone_hash = ArtifactHash([2; 32]); + let mut install_dataset_zone = + make_zone_config(OmicronZoneUuid::new_v4()); + let mut artifact_zone = make_zone_config(OmicronZoneUuid::new_v4()); + artifact_zone.image_source = + OmicronZoneImageSource::Artifact { hash: artifact_zone_hash }; + + // Ensure our two zones are using different zone types (so we can + // populate our fake install dataset with two images). + install_dataset_zone.zone_type = + OmicronZoneType::Oximeter { address: "[::1]:0".parse().unwrap() }; + artifact_zone.zone_type = OmicronZoneType::InternalNtp { + address: "[::1]:0".parse().unwrap(), + }; + + // Populate a zone image resolver where the install dataset contains + // both our zone types with hashes that match our fake zones. + let sled_agent_facilities = FakeSledAgentFacilities::default(); + sled_agent_facilities.set_resolver_status_zone_manifest( + ZoneManifestStatus { + boot_disk_path: "/test/boot/disk".into(), + boot_disk_result: Ok(ZoneManifestArtifactsResult { + manifest: OmicronZoneManifest { + source: OmicronZoneManifestSource::SledAgent, + zones: IdOrdMap::new(), + }, + data: [ + ZoneManifestArtifactResult { + file_name: ZoneKind::Oximeter + .artifact_in_install_dataset() + .to_string(), + path: "/".into(), + expected_size: 0, + expected_hash: install_dataset_zone_hash, + status: ArtifactReadResult::Valid, + }, + ZoneManifestArtifactResult { + file_name: ZoneKind::InternalNtp + .artifact_in_install_dataset() + .to_string(), + path: "/".into(), + expected_size: 0, + expected_hash: artifact_zone_hash, + status: ArtifactReadResult::Valid, + }, + ] + .into_iter() + .collect(), + }), + non_boot_disk_metadata: IdOrdMap::new(), + }, + ); + + let desired_zones: IdMap<_> = + [install_dataset_zone.clone(), artifact_zone.clone()] + .into_iter() + .collect(); + let datasets = make_datasets(desired_zones.iter()); + + let zone_facilities = FakeZoneFacilities::default(); + + let fake_zone_builder = FakeZoneBuilder::new(); + let fake_zone1 = fake_zone_builder + .make_running_zone("test1", logctx.log.clone()) + .await; + let fake_zone2 = fake_zone_builder + .make_running_zone("test2", logctx.log.clone()) + .await; + sled_agent_facilities.push_start_response(Ok(fake_zone1)); + sled_agent_facilities.push_start_response(Ok(fake_zone2)); + + // "start" both fake zones. + let mut zones = + OmicronZones::new(nonexistent_mount_config(), TimeSyncConfig::Skip); + zones + .start_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + true, // is_time_synchronized + &datasets, + &logctx.log, + ) + .await; + + assert_eq!(zones.zones.len(), 2); + assert!( + zones + .zones + .iter() + .all(|z| matches!(z.state, ZoneState::Running(_))) + ); + + // Change both zone configs, but only by swapping the image zone sources + // with ones with matching hashes. + install_dataset_zone.image_source = OmicronZoneImageSource::Artifact { + hash: install_dataset_zone_hash, + }; + artifact_zone.image_source = OmicronZoneImageSource::InstallDataset; + let desired_zones: IdMap<_> = + [install_dataset_zone.clone(), artifact_zone.clone()] + .into_iter() + .collect(); + + // See if we try to shut down zones; we shouldn't shut down either. + // Their configs have only changed by the image source, and the image + // source hash matches in both cases. + zones + .shut_down_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + &logctx.log, + ) + .await + .expect("zone shutdown should succeed"); + assert_eq!(zones.zones.len(), 2); + assert!( + zones + .zones + .iter() + .all(|z| matches!(z.state, ZoneState::Running(_))) + ); + + // The configs should have been updated to match the new request. + for z in &desired_zones { + assert_eq!(*z, zones.zones.get(&z.id).unwrap().config); + } + + // Swap the image sources back. Change the install dataset to only have + // a matching hash for Oximeter; this should shut down our + // `artifact_zone` (running internal NTP). + install_dataset_zone.image_source = + OmicronZoneImageSource::InstallDataset; + artifact_zone.image_source = + OmicronZoneImageSource::Artifact { hash: artifact_zone_hash }; + let desired_zones: IdMap<_> = + [install_dataset_zone.clone(), artifact_zone.clone()] + .into_iter() + .collect(); + sled_agent_facilities.set_resolver_status_zone_manifest( + ZoneManifestStatus { + boot_disk_path: "/test/boot/disk".into(), + boot_disk_result: Ok(ZoneManifestArtifactsResult { + manifest: OmicronZoneManifest { + source: OmicronZoneManifestSource::SledAgent, + zones: IdOrdMap::new(), + }, + data: [ + ZoneManifestArtifactResult { + file_name: ZoneKind::Oximeter + .artifact_in_install_dataset() + .to_string(), + path: "/".into(), + expected_size: 0, + expected_hash: install_dataset_zone_hash, + status: ArtifactReadResult::Valid, + }, + ZoneManifestArtifactResult { + file_name: ZoneKind::InternalNtp + .artifact_in_install_dataset() + .to_string(), + path: "/".into(), + expected_size: 0, + // Wrong hash! We should bounce the zone. + expected_hash: install_dataset_zone_hash, + status: ArtifactReadResult::Valid, + }, + ] + .into_iter() + .collect(), + }), + non_boot_disk_metadata: IdOrdMap::new(), + }, + ); + zones + .shut_down_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + &logctx.log, + ) + .await + .expect("zone shutdown should succeed"); + assert_eq!(zones.zones.len(), 1); + assert!( + zones + .zones + .iter() + .all(|z| matches!(z.state, ZoneState::Running(_))) + ); + assert_eq!( + install_dataset_zone, + zones.zones.get(&install_dataset_zone.id).unwrap().config, + ); + + // Swap it one more time, but this time also change a different part of + // the config (`filesystem_pool` here, but any property would do). We + // _should_ shut it down; even though the hash matches, there are other + // changes. + install_dataset_zone.image_source = OmicronZoneImageSource::Artifact { + hash: install_dataset_zone_hash, + }; + install_dataset_zone.filesystem_pool = + Some(ZpoolName::new_external(ZpoolUuid::new_v4())); + let desired_zones: IdMap<_> = + [install_dataset_zone.clone()].into_iter().collect(); + zones + .shut_down_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + &logctx.log, + ) + .await + .expect("zone shutdown should succeed"); + assert_eq!(zones.zones.len(), 0); + + logctx.cleanup_successful(); + } } diff --git a/sled-agent/config-reconciler/src/sled_agent_facilities.rs b/sled-agent/config-reconciler/src/sled_agent_facilities.rs index 8445b51d21b..a7d2bbd518d 100644 --- a/sled-agent/config-reconciler/src/sled_agent_facilities.rs +++ b/sled-agent/config-reconciler/src/sled_agent_facilities.rs @@ -12,6 +12,7 @@ use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use sled_agent_types::zone_bundle::ZoneBundleCause; +use sled_agent_types::zone_images::ResolverStatus; use std::future::Future; use tufaceous_artifact::ArtifactHash; @@ -40,6 +41,9 @@ pub trait SledAgentFacilities: Send + Sync + 'static { zone_root_path: PathInPool, ) -> impl Future> + Send; + /// Get the status of the zone image resolver. + fn zone_image_resolver_status(&self) -> ResolverStatus; + /// Stop tracking metrics for a zone's datalinks. fn metrics_untrack_zone_links( &self, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 646e28f3446..0846184ccde 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -69,6 +69,7 @@ use sled_agent_types::zone_bundle::{ BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod, PriorityOrder, StorageLimit, ZoneBundleCause, ZoneBundleMetadata, }; +use sled_agent_types::zone_images::ResolverStatus; use sled_diagnostics::SledDiagnosticsCmdError; use sled_diagnostics::SledDiagnosticsCmdOutput; use sled_hardware::{ @@ -1352,6 +1353,10 @@ impl SledAgentFacilities for ReconcilerFacilities { Ok(zone) } + fn zone_image_resolver_status(&self) -> ResolverStatus { + self.service_manager.zone_image_resolver().status() + } + fn metrics_untrack_zone_links( &self, zone: &RunningZone, diff --git a/sled-agent/types/src/zone_images.rs b/sled-agent/types/src/zone_images.rs index 387bb99afa8..a3547b37b49 100644 --- a/sled-agent/types/src/zone_images.rs +++ b/sled-agent/types/src/zone_images.rs @@ -11,6 +11,7 @@ use nexus_sled_agent_shared::inventory::MupdateOverrideInventory; use nexus_sled_agent_shared::inventory::MupdateOverrideNonBootInventory; use nexus_sled_agent_shared::inventory::ZoneArtifactInventory; use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; +use nexus_sled_agent_shared::inventory::ZoneKind; use nexus_sled_agent_shared::inventory::ZoneManifestBootInventory; use nexus_sled_agent_shared::inventory::ZoneManifestInventory; use nexus_sled_agent_shared::inventory::ZoneManifestNonBootInventory; @@ -83,6 +84,63 @@ impl ZoneManifestStatus { non_boot_status, } } + + /// Return the validated artifact hash for a given [`ZoneKind`]. + /// + /// Only considers [`Self::boot_disk_result`]. + pub fn zone_hash( + &self, + kind: ZoneKind, + ) -> Result { + let artifacts_result = + self.boot_disk_result.as_ref().map_err(|err| { + ZoneManifestZoneHashError::ReadBootDisk(err.clone()) + })?; + + let file_name = kind.artifact_in_install_dataset(); + let artifact = &artifacts_result + .data + .get(file_name) + .ok_or(ZoneManifestZoneHashError::NoArtifactForZoneKind(kind))?; + + match &artifact.status { + ArtifactReadResult::Valid => Ok(artifact.expected_hash), + ArtifactReadResult::Mismatch { actual_size, actual_hash } => { + Err(ZoneManifestZoneHashError::SizeHashMismatch { + expected_size: artifact.expected_size, + expected_hash: artifact.expected_hash, + actual_size: *actual_size, + actual_hash: *actual_hash, + }) + } + ArtifactReadResult::Error(err) => { + Err(ZoneManifestZoneHashError::ReadArtifact(err.clone())) + } + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum ZoneManifestZoneHashError { + #[error("error reading boot disk")] + ReadBootDisk(#[source] ZoneManifestReadError), + #[error("no artifact found for zone kind {0:?}")] + NoArtifactForZoneKind(ZoneKind), + #[error( + "size/hash mismatch: expected {} bytes/{}, got {} bytes/{}", + .expected_size, + .expected_hash, + .actual_size, + .actual_hash, + )] + SizeHashMismatch { + expected_size: u64, + expected_hash: ArtifactHash, + actual_size: u64, + actual_hash: ArtifactHash, + }, + #[error("error reading artifact")] + ReadArtifact(#[source] ArcIoError), } /// The result of reading artifacts from an install dataset.