diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 01dc6bca472..aed8bfec2f5 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -223,6 +223,45 @@ CREATE INDEX on omicron.public.Region ( dataset_id ); +/* + * A snapshot of a region, within a dataset. + */ +CREATE TABLE omicron.public.region_snapshot ( + dataset_id UUID NOT NULL, + region_id UUID NOT NULL, + + /* Associated higher level virtual snapshot */ + snapshot_id UUID NOT NULL, + + /* + * Target string, for identification as part of + * volume construction request(s) + */ + snapshot_addr TEXT NOT NULL, + + /* How many volumes reference this? */ + volume_references INT8 NOT NULL, + + PRIMARY KEY (dataset_id, region_id, snapshot_id) +); + +/* Index for use during join with region table */ +CREATE INDEX on omicron.public.region_snapshot ( + dataset_id, region_id +); + +/* + * Index on volume_references and snapshot_addr for crucible + * resource accounting lookup + */ +CREATE INDEX on omicron.public.region_snapshot ( + volume_references +); + +CREATE INDEX on omicron.public.region_snapshot ( + snapshot_addr +); + /* * A volume within Crucible */ @@ -241,7 +280,19 @@ CREATE TABLE omicron.public.volume ( * consumed by some Upstairs code to perform the volume creation. The Rust * type of this column should be Crucible::VolumeConstructionRequest. */ - data TEXT NOT NULL + data TEXT NOT NULL, + + /* + * A JSON document describing what resources to clean up when deleting this + * volume. The Rust type of this column should be the CrucibleResources + * enum. + */ + resources_to_clean_up TEXT +); + +/* Quickly find deleted volumes */ +CREATE INDEX on omicron.public.volume ( + time_deleted ); /* diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 95c79b6c863..95311d79453 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -41,6 +41,7 @@ mod producer_endpoint; mod project; mod rack; mod region; +mod region_snapshot; mod role_assignment; mod role_builtin; pub mod saga_types; @@ -106,6 +107,7 @@ pub use producer_endpoint::*; pub use project::*; pub use rack::*; pub use region::*; +pub use region_snapshot::*; pub use role_assignment::*; pub use role_builtin::*; pub use service::*; diff --git a/nexus/db-model/src/region_snapshot.rs b/nexus/db-model/src/region_snapshot.rs new file mode 100644 index 00000000000..9addeb83e33 --- /dev/null +++ b/nexus/db-model/src/region_snapshot.rs @@ -0,0 +1,35 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::schema::region_snapshot; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Database representation of a Region's snapshot. +/// +/// A region snapshot represents a snapshot of a region, taken during the higher +/// level virtual disk snapshot operation. +#[derive( + Queryable, + Insertable, + Debug, + Clone, + Selectable, + Serialize, + Deserialize, + PartialEq, +)] +#[diesel(table_name = region_snapshot)] +pub struct RegionSnapshot { + // unique identifier of this region snapshot + pub dataset_id: Uuid, + pub region_id: Uuid, + pub snapshot_id: Uuid, + + // used for identifying volumes that reference this + pub snapshot_addr: String, + + // how many volumes reference this? + pub volume_references: i64, +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 73d91b11a51..54ec6fb3519 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -442,6 +442,16 @@ table! { } } +table! { + region_snapshot (dataset_id, region_id, snapshot_id) { + dataset_id -> Uuid, + region_id -> Uuid, + snapshot_id -> Uuid, + snapshot_addr -> Text, + volume_references -> Int8, + } +} + table! { volume (id) { id -> Uuid, @@ -451,7 +461,12 @@ table! { rcgen -> Int8, data -> Text, - /* TODO: some sort of refcount? */ + + /* + * During volume deletion, a serialized list of Crucible resources to + * clean up will be written here, along with setting time_deleted. + */ + resources_to_clean_up -> Nullable, } } @@ -623,6 +638,7 @@ allow_tables_to_appear_in_same_query!( project, rack, region, + region_snapshot, saga, saga_node_event, silo, @@ -631,6 +647,7 @@ allow_tables_to_appear_in_same_query!( service, sled, router_route, + volume, vpc, vpc_subnet, vpc_router, diff --git a/nexus/db-model/src/volume.rs b/nexus/db-model/src/volume.rs index b53815c10e3..3fbb4378416 100644 --- a/nexus/db-model/src/volume.rs +++ b/nexus/db-model/src/volume.rs @@ -24,11 +24,13 @@ use uuid::Uuid; pub struct Volume { #[diesel(embed)] identity: VolumeIdentity, - time_deleted: Option>, + pub time_deleted: Option>, rcgen: Generation, data: String, + + pub resources_to_clean_up: Option, } impl Volume { @@ -38,6 +40,7 @@ impl Volume { time_deleted: None, rcgen: Generation::new(), data, + resources_to_clean_up: None, } } diff --git a/nexus/src/app/disk.rs b/nexus/src/app/disk.rs index 7c327546cd8..e26955f12fe 100644 --- a/nexus/src/app/disk.rs +++ b/nexus/src/app/disk.rs @@ -510,6 +510,9 @@ impl super::Nexus { .project_delete_snapshot(opctx, &authz_snapshot, &db_snapshot) .await?; + // Kick off volume deletion saga + self.volume_delete(db_snapshot.volume_id).await?; + Ok(()) } } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 1ad0a3f34de..782bd3fbece 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -39,6 +39,7 @@ mod silo; mod sled; pub mod test_interfaces; mod update; +mod volume; mod vpc; mod vpc_router; mod vpc_subnet; diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 34c8ac445f7..c65f97d7331 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -4,7 +4,7 @@ use super::{ ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, - ACTION_GENERATE_ID, + ACTION_GENERATE_ID, MAX_CONCURRENT_REGION_REQUESTS, }; use crate::app::sagas::NexusAction; use crate::context::OpContext; @@ -51,16 +51,10 @@ lazy_static! { sdc_create_disk_record, sdc_create_disk_record_undo ); - static ref REGIONS_ALLOC: NexusAction = ActionFunc::new_action( - "disk-create.regions-alloc", - sdc_alloc_regions, - sdc_alloc_regions_undo - ); - static ref REGIONS_ENSURE: NexusAction = ActionFunc::new_action( - "disk-create.regions-ensure", - sdc_regions_ensure, - sdc_regions_ensure_undo - ); + static ref REGIONS_ALLOC: NexusAction = + new_action_noop_undo("disk-create.regions-alloc", sdc_alloc_regions,); + static ref REGIONS_ENSURE: NexusAction = + new_action_noop_undo("disk-create.regions-ensure", sdc_regions_ensure,); static ref CREATE_VOLUME_RECORD: NexusAction = ActionFunc::new_action( "disk-create.create-volume-record", sdc_create_volume_record, @@ -231,6 +225,7 @@ async fn sdc_alloc_regions( let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; let volume_id = sagactx.lookup::("volume_id")?; + // Ensure the disk is backed by appropriate regions. // // This allocates regions in the database, but the disk state is still @@ -251,16 +246,7 @@ async fn sdc_alloc_regions( Ok(datasets_and_regions) } -async fn sdc_alloc_regions_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let osagactx = sagactx.user_data(); - - let volume_id = sagactx.lookup::("volume_id")?; - osagactx.datastore().regions_hard_delete(volume_id).await?; - Ok(()) -} - +/// Call out to Crucible agent and perform region creation. async fn ensure_region_in_dataset( log: &Logger, dataset: &db::model::Dataset, @@ -317,10 +303,7 @@ async fn ensure_region_in_dataset( Ok(region.into_inner()) } -// Arbitrary limit on concurrency, for operations issued -// on multiple regions within a disk at the same time. -const MAX_CONCURRENT_REGION_REQUESTS: usize = 3; - +/// Call out to Crucible agent and perform region creation. async fn sdc_regions_ensure( sagactx: NexusActionContext, ) -> Result { @@ -379,7 +362,7 @@ async fn sdc_regions_ensure( let block_size = datasets_and_regions[0].1.block_size; - // If requested, back disk by image + // If a disk source was requested, set the read-only parent of this disk. let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; let log = osagactx.log(); @@ -497,7 +480,7 @@ async fn sdc_regions_ensure( ); } - // Store volume details in db + // Create volume construction request for this disk let mut rng = StdRng::from_entropy(); let volume_construction_request = VolumeConstructionRequest::Volume { id: disk_id, @@ -554,55 +537,6 @@ async fn sdc_regions_ensure( Ok(volume_data) } -pub(super) async fn delete_regions( - datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>, -) -> Result<(), Error> { - let request_count = datasets_and_regions.len(); - futures::stream::iter(datasets_and_regions) - .map(|(dataset, region)| async move { - let url = format!("http://{}", dataset.address()); - let client = CrucibleAgentClient::new(&url); - let id = RegionId(region.id().to_string()); - client.region_delete(&id).await.map_err(|e| match e { - crucible_agent_client::Error::ErrorResponse(rv) => { - match rv.status() { - http::StatusCode::SERVICE_UNAVAILABLE => { - Error::unavail(&rv.message) - } - status if status.is_client_error() => { - Error::invalid_request(&rv.message) - } - _ => Error::internal_error(&rv.message), - } - } - _ => Error::internal_error( - "unexpected failure during `region_delete`", - ), - }) - }) - // Execute the allocation requests concurrently. - .buffer_unordered(std::cmp::min( - request_count, - MAX_CONCURRENT_REGION_REQUESTS, - )) - .collect::>>() - .await - .into_iter() - .collect::, _>>()?; - Ok(()) -} - -async fn sdc_regions_ensure_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let datasets_and_regions = sagactx - .lookup::>( - "datasets_and_regions", - )?; - delete_regions(datasets_and_regions).await?; - Ok(()) -} - async fn sdc_create_volume_record( sagactx: NexusActionContext, ) -> Result { @@ -628,7 +562,7 @@ async fn sdc_create_volume_record_undo( let osagactx = sagactx.user_data(); let volume_id = sagactx.lookup::("volume_id")?; - osagactx.datastore().volume_delete(volume_id).await?; + osagactx.nexus().volume_delete(volume_id).await?; Ok(()) } @@ -647,6 +581,7 @@ async fn sdc_finalize_disk_record( .lookup_for(authz::Action::Modify) .await .map_err(ActionError::action_failed)?; + // TODO-security Review whether this can ever fail an authz check. We don't // want this to ever fail the authz check here -- if it did, we would have // wanted to catch that a lot sooner. It wouldn't make sense for it to fail @@ -665,6 +600,7 @@ async fn sdc_finalize_disk_record( ) .await .map_err(ActionError::action_failed)?; + Ok(()) } diff --git a/nexus/src/app/sagas/disk_delete.rs b/nexus/src/app/sagas/disk_delete.rs index 4ab26222b87..9cb8ac04b88 100644 --- a/nexus/src/app/sagas/disk_delete.rs +++ b/nexus/src/app/sagas/disk_delete.rs @@ -2,7 +2,6 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::disk_create::delete_regions; use super::ActionRegistry; use super::NexusActionContext; use super::NexusSaga; @@ -33,26 +32,9 @@ lazy_static! { // underlying regions. sdd_delete_disk_record ); - static ref DELETE_REGIONS: NexusAction = new_action_noop_undo( - "disk-delete.delete-regions", - // TODO(https://github.com/oxidecomputer/omicron/issues/612): - // We need a way to deal with this operation failing, aside from - // propagating the error to the user. - // - // What if the Sled goes offline? Nexus must ultimately be - // responsible for reconciling this scenario. - // - // The current behavior causes the disk deletion saga to - // fail, but still marks the disk as destroyed. - sdd_delete_regions - ); - static ref DELETE_REGION_RECORDS: NexusAction = new_action_noop_undo( - "disk-delete.delete-region-records", - sdd_delete_region_records - ); - static ref DELETE_VOLUME_RECORD: NexusAction = new_action_noop_undo( - "disk-delete.delete-volume-record", - sdd_delete_volume_record + static ref DELETE_VOLUME: NexusAction = new_action_noop_undo( + "disk-delete.delete-volume", + sdd_delete_volume ); } @@ -66,9 +48,7 @@ impl NexusSaga for SagaDiskDelete { fn register_actions(registry: &mut ActionRegistry) { registry.register(Arc::clone(&*DELETE_DISK_RECORD)); - registry.register(Arc::clone(&*DELETE_REGIONS)); - registry.register(Arc::clone(&*DELETE_REGION_RECORDS)); - registry.register(Arc::clone(&*DELETE_VOLUME_RECORD)); + registry.register(Arc::clone(&*DELETE_VOLUME)); } fn make_saga_dag( @@ -81,19 +61,9 @@ impl NexusSaga for SagaDiskDelete { DELETE_DISK_RECORD.as_ref(), )); builder.append(Node::action( - "no_result1", - "DeleteRegions", - DELETE_REGIONS.as_ref(), - )); - builder.append(Node::action( - "no_result2", - "DeleteRegionRecords", - DELETE_REGION_RECORDS.as_ref(), - )); - builder.append(Node::action( - "no_result3", - "DeleteVolumeRecord", - DELETE_VOLUME_RECORD.as_ref(), + "no_result", + "DeleteVolume", + DELETE_VOLUME.as_ref(), )); Ok(builder.build()?) } @@ -115,42 +85,13 @@ async fn sdd_delete_disk_record( Ok(volume_id) } -async fn sdd_delete_regions( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let volume_id = sagactx.lookup::("volume_id")?; - let datasets_and_regions = osagactx - .datastore() - .get_allocated_regions(volume_id) - .await - .map_err(ActionError::action_failed)?; - delete_regions(datasets_and_regions) - .await - .map_err(ActionError::action_failed)?; - Ok(()) -} - -async fn sdd_delete_region_records( +async fn sdd_delete_volume( sagactx: NexusActionContext, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); let volume_id = sagactx.lookup::("volume_id")?; osagactx - .datastore() - .regions_hard_delete(volume_id) - .await - .map_err(ActionError::action_failed)?; - Ok(()) -} - -async fn sdd_delete_volume_record( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let volume_id = sagactx.lookup::("volume_id")?; - osagactx - .datastore() + .nexus() .volume_delete(volume_id) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index 35dcae5b5bd..564f1b342ad 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -24,6 +24,7 @@ pub mod disk_delete; pub mod instance_create; pub mod instance_migrate; pub mod snapshot_create; +pub mod volume_delete; #[derive(Debug)] pub struct NexusSagaType; @@ -97,6 +98,9 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); + ::register_actions( + &mut registry, + ); registry } @@ -106,3 +110,7 @@ pub(super) async fn saga_generate_uuid( ) -> Result { Ok(Uuid::new_v4()) } + +// Arbitrary limit on concurrency, for operations issued on multiple regions +// within a disk at the same time. +const MAX_CONCURRENT_REGION_REQUESTS: usize = 3; diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 3d9fe37f3a2..2d11ad91e01 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -123,28 +123,16 @@ pub struct Params { // snapshot create saga: actions -/// A no-op action used because if saga nodes fail their undo action isn't run! -async fn ssc_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> { - Ok(()) -} - lazy_static! { static ref CREATE_SNAPSHOT_RECORD: NexusAction = ActionFunc::new_action( "snapshot-create.create-snapshot-record", ssc_create_snapshot_record, ssc_create_snapshot_record_undo, ); - static ref SEND_SNAPSHOT_REQUEST: NexusAction = ActionFunc::new_action( + static ref SEND_SNAPSHOT_REQUEST: NexusAction = new_action_noop_undo( "snapshot-create.send-snapshot-request", ssc_send_snapshot_request, - ssc_send_snapshot_request_undo, ); - static ref NOOP_FOR_START_RUNNING_SNAPSHOT: NexusAction = - ActionFunc::new_action( - "snapshot-create.noop-for-start-running-snapshot", - ssc_noop, - ssc_start_running_snapshot_undo, - ); static ref START_RUNNING_SNAPSHOT: NexusAction = new_action_noop_undo( "snapshot-create.start-running-snapshot", ssc_start_running_snapshot, @@ -171,7 +159,6 @@ impl NexusSaga for SagaSnapshotCreate { fn register_actions(registry: &mut ActionRegistry) { registry.register(Arc::clone(&*CREATE_SNAPSHOT_RECORD)); registry.register(Arc::clone(&*SEND_SNAPSHOT_REQUEST)); - registry.register(Arc::clone(&*NOOP_FOR_START_RUNNING_SNAPSHOT)); registry.register(Arc::clone(&*START_RUNNING_SNAPSHOT)); registry.register(Arc::clone(&*CREATE_VOLUME_RECORD)); registry.register(Arc::clone(&*FINALIZE_SNAPSHOT_RECORD)); @@ -208,27 +195,7 @@ impl NexusSaga for SagaSnapshotCreate { SEND_SNAPSHOT_REQUEST.as_ref(), )); - // The following saga action iterates over the datasets and regions for a - // disk and make requests for each tuple, and this violates the saga's - // mental model where actions should do one thing at a time and be - // idempotent + atomic. If only a few of the requests succeed, the saga will - // leave things in a partial state because the undo function of a node is - // not run when the action fails. - // - // Use a noop action and an undo, followed by an action + no undo, to work - // around this: - // - // - [noop, undo function] - // - [action function, noop] - // - // With this, if the action function fails, the undo function will run. - // Validate with crucible agent and start snapshot downstairs - builder.append(Node::action( - "noop_for_replace_sockets_map", - "NoopForStartRunningSnapshot", - NOOP_FOR_START_RUNNING_SNAPSHOT.as_ref(), - )); builder.append(Node::action( "replace_sockets_map", "StartRunningSnapshot", @@ -442,50 +409,6 @@ async fn ssc_send_snapshot_request( Ok(()) } -async fn ssc_send_snapshot_request_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let log = sagactx.user_data().log(); - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = OpContext::for_saga_action(&sagactx, ¶ms.serialized_authn); - - let snapshot_id = sagactx.lookup::("snapshot_id")?; - - let (.., disk) = LookupPath::new(&opctx, &osagactx.datastore()) - .project_id(params.project_id) - .disk_name(¶ms.create_params.disk.clone().into()) - .fetch() - .await - .map_err(ActionError::action_failed)?; - - // Delete any snapshots created by this saga - let datasets_and_regions = osagactx - .datastore() - .get_allocated_regions(disk.volume_id) - .await - .map_err(ActionError::action_failed)?; - - for (dataset, region) in datasets_and_regions { - // Create a Crucible agent client - let url = format!("http://{}", dataset.address()); - let client = CrucibleAgentClient::new(&url); - - // Delete snapshot, it was created by this saga - info!(log, "deleting snapshot {} {} {}", url, region.id(), snapshot_id); - client - .region_delete_snapshot( - &RegionId(region.id().to_string()), - &snapshot_id.to_string(), - ) - .await - .map_err(|e| e.to_string()) - .map_err(ActionError::action_failed)?; - } - - Ok(()) -} - async fn ssc_start_running_snapshot( sagactx: NexusActionContext, ) -> Result, ActionError> { @@ -562,60 +485,25 @@ async fn ssc_start_running_snapshot( dataset.address_with_port(crucible_running_snapshot.port_number) ); info!(log, "map {} to {}", region_addr, snapshot_addr); - map.insert(region_addr, snapshot_addr); - } - - Ok(map) -} - -async fn ssc_start_running_snapshot_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let log = sagactx.user_data().log(); - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = OpContext::for_saga_action(&sagactx, ¶ms.serialized_authn); - - let snapshot_id = sagactx.lookup::("snapshot_id")?; - - let (.., disk) = LookupPath::new(&opctx, &osagactx.datastore()) - .project_id(params.project_id) - .disk_name(¶ms.create_params.disk.clone().into()) - .fetch() - .await - .map_err(ActionError::action_failed)?; - - // Delete any running snapshots created by this saga - let datasets_and_regions = osagactx - .datastore() - .get_allocated_regions(disk.volume_id) - .await - .map_err(ActionError::action_failed)?; - - for (dataset, region) in datasets_and_regions { - // Create a Crucible agent client - let url = format!("http://{}", dataset.address()); - let client = CrucibleAgentClient::new(&url); - - // Delete running snapshot - info!( - log, - "deleting running snapshot {} {} {}", - url, - region.id(), - snapshot_id - ); - client - .region_delete_running_snapshot( - &RegionId(region.id().to_string()), - &snapshot_id.to_string(), - ) + map.insert(region_addr, snapshot_addr.clone()); + + // Once snapshot has been validated, and running snapshot has been + // started, add an entry in the region_snapshot table to correspond to + // these Crucible resources. + osagactx + .datastore() + .region_snapshot_create(db::model::RegionSnapshot { + dataset_id: dataset.id(), + region_id: region.id(), + snapshot_id, + snapshot_addr, + volume_references: 0, // to be filled later + }) .await - .map_err(|e| e.to_string()) .map_err(ActionError::action_failed)?; } - Ok(()) + Ok(map) } async fn ssc_create_volume_record( @@ -679,6 +567,7 @@ async fn ssc_create_volume_record( info!(log, "snapshot volume construction request {}", volume_data); let volume = db::model::Volume::new(volume_id, volume_data); + // Insert volume record into the DB let volume_created = osagactx .datastore() .volume_create(volume) @@ -696,8 +585,9 @@ async fn ssc_create_volume_record_undo( let log = sagactx.user_data().log(); let osagactx = sagactx.user_data(); let volume_id = sagactx.lookup::("volume_id")?; + info!(log, "deleting volume {}", volume_id); - osagactx.datastore().volume_delete(volume_id).await?; + osagactx.nexus().volume_delete(volume_id).await?; Ok(()) } diff --git a/nexus/src/app/sagas/volume_delete.rs b/nexus/src/app/sagas/volume_delete.rs new file mode 100644 index 00000000000..74efcc5eb3b --- /dev/null +++ b/nexus/src/app/sagas/volume_delete.rs @@ -0,0 +1,461 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Nexus is responsible for telling Crucible Agent(s) when to clean up +//! resources - those Agents do not have any idea of what volumes are +//! constructed, currently active, etc. Plus, volumes can (and will) change +//! during their lifetime. Operations like growing a disk, removing a read-only +//! parent after a scrub has completed, or re-encrypting a disk will all change +//! the volume that backs a disk. +//! +//! Nexus has to account for all the Crucible resources it is using, and count +//! how many volumes are using those resources. Only when that count drops to +//! zero is it valid to clean up the appropriate Crucible resource. +//! +//! Complicating things is the fact that ZFS datasets cannot be deleted if there +//! are snapshots of that dataset. Nexus' resource accounting must take this +//! dependency into account. Note that ZFS snapshots can layer, but any snapshot +//! can be deleted without the requirement of (for example) deleting the +//! snapshots in a certain order. +//! +//! One problem to solve is doing this idempotently. Volumes reference Crucible +//! resources, and when they are inserted or deleted the accounting needs to +//! change. Saga nodes must be idempotent in order to work correctly. + +use super::ActionRegistry; +use super::NexusActionContext; +use super::NexusSaga; +use super::MAX_CONCURRENT_REGION_REQUESTS; +use crate::app::sagas::NexusAction; +use crate::db; +use crate::db::datastore::CrucibleResources; +use crucible_agent_client::{types::RegionId, Client as CrucibleAgentClient}; +use futures::StreamExt; +use lazy_static::lazy_static; +use nexus_types::identity::Asset; +use omicron_common::api::external::Error; +use serde::Deserialize; +use serde::Serialize; +use std::sync::Arc; +use steno::new_action_noop_undo; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; + +// volume delete saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub struct Params { + pub volume_id: Uuid, +} + +// volume delete saga: actions + +lazy_static! { + // TODO(https://github.com/oxidecomputer/omicron/issues/612): + // + // We need a way to deal with this operation failing, aside from + // propagating the error to the user. + // + // What if the Sled goes offline? Nexus must ultimately be + // responsible for reconciling this scenario. + + static ref DECREASE_CRUCIBLE_RESOURCE_COUNT: NexusAction = new_action_noop_undo( + "volume-delete.decrease-resource-count", + svd_decrease_crucible_resource_count, + ); + + static ref DELETE_CRUCIBLE_REGIONS: NexusAction = new_action_noop_undo( + "volume-delete.delete-crucible-regions", + svd_delete_crucible_regions, + ); + + static ref DELETE_CRUCIBLE_SNAPSHOTS: NexusAction = new_action_noop_undo( + "volume-delete.delete-crucible-snapshots", + svd_delete_crucible_snapshots, + ); + + static ref DELETE_FREED_CRUCIBLE_REGIONS: NexusAction = new_action_noop_undo( + "volume-delete.delete-freed-crucible-regions", + svd_delete_freed_crucible_regions, + ); + + static ref HARD_DELETE_VOLUME_RECORD: NexusAction = new_action_noop_undo( + "volume-delete.hard-delete-volume-record", + svd_hard_delete_volume_record, + ); +} + +// volume delete saga: definition + +#[derive(Debug)] +pub struct SagaVolumeDelete; +impl NexusSaga for SagaVolumeDelete { + const NAME: &'static str = "volume-delete"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + registry.register(Arc::clone(&*DECREASE_CRUCIBLE_RESOURCE_COUNT)); + registry.register(Arc::clone(&*DELETE_CRUCIBLE_REGIONS)); + registry.register(Arc::clone(&*DELETE_CRUCIBLE_SNAPSHOTS)); + registry.register(Arc::clone(&*DELETE_FREED_CRUCIBLE_REGIONS)); + registry.register(Arc::clone(&*HARD_DELETE_VOLUME_RECORD)); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(Node::action( + "crucible_resources_to_delete", + "DecreaseCrucibleResources", + DECREASE_CRUCIBLE_RESOURCE_COUNT.as_ref(), + )); + + builder.append_parallel(vec![ + // clean up top level regions for volume + Node::action( + "no_result_1", + "DeleteCrucibleRegions", + DELETE_CRUCIBLE_REGIONS.as_ref(), + ), + // clean up snapshots no longer referenced by any volume + Node::action( + "no_result_2", + "DeleteCrucibleSnapshots", + DELETE_CRUCIBLE_SNAPSHOTS.as_ref(), + ), + ]); + + // clean up regions that were freed by deleting snapshots + builder.append(Node::action( + "no_result_3", + "DeleteFreedCrucibleRegions", + DELETE_FREED_CRUCIBLE_REGIONS.as_ref(), + )); + + builder.append(Node::action( + "final_no_result", + "HardDeleteVolumeRecord", + HARD_DELETE_VOLUME_RECORD.as_ref(), + )); + + Ok(builder.build()?) + } +} + +// volume delete saga: action implementations + +/// Decrease Crucible resource accounting for this volume, and return Crucible +/// resources to delete. +async fn svd_decrease_crucible_resource_count( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let crucible_resources = osagactx + .datastore() + .decrease_crucible_resource_count_and_soft_delete_volume( + params.volume_id, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(crucible_resources) +} + +/// Clean up regions associated with this volume. +async fn svd_delete_crucible_regions( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + let crucible_resources_to_delete = + sagactx.lookup::("crucible_resources_to_delete")?; + + // Send DELETE calls to the corresponding Crucible agents + match crucible_resources_to_delete { + CrucibleResources::V1(crucible_resources_to_delete) => { + delete_crucible_regions( + crucible_resources_to_delete.datasets_and_regions.clone(), + ) + .await + .map_err(ActionError::action_failed)?; + + // Remove DB records + let region_ids_to_delete = crucible_resources_to_delete + .datasets_and_regions + .iter() + .map(|(_, r)| r.id()) + .collect(); + + osagactx + .datastore() + .regions_hard_delete(region_ids_to_delete) + .await + .map_err(ActionError::action_failed)?; + } + } + + Ok(()) +} + +/// Clean up snapshots freed up for deletion by deleting this volume. +/// +/// This Volume may have referenced read-only downstairs (and their snapshots), +/// and deleting it will remove the references - this may free up those +/// resources for deletion, which this Saga node does. +async fn svd_delete_crucible_snapshots( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + let crucible_resources_to_delete = + sagactx.lookup::("crucible_resources_to_delete")?; + + // Send DELETE calls to the corresponding Crucible agents + match crucible_resources_to_delete { + CrucibleResources::V1(crucible_resources_to_delete) => { + delete_crucible_snapshots( + crucible_resources_to_delete.datasets_and_snapshots.clone(), + ) + .await + .map_err(ActionError::action_failed)?; + + // Remove DB records + for (_, region_snapshot) in + &crucible_resources_to_delete.datasets_and_snapshots + { + osagactx + .datastore() + .region_snapshot_remove( + region_snapshot.dataset_id, + region_snapshot.region_id, + region_snapshot.snapshot_id, + ) + .await + .map_err(ActionError::action_failed)?; + } + } + } + + Ok(()) +} + +/// Deleting region snapshots in a previous saga node may have freed up regions +/// that were deleted in the DB but couldn't be deleted by the Crucible Agent +/// because a snapshot existed. Look for those here, and delete them. These will +/// be a different volume id (i.e. for a previously deleted disk) than the one +/// in this saga's params struct. +/// +/// Note: each delete of a snapshot could trigger another delete of a region, if +/// that region's use has gone to zero. A snapshot delete will never trigger +/// another snapshot delete. +async fn svd_delete_freed_crucible_regions( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + // Find regions freed up for deletion by a previous saga node deleting the + // region snapshots. + let freed_datasets_regions_and_volumes = osagactx + .datastore() + .find_deleted_volume_regions() + .await + .map_err(ActionError::action_failed)?; + + // Send DELETE calls to the corresponding Crucible agents + delete_crucible_regions( + freed_datasets_regions_and_volumes + .iter() + .map(|(d, r, _)| (d.clone(), r.clone())) + .collect(), + ) + .await + .map_err(ActionError::action_failed)?; + + // Remove region DB records + osagactx + .datastore() + .regions_hard_delete( + freed_datasets_regions_and_volumes + .iter() + .map(|(_, r, _)| r.id()) + .collect(), + ) + .await + .map_err(ActionError::action_failed)?; + + // Remove volume DB records + for (_, _, volume) in &freed_datasets_regions_and_volumes { + osagactx + .datastore() + .volume_hard_delete(volume.id()) + .await + .map_err(ActionError::action_failed)?; + } + + Ok(()) +} + +/// Hard delete the volume record +async fn svd_hard_delete_volume_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + // Do not hard delete the volume record if there are lingering regions + // associated with them. This occurs when a region snapshot hasn't been + // deleted, which means we can't delete the region. Later on, deleting the + // region snapshot will free up the region(s) to be deleted (this occurs in + // svd_delete_freed_crucible_regions). + let allocated_regions = osagactx + .datastore() + .get_allocated_regions(params.volume_id) + .await + .map_err(ActionError::action_failed)?; + + if !allocated_regions.is_empty() { + return Ok(()); + } + + osagactx + .datastore() + .volume_hard_delete(params.volume_id) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +// helper functions + +// Given a list of datasets and regions, send DELETE calls to the datasets +// corresponding Crucible Agent for each region. +pub(super) async fn delete_crucible_regions( + datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>, +) -> Result<(), Error> { + let request_count = datasets_and_regions.len(); + if request_count == 0 { + return Ok(()); + } + + futures::stream::iter(datasets_and_regions) + .map(|(dataset, region)| async move { + let url = format!("http://{}", dataset.address()); + let client = CrucibleAgentClient::new(&url); + let id = RegionId(region.id().to_string()); + client.region_delete(&id).await.map_err(|e| match e { + crucible_agent_client::Error::ErrorResponse(rv) => { + match rv.status() { + http::StatusCode::SERVICE_UNAVAILABLE => { + Error::unavail(&rv.message) + } + status if status.is_client_error() => { + Error::invalid_request(&rv.message) + } + _ => Error::internal_error(&rv.message), + } + } + _ => Error::internal_error( + "unexpected failure during `delete_crucible_regions`", + ), + }) + }) + // Execute the allocation requests concurrently. + .buffer_unordered(std::cmp::min( + request_count, + MAX_CONCURRENT_REGION_REQUESTS, + )) + .collect::>>() + .await + .into_iter() + .collect::, _>>()?; + + Ok(()) +} + +// Given a list of datasets and region snapshots, send DELETE calls to the +// datasets corresponding Crucible Agent for each running read-only downstairs +// and snapshot. +pub(super) async fn delete_crucible_snapshots( + datasets_and_snapshots: Vec<( + db::model::Dataset, + db::model::RegionSnapshot, + )>, +) -> Result<(), Error> { + let request_count = datasets_and_snapshots.len(); + if request_count == 0 { + return Ok(()); + } + + futures::stream::iter(datasets_and_snapshots) + .map(|(dataset, region_snapshot)| async move { + let url = format!("http://{}", dataset.address()); + let client = CrucibleAgentClient::new(&url); + + // delete running snapshot + client + .region_delete_running_snapshot( + &RegionId(region_snapshot.region_id.to_string()), + ®ion_snapshot.snapshot_id.to_string(), + ) + .await + .map_err(|e| match e { + crucible_agent_client::Error::ErrorResponse(rv) => { + match rv.status() { + http::StatusCode::SERVICE_UNAVAILABLE => { + Error::unavail(&rv.message) + } + status if status.is_client_error() => { + Error::invalid_request(&rv.message) + } + _ => Error::internal_error(&rv.message), + } + } + _ => Error::internal_error( + "unexpected failure during `region_delete_running_snapshot`", + ), + })?; + + // delete snapshot + client + .region_delete_snapshot( + &RegionId(region_snapshot.region_id.to_string()), + ®ion_snapshot.snapshot_id.to_string(), + ) + .await + .map_err(|e| match e { + crucible_agent_client::Error::ErrorResponse(rv) => { + match rv.status() { + http::StatusCode::SERVICE_UNAVAILABLE => { + Error::unavail(&rv.message) + } + status if status.is_client_error() => { + Error::invalid_request(&rv.message) + } + _ => Error::internal_error(&rv.message), + } + } + _ => Error::internal_error( + "unexpected failure during `region_delete_snapshot`", + ), + })?; + + Ok(()) + }) + // Execute the allocation requests concurrently. + .buffer_unordered(std::cmp::min( + request_count, + MAX_CONCURRENT_REGION_REQUESTS, + )) + .collect::>>() + .await + .into_iter() + .collect::, _>>()?; + + Ok(()) +} diff --git a/nexus/src/app/volume.rs b/nexus/src/app/volume.rs new file mode 100644 index 00000000000..e8bfe362920 --- /dev/null +++ b/nexus/src/app/volume.rs @@ -0,0 +1,43 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Volumes + +use crate::app::sagas; +use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::Error; +use std::sync::Arc; +use uuid::Uuid; + +impl super::Nexus { + /// Kick off a saga to delete a volume (and clean up any Crucible resources + /// as a result). Note that this does not unconditionally delete the volume + /// record: if the allocated Crucible regions associated with this volume + /// still have references, we cannot delete it, so it will be soft-deleted. + /// Only when all the associated resources have been cleaned up does Nexus + /// hard delete the volume record. + /// + /// Importantly, this should not be a sub-saga - whoever is calling this + /// should not block on cleaning up Crucible Resources, because the deletion + /// of a "disk" or "snapshot" could free up a *lot* of Crucible resources + /// and the user's query shouldn't wait on those DELETE calls. + pub async fn volume_delete( + self: &Arc, + volume_id: Uuid, + ) -> DeleteResult { + let saga_params = sagas::volume_delete::Params { volume_id }; + + // TODO execute this in the background instead, not using the usual SEC + let saga_outputs = self + .execute_saga::(saga_params) + .await?; + + let volume_deleted = + saga_outputs.lookup_node_output::<()>("final_no_result").map_err( + |e| Error::InternalError { internal_message: e.to_string() }, + )?; + + Ok(volume_deleted) + } +} diff --git a/nexus/src/db/datastore/mod.rs b/nexus/src/db/datastore/mod.rs index 4a7a4f055e9..93bb030ee02 100644 --- a/nexus/src/db/datastore/mod.rs +++ b/nexus/src/db/datastore/mod.rs @@ -55,6 +55,7 @@ mod oximeter; mod project; mod rack; mod region; +mod region_snapshot; mod role; mod saga; mod service; @@ -69,6 +70,8 @@ mod volume; mod vpc; mod zpool; +pub use volume::CrucibleResources; + // Number of unique datasets required to back a region. // TODO: This should likely turn into a configuration option. const REGION_REDUNDANCY_THRESHOLD: usize = 3; diff --git a/nexus/src/db/datastore/region.rs b/nexus/src/db/datastore/region.rs index acf14c8a4b0..49e5605e898 100644 --- a/nexus/src/db/datastore/region.rs +++ b/nexus/src/db/datastore/region.rs @@ -327,10 +327,17 @@ impl DataStore { }) } - /// Deletes all regions backing a disk. + /// Deletes a set of regions. /// /// Also updates the storage usage on their corresponding datasets. - pub async fn regions_hard_delete(&self, volume_id: Uuid) -> DeleteResult { + pub async fn regions_hard_delete( + &self, + region_ids: Vec, + ) -> DeleteResult { + if region_ids.is_empty() { + return Ok(()); + } + #[derive(Debug, thiserror::Error)] enum RegionDeleteError { #[error("Numeric error: {0}")] @@ -345,7 +352,7 @@ impl DataStore { // Remove the regions, collecting datasets they're from. let datasets = diesel::delete(region_dsl::region) - .filter(region_dsl::volume_id.eq(volume_id)) + .filter(region_dsl::id.eq_any(region_ids)) .returning(region_dsl::dataset_id) .get_results::(conn)?; diff --git a/nexus/src/db/datastore/region_snapshot.rs b/nexus/src/db/datastore/region_snapshot.rs new file mode 100644 index 00000000000..dab3a90bcb7 --- /dev/null +++ b/nexus/src/db/datastore/region_snapshot.rs @@ -0,0 +1,51 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! [`DataStore`] methods on [`RegionSnapshot`]s. + +use super::DataStore; +use crate::db; +use crate::db::error::public_error_from_diesel_pool; +use crate::db::error::ErrorHandler; +use crate::db::model::RegionSnapshot; +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::prelude::*; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DeleteResult; +use uuid::Uuid; + +impl DataStore { + pub async fn region_snapshot_create( + &self, + region_snapshot: RegionSnapshot, + ) -> CreateResult<()> { + use db::schema::region_snapshot::dsl; + + diesel::insert_into(dsl::region_snapshot) + .values(region_snapshot.clone()) + .on_conflict_do_nothing() + .execute_async(self.pool()) + .await + .map(|_| ()) + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + + pub async fn region_snapshot_remove( + &self, + dataset_id: Uuid, + region_id: Uuid, + snapshot_id: Uuid, + ) -> DeleteResult { + use db::schema::region_snapshot::dsl; + + diesel::delete(dsl::region_snapshot) + .filter(dsl::dataset_id.eq(dataset_id)) + .filter(dsl::region_id.eq(region_id)) + .filter(dsl::snapshot_id.eq(snapshot_id)) + .execute_async(self.pool()) + .await + .map(|_rows_deleted| ()) + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } +} diff --git a/nexus/src/db/datastore/snapshot.rs b/nexus/src/db/datastore/snapshot.rs index 23c3f3d7453..ebe12b1665c 100644 --- a/nexus/src/db/datastore/snapshot.rs +++ b/nexus/src/db/datastore/snapshot.rs @@ -11,7 +11,6 @@ use crate::db; use crate::db::datastore::RunnableQuery; use crate::db::error::public_error_from_diesel_pool; use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; use crate::db::lookup::LookupPath; use crate::db::model::Generation; use crate::db::model::Name; @@ -19,7 +18,6 @@ use crate::db::model::Snapshot; use crate::db::model::SnapshotState; use crate::db::pagination::paginated; use crate::db::update_and_check::UpdateAndCheck; -use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; @@ -144,44 +142,23 @@ impl DataStore { // A snapshot can be deleted in any state. It's never attached to an // instance, and any disk launched from it will copy and modify the volume - // construction request it's based on. The associated volume can be also - // be deleted - this will not affect any active crucible connections - // because no actual resources are cleaned up here. - // - // TODO-correctness this will leak on-disk snapshots and currently - // running read-only downstairs, which will need to be cleaned up once - // all volumes that reference them are gone. + // construction request it's based on. let snapshot_id = authz_snapshot.id(); let gen = db_snapshot.gen; - let volume_id = db_snapshot.volume_id; - let volume_delete_query = self.volume_delete_query(volume_id); - - self.pool_authorized(&opctx) - .await? - .transaction_async(|conn| async move { - use db::schema::snapshot::dsl; - - diesel::update(dsl::snapshot) - .filter(dsl::time_deleted.is_null()) - .filter(dsl::gen.eq(gen)) - .filter(dsl::id.eq(snapshot_id)) - .set(dsl::time_deleted.eq(now)) - .check_if_exists::(snapshot_id) - .execute_async(&conn) - .await?; - - volume_delete_query.execute_async(&conn).await?; + use db::schema::snapshot::dsl; - Ok(()) - }) + diesel::update(dsl::snapshot) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::gen.eq(gen)) + .filter(dsl::id.eq(snapshot_id)) + .set(dsl::time_deleted.eq(now)) + .check_if_exists::(snapshot_id) + .execute_async(self.pool_authorized(&opctx).await?) .await - .map_err(|e| match e { - TransactionError::CustomError(e) => e, - TransactionError::Pool(e) => { - public_error_from_diesel_pool(e, ErrorHandler::Server) - } + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) })?; Ok(snapshot_id) diff --git a/nexus/src/db/datastore/volume.rs b/nexus/src/db/datastore/volume.rs index 4909c500ca2..8be451592c0 100644 --- a/nexus/src/db/datastore/volume.rs +++ b/nexus/src/db/datastore/volume.rs @@ -8,59 +8,149 @@ use super::DataStore; use crate::db; use crate::db::error::public_error_from_diesel_pool; use crate::db::error::ErrorHandler; +use crate::db::error::TransactionError; use crate::db::identity::Asset; +use crate::db::model::Dataset; +use crate::db::model::Region; +use crate::db::model::RegionSnapshot; use crate::db::model::Volume; +use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use diesel::OptionalExtension as DieselOptionalExtension; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use serde::Deserialize; +use serde::Serialize; +use sled_agent_client::types::VolumeConstructionRequest; use uuid::Uuid; impl DataStore { pub async fn volume_create(&self, volume: Volume) -> CreateResult { use db::schema::volume::dsl; - diesel::insert_into(dsl::volume) - .values(volume.clone()) - .on_conflict(dsl::id) - .do_nothing() - .returning(Volume::as_returning()) - .get_result_async(self.pool()) + #[derive(Debug, thiserror::Error)] + enum VolumeCreationError { + #[error("Error from Volume creation: {0}")] + Public(Error), + + #[error("Serde error during Volume creation: {0}")] + SerdeError(#[from] serde_json::Error), + } + type TxnError = TransactionError; + + self.pool() + .transaction(move |conn| { + let maybe_volume: Option = dsl::volume + .filter(dsl::id.eq(volume.id())) + .select(Volume::as_select()) + .first(conn) + .optional() + .map_err(|e| { + TxnError::CustomError(VolumeCreationError::Public( + public_error_from_diesel_pool( + e.into(), + ErrorHandler::Server, + ), + )) + })?; + + // If the volume existed already, return it and do not increase + // usage counts. + if let Some(volume) = maybe_volume { + return Ok(volume); + } + + // TODO do we need on_conflict do_nothing here? if the transaction + // model is read-committed, the SELECT above could return nothing, + // and the INSERT here could still result in a conflict. + // + // See also https://github.com/oxidecomputer/omicron/issues/1168 + let volume: Volume = diesel::insert_into(dsl::volume) + .values(volume.clone()) + .on_conflict(dsl::id) + .do_nothing() + .returning(Volume::as_returning()) + .get_result(conn) + .map_err(|e| { + TxnError::CustomError(VolumeCreationError::Public( + public_error_from_diesel_pool( + e.into(), + ErrorHandler::Conflict( + ResourceType::Volume, + volume.id().to_string().as_str(), + ), + ), + )) + })?; + + // Increase the usage count for Crucible resources according to the + // contents of the volume. + + // Grab all the targets that the volume construction request references. + let crucible_targets = { + let vcr: VolumeConstructionRequest = serde_json::from_str( + &volume.data(), + ) + .map_err(|e: serde_json::Error| { + TxnError::CustomError(VolumeCreationError::SerdeError( + e, + )) + })?; + + let mut crucible_targets = CrucibleTargets::default(); + resources_associated_with_volume( + &vcr, + &mut crucible_targets, + ); + crucible_targets + }; + + // Increase the number of uses for each referenced region snapshot. + use db::schema::region_snapshot::dsl as rs_dsl; + for read_only_target in &crucible_targets.read_only_targets { + diesel::update(rs_dsl::region_snapshot) + .filter( + rs_dsl::snapshot_addr.eq(read_only_target.clone()), + ) + .set( + rs_dsl::volume_references + .eq(rs_dsl::volume_references + 1), + ) + .execute(conn) + .map_err(|e| { + TxnError::CustomError(VolumeCreationError::Public( + public_error_from_diesel_pool( + e.into(), + ErrorHandler::Server, + ), + )) + })?; + } + + Ok(volume) + }) .await - .map_err(|e| { - public_error_from_diesel_pool( - e, - ErrorHandler::Conflict( - ResourceType::Volume, - volume.id().to_string().as_str(), - ), - ) + .map_err(|e| match e { + TxnError::CustomError(VolumeCreationError::Public(e)) => e, + + _ => { + Error::internal_error(&format!("Transaction error: {}", e)) + } }) } - pub fn volume_delete_query( - &self, - volume_id: Uuid, - ) -> impl diesel::query_builder::QueryFragment - + RunQueryDsl - + diesel::query_builder::QueryId - where - T: Connection, - { + pub async fn volume_hard_delete(&self, volume_id: Uuid) -> DeleteResult { use db::schema::volume::dsl; - let now = Utc::now(); - diesel::update(dsl::volume) + diesel::delete(dsl::volume) .filter(dsl::id.eq(volume_id)) - .set(dsl::time_deleted.eq(now)) - } - - pub async fn volume_delete(&self, volume_id: Uuid) -> DeleteResult { - self.volume_delete_query(volume_id) .execute_async(self.pool()) .await .map_err(|e| { @@ -86,4 +176,337 @@ impl DataStore { .await .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } + + /// Find regions for deleted volumes that do not have associated region + /// snapshots. + pub async fn find_deleted_volume_regions( + &self, + ) -> ListResultVec<(Dataset, Region, Volume)> { + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::region::dsl as region_dsl; + use db::schema::region_snapshot::dsl; + use db::schema::volume::dsl as volume_dsl; + + // Find all regions and datasets + region_dsl::region + .inner_join( + volume_dsl::volume.on(region_dsl::volume_id.eq(volume_dsl::id)), + ) + .inner_join( + dataset_dsl::dataset + .on(region_dsl::dataset_id.eq(dataset_dsl::id)), + ) + // where there either are no region snapshots, or the region + // snapshot volume references have gone to zero + .left_join( + dsl::region_snapshot.on(dsl::region_id + .eq(region_dsl::id) + .and(dsl::dataset_id.eq(dataset_dsl::id))), + ) + .filter( + dsl::volume_references + .eq(0) + .or(dsl::volume_references.is_null()), + ) + // where the volume has already been soft-deleted + .filter(volume_dsl::time_deleted.is_not_null()) + // and return them (along with the volume so it can be hard deleted) + .select(( + Dataset::as_select(), + Region::as_select(), + Volume::as_select(), + )) + .load_async(self.pool()) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + + /// Decrease the usage count for Crucible resources according to the + /// contents of the volume. Call this when deleting a volume (but before the + /// volume record has been hard deleted). + /// + /// Returns a list of Crucible resources to clean up, and soft-deletes the + /// volume. Note this function must be idempotent, it is called from a saga + /// node. + pub async fn decrease_crucible_resource_count_and_soft_delete_volume( + &self, + volume_id: Uuid, + ) -> Result { + #[derive(Debug, thiserror::Error)] + enum DecreaseCrucibleResourcesError { + #[error("Error during decrease Crucible resources: {0}")] + DieselError(#[from] diesel::result::Error), + + #[error("Serde error during decrease Crucible resources: {0}")] + SerdeError(#[from] serde_json::Error), + } + type TxnError = TransactionError; + + // In a transaction: + // + // 1. decrease the number of references for each region snapshot that + // this Volume references + // 2. soft-delete the volume + // 3. record the resources to clean up + // + // Step 3 is important because this function is called from a saga node. + // If saga execution crashes after steps 1 and 2, but before serializing + // the resources to be cleaned up as part of the saga node context, then + // that list of resources will be lost. + // + // We also have to guard against the case where this function is called + // multiple times, and that is done by soft-deleting the volume during + // the transaction, and returning the previously serialized list of + // resources to clean up if a soft-delete has already occurred. + // + // TODO it would be nice to make this transaction_async, but I couldn't + // get the async optional extension to work. + self.pool() + .transaction(move |conn| { + // Grab the volume in question. If the volume record was already + // hard-deleted, assume clean-up has occurred and return an empty + // CrucibleResources. If the volume record was soft-deleted, then + // return the serialized CrucibleResources. + let volume = { + use db::schema::volume::dsl; + + let volume = dsl::volume + .filter(dsl::id.eq(volume_id)) + .select(Volume::as_select()) + .get_result(conn) + .optional()?; + + let volume = if let Some(v) = volume { + v + } else { + // the volume was hard-deleted, return an empty + // CrucibleResources + return Ok(CrucibleResources::V1( + CrucibleResourcesV1::default(), + )); + }; + + if volume.time_deleted.is_none() { + // a volume record exists, and was not deleted - this is the + // first time through this transaction for a certain volume + // id. Get the volume for use in the transaction. + volume + } else { + // this volume was soft deleted - this is a repeat time + // through this transaction. + + if let Some(resources_to_clean_up) = + volume.resources_to_clean_up + { + // return the serialized CrucibleResources + return serde_json::from_str( + &resources_to_clean_up, + ) + .map_err(|e| { + TxnError::CustomError( + DecreaseCrucibleResourcesError::SerdeError( + e, + ), + ) + }); + } else { + // If no CrucibleResources struct was serialized, that's + // definitely a bug of some sort - the soft-delete below + // sets time_deleted at the same time as + // resources_to_clean_up! But, instead of a panic here, + // just return an empty CrucibleResources. + return Ok(CrucibleResources::V1( + CrucibleResourcesV1::default(), + )); + } + } + }; + + // Grab all the targets that the volume construction request references. + let crucible_targets = { + let vcr: VolumeConstructionRequest = + serde_json::from_str(&volume.data()).map_err(|e| { + TxnError::CustomError( + DecreaseCrucibleResourcesError::SerdeError(e), + ) + })?; + + let mut crucible_targets = CrucibleTargets::default(); + resources_associated_with_volume( + &vcr, + &mut crucible_targets, + ); + crucible_targets + }; + + // Decrease the number of uses for each referenced region snapshot. + use db::schema::region_snapshot::dsl; + + for read_only_target in &crucible_targets.read_only_targets { + diesel::update(dsl::region_snapshot) + .filter(dsl::snapshot_addr.eq(read_only_target.clone())) + .set( + dsl::volume_references + .eq(dsl::volume_references - 1), + ) + .execute(conn)?; + } + + // Return what results can be cleaned up + let result = CrucibleResources::V1(CrucibleResourcesV1 { + // The only use of a read-write region will be at the top level of a + // Volume. These are not shared, but if any snapshots are taken this + // will prevent deletion of the region. Filter out any regions that + // have associated snapshots. + datasets_and_regions: { + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::region::dsl as region_dsl; + + // Return all regions for this volume_id, where there either are + // no region_snapshots, or region_snapshots.volume_references = + // 0. + region_dsl::region + .filter(region_dsl::volume_id.eq(volume_id)) + .inner_join( + dataset_dsl::dataset + .on(region_dsl::dataset_id + .eq(dataset_dsl::id)), + ) + .left_join( + dsl::region_snapshot.on(dsl::region_id + .eq(region_dsl::id) + .and(dsl::dataset_id.eq(dataset_dsl::id))), + ) + .filter( + dsl::volume_references + .eq(0) + .or(dsl::volume_references.is_null()), + ) + .select((Dataset::as_select(), Region::as_select())) + .get_results::<(Dataset, Region)>(conn)? + }, + + // A volume (for a disk or snapshot) may reference another nested + // volume as a read-only parent, and this may be arbitrarily deep. + // After decrementing volume_references above, get all region + // snapshot records where the volume_references has gone to 0. + // Consumers of this struct will be responsible for deleting the + // read-only downstairs running for the snapshot and the snapshot + // itself. + datasets_and_snapshots: { + use db::schema::dataset::dsl as dataset_dsl; + + dsl::region_snapshot + .filter(dsl::volume_references.eq(0)) + .inner_join( + dataset_dsl::dataset + .on(dsl::dataset_id.eq(dataset_dsl::id)), + ) + .select(( + Dataset::as_select(), + RegionSnapshot::as_select(), + )) + .get_results::<(Dataset, RegionSnapshot)>(conn)? + }, + }); + + // Soft delete this volume, and serialize the resources that are to + // be cleaned up. + use db::schema::volume::dsl as volume_dsl; + + let now = Utc::now(); + diesel::update(volume_dsl::volume) + .filter(volume_dsl::id.eq(volume_id)) + .set(( + volume_dsl::time_deleted.eq(now), + volume_dsl::resources_to_clean_up.eq( + serde_json::to_string(&result).map_err(|e| { + TxnError::CustomError( + DecreaseCrucibleResourcesError::SerdeError( + e, + ), + ) + })?, + ), + )) + .execute(conn)?; + + Ok(result) + }) + .await + .map_err(|e| match e { + TxnError::CustomError( + DecreaseCrucibleResourcesError::DieselError(e), + ) => public_error_from_diesel_pool( + e.into(), + ErrorHandler::Server, + ), + + _ => { + Error::internal_error(&format!("Transaction error: {}", e)) + } + }) + } +} + +#[derive(Default)] +struct CrucibleTargets { + read_only_targets: Vec, +} + +// Serialize this enum into the `resources_to_clean_up` column to handle +// different versions over time. +#[derive(Debug, Serialize, Deserialize)] +pub enum CrucibleResources { + V1(CrucibleResourcesV1), +} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct CrucibleResourcesV1 { + pub datasets_and_regions: Vec<(Dataset, Region)>, + pub datasets_and_snapshots: Vec<(Dataset, RegionSnapshot)>, +} + +/// Return the targets from a VolumeConstructionRequest. +/// +/// The targets of a volume construction request map to resources. +fn resources_associated_with_volume( + vcr: &VolumeConstructionRequest, + crucible_targets: &mut CrucibleTargets, +) { + match vcr { + VolumeConstructionRequest::Volume { + id: _, + block_size: _, + sub_volumes, + read_only_parent, + } => { + for sub_volume in sub_volumes { + resources_associated_with_volume(sub_volume, crucible_targets); + } + + if let Some(read_only_parent) = read_only_parent { + resources_associated_with_volume( + read_only_parent, + crucible_targets, + ); + } + } + + VolumeConstructionRequest::Url { id: _, block_size: _, url: _ } => { + // no action required + } + + VolumeConstructionRequest::Region { block_size: _, opts, gen: _ } => { + for target in &opts.target { + if opts.read_only { + crucible_targets.read_only_targets.push(target.clone()); + } + } + } + + VolumeConstructionRequest::File { id: _, block_size: _, path: _ } => { + // no action required + } + } } diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 6fe6f6a9e28..3abb881080b 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -492,4 +492,21 @@ impl DiskTest { } } } + + /// Returns true if all Crucible resources were cleaned up, false otherwise. + pub async fn crucible_resources_deleted(&self) -> bool { + for zpool in &self.zpools { + for dataset in &zpool.datasets { + let crucible = self + .sled_agent + .get_crucible_dataset(zpool.id, dataset.id) + .await; + if !crucible.is_empty().await { + return false; + } + } + } + + true + } } diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index ff778156da5..6d062e034aa 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -31,6 +31,7 @@ mod unauthorized; mod unauthorized_coverage; mod updates; mod users_builtin; +mod volume_management; mod vpc_firewall; mod vpc_routers; mod vpc_subnets; diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index 4f8e80a8f49..a5e3de17d3f 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -704,3 +704,25 @@ async fn test_create_snapshot_record_idempotent( .await .unwrap(); } + +#[nexus_test] +async fn test_region_snapshot_create_idempotent( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.apictx.nexus; + let datastore = nexus.datastore(); + + let region_snapshot = db::model::RegionSnapshot { + dataset_id: Uuid::new_v4(), + region_id: Uuid::new_v4(), + snapshot_id: Uuid::new_v4(), + + snapshot_addr: "[::]:12345".to_string(), + + volume_references: 1, + }; + + datastore.region_snapshot_create(region_snapshot.clone()).await.unwrap(); + + datastore.region_snapshot_create(region_snapshot).await.unwrap(); +} diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs new file mode 100644 index 00000000000..9eab3493c1e --- /dev/null +++ b/nexus/tests/integration_tests/volume_management.rs @@ -0,0 +1,1224 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tests that Nexus properly manages and cleans up Crucible resources +//! associated with Volumes + +use dropshot::test_util::ClientTestContext; +use http::method::Method; +use http::StatusCode; +use nexus_test_utils::http_testing::AuthnMode; +use nexus_test_utils::http_testing::NexusRequest; +use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_ip_pool; +use nexus_test_utils::resource_helpers::create_organization; +use nexus_test_utils::resource_helpers::create_project; +use nexus_test_utils::resource_helpers::object_create; +use nexus_test_utils::resource_helpers::DiskTest; +use nexus_test_utils::ControlPlaneTestContext; +use nexus_test_utils_macros::nexus_test; +use omicron_common::api::external::ByteCount; +use omicron_common::api::external::Disk; +use omicron_common::api::external::IdentityMetadataCreateParams; +use omicron_common::api::external::Name; +use omicron_nexus::external_api::params; +use omicron_nexus::external_api::views; +use rand::prelude::SliceRandom; +use rand::{rngs::StdRng, SeedableRng}; +use sled_agent_client::types::VolumeConstructionRequest; +use uuid::Uuid; + +use httptest::{matchers::*, responders::*, Expectation, ServerBuilder}; + +const ORG_NAME: &str = "test-org"; +const PROJECT_NAME: &str = "springfield-squidport-disks"; + +fn get_project_url() -> String { + format!("/organizations/{}/projects/{}", ORG_NAME, PROJECT_NAME) +} + +fn get_disks_url() -> String { + format!("{}/disks", get_project_url()) +} + +async fn create_org_and_project(client: &ClientTestContext) -> Uuid { + create_organization(&client, ORG_NAME).await; + let project = create_project(client, ORG_NAME, PROJECT_NAME).await; + project.identity.id +} + +#[nexus_test] +async fn test_snapshot_then_delete_disk(cptestctx: &ControlPlaneTestContext) { + // Test that Nexus does not delete a region if there's a snapshot of that + // region: + // + // 1. Create a disk + // 2. Create a snapshot of that disk (creating running snapshots) + // 3. Delete the disk + // 4. Delete the snapshot + + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + let disks_url = get_disks_url(); + + // Define a global image + let server = ServerBuilder::new().run().unwrap(); + server.expect( + Expectation::matching(request::method_path("HEAD", "/image.raw")) + .times(1..) + .respond_with( + status_code(200).append_header( + "Content-Length", + format!("{}", 4096 * 1000), + ), + ), + ); + + let image_create_params = params::GlobalImageCreate { + identity: IdentityMetadataCreateParams { + name: "alpine-edge".parse().unwrap(), + description: String::from( + "you can boot any image, as long as it's alpine", + ), + }, + source: params::ImageSource::Url { + url: server.url("/image.raw").to_string(), + }, + distribution: params::Distribution { + name: "alpine".parse().unwrap(), + version: "edge".into(), + }, + block_size: params::BlockSize::try_from(512).unwrap(), + }; + + let global_image: views::GlobalImage = + NexusRequest::objects_post(client, "/images", &image_create_params) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Create a disk from this image + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let base_disk_name: Name = "base-disk".parse().unwrap(); + let base_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: base_disk_name.clone(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::GlobalImage { + image_id: global_image.identity.id, + }, + size: disk_size, + }; + + let base_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&base_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + let snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "a-snapshot".parse().unwrap(), + description: "a snapshot!".to_string(), + }, + disk: base_disk_name.clone(), + }, + ) + .await; + + assert_eq!(snapshot.disk_id, base_disk.identity.id); + assert_eq!(snapshot.size, base_disk.size); + + // The Crucible regions and snapshot still remains + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the disk + let disk_url = format!("{}/{}", disks_url, base_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // The Crucible snapshot still remains + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the snapshot + let snapshot_url = + format!("{}/snapshots/{}", get_project_url(), "a-snapshot"); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_delete_snapshot_then_disk(cptestctx: &ControlPlaneTestContext) { + // Test that Nexus cleans up resources properly: + // + // 1. Create a disk + // 2. Create a snapshot of that disk (creating running snapshots) + // 3. Delete the snapshot + // 4. Delete the disk + + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + let disks_url = get_disks_url(); + + // Define a global image + let server = ServerBuilder::new().run().unwrap(); + server.expect( + Expectation::matching(request::method_path("HEAD", "/image.raw")) + .times(1..) + .respond_with( + status_code(200).append_header( + "Content-Length", + format!("{}", 4096 * 1000), + ), + ), + ); + + let image_create_params = params::GlobalImageCreate { + identity: IdentityMetadataCreateParams { + name: "alpine-edge".parse().unwrap(), + description: String::from( + "you can boot any image, as long as it's alpine", + ), + }, + source: params::ImageSource::Url { + url: server.url("/image.raw").to_string(), + }, + distribution: params::Distribution { + name: "alpine".parse().unwrap(), + version: "edge".into(), + }, + block_size: params::BlockSize::try_from(512).unwrap(), + }; + + let global_image: views::GlobalImage = + NexusRequest::objects_post(client, "/images", &image_create_params) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Create a disk from this image + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let base_disk_name: Name = "base-disk".parse().unwrap(); + let base_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: base_disk_name.clone(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::GlobalImage { + image_id: global_image.identity.id, + }, + size: disk_size, + }; + + let base_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&base_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + let snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "a-snapshot".parse().unwrap(), + description: "a snapshot!".to_string(), + }, + disk: base_disk_name.clone(), + }, + ) + .await; + + assert_eq!(snapshot.disk_id, base_disk.identity.id); + assert_eq!(snapshot.size, base_disk.size); + + // The Crucible regions and snapshot still remain + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the snapshot + let snapshot_url = + format!("{}/snapshots/{}", get_project_url(), "a-snapshot"); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + // The Crucible regions still remain + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the disk + let disk_url = format!("{}/{}", disks_url, base_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_multiple_snapshots(cptestctx: &ControlPlaneTestContext) { + // Test that Nexus cleans up resources properly: + // + // 1. Create a disk + // 2. Create multiple snapshots of that disk (creating running snapshots) + // 3. Delete the disk + // 4. Delete the snapshots + + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + let disks_url = get_disks_url(); + + // Define a global image + let server = ServerBuilder::new().run().unwrap(); + server.expect( + Expectation::matching(request::method_path("HEAD", "/image.raw")) + .times(1..) + .respond_with( + status_code(200).append_header( + "Content-Length", + format!("{}", 4096 * 1000), + ), + ), + ); + + let image_create_params = params::GlobalImageCreate { + identity: IdentityMetadataCreateParams { + name: "alpine-edge".parse().unwrap(), + description: String::from( + "you can boot any image, as long as it's alpine", + ), + }, + source: params::ImageSource::Url { + url: server.url("/image.raw").to_string(), + }, + distribution: params::Distribution { + name: "alpine".parse().unwrap(), + version: "edge".into(), + }, + block_size: params::BlockSize::try_from(512).unwrap(), + }; + + let global_image: views::GlobalImage = + NexusRequest::objects_post(client, "/images", &image_create_params) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Create a disk from this image + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let base_disk_name: Name = "base-disk".parse().unwrap(); + let base_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: base_disk_name.clone(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::GlobalImage { + image_id: global_image.identity.id, + }, + size: disk_size, + }; + + let base_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&base_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot requests + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + for i in 0..4 { + let snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: format!("a-snapshot-{}", i).parse().unwrap(), + description: "a snapshot!".to_string(), + }, + disk: base_disk_name.clone(), + }, + ) + .await; + + assert_eq!(snapshot.disk_id, base_disk.identity.id); + assert_eq!(snapshot.size, base_disk.size); + } + + // The Crucible regions and snapshots still remain + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the disk + let disk_url = format!("{}/{}", disks_url, base_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // Delete the snapshots + for i in 0..4 { + // The Crucible snapshots still remain + assert!(!disk_test.crucible_resources_deleted().await); + + let snapshot_url = + format!("{}/snapshots/a-snapshot-{}", get_project_url(), i); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + } + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_snapshot_prevents_other_disk( + cptestctx: &ControlPlaneTestContext, +) { + // Test that region remains if there is a snapshot, preventing further + // allocation. + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + let disks_url = get_disks_url(); + + // Define a global image + let server = ServerBuilder::new().run().unwrap(); + server.expect( + Expectation::matching(request::method_path("HEAD", "/image.raw")) + .times(1..) + .respond_with( + status_code(200).append_header( + "Content-Length", + format!("{}", 4096 * 1000), + ), + ), + ); + + let image_create_params = params::GlobalImageCreate { + identity: IdentityMetadataCreateParams { + name: "alpine-edge".parse().unwrap(), + description: String::from( + "you can boot any image, as long as it's alpine", + ), + }, + source: params::ImageSource::Url { + url: server.url("/image.raw").to_string(), + }, + distribution: params::Distribution { + name: "alpine".parse().unwrap(), + version: "edge".into(), + }, + block_size: params::BlockSize::try_from(512).unwrap(), + }; + + let global_image: views::GlobalImage = + NexusRequest::objects_post(client, "/images", &image_create_params) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Create a disk from this image + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let base_disk_name: Name = "base-disk".parse().unwrap(); + let base_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: base_disk_name.clone(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::GlobalImage { + image_id: global_image.identity.id, + }, + size: disk_size, + }; + + let base_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&base_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + let snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "a-snapshot".parse().unwrap(), + description: "a snapshot!".to_string(), + }, + disk: base_disk_name.clone(), + }, + ) + .await; + + assert_eq!(snapshot.disk_id, base_disk.identity.id); + assert_eq!(snapshot.size, base_disk.size); + + // The Crucible regions and snapshots still remain + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the disk + let disk_url = format!("{}/{}", disks_url, base_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // The Crucible snapshots still remain + assert!(!disk_test.crucible_resources_deleted().await); + + // Attempt disk allocation, which will fail - the presense of the snapshot + // means the region wasn't deleted. + let disk_size = ByteCount::try_from(10u64 * 1024 * 1024 * 1024).unwrap(); + let next_disk_name: Name = "next-disk".parse().unwrap(); + let next_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: next_disk_name.clone(), + description: String::from("will fail"), + }, + disk_source: params::DiskSource::GlobalImage { + image_id: global_image.identity.id, + }, + size: disk_size, + }; + + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&next_disk)) + .expect_status(Some(StatusCode::SERVICE_UNAVAILABLE)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap(); + + // Delete the snapshot + let snapshot_url = format!("{}/snapshots/a-snapshot", get_project_url()); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + // All resources were deleted + assert!(disk_test.crucible_resources_deleted().await); + + // Disk allocation will work now + let _next_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&next_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Delete it + let disk_url = format!("{}/{}", disks_url, next_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_multiple_disks_multiple_snapshots_order_1( + cptestctx: &ControlPlaneTestContext, +) { + // Test multiple disks with multiple snapshots + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + let disks_url = get_disks_url(); + + // Create a blank disk + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let first_disk_name: Name = "first-disk".parse().unwrap(); + let first_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: first_disk_name.clone(), + description: String::from("disk 1"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: disk_size, + }; + + let first_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&first_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + let first_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "first-snapshot".parse().unwrap(), + description: "first snapshot!".to_string(), + }, + disk: first_disk_name.clone(), + }, + ) + .await; + + assert_eq!(first_snapshot.disk_id, first_disk.identity.id); + assert_eq!(first_snapshot.size, first_disk.size); + + // Create another blank disk + let second_disk_name: Name = "second-disk".parse().unwrap(); + let second_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: second_disk_name.clone(), + description: String::from("disk 1"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: disk_size, + }; + + let second_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&second_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request for the second disk + let second_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "second-snapshot".parse().unwrap(), + description: "second snapshot!".to_string(), + }, + disk: second_disk_name.clone(), + }, + ) + .await; + + assert_eq!(second_snapshot.disk_id, second_disk.identity.id); + assert_eq!(second_snapshot.size, second_disk.size); + + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the first disk + let disk_url = format!("{}/{}", disks_url, first_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the second snapshot + let snapshot_url = + format!("{}/snapshots/second-snapshot", get_project_url()); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the second disk + let disk_url = format!("{}/{}", disks_url, second_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the first snapshot + let snapshot_url = + format!("{}/snapshots/first-snapshot", get_project_url()); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_multiple_disks_multiple_snapshots_order_2( + cptestctx: &ControlPlaneTestContext, +) { + // Test multiple disks with multiple snapshots, varying the delete order + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + let disks_url = get_disks_url(); + + // Create a blank disk + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let first_disk_name: Name = "first-disk".parse().unwrap(); + let first_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: first_disk_name.clone(), + description: String::from("disk 1"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: disk_size, + }; + + let first_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&first_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + let first_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "first-snapshot".parse().unwrap(), + description: "first snapshot!".to_string(), + }, + disk: first_disk_name.clone(), + }, + ) + .await; + + assert_eq!(first_snapshot.disk_id, first_disk.identity.id); + assert_eq!(first_snapshot.size, first_disk.size); + + // Create another blank disk + let second_disk_name: Name = "second-disk".parse().unwrap(); + let second_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: second_disk_name.clone(), + description: String::from("disk 1"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: disk_size, + }; + + let second_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&second_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request for the second disk + let second_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "second-snapshot".parse().unwrap(), + description: "second snapshot!".to_string(), + }, + disk: second_disk_name.clone(), + }, + ) + .await; + + assert_eq!(second_snapshot.disk_id, second_disk.identity.id); + assert_eq!(second_snapshot.size, second_disk.size); + + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the first disk + let disk_url = format!("{}/{}", disks_url, first_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the second disk + let disk_url = format!("{}/{}", disks_url, second_disk_name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the second snapshot + let snapshot_url = + format!("{}/snapshots/second-snapshot", get_project_url()); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + assert!(!disk_test.crucible_resources_deleted().await); + + // Delete the first snapshot + let snapshot_url = + format!("{}/snapshots/first-snapshot", get_project_url()); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +async fn prepare_for_test_multiple_layers_of_snapshots( + client: &ClientTestContext, +) { + let disks_url = get_disks_url(); + + // Create a blank disk + let disk_size = ByteCount::try_from(2u64 * 1024 * 1024 * 1024).unwrap(); + let layer_1_disk_name: Name = "layer-1-disk".parse().unwrap(); + let layer_1_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: layer_1_disk_name.clone(), + description: String::from("layer 1"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: disk_size, + }; + + let layer_1_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&layer_1_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request + let snapshots_url = format!( + "/organizations/{}/projects/{}/snapshots", + ORG_NAME, PROJECT_NAME + ); + + let layer_1_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "layer-1-snapshot".parse().unwrap(), + description: "layer 1 snapshot!".to_string(), + }, + disk: layer_1_disk_name.clone(), + }, + ) + .await; + + assert_eq!(layer_1_snapshot.disk_id, layer_1_disk.identity.id); + assert_eq!(layer_1_snapshot.size, layer_1_disk.size); + + // Create a layer 2 disk out of the layer 1 snapshot + let layer_2_disk_name: Name = "layer-2-disk".parse().unwrap(); + let layer_2_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: layer_2_disk_name.clone(), + description: String::from("layer 2"), + }, + disk_source: params::DiskSource::Snapshot { + snapshot_id: layer_1_snapshot.identity.id, + }, + size: disk_size, + }; + + let layer_2_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&layer_2_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request for the second disk + let layer_2_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "layer-2-snapshot".parse().unwrap(), + description: "layer 2 snapshot!".to_string(), + }, + disk: layer_2_disk_name.clone(), + }, + ) + .await; + + assert_eq!(layer_2_snapshot.disk_id, layer_2_disk.identity.id); + assert_eq!(layer_2_snapshot.size, layer_2_disk.size); + + // Create a layer 3 disk out of the layer 2 snapshot + let layer_3_disk_name: Name = "layer-3-disk".parse().unwrap(); + let layer_3_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: layer_3_disk_name.clone(), + description: String::from("layer 3"), + }, + disk_source: params::DiskSource::Snapshot { + snapshot_id: layer_2_snapshot.identity.id, + }, + size: disk_size, + }; + + let layer_3_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&layer_3_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Issue snapshot request for the third disk + let layer_3_snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: "layer-3-snapshot".parse().unwrap(), + description: "layer 3 snapshot!".to_string(), + }, + disk: layer_3_disk_name.clone(), + }, + ) + .await; + + assert_eq!(layer_3_snapshot.disk_id, layer_3_disk.identity.id); + assert_eq!(layer_3_snapshot.size, layer_3_disk.size); +} + +#[nexus_test] +async fn test_multiple_layers_of_snapshots_delete_all_disks_first( + cptestctx: &ControlPlaneTestContext, +) { + // Test layering read-only snapshots through multiple disks: + // delete all disks, then delete all snapshots + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + + prepare_for_test_multiple_layers_of_snapshots(&client).await; + + // Delete the disks + for name in ["layer-1-disk", "layer-2-disk", "layer-3-disk"] { + assert!(!disk_test.crucible_resources_deleted().await); + + let disk_url = format!("{}/{}", get_disks_url(), name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + } + + // Delete the snapshots + for name in ["layer-1-snapshot", "layer-2-snapshot", "layer-3-snapshot"] { + assert!(!disk_test.crucible_resources_deleted().await); + + let snapshot_url = format!("{}/snapshots/{}", get_project_url(), name); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + } + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_multiple_layers_of_snapshots_delete_all_snapshots_first( + cptestctx: &ControlPlaneTestContext, +) { + // Test layering read-only snapshots through multiple disks: + // delete all snapshots, then delete all disks + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + + prepare_for_test_multiple_layers_of_snapshots(&client).await; + + // Delete the snapshots + for name in ["layer-1-snapshot", "layer-2-snapshot", "layer-3-snapshot"] { + assert!(!disk_test.crucible_resources_deleted().await); + + let snapshot_url = format!("{}/snapshots/{}", get_project_url(), name); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + } + + // Delete the disks + for name in ["layer-1-disk", "layer-2-disk", "layer-3-disk"] { + assert!(!disk_test.crucible_resources_deleted().await); + + let disk_url = format!("{}/{}", get_disks_url(), name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + } + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +#[nexus_test] +async fn test_multiple_layers_of_snapshots_random_delete_order( + cptestctx: &ControlPlaneTestContext, +) { + // Test layering read-only snapshots through multiple disks: + // delete snapshots and disks in a random order + let client = &cptestctx.external_client; + let disk_test = DiskTest::new(&cptestctx).await; + create_ip_pool(&client, "p0", None, None).await; + create_org_and_project(client).await; + + prepare_for_test_multiple_layers_of_snapshots(&client).await; + + #[derive(Debug)] + enum DeleteObject { + Disk(String), + Snapshot(String), + } + + let objects = { + let mut objects = vec![ + DeleteObject::Disk("layer-1-disk".to_string()), + DeleteObject::Disk("layer-2-disk".to_string()), + DeleteObject::Disk("layer-3-disk".to_string()), + DeleteObject::Snapshot("layer-1-snapshot".to_string()), + DeleteObject::Snapshot("layer-2-snapshot".to_string()), + DeleteObject::Snapshot("layer-3-snapshot".to_string()), + ]; + + let mut rng = StdRng::from_entropy(); + objects.shuffle(&mut rng); + + objects + }; + + // If this test fails, this should print out the order that caused the + // failure. + dbg!(&objects); + + for object in &objects { + assert!(!disk_test.crucible_resources_deleted().await); + + match object { + DeleteObject::Disk(name) => { + let disk_url = format!("{}/{}", get_disks_url(), name); + NexusRequest::object_delete(client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete disk"); + } + + DeleteObject::Snapshot(name) => { + let snapshot_url = + format!("{}/snapshots/{}", get_project_url(), name); + NexusRequest::object_delete(client, &snapshot_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete snapshot"); + } + } + } + + // Assert everything was cleaned up + assert!(disk_test.crucible_resources_deleted().await); +} + +// volume_delete saga node idempotency tests + +#[nexus_test] +async fn test_volume_hard_delete_idempotent( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.apictx.nexus; + let datastore = nexus.datastore(); + + let volume_id = Uuid::new_v4(); + datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::File { + id: volume_id, + block_size: 512, + path: "/lol".to_string(), + }) + .unwrap(), + )) + .await + .unwrap(); + + datastore.volume_hard_delete(volume_id).await.unwrap(); + datastore.volume_hard_delete(volume_id).await.unwrap(); +} diff --git a/sled-agent/src/sim/http_entrypoints_storage.rs b/sled-agent/src/sim/http_entrypoints_storage.rs index 40813a80a17..fcdd19be70a 100644 --- a/sled-agent/src/sim/http_entrypoints_storage.rs +++ b/sled-agent/src/sim/http_entrypoints_storage.rs @@ -89,6 +89,7 @@ async fn region_get( ) -> Result, HttpError> { let id = path.into_inner().id; let crucible = rc.context(); + match crucible.get(id).await { Some(region) => Ok(HttpResponseOk(region)), None => { @@ -108,15 +109,11 @@ async fn region_delete( let id = path.into_inner().id; let crucible = rc.context(); - // Can't delete a ZFS dataset if there are snapshots - if !crucible.snapshots_for_region(&id).await.is_empty() { - return Err(HttpError::for_bad_request( - None, - "must delete snapshots first!".to_string(), - )); - } - - match crucible.delete(id).await { + match crucible + .delete(id) + .await + .map_err(|e| HttpError::for_bad_request(None, e.to_string()))? + { Some(_) => Ok(HttpResponseDeleted()), None => { Err(HttpError::for_not_found(None, "Region not found".to_string())) diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index cbac12cc067..58c880eb20d 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -97,11 +97,25 @@ impl CrucibleDataInner { self.regions.get_mut(&id) } - fn delete(&mut self, id: RegionId) -> Option { + fn delete(&mut self, id: RegionId) -> Result> { + // Can't delete a ZFS dataset if there are snapshots + if !self.snapshots_for_region(&id).is_empty() { + bail!( + "must delete snapshots {:?} first!", + self.snapshots_for_region(&id) + .into_iter() + .map(|s| s.name) + .collect::>(), + ); + } + let id = Uuid::from_str(&id.0).unwrap(); - let mut region = self.regions.get_mut(&id)?; - region.state = State::Destroyed; - Some(region.clone()) + if let Some(mut region) = self.regions.get_mut(&id) { + region.state = State::Destroyed; + Ok(Some(region.clone())) + } else { + Ok(None) + } } fn create_snapshot( @@ -143,7 +157,8 @@ impl CrucibleDataInner { } fn delete_snapshot(&mut self, id: &RegionId, name: &str) -> Result<()> { - if !self.running_snapshots_for_id(id).is_empty() { + let running_snapshots_for_id = self.running_snapshots_for_id(id); + if running_snapshots_for_id.contains_key(name) { bail!("downstairs running for region {} snapshot {}", id.0, name,); } @@ -204,6 +219,22 @@ impl CrucibleDataInner { Ok(()) } + + /// Return true if there are no undeleted Crucible resources + pub fn is_empty(&self) -> bool { + let non_destroyed_regions = self + .regions + .values() + .filter(|r| r.state != State::Destroyed) + .count(); + + let snapshots = self.snapshots.values().flatten().count(); + + let running_snapshots = + self.running_snapshots.values().flat_map(|hm| hm.values()).count(); + + non_destroyed_regions == 0 && snapshots == 0 && running_snapshots == 0 + } } /// Represents a running Crucible Agent. Contains regions. @@ -232,7 +263,7 @@ impl CrucibleData { self.inner.lock().await.get(id) } - pub async fn delete(&self, id: RegionId) -> Option { + pub async fn delete(&self, id: RegionId) -> Result> { self.inner.lock().await.delete(id) } @@ -287,6 +318,10 @@ impl CrucibleData { ) -> Result<()> { self.inner.lock().await.delete_running_snapshot(id, name) } + + pub async fn is_empty(&self) -> bool { + self.inner.lock().await.is_empty() + } } /// A simulated Crucible Dataset.