Skip to content

Commit c13f1df

Browse files
authored
Cleanup Crucible resources with volume delete saga (#1706)
Add a volume delete saga, and call that whenever a volume needs to be deleted. Crucible resources now have accounting for their use, and when the number of references reaches zero then cleanup occurs. Adds a table to store created region snapshots as part of this accounting, and make a record of when a Crucible snapshot is taken of a region. Region snapshots are uniquely identified by (dataset_id, region_id, snapshot_id), and additionally contain a snapshot address for identification as part of a volume construction request. All DELETE calls to Crucible Agent(s) are now only performed by the volume delete saga. Add tests for cases where if Nexus is properly accounting for snapshots. It should not delete the disk's corresponding region when the disk is deleted because a snapshot exists, or if multiple disks reference a running snapshot. A small change to the simulated Crucible agent is also included in this commit - logic was moved out of the HTTP endpoint function into the Crucible struct. Logic doesn't belong in HTTP endpoint functions :) Closes #1632
1 parent 24750d7 commit c13f1df

File tree

24 files changed

+2507
-359
lines changed

24 files changed

+2507
-359
lines changed

common/src/sql/dbinit.sql

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,45 @@ CREATE INDEX on omicron.public.Region (
229229
dataset_id
230230
);
231231

232+
/*
233+
* A snapshot of a region, within a dataset.
234+
*/
235+
CREATE TABLE omicron.public.region_snapshot (
236+
dataset_id UUID NOT NULL,
237+
region_id UUID NOT NULL,
238+
239+
/* Associated higher level virtual snapshot */
240+
snapshot_id UUID NOT NULL,
241+
242+
/*
243+
* Target string, for identification as part of
244+
* volume construction request(s)
245+
*/
246+
snapshot_addr TEXT NOT NULL,
247+
248+
/* How many volumes reference this? */
249+
volume_references INT8 NOT NULL,
250+
251+
PRIMARY KEY (dataset_id, region_id, snapshot_id)
252+
);
253+
254+
/* Index for use during join with region table */
255+
CREATE INDEX on omicron.public.region_snapshot (
256+
dataset_id, region_id
257+
);
258+
259+
/*
260+
* Index on volume_references and snapshot_addr for crucible
261+
* resource accounting lookup
262+
*/
263+
CREATE INDEX on omicron.public.region_snapshot (
264+
volume_references
265+
);
266+
267+
CREATE INDEX on omicron.public.region_snapshot (
268+
snapshot_addr
269+
);
270+
232271
/*
233272
* A volume within Crucible
234273
*/
@@ -247,7 +286,19 @@ CREATE TABLE omicron.public.volume (
247286
* consumed by some Upstairs code to perform the volume creation. The Rust
248287
* type of this column should be Crucible::VolumeConstructionRequest.
249288
*/
250-
data TEXT NOT NULL
289+
data TEXT NOT NULL,
290+
291+
/*
292+
* A JSON document describing what resources to clean up when deleting this
293+
* volume. The Rust type of this column should be the CrucibleResources
294+
* enum.
295+
*/
296+
resources_to_clean_up TEXT
297+
);
298+
299+
/* Quickly find deleted volumes */
300+
CREATE INDEX on omicron.public.volume (
301+
time_deleted
251302
);
252303

253304
/*

nexus/db-model/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ mod project;
4545
pub mod queries;
4646
mod rack;
4747
mod region;
48+
mod region_snapshot;
4849
mod role_assignment;
4950
mod role_builtin;
5051
pub mod saga_types;
@@ -110,6 +111,7 @@ pub use producer_endpoint::*;
110111
pub use project::*;
111112
pub use rack::*;
112113
pub use region::*;
114+
pub use region_snapshot::*;
113115
pub use role_assignment::*;
114116
pub use role_builtin::*;
115117
pub use service::*;
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
use crate::schema::region_snapshot;
6+
use serde::{Deserialize, Serialize};
7+
use uuid::Uuid;
8+
9+
/// Database representation of a Region's snapshot.
10+
///
11+
/// A region snapshot represents a snapshot of a region, taken during the higher
12+
/// level virtual disk snapshot operation.
13+
#[derive(
14+
Queryable,
15+
Insertable,
16+
Debug,
17+
Clone,
18+
Selectable,
19+
Serialize,
20+
Deserialize,
21+
PartialEq,
22+
)]
23+
#[diesel(table_name = region_snapshot)]
24+
pub struct RegionSnapshot {
25+
// unique identifier of this region snapshot
26+
pub dataset_id: Uuid,
27+
pub region_id: Uuid,
28+
pub snapshot_id: Uuid,
29+
30+
// used for identifying volumes that reference this
31+
pub snapshot_addr: String,
32+
33+
// how many volumes reference this?
34+
pub volume_references: i64,
35+
}

nexus/db-model/src/schema.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,16 @@ table! {
441441
}
442442
}
443443

444+
table! {
445+
region_snapshot (dataset_id, region_id, snapshot_id) {
446+
dataset_id -> Uuid,
447+
region_id -> Uuid,
448+
snapshot_id -> Uuid,
449+
snapshot_addr -> Text,
450+
volume_references -> Int8,
451+
}
452+
}
453+
444454
table! {
445455
volume (id) {
446456
id -> Uuid,
@@ -450,7 +460,12 @@ table! {
450460
rcgen -> Int8,
451461

452462
data -> Text,
453-
/* TODO: some sort of refcount? */
463+
464+
/*
465+
* During volume deletion, a serialized list of Crucible resources to
466+
* clean up will be written here, along with setting time_deleted.
467+
*/
468+
resources_to_clean_up -> Nullable<Text>,
454469
}
455470
}
456471

@@ -622,6 +637,7 @@ allow_tables_to_appear_in_same_query!(
622637
project,
623638
rack,
624639
region,
640+
region_snapshot,
625641
saga,
626642
saga_node_event,
627643
silo,
@@ -630,6 +646,7 @@ allow_tables_to_appear_in_same_query!(
630646
service,
631647
sled,
632648
router_route,
649+
volume,
633650
vpc,
634651
vpc_subnet,
635652
vpc_router,

nexus/db-model/src/volume.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@ use uuid::Uuid;
2424
pub struct Volume {
2525
#[diesel(embed)]
2626
identity: VolumeIdentity,
27-
time_deleted: Option<DateTime<Utc>>,
27+
pub time_deleted: Option<DateTime<Utc>>,
2828

2929
rcgen: Generation,
3030

3131
data: String,
32+
33+
pub resources_to_clean_up: Option<String>,
3234
}
3335

3436
impl Volume {
@@ -38,6 +40,7 @@ impl Volume {
3840
time_deleted: None,
3941
rcgen: Generation::new(),
4042
data,
43+
resources_to_clean_up: None,
4144
}
4245
}
4346

nexus/src/app/disk.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,9 @@ impl super::Nexus {
510510
.project_delete_snapshot(opctx, &authz_snapshot, &db_snapshot)
511511
.await?;
512512

513+
// Kick off volume deletion saga
514+
self.volume_delete(db_snapshot.volume_id).await?;
515+
513516
Ok(())
514517
}
515518
}

nexus/src/app/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ mod silo;
3939
mod sled;
4040
pub mod test_interfaces;
4141
mod update;
42+
mod volume;
4243
mod vpc;
4344
mod vpc_router;
4445
mod vpc_subnet;

nexus/src/app/sagas/disk_create.rs

Lines changed: 13 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
use super::{
66
ActionRegistry, NexusActionContext, NexusSaga, SagaInitError,
7-
ACTION_GENERATE_ID,
7+
ACTION_GENERATE_ID, MAX_CONCURRENT_REGION_REQUESTS,
88
};
99
use crate::app::sagas::NexusAction;
1010
use crate::context::OpContext;
@@ -51,16 +51,10 @@ lazy_static! {
5151
sdc_create_disk_record,
5252
sdc_create_disk_record_undo
5353
);
54-
static ref REGIONS_ALLOC: NexusAction = ActionFunc::new_action(
55-
"disk-create.regions-alloc",
56-
sdc_alloc_regions,
57-
sdc_alloc_regions_undo
58-
);
59-
static ref REGIONS_ENSURE: NexusAction = ActionFunc::new_action(
60-
"disk-create.regions-ensure",
61-
sdc_regions_ensure,
62-
sdc_regions_ensure_undo
63-
);
54+
static ref REGIONS_ALLOC: NexusAction =
55+
new_action_noop_undo("disk-create.regions-alloc", sdc_alloc_regions,);
56+
static ref REGIONS_ENSURE: NexusAction =
57+
new_action_noop_undo("disk-create.regions-ensure", sdc_regions_ensure,);
6458
static ref CREATE_VOLUME_RECORD: NexusAction = ActionFunc::new_action(
6559
"disk-create.create-volume-record",
6660
sdc_create_volume_record,
@@ -231,6 +225,7 @@ async fn sdc_alloc_regions(
231225
let osagactx = sagactx.user_data();
232226
let params = sagactx.saga_params::<Params>()?;
233227
let volume_id = sagactx.lookup::<Uuid>("volume_id")?;
228+
234229
// Ensure the disk is backed by appropriate regions.
235230
//
236231
// This allocates regions in the database, but the disk state is still
@@ -251,16 +246,7 @@ async fn sdc_alloc_regions(
251246
Ok(datasets_and_regions)
252247
}
253248

254-
async fn sdc_alloc_regions_undo(
255-
sagactx: NexusActionContext,
256-
) -> Result<(), anyhow::Error> {
257-
let osagactx = sagactx.user_data();
258-
259-
let volume_id = sagactx.lookup::<Uuid>("volume_id")?;
260-
osagactx.datastore().regions_hard_delete(volume_id).await?;
261-
Ok(())
262-
}
263-
249+
/// Call out to Crucible agent and perform region creation.
264250
async fn ensure_region_in_dataset(
265251
log: &Logger,
266252
dataset: &db::model::Dataset,
@@ -317,10 +303,7 @@ async fn ensure_region_in_dataset(
317303
Ok(region.into_inner())
318304
}
319305

320-
// Arbitrary limit on concurrency, for operations issued
321-
// on multiple regions within a disk at the same time.
322-
const MAX_CONCURRENT_REGION_REQUESTS: usize = 3;
323-
306+
/// Call out to Crucible agent and perform region creation.
324307
async fn sdc_regions_ensure(
325308
sagactx: NexusActionContext,
326309
) -> Result<String, ActionError> {
@@ -379,7 +362,7 @@ async fn sdc_regions_ensure(
379362

380363
let block_size = datasets_and_regions[0].1.block_size;
381364

382-
// If requested, back disk by image
365+
// If a disk source was requested, set the read-only parent of this disk.
383366
let osagactx = sagactx.user_data();
384367
let params = sagactx.saga_params::<Params>()?;
385368
let log = osagactx.log();
@@ -497,7 +480,7 @@ async fn sdc_regions_ensure(
497480
);
498481
}
499482

500-
// Store volume details in db
483+
// Create volume construction request for this disk
501484
let mut rng = StdRng::from_entropy();
502485
let volume_construction_request = VolumeConstructionRequest::Volume {
503486
id: disk_id,
@@ -554,55 +537,6 @@ async fn sdc_regions_ensure(
554537
Ok(volume_data)
555538
}
556539

557-
pub(super) async fn delete_regions(
558-
datasets_and_regions: Vec<(db::model::Dataset, db::model::Region)>,
559-
) -> Result<(), Error> {
560-
let request_count = datasets_and_regions.len();
561-
futures::stream::iter(datasets_and_regions)
562-
.map(|(dataset, region)| async move {
563-
let url = format!("http://{}", dataset.address());
564-
let client = CrucibleAgentClient::new(&url);
565-
let id = RegionId(region.id().to_string());
566-
client.region_delete(&id).await.map_err(|e| match e {
567-
crucible_agent_client::Error::ErrorResponse(rv) => {
568-
match rv.status() {
569-
http::StatusCode::SERVICE_UNAVAILABLE => {
570-
Error::unavail(&rv.message)
571-
}
572-
status if status.is_client_error() => {
573-
Error::invalid_request(&rv.message)
574-
}
575-
_ => Error::internal_error(&rv.message),
576-
}
577-
}
578-
_ => Error::internal_error(
579-
"unexpected failure during `region_delete`",
580-
),
581-
})
582-
})
583-
// Execute the allocation requests concurrently.
584-
.buffer_unordered(std::cmp::min(
585-
request_count,
586-
MAX_CONCURRENT_REGION_REQUESTS,
587-
))
588-
.collect::<Vec<Result<_, _>>>()
589-
.await
590-
.into_iter()
591-
.collect::<Result<Vec<_>, _>>()?;
592-
Ok(())
593-
}
594-
595-
async fn sdc_regions_ensure_undo(
596-
sagactx: NexusActionContext,
597-
) -> Result<(), anyhow::Error> {
598-
let datasets_and_regions = sagactx
599-
.lookup::<Vec<(db::model::Dataset, db::model::Region)>>(
600-
"datasets_and_regions",
601-
)?;
602-
delete_regions(datasets_and_regions).await?;
603-
Ok(())
604-
}
605-
606540
async fn sdc_create_volume_record(
607541
sagactx: NexusActionContext,
608542
) -> Result<db::model::Volume, ActionError> {
@@ -628,7 +562,7 @@ async fn sdc_create_volume_record_undo(
628562
let osagactx = sagactx.user_data();
629563

630564
let volume_id = sagactx.lookup::<Uuid>("volume_id")?;
631-
osagactx.datastore().volume_delete(volume_id).await?;
565+
osagactx.nexus().volume_delete(volume_id).await?;
632566
Ok(())
633567
}
634568

@@ -647,6 +581,7 @@ async fn sdc_finalize_disk_record(
647581
.lookup_for(authz::Action::Modify)
648582
.await
649583
.map_err(ActionError::action_failed)?;
584+
650585
// TODO-security Review whether this can ever fail an authz check. We don't
651586
// want this to ever fail the authz check here -- if it did, we would have
652587
// wanted to catch that a lot sooner. It wouldn't make sense for it to fail
@@ -665,6 +600,7 @@ async fn sdc_finalize_disk_record(
665600
)
666601
.await
667602
.map_err(ActionError::action_failed)?;
603+
668604
Ok(())
669605
}
670606

0 commit comments

Comments
 (0)