Skip to content

Commit 58e8c67

Browse files
authored
Fix ensuring snapshot creation and deletion (#3947)
If `project_ensure_snapshot` is passed a snapshot where the project and name matches a record that exists already, but the ID is different, then .on_conflict((dsl::project_id, dsl::name)) .filter_target(dsl::time_deleted.is_null()) .do_update() .set(dsl::time_modified.eq(dsl::time_modified)), will incorrectly modify the existing record, leading callers of that function to erroneously believe they were the one to create that snapshot record. Change `project_ensure_snapshot` to check if an existing snapshot record has a matching project and name, and return ObjectAlreadyExists if so. It's not appropriate to delete a snapshot in any state: if it's in state Creating, then a `snapshot_create` saga is working on it and concurrently running `snapshot_delete` will cause a conflict. Add an argument to `project_delete_snapshot` that describes the states that a snapshot can be deleted in, depending on on the call site.
1 parent eee942f commit 58e8c67

File tree

4 files changed

+226
-31
lines changed

4 files changed

+226
-31
lines changed

nexus/db-queries/src/db/datastore/snapshot.rs

Lines changed: 148 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,14 @@ use crate::db::model::Snapshot;
1919
use crate::db::model::SnapshotState;
2020
use crate::db::pagination::paginated;
2121
use crate::db::update_and_check::UpdateAndCheck;
22+
use crate::db::TransactionError;
23+
use async_bb8_diesel::AsyncConnection;
2224
use async_bb8_diesel::AsyncRunQueryDsl;
25+
use async_bb8_diesel::ConnectionError;
2326
use chrono::Utc;
2427
use diesel::prelude::*;
28+
use diesel::result::Error as DieselError;
29+
use nexus_types::identity::Resource;
2530
use omicron_common::api::external::http_pagination::PaginatedBy;
2631
use omicron_common::api::external::CreateResult;
2732
use omicron_common::api::external::Error;
@@ -43,28 +48,127 @@ impl DataStore {
4348
let gen = snapshot.gen;
4449
opctx.authorize(authz::Action::CreateChild, authz_project).await?;
4550

46-
use db::schema::snapshot::dsl;
51+
#[derive(Debug, thiserror::Error)]
52+
pub enum CustomError {
53+
#[error("Resource already exists")]
54+
ResourceAlreadyExists,
55+
56+
#[error("saw AsyncInsertError")]
57+
InsertError(AsyncInsertError),
58+
}
59+
60+
type TxnError = TransactionError<CustomError>;
61+
62+
let snapshot_name = snapshot.name().to_string();
4763
let project_id = snapshot.project_id;
48-
let snapshot: Snapshot = Project::insert_resource(
49-
project_id,
50-
diesel::insert_into(dsl::snapshot)
51-
.values(snapshot)
52-
.on_conflict((dsl::project_id, dsl::name))
53-
.filter_target(dsl::time_deleted.is_null())
54-
.do_update()
55-
.set(dsl::time_modified.eq(dsl::time_modified)),
56-
)
57-
.insert_and_get_result_async(self.pool_authorized(opctx).await?)
58-
.await
59-
.map_err(|e| match e {
60-
AsyncInsertError::CollectionNotFound => Error::ObjectNotFound {
61-
type_name: ResourceType::Project,
62-
lookup_type: LookupType::ById(project_id),
63-
},
64-
AsyncInsertError::DatabaseError(e) => {
65-
public_error_from_diesel_pool(e, ErrorHandler::Server)
66-
}
67-
})?;
64+
65+
let snapshot: Snapshot = self
66+
.pool_authorized(opctx)
67+
.await?
68+
.transaction_async(|conn| async move {
69+
use db::schema::snapshot::dsl;
70+
71+
// If an undeleted snapshot exists in the database with the
72+
// same name and project but a different id to the snapshot
73+
// this function was passed as an argument, then return an
74+
// error here.
75+
//
76+
// As written below,
77+
//
78+
// .on_conflict((dsl::project_id, dsl::name))
79+
// .filter_target(dsl::time_deleted.is_null())
80+
// .do_update()
81+
// .set(dsl::time_modified.eq(dsl::time_modified))
82+
//
83+
// will set any existing record's `time_modified` if the
84+
// project id and name match, even if the snapshot ID does
85+
// not match. diesel supports adding a filter below like so
86+
// (marked with >>):
87+
//
88+
// .on_conflict((dsl::project_id, dsl::name))
89+
// .filter_target(dsl::time_deleted.is_null())
90+
// .do_update()
91+
// .set(dsl::time_modified.eq(dsl::time_modified))
92+
// >> .filter(dsl::id.eq(snapshot.id()))
93+
//
94+
// which will restrict the `insert_into`'s set so that it
95+
// only applies if the snapshot ID matches. But,
96+
// AsyncInsertError does not have a ObjectAlreadyExists
97+
// variant, so this will be returned as CollectionNotFound
98+
// due to the `insert_into` failing.
99+
//
100+
// If this function is passed a snapshot with an ID that
101+
// does not match, but a project and name that does, return
102+
// ObjectAlreadyExists here.
103+
104+
let existing_snapshot_id: Option<Uuid> = match dsl::snapshot
105+
.filter(dsl::time_deleted.is_null())
106+
.filter(dsl::name.eq(snapshot.name().to_string()))
107+
.filter(dsl::project_id.eq(snapshot.project_id))
108+
.select(dsl::id)
109+
.limit(1)
110+
.first_async(&conn)
111+
.await
112+
{
113+
Ok(v) => Ok(Some(v)),
114+
Err(ConnectionError::Query(DieselError::NotFound)) => {
115+
Ok(None)
116+
}
117+
Err(e) => Err(e),
118+
}?;
119+
120+
if let Some(existing_snapshot_id) = existing_snapshot_id {
121+
if existing_snapshot_id != snapshot.id() {
122+
return Err(TransactionError::CustomError(
123+
CustomError::ResourceAlreadyExists,
124+
));
125+
}
126+
}
127+
128+
Project::insert_resource(
129+
project_id,
130+
diesel::insert_into(dsl::snapshot)
131+
.values(snapshot)
132+
.on_conflict((dsl::project_id, dsl::name))
133+
.filter_target(dsl::time_deleted.is_null())
134+
.do_update()
135+
.set(dsl::time_modified.eq(dsl::time_modified)),
136+
)
137+
.insert_and_get_result_async(&conn)
138+
.await
139+
.map_err(|e| {
140+
TransactionError::CustomError(CustomError::InsertError(e))
141+
})
142+
})
143+
.await
144+
.map_err(|e: TxnError| match e {
145+
TxnError::CustomError(e) => match e {
146+
CustomError::ResourceAlreadyExists => {
147+
Error::ObjectAlreadyExists {
148+
type_name: ResourceType::Snapshot,
149+
object_name: snapshot_name,
150+
}
151+
}
152+
CustomError::InsertError(e) => match e {
153+
AsyncInsertError::CollectionNotFound => {
154+
Error::ObjectNotFound {
155+
type_name: ResourceType::Project,
156+
lookup_type: LookupType::ById(project_id),
157+
}
158+
}
159+
AsyncInsertError::DatabaseError(e) => {
160+
public_error_from_diesel_pool(
161+
e,
162+
ErrorHandler::Server,
163+
)
164+
}
165+
},
166+
},
167+
168+
TxnError::Pool(e) => {
169+
public_error_from_diesel_pool(e, ErrorHandler::Server)
170+
}
171+
})?;
68172

69173
bail_unless!(
70174
snapshot.state == SnapshotState::Creating,
@@ -141,32 +245,50 @@ impl DataStore {
141245
opctx: &OpContext,
142246
authz_snapshot: &authz::Snapshot,
143247
db_snapshot: &Snapshot,
248+
ok_to_delete_states: Vec<SnapshotState>,
144249
) -> Result<Uuid, Error> {
145250
opctx.authorize(authz::Action::Delete, authz_snapshot).await?;
146251

147252
let now = Utc::now();
148253

149-
// A snapshot can be deleted in any state. It's never attached to an
150-
// instance, and any disk launched from it will copy and modify the volume
151-
// construction request it's based on.
254+
// A snapshot can be deleted in states Ready and Faulted. It's never
255+
// attached to an instance, and any disk launched from it will copy and
256+
// modify the volume construction request it's based on. However, if its
257+
// state is Creating then the snapshot_create saga is currently running
258+
// and this delete action would disrupt that. If its in state Destroyed,
259+
// then it was already deleted.
152260

153261
let snapshot_id = authz_snapshot.id();
154262
let gen = db_snapshot.gen;
155263

156264
use db::schema::snapshot::dsl;
157265

158-
diesel::update(dsl::snapshot)
266+
let updated_rows = diesel::update(dsl::snapshot)
159267
.filter(dsl::time_deleted.is_null())
160268
.filter(dsl::gen.eq(gen))
161269
.filter(dsl::id.eq(snapshot_id))
162-
.set(dsl::time_deleted.eq(now))
270+
.filter(dsl::state.eq_any(ok_to_delete_states))
271+
.set((
272+
dsl::time_deleted.eq(now),
273+
dsl::state.eq(SnapshotState::Destroyed),
274+
))
163275
.check_if_exists::<Snapshot>(snapshot_id)
164276
.execute_async(self.pool_authorized(&opctx).await?)
165277
.await
166278
.map_err(|e| {
167279
public_error_from_diesel_pool(e, ErrorHandler::Server)
168280
})?;
169281

282+
if updated_rows == 0 {
283+
// Either:
284+
//
285+
// - the snapshot was already deleted
286+
// - the generation number changed
287+
// - the state of the snapshot isn't one of `ok_to_delete_states`
288+
289+
return Err(Error::invalid_request("snapshot cannot be deleted"));
290+
}
291+
170292
Ok(snapshot_id)
171293
}
172294
}

nexus/src/app/sagas/snapshot_create.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,16 @@ async fn ssc_create_snapshot_record_undo(
594594

595595
osagactx
596596
.datastore()
597-
.project_delete_snapshot(&opctx, &authz_snapshot, &db_snapshot)
597+
.project_delete_snapshot(
598+
&opctx,
599+
&authz_snapshot,
600+
&db_snapshot,
601+
vec![
602+
db::model::SnapshotState::Creating,
603+
db::model::SnapshotState::Ready,
604+
db::model::SnapshotState::Faulted,
605+
],
606+
)
598607
.await?;
599608

600609
Ok(())

nexus/src/app/sagas/snapshot_delete.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,11 @@ async fn ssd_delete_snapshot_record(
116116
&opctx,
117117
&params.authz_snapshot,
118118
&params.snapshot,
119+
vec![
120+
db::model::SnapshotState::Ready,
121+
db::model::SnapshotState::Faulted,
122+
db::model::SnapshotState::Destroyed,
123+
],
119124
)
120125
.await
121126
.map_err(ActionError::action_failed)?;

nexus/tests/integration_tests/snapshots.rs

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -953,6 +953,7 @@ async fn test_create_snapshot_record_idempotent(
953953
let datastore = nexus.datastore();
954954

955955
let project_id = create_org_and_project(&client).await;
956+
let disk_id = Uuid::new_v4();
956957

957958
let snapshot = db::model::Snapshot {
958959
identity: db::model::SnapshotIdentity {
@@ -968,7 +969,7 @@ async fn test_create_snapshot_record_idempotent(
968969
},
969970

970971
project_id,
971-
disk_id: Uuid::new_v4(),
972+
disk_id,
972973
volume_id: Uuid::new_v4(),
973974
destination_volume_id: Uuid::new_v4(),
974975

@@ -995,12 +996,51 @@ async fn test_create_snapshot_record_idempotent(
995996
.unwrap();
996997

997998
let snapshot_created_2 = datastore
998-
.project_ensure_snapshot(&opctx, &authz_project, snapshot)
999+
.project_ensure_snapshot(&opctx, &authz_project, snapshot.clone())
9991000
.await
10001001
.unwrap();
10011002

10021003
assert_eq!(snapshot_created_1.id(), snapshot_created_2.id());
10031004

1005+
// Test that attempting to ensure a snapshot with the same project + name
1006+
// but a different ID will not work: if a user tries to fire off multiple
1007+
// snapshot creations for the same project + name, they will get different
1008+
// IDs, so this should be rejected, ensuring only one snapshot create saga
1009+
// would apply for the user's multiple requests.
1010+
1011+
let dupe_snapshot = db::model::Snapshot {
1012+
identity: db::model::SnapshotIdentity {
1013+
id: Uuid::new_v4(),
1014+
name: external::Name::try_from("snapshot".to_string())
1015+
.unwrap()
1016+
.into(),
1017+
description: "snapshot".into(),
1018+
1019+
time_created: Utc::now(),
1020+
time_modified: Utc::now(),
1021+
time_deleted: None,
1022+
},
1023+
1024+
project_id,
1025+
disk_id,
1026+
volume_id: Uuid::new_v4(),
1027+
destination_volume_id: Uuid::new_v4(),
1028+
1029+
gen: db::model::Generation::new(),
1030+
state: db::model::SnapshotState::Creating,
1031+
block_size: db::model::BlockSize::Traditional,
1032+
size: external::ByteCount::try_from(1024u32).unwrap().into(),
1033+
};
1034+
1035+
let dupe_snapshot_created_err = datastore
1036+
.project_ensure_snapshot(&opctx, &authz_project, dupe_snapshot)
1037+
.await;
1038+
1039+
assert!(matches!(
1040+
dupe_snapshot_created_err.unwrap_err(),
1041+
external::Error::ObjectAlreadyExists { .. },
1042+
));
1043+
10041044
// Test project_delete_snapshot is idempotent
10051045

10061046
let (.., authz_snapshot, db_snapshot) = LookupPath::new(&opctx, &datastore)
@@ -1010,12 +1050,31 @@ async fn test_create_snapshot_record_idempotent(
10101050
.unwrap();
10111051

10121052
datastore
1013-
.project_delete_snapshot(&opctx, &authz_snapshot, &db_snapshot)
1053+
.project_delete_snapshot(
1054+
&opctx,
1055+
&authz_snapshot,
1056+
&db_snapshot,
1057+
vec![
1058+
// This must match what is in the snapshot_delete saga!
1059+
db::model::SnapshotState::Ready,
1060+
db::model::SnapshotState::Faulted,
1061+
db::model::SnapshotState::Destroyed,
1062+
],
1063+
)
10141064
.await
10151065
.unwrap();
10161066

10171067
datastore
1018-
.project_delete_snapshot(&opctx, &authz_snapshot, &db_snapshot)
1068+
.project_delete_snapshot(
1069+
&opctx,
1070+
&authz_snapshot,
1071+
&db_snapshot,
1072+
vec![
1073+
db::model::SnapshotState::Ready,
1074+
db::model::SnapshotState::Faulted,
1075+
db::model::SnapshotState::Destroyed,
1076+
],
1077+
)
10191078
.await
10201079
.unwrap();
10211080
}

0 commit comments

Comments
 (0)