Use an opaque type to describe monitor updates in Persist

TheBlueMatt · TheBlueMatt · commit 6d54db470e78 · 2021-10-10T20:51:49.000Z
In the next commit, we'll be originating monitor updates both from
the ChainMonitor and from the ChannelManager, making simple
sequential update IDs impossible.

Further, the existing async monitor update API was somewhat hard to
work with - instead of being able to generate monitor_updated
callbacks whenever a persistence process finishes, you had to
ensure you only did so at least once all previous updates had also
been persisted.

Here we eat the complexity for the user by moving to an opaque
type for monitor updates, tracking which updates are in-flight for
the user and only generating monitor-persisted events once all
pending updates have been committed.
diff --git a/fuzz/src/chanmon_consistency.rs b/fuzz/src/chanmon_consistency.rs
@@ -855,25 +855,25 @@ pub fn do_test<Out: test_logger::Output>(data: &[u8], out: Out) {
 
 			0x08 => {
 				if let Some((id, _)) = monitor_a.latest_monitors.lock().unwrap().get(&chan_1_funding) {
-					monitor_a.chain_monitor.channel_monitor_updated(chan_1_funding, *id);
+					monitor_a.chain_monitor.force_channel_monitor_updated(chan_1_funding, *id);
 					nodes[0].process_monitor_events();
 				}
 			},
 			0x09 => {
 				if let Some((id, _)) = monitor_b.latest_monitors.lock().unwrap().get(&chan_1_funding) {
-					monitor_b.chain_monitor.channel_monitor_updated(chan_1_funding, *id);
+					monitor_b.chain_monitor.force_channel_monitor_updated(chan_1_funding, *id);
 					nodes[1].process_monitor_events();
 				}
 			},
 			0x0a => {
 				if let Some((id, _)) = monitor_b.latest_monitors.lock().unwrap().get(&chan_2_funding) {
-					monitor_b.chain_monitor.channel_monitor_updated(chan_2_funding, *id);
+					monitor_b.chain_monitor.force_channel_monitor_updated(chan_2_funding, *id);
 					nodes[1].process_monitor_events();
 				}
 			},
 			0x0b => {
 				if let Some((id, _)) = monitor_c.latest_monitors.lock().unwrap().get(&chan_2_funding) {
-					monitor_c.chain_monitor.channel_monitor_updated(chan_2_funding, *id);
+					monitor_c.chain_monitor.force_channel_monitor_updated(chan_2_funding, *id);
 					nodes[2].process_monitor_events();
 				}
 			},
@@ -1075,25 +1075,25 @@ pub fn do_test<Out: test_logger::Output>(data: &[u8], out: Out) {
 				// Test that no channel is in a stuck state where neither party can send funds even
 				// after we resolve all pending events.
 				// First make sure there are no pending monitor updates, resetting the error state
-				// and calling channel_monitor_updated for each monitor.
+				// and calling force_channel_monitor_updated for each monitor.
 				*monitor_a.persister.update_ret.lock().unwrap() = Ok(());
 				*monitor_b.persister.update_ret.lock().unwrap() = Ok(());
 				*monitor_c.persister.update_ret.lock().unwrap() = Ok(());
 
 				if let Some((id, _)) = monitor_a.latest_monitors.lock().unwrap().get(&chan_1_funding) {
-					monitor_a.chain_monitor.channel_monitor_updated(chan_1_funding, *id);
+					monitor_a.chain_monitor.force_channel_monitor_updated(chan_1_funding, *id);
 					nodes[0].process_monitor_events();
 				}
 				if let Some((id, _)) = monitor_b.latest_monitors.lock().unwrap().get(&chan_1_funding) {
-					monitor_b.chain_monitor.channel_monitor_updated(chan_1_funding, *id);
+					monitor_b.chain_monitor.force_channel_monitor_updated(chan_1_funding, *id);
 					nodes[1].process_monitor_events();
 				}
 				if let Some((id, _)) = monitor_b.latest_monitors.lock().unwrap().get(&chan_2_funding) {
-					monitor_b.chain_monitor.channel_monitor_updated(chan_2_funding, *id);
+					monitor_b.chain_monitor.force_channel_monitor_updated(chan_2_funding, *id);
 					nodes[1].process_monitor_events();
 				}
 				if let Some((id, _)) = monitor_c.latest_monitors.lock().unwrap().get(&chan_2_funding) {
-					monitor_c.chain_monitor.channel_monitor_updated(chan_2_funding, *id);
+					monitor_c.chain_monitor.force_channel_monitor_updated(chan_2_funding, *id);
 					nodes[2].process_monitor_events();
 				}
 
diff --git a/fuzz/src/utils/test_persister.rs b/fuzz/src/utils/test_persister.rs
@@ -1,5 +1,6 @@
 use lightning::chain;
 use lightning::chain::{chainmonitor, channelmonitor};
+use lightning::chain::chainmonitor::MonitorUpdateId;
 use lightning::chain::transaction::OutPoint;
 use lightning::util::enforcing_trait_impls::EnforcingSigner;
 
@@ -9,11 +10,11 @@ pub struct TestPersister {
 	pub update_ret: Mutex<Result<(), chain::ChannelMonitorUpdateErr>>,
 }
 impl chainmonitor::Persist<EnforcingSigner> for TestPersister {
-	fn persist_new_channel(&self, _funding_txo: OutPoint, _data: &channelmonitor::ChannelMonitor<EnforcingSigner>) -> Result<(), chain::ChannelMonitorUpdateErr> {
+	fn persist_new_channel(&self, _funding_txo: OutPoint, _data: &channelmonitor::ChannelMonitor<EnforcingSigner>, _update_id: MonitorUpdateId) -> Result<(), chain::ChannelMonitorUpdateErr> {
 		self.update_ret.lock().unwrap().clone()
 	}
 
-	fn update_persisted_channel(&self, _funding_txo: OutPoint, _update: &channelmonitor::ChannelMonitorUpdate, _data: &channelmonitor::ChannelMonitor<EnforcingSigner>) -> Result<(), chain::ChannelMonitorUpdateErr> {
+	fn update_persisted_channel(&self, _funding_txo: OutPoint, _update: &channelmonitor::ChannelMonitorUpdate, _data: &channelmonitor::ChannelMonitor<EnforcingSigner>, _update_id: MonitorUpdateId) -> Result<(), chain::ChannelMonitorUpdateErr> {
 		self.update_ret.lock().unwrap().clone()
 	}
 }
diff --git a/lightning-persister/src/lib.rs b/lightning-persister/src/lib.rs
@@ -159,13 +159,13 @@ impl FilesystemPersister {
 }
 
 impl<ChannelSigner: Sign> chainmonitor::Persist<ChannelSigner> for FilesystemPersister {
-	fn persist_new_channel(&self, funding_txo: OutPoint, monitor: &ChannelMonitor<ChannelSigner>) -> Result<(), chain::ChannelMonitorUpdateErr> {
+	fn persist_new_channel(&self, funding_txo: OutPoint, monitor: &ChannelMonitor<ChannelSigner>, _update_id: chainmonitor::MonitorUpdateId) -> Result<(), chain::ChannelMonitorUpdateErr> {
 		let filename = format!("{}_{}", funding_txo.txid.to_hex(), funding_txo.index);
 		util::write_to_file(self.path_to_monitor_data(), filename, monitor)
 			.map_err(|_| chain::ChannelMonitorUpdateErr::PermanentFailure)
 	}
 
-	fn update_persisted_channel(&self, funding_txo: OutPoint, _update: &ChannelMonitorUpdate, monitor: &ChannelMonitor<ChannelSigner>) -> Result<(), chain::ChannelMonitorUpdateErr> {
+	fn update_persisted_channel(&self, funding_txo: OutPoint, _update: &ChannelMonitorUpdate, monitor: &ChannelMonitor<ChannelSigner>, _update_id: chainmonitor::MonitorUpdateId) -> Result<(), chain::ChannelMonitorUpdateErr> {
 		let filename = format!("{}_{}", funding_txo.txid.to_hex(), funding_txo.index);
 		util::write_to_file(self.path_to_monitor_data(), filename, monitor)
 			.map_err(|_| chain::ChannelMonitorUpdateErr::PermanentFailure)
@@ -296,6 +296,8 @@ mod tests {
 		nodes[1].node.force_close_channel(&chan.2).unwrap();
 		check_closed_event!(nodes[1], 1, ClosureReason::HolderForceClosed);
 		let mut added_monitors = nodes[1].chain_monitor.added_monitors.lock().unwrap();
+		let update_map = nodes[1].chain_monitor.latest_monitor_update_id.lock().unwrap();
+		let update_id = update_map.get(&added_monitors[0].0.to_channel_id()).unwrap();
 
 		// Set the persister's directory to read-only, which should result in
 		// returning a permanent failure when we then attempt to persist a
@@ -309,7 +311,7 @@ mod tests {
 			txid: Txid::from_hex("8984484a580b825b9972d7adb15050b3ab624ccd731946b3eeddb92f4e7ef6be").unwrap(),
 			index: 0
 		};
-		match persister.persist_new_channel(test_txo, &added_monitors[0].1) {
+		match persister.persist_new_channel(test_txo, &added_monitors[0].1, update_id.2) {
 			Err(ChannelMonitorUpdateErr::PermanentFailure) => {},
 			_ => panic!("unexpected result from persisting new channel")
 		}
@@ -333,6 +335,8 @@ mod tests {
 		nodes[1].node.force_close_channel(&chan.2).unwrap();
 		check_closed_event!(nodes[1], 1, ClosureReason::HolderForceClosed);
 		let mut added_monitors = nodes[1].chain_monitor.added_monitors.lock().unwrap();
+		let update_map = nodes[1].chain_monitor.latest_monitor_update_id.lock().unwrap();
+		let update_id = update_map.get(&added_monitors[0].0.to_channel_id()).unwrap();
 
 		// Create the persister with an invalid directory name and test that the
 		// channel fails to open because the directories fail to be created. There
@@ -344,7 +348,7 @@ mod tests {
 			txid: Txid::from_hex("8984484a580b825b9972d7adb15050b3ab624ccd731946b3eeddb92f4e7ef6be").unwrap(),
 			index: 0
 		};
-		match persister.persist_new_channel(test_txo, &added_monitors[0].1) {
+		match persister.persist_new_channel(test_txo, &added_monitors[0].1, update_id.2) {
 			Err(ChannelMonitorUpdateErr::PermanentFailure) => {},
 			_ => panic!("unexpected result from persisting new channel")
 		}
diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs
@@ -41,36 +41,54 @@ use prelude::*;
 use sync::{RwLock, RwLockReadGuard, Mutex};
 use core::ops::Deref;
 
+#[derive(Clone, Copy, Hash, PartialEq, Eq)]
+pub(crate) enum MonitorUpdate {
+	MonitorUpdateId(u64),
+}
+
+/// An opaque identifier describing a specific [`Persist`] method call.
+#[derive(Clone, Copy, Hash, PartialEq, Eq)]
+pub struct MonitorUpdateId {
+	pub(crate) contents: MonitorUpdate,
+}
+
 /// `Persist` defines behavior for persisting channel monitors: this could mean
 /// writing once to disk, and/or uploading to one or more backup services.
 ///
-/// Note that for every new monitor, you **must** persist the new `ChannelMonitor`
-/// to disk/backups. And, on every update, you **must** persist either the
-/// `ChannelMonitorUpdate` or the updated monitor itself. Otherwise, there is risk
-/// of situations such as revoking a transaction, then crashing before this
-/// revocation can be persisted, then unintentionally broadcasting a revoked
-/// transaction and losing money. This is a risk because previous channel states
-/// are toxic, so it's important that whatever channel state is persisted is
-/// kept up-to-date.
+/// Each method can return three possible values:
+///  * If persistence (including any relevant `fsync()` calls) happens immediately, the
+///    implementation should return `Ok(())`, indicating normal channel operation should continue.
+///  * If persistence happens asynchronously, implementations should first ensure the
+///    [`ChannelMonitor`] or [`ChannelMonitorUpdate`] are written durably to disk, and then return
+///    `Err(ChannelMonitorUpdateErr::TemporaryFailure)` while the update continues in the
+///    background. Once the update completes, [`ChainMonitor::channel_monitor_updated`] should be
+///    called with the corresponding [`MonitorUpdateId`].
+///
+///    Note that unlike the direct [`chain::Watch`] interface,
+///    [`ChainMonitor::channel_monitor_updated`] must be called once for *each* update which occurs.
+///
+///  * If persistence fails for some reason, implementations should return
+///    `Err(ChannelMonitorUpdateErr::PermanentFailure)`, in which case the channel will likely be
+///    closed without broadcasting the latest state. See
+///    [`ChannelMonitorUpdateErr::PermanentFailure`] for more details.
 pub trait Persist<ChannelSigner: Sign> {
-	/// Persist a new channel's data. The data can be stored any way you want, but
-	/// the identifier provided by Rust-Lightning is the channel's outpoint (and
-	/// it is up to you to maintain a correct mapping between the outpoint and the
-	/// stored channel data). Note that you **must** persist every new monitor to
-	/// disk. See the `Persist` trait documentation for more details.
+	/// Persist a new channel's data. The data can be stored any way you want, but the identifier
+	/// provided by LDK is the channel's outpoint (and it is up to you to maintain a correct
+	/// mapping between the outpoint and the stored channel data). Note that you **must** persist
+	/// every new monitor to disk.
 	///
 	/// See [`Writeable::write`] on [`ChannelMonitor`] for writing out a `ChannelMonitor`
 	/// and [`ChannelMonitorUpdateErr`] for requirements when returning errors.
 	///
 	/// [`Writeable::write`]: crate::util::ser::Writeable::write
-	fn persist_new_channel(&self, id: OutPoint, data: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr>;
+	fn persist_new_channel(&self, id: OutPoint, data: &ChannelMonitor<ChannelSigner>, update_id: MonitorUpdateId) -> Result<(), ChannelMonitorUpdateErr>;
 
-	/// Update one channel's data. The provided `ChannelMonitor` has already
-	/// applied the given update.
+	/// Update one channel's data. The provided [`ChannelMonitor`] has already applied the given
+	/// update.
 	///
-	/// Note that on every update, you **must** persist either the
-	/// `ChannelMonitorUpdate` or the updated monitor itself to disk/backups. See
-	/// the `Persist` trait documentation for more details.
+	/// Note that on every update, you **must** persist either the [`ChannelMonitorUpdate`] or the
+	/// updated monitor itself to disk/backups. See the `Persist` trait documentation for more
+	/// details.
 	///
 	/// If an implementer chooses to persist the updates only, they need to make
 	/// sure that all the updates are applied to the `ChannelMonitors` *before*
@@ -89,11 +107,18 @@ pub trait Persist<ChannelSigner: Sign> {
 	/// [`ChannelMonitorUpdateErr`] for requirements when returning errors.
 	///
 	/// [`Writeable::write`]: crate::util::ser::Writeable::write
-	fn update_persisted_channel(&self, id: OutPoint, update: &ChannelMonitorUpdate, data: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr>;
+	fn update_persisted_channel(&self, id: OutPoint, update: &ChannelMonitorUpdate, data: &ChannelMonitor<ChannelSigner>, update_id: MonitorUpdateId) -> Result<(), ChannelMonitorUpdateErr>;
 }
 
 struct MonitorHolder<ChannelSigner: Sign> {
 	monitor: ChannelMonitor<ChannelSigner>,
+	/// The full set of pending monitor updates for this Channel.
+	///
+	/// Note that this lock must be held during updates to prevent a race where we call
+	/// update_persisted_channel, the user returns a TemporaryFailure, and then calls
+	/// channel_monitor_updated immediately, racing our insertion of the pending update into the
+	/// contained Vec.
+	pending_monitor_updates: Mutex<Vec<MonitorUpdateId>>,
 }
 
 /// A read-only reference to a current ChannelMonitor.
@@ -262,23 +287,43 @@ where C::Target: chain::Filter,
 	/// Indicates the persistence of a [`ChannelMonitor`] has completed after
 	/// [`ChannelMonitorUpdateErr::TemporaryFailure`] was returned from an update operation.
 	///
-	/// All ChannelMonitor updates up to and including highest_applied_update_id must have been
-	/// fully committed in every copy of the given channels' ChannelMonitors.
-	///
-	/// Note that there is no effect to calling with a highest_applied_update_id other than the
-	/// current latest ChannelMonitorUpdate and one call to this function after multiple
-	/// ChannelMonitorUpdateErr::TemporaryFailures is fine. The highest_applied_update_id field
-	/// exists largely only to prevent races between this and concurrent update_monitor calls.
-	///
 	/// Thus, the anticipated use is, at a high level:
 	///  1) This [`ChainMonitor`] calls [`Persist::update_persisted_channel`] which stores the
 	///     update to disk and begins updating any remote (e.g. watchtower/backup) copies,
 	///     returning [`ChannelMonitorUpdateErr::TemporaryFailure`],
 	///  2) once all remote copies are updated, you call this function with the update_id that
-	///     completed, and once it is the latest the Channel will be re-enabled.
-	pub fn channel_monitor_updated(&self, funding_txo: OutPoint, highest_applied_update_id: u64) {
+	///     completed, and once all pending updates have completed the Channel will be re-enabled.
+	pub fn channel_monitor_updated(&self, funding_txo: OutPoint, completed_update_id: MonitorUpdateId) {
+		let monitors = self.monitors.read().unwrap();
+		let monitor_data = if let Some(mon) = monitors.get(&funding_txo) { mon } else { return; };
+		let mut pending_monitor_updates = monitor_data.pending_monitor_updates.lock().unwrap();
+		pending_monitor_updates.retain(|update_id| *update_id != completed_update_id);
+
+		match completed_update_id {
+			MonitorUpdateId { .. } => {
+				let monitor_update_pending_updates =  pending_monitor_updates.iter().filter(|update_id|
+					if let MonitorUpdate::MonitorUpdateId(_) = update_id.contents { true } else { false }).count();
+				if monitor_update_pending_updates != 0 {
+					// If there are still monitor updates pending, we cannot yet construct an
+					// UpdateCompleted event.
+					return;
+				}
+				self.user_provided_events.lock().unwrap().push(MonitorEvent::UpdateCompleted(MonitorUpdated {
+					funding_txo,
+					monitor_update_id: monitor_data.monitor.get_latest_update_id(),
+				}));
+			}
+		}
+	}
+
+	/// This wrapper avoids having to update some of our tests for now as they assume the direct
+	/// chain::Watch API wherein we mark a monitor fully-updated by just calling
+	/// channel_monitor_updated once with the higest ID.
+	#[cfg(any(test, feature = "fuzztarget"))]
+	pub fn force_channel_monitor_updated(&self, funding_txo: OutPoint, monitor_update_id: u64) {
 		self.user_provided_events.lock().unwrap().push(MonitorEvent::UpdateCompleted(MonitorUpdated {
-			funding_txo, monitor_update_id: highest_applied_update_id
+			funding_txo,
+			monitor_update_id,
 		}));
 	}
 
@@ -392,12 +437,18 @@ where C::Target: chain::Filter,
 				return Err(ChannelMonitorUpdateErr::PermanentFailure)},
 			hash_map::Entry::Vacant(e) => e,
 		};
-		let update_res = self.persister.persist_new_channel(funding_outpoint, &monitor);
+		let update_id = MonitorUpdateId {
+			contents: MonitorUpdate::MonitorUpdateId(monitor.get_latest_update_id()),
+		};
+		let mut pending_monitor_updates = Vec::new();
+		let update_res = self.persister.persist_new_channel(funding_outpoint, &monitor, update_id);
 		if update_res.is_err() {
 			log_error!(self.logger, "Failed to persist new channel data: {:?}", update_res);
 		}
 		if update_res == Err(ChannelMonitorUpdateErr::PermanentFailure) {
 			return update_res;
+		} else if update_res.is_err() {
+			pending_monitor_updates.push(update_id);
 		}
 		{
 			let funding_txo = monitor.get_funding_txo();
@@ -407,7 +458,7 @@ where C::Target: chain::Filter,
 				monitor.load_outputs_to_watch(chain_source);
 			}
 		}
-		entry.insert(MonitorHolder { monitor });
+		entry.insert(MonitorHolder { monitor, pending_monitor_updates: Mutex::new(pending_monitor_updates) });
 		update_res
 	}
 
@@ -437,8 +488,15 @@ where C::Target: chain::Filter,
 				}
 				// Even if updating the monitor returns an error, the monitor's state will
 				// still be changed. So, persist the updated monitor despite the error.
-				let persist_res = self.persister.update_persisted_channel(funding_txo, &update, monitor);
-				if let Err(ref e) = persist_res {
+				let update_id = MonitorUpdateId {
+					contents: MonitorUpdate::MonitorUpdateId(update.update_id),
+				};
+				let mut pending_monitor_updates = monitor_state.pending_monitor_updates.lock().unwrap();
+				let persist_res = self.persister.update_persisted_channel(funding_txo, &update, monitor, update_id);
+				if let Err(e) = persist_res {
+					if e == ChannelMonitorUpdateErr::TemporaryFailure {
+						pending_monitor_updates.push(update_id);
+					}
 					log_error!(self.logger, "Failed to persist channel monitor update: {:?}", e);
 				}
 				if update_res.is_err() {
diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs
diff --git a/lightning/src/util/test_utils.rs b/lightning/src/util/test_utils.rs

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,6 @@`
`1`	`1`	`use lightning::chain;`
`2`	`2`	`use lightning::chain::{chainmonitor, channelmonitor};`
	`3`	`+use lightning::chain::chainmonitor::MonitorUpdateId;`
`3`	`4`	`use lightning::chain::transaction::OutPoint;`
`4`	`5`	`use lightning::util::enforcing_trait_impls::EnforcingSigner;`
`5`	`6`
`@@ -9,11 +10,11 @@ pub struct TestPersister {`
`9`	`10`	`pub update_ret: Mutex<Result<(), chain::ChannelMonitorUpdateErr>>,`
`10`	`11`	`}`
`11`	`12`	`impl chainmonitor::Persist<EnforcingSigner> for TestPersister {`
`12`		`- fn persist_new_channel(&self, _funding_txo: OutPoint, _data: &channelmonitor::ChannelMonitor<EnforcingSigner>) -> Result<(), chain::ChannelMonitorUpdateErr> {`
	`13`	`+ fn persist_new_channel(&self, _funding_txo: OutPoint, _data: &channelmonitor::ChannelMonitor<EnforcingSigner>, _update_id: MonitorUpdateId) -> Result<(), chain::ChannelMonitorUpdateErr> {`
`13`	`14`	`self.update_ret.lock().unwrap().clone()`
`14`	`15`	`}`
`15`	`16`
`16`		`- fn update_persisted_channel(&self, _funding_txo: OutPoint, _update: &channelmonitor::ChannelMonitorUpdate, _data: &channelmonitor::ChannelMonitor<EnforcingSigner>) -> Result<(), chain::ChannelMonitorUpdateErr> {`
	`17`	`+ fn update_persisted_channel(&self, _funding_txo: OutPoint, _update: &channelmonitor::ChannelMonitorUpdate, _data: &channelmonitor::ChannelMonitor<EnforcingSigner>, _update_id: MonitorUpdateId) -> Result<(), chain::ChannelMonitorUpdateErr> {`
`17`	`18`	`self.update_ret.lock().unwrap().clone()`
`18`	`19`	`}`
`19`	`20`	`}`