Skip to content

Commit 3eb9e41

Browse files
Holding cell: if we fail to free an HTLC, fail it backwards
Plus add a test.
1 parent 779ff67 commit 3eb9e41

File tree

3 files changed

+421
-30
lines changed

3 files changed

+421
-30
lines changed

lightning/src/ln/channel.rs

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2117,7 +2117,7 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
21172117

21182118
/// Used to fulfill holding_cell_htlcs when we get a remote ack (or implicitly get it by them
21192119
/// fulfilling or failing the last pending HTLC)
2120-
fn free_holding_cell_htlcs<L: Deref>(&mut self, logger: &L) -> Result<Option<(msgs::CommitmentUpdate, ChannelMonitorUpdate)>, ChannelError> where L::Target: Logger {
2120+
fn free_holding_cell_htlcs<L: Deref>(&mut self, logger: &L) -> Result<(Option<(msgs::CommitmentUpdate, ChannelMonitorUpdate)>, Vec<(HTLCSource, PaymentHash)>), ChannelError> where L::Target: Logger {
21212121
assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, 0);
21222122
if self.holding_cell_htlc_updates.len() != 0 || self.holding_cell_update_fee.is_some() {
21232123
log_trace!(logger, "Freeing holding cell with {} HTLC updates{}", self.holding_cell_htlc_updates.len(), if self.holding_cell_update_fee.is_some() { " and a fee update" } else { "" });
@@ -2132,6 +2132,7 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
21322132
let mut update_add_htlcs = Vec::with_capacity(htlc_updates.len());
21332133
let mut update_fulfill_htlcs = Vec::with_capacity(htlc_updates.len());
21342134
let mut update_fail_htlcs = Vec::with_capacity(htlc_updates.len());
2135+
let mut htlcs_to_fail = Vec::new();
21352136
let mut err = None;
21362137
for htlc_update in htlc_updates.drain(..) {
21372138
// Note that this *can* fail, though it should be due to rather-rare conditions on
@@ -2150,6 +2151,13 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
21502151
match e {
21512152
ChannelError::Ignore(ref msg) => {
21522153
log_info!(logger, "Failed to send HTLC with payment_hash {} due to {}", log_bytes!(payment_hash.0), msg);
2154+
// If we fail to send here, then this HTLC should
2155+
// be failed backwards. Failing to send here
2156+
// indicates that this HTLC may keep being put back
2157+
// into the holding cell without ever being
2158+
// successfully forwarded/failed/fulfilled, causing
2159+
// our counterparty to eventually close on us.
2160+
htlcs_to_fail.push((source.clone(), *payment_hash));
21532161
},
21542162
_ => {
21552163
log_info!(logger, "Failed to send HTLC with payment_hash {} resulting in a channel closure during holding_cell freeing", log_bytes!(payment_hash.0));
@@ -2188,24 +2196,27 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
21882196
},
21892197
}
21902198
if err.is_some() {
2191-
self.holding_cell_htlc_updates.push(htlc_update);
21922199
if let Some(ChannelError::Ignore(_)) = err {
21932200
// If we failed to add the HTLC, but got an Ignore error, we should
21942201
// still send the new commitment_signed, so reset the err to None.
2202+
// If we failed to fail or fulfill an HTLC, but got an Ignore error,
2203+
// it's OK to drop the error because these errors are caused by
2204+
// the ChannelManager generating duplicate claim/fail events during
2205+
// block rescan.
21952206
err = None;
2207+
} else {
2208+
self.holding_cell_htlc_updates.push(htlc_update);
21962209
}
21972210
}
21982211
}
21992212
}
2200-
//TODO: Need to examine the type of err - if it's a fee issue or similar we may want to
2201-
//fail it back the route, if it's a temporary issue we can ignore it...
22022213
match err {
22032214
None => {
22042215
if update_add_htlcs.is_empty() && update_fulfill_htlcs.is_empty() && update_fail_htlcs.is_empty() && self.holding_cell_update_fee.is_none() {
2205-
// This should never actually happen and indicates we got some Errs back
2206-
// from update_fulfill_htlc/update_fail_htlc, but we handle it anyway in
2207-
// case there is some strange way to hit duplicate HTLC removes.
2208-
return Ok(None);
2216+
// Hitting this case indicates that we got some Errs back from update_fulfill_htlc
2217+
// or update_fail_htlc.
2218+
log_warn!(logger, "Attempted to fulfill or fail an HTLC that was already removed");
2219+
return Ok((None, htlcs_to_fail));
22092220
}
22102221
let update_fee = if let Some(feerate) = self.holding_cell_update_fee {
22112222
self.pending_update_fee = self.holding_cell_update_fee.take();
@@ -2223,19 +2234,19 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
22232234
self.latest_monitor_update_id = monitor_update.update_id;
22242235
monitor_update.updates.append(&mut additional_update.updates);
22252236

2226-
Ok(Some((msgs::CommitmentUpdate {
2237+
Ok((Some((msgs::CommitmentUpdate {
22272238
update_add_htlcs,
22282239
update_fulfill_htlcs,
22292240
update_fail_htlcs,
22302241
update_fail_malformed_htlcs: Vec::new(),
22312242
update_fee: update_fee,
22322243
commitment_signed,
2233-
}, monitor_update)))
2244+
}, monitor_update)), htlcs_to_fail))
22342245
},
22352246
Some(e) => Err(e)
22362247
}
22372248
} else {
2238-
Ok(None)
2249+
Ok((None, Vec::new()))
22392250
}
22402251
}
22412252

@@ -2244,7 +2255,7 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
22442255
/// waiting on this revoke_and_ack. The generation of this new commitment_signed may also fail,
22452256
/// generating an appropriate error *after* the channel state has been updated based on the
22462257
/// revoke_and_ack message.
2247-
pub fn revoke_and_ack<F: Deref, L: Deref>(&mut self, msg: &msgs::RevokeAndACK, fee_estimator: &F, logger: &L) -> Result<(Option<msgs::CommitmentUpdate>, Vec<(PendingHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, Option<msgs::ClosingSigned>, ChannelMonitorUpdate), ChannelError>
2258+
pub fn revoke_and_ack<F: Deref, L: Deref>(&mut self, msg: &msgs::RevokeAndACK, fee_estimator: &F, logger: &L) -> Result<(Option<msgs::CommitmentUpdate>, Vec<(PendingHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, Option<msgs::ClosingSigned>, ChannelMonitorUpdate, Vec<(HTLCSource, PaymentHash)>), ChannelError>
22482259
where F::Target: FeeEstimator,
22492260
L::Target: Logger,
22502261
{
@@ -2419,11 +2430,11 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
24192430
}
24202431
self.monitor_pending_forwards.append(&mut to_forward_infos);
24212432
self.monitor_pending_failures.append(&mut revoked_htlcs);
2422-
return Ok((None, Vec::new(), Vec::new(), None, monitor_update))
2433+
return Ok((None, Vec::new(), Vec::new(), None, monitor_update, Vec::new()))
24232434
}
24242435

24252436
match self.free_holding_cell_htlcs(logger)? {
2426-
Some((mut commitment_update, mut additional_update)) => {
2437+
(Some((mut commitment_update, mut additional_update)), htlcs_to_fail) => {
24272438
commitment_update.update_fail_htlcs.reserve(update_fail_htlcs.len());
24282439
for fail_msg in update_fail_htlcs.drain(..) {
24292440
commitment_update.update_fail_htlcs.push(fail_msg);
@@ -2438,9 +2449,9 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
24382449
self.latest_monitor_update_id = monitor_update.update_id;
24392450
monitor_update.updates.append(&mut additional_update.updates);
24402451

2441-
Ok((Some(commitment_update), to_forward_infos, revoked_htlcs, None, monitor_update))
2452+
Ok((Some(commitment_update), to_forward_infos, revoked_htlcs, None, monitor_update, htlcs_to_fail))
24422453
},
2443-
None => {
2454+
(None, htlcs_to_fail) => {
24442455
if require_commitment {
24452456
let (commitment_signed, mut additional_update) = self.send_commitment_no_status_check(logger)?;
24462457

@@ -2456,9 +2467,9 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
24562467
update_fail_malformed_htlcs,
24572468
update_fee: None,
24582469
commitment_signed
2459-
}), to_forward_infos, revoked_htlcs, None, monitor_update))
2470+
}), to_forward_infos, revoked_htlcs, None, monitor_update, htlcs_to_fail))
24602471
} else {
2461-
Ok((None, to_forward_infos, revoked_htlcs, self.maybe_propose_first_closing_signed(fee_estimator), monitor_update))
2472+
Ok((None, to_forward_infos, revoked_htlcs, self.maybe_propose_first_closing_signed(fee_estimator), monitor_update, htlcs_to_fail))
24622473
}
24632474
}
24642475
}
@@ -2727,7 +2738,7 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
27272738

27282739
/// May panic if some calls other than message-handling calls (which will all Err immediately)
27292740
/// have been called between remove_uncommitted_htlcs_and_mark_paused and this call.
2730-
pub fn channel_reestablish<L: Deref>(&mut self, msg: &msgs::ChannelReestablish, logger: &L) -> Result<(Option<msgs::FundingLocked>, Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, Option<ChannelMonitorUpdate>, RAACommitmentOrder, Option<msgs::Shutdown>), ChannelError> where L::Target: Logger {
2741+
pub fn channel_reestablish<L: Deref>(&mut self, msg: &msgs::ChannelReestablish, logger: &L) -> Result<(Option<msgs::FundingLocked>, Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, Option<ChannelMonitorUpdate>, Vec<(HTLCSource, PaymentHash)>, RAACommitmentOrder, Option<msgs::Shutdown>), ChannelError> where L::Target: Logger {
27312742
if self.channel_state & (ChannelState::PeerDisconnected as u32) == 0 {
27322743
// While BOLT 2 doesn't indicate explicitly we should error this channel here, it
27332744
// almost certainly indicates we are going to end up out-of-sync in some way, so we
@@ -2778,15 +2789,15 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
27782789
return Err(ChannelError::Close("Peer claimed they saw a revoke_and_ack but we haven't sent funding_locked yet".to_owned()));
27792790
}
27802791
// Short circuit the whole handler as there is nothing we can resend them
2781-
return Ok((None, None, None, None, RAACommitmentOrder::CommitmentFirst, shutdown_msg));
2792+
return Ok((None, None, None, None, Vec::new(), RAACommitmentOrder::CommitmentFirst, shutdown_msg));
27822793
}
27832794

27842795
// We have OurFundingLocked set!
27852796
let next_per_commitment_point = self.local_keys.get_per_commitment_point(self.cur_local_commitment_transaction_number, &self.secp_ctx);
27862797
return Ok((Some(msgs::FundingLocked {
27872798
channel_id: self.channel_id(),
27882799
next_per_commitment_point: next_per_commitment_point,
2789-
}), None, None, None, RAACommitmentOrder::CommitmentFirst, shutdown_msg));
2800+
}), None, None, None, Vec::new(), RAACommitmentOrder::CommitmentFirst, shutdown_msg));
27902801
}
27912802

27922803
let required_revoke = if msg.next_remote_commitment_number + 1 == INITIAL_COMMITMENT_NUMBER - self.cur_local_commitment_transaction_number {
@@ -2834,11 +2845,11 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
28342845
match self.free_holding_cell_htlcs(logger) {
28352846
Err(ChannelError::Close(msg)) => return Err(ChannelError::Close(msg)),
28362847
Err(ChannelError::Ignore(_)) | Err(ChannelError::CloseDelayBroadcast(_)) => panic!("Got non-channel-failing result from free_holding_cell_htlcs"),
2837-
Ok(Some((commitment_update, monitor_update))) => return Ok((resend_funding_locked, required_revoke, Some(commitment_update), Some(monitor_update), self.resend_order.clone(), shutdown_msg)),
2838-
Ok(None) => return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), shutdown_msg)),
2848+
Ok((Some((commitment_update, monitor_update)), htlcs_to_fail)) => return Ok((resend_funding_locked, required_revoke, Some(commitment_update), Some(monitor_update), htlcs_to_fail, self.resend_order.clone(), shutdown_msg)),
2849+
Ok((None, htlcs_to_fail)) => return Ok((resend_funding_locked, required_revoke, None, None, htlcs_to_fail, self.resend_order.clone(), shutdown_msg)),
28392850
}
28402851
} else {
2841-
return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), shutdown_msg));
2852+
return Ok((resend_funding_locked, required_revoke, None, None, Vec::new(), self.resend_order.clone(), shutdown_msg));
28422853
}
28432854
} else if msg.next_local_commitment_number == our_next_remote_commitment_number - 1 {
28442855
if required_revoke.is_some() {
@@ -2849,10 +2860,10 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
28492860

28502861
if self.channel_state & (ChannelState::MonitorUpdateFailed as u32) != 0 {
28512862
self.monitor_pending_commitment_signed = true;
2852-
return Ok((resend_funding_locked, None, None, None, self.resend_order.clone(), shutdown_msg));
2863+
return Ok((resend_funding_locked, None, None, None, Vec::new(), self.resend_order.clone(), shutdown_msg));
28532864
}
28542865

2855-
return Ok((resend_funding_locked, required_revoke, Some(self.get_last_commitment_update(logger)), None, self.resend_order.clone(), shutdown_msg));
2866+
return Ok((resend_funding_locked, required_revoke, Some(self.get_last_commitment_update(logger)), None, Vec::new(), self.resend_order.clone(), shutdown_msg));
28562867
} else {
28572868
return Err(ChannelError::Close("Peer attempted to reestablish channel with a very old remote commitment transaction".to_owned()));
28582869
}

lightning/src/ln/channelmanager.rs

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1822,6 +1822,48 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
18221822
} else { false }
18231823
}
18241824

1825+
// Fail a list of HTLCs that were just freed from the holding cell. The HTLCs need to be
1826+
// failed backwards or, if they were one of our outgoing HTLCs, then their failure needs to
1827+
// be surfaced to the user.
1828+
fn fail_holding_cell_htlcs(&self, mut htlcs_to_fail: Vec<(HTLCSource, PaymentHash)>, channel_id: [u8; 32]) -> Result<(), MsgHandleErrInternal> {
1829+
for (htlc_src, payment_hash) in htlcs_to_fail.drain(..) {
1830+
match htlc_src {
1831+
HTLCSource::PreviousHopData(HTLCPreviousHopData { .. }) => {
1832+
let onion_failure_data =
1833+
match self.channel_state.lock().unwrap().by_id.entry(channel_id) {
1834+
hash_map::Entry::Occupied(chan_entry) => {
1835+
if let Ok(upd) = self.get_channel_update(&chan_entry.get()) {
1836+
let mut res = Vec::with_capacity(8 + 128);
1837+
res.extend_from_slice(&byte_utils::be16_to_array(upd.contents.flags));
1838+
res.extend_from_slice(&upd.encode_with_len()[..]);
1839+
res
1840+
} else {
1841+
return Err(MsgHandleErrInternal::send_err_msg_no_close("Failed to find corresponding channel", channel_id));
1842+
}
1843+
},
1844+
hash_map::Entry::Vacant(_) => return Err(MsgHandleErrInternal::send_err_msg_no_close("Failed to find corresponding channel", channel_id))
1845+
};
1846+
let channel_state = self.channel_state.lock().unwrap();
1847+
self.fail_htlc_backwards_internal(channel_state,
1848+
htlc_src, &payment_hash, HTLCFailReason::Reason { failure_code: 0x1000|7, data: onion_failure_data});
1849+
},
1850+
HTLCSource::OutboundRoute { .. } => {
1851+
self.pending_events.lock().unwrap().push(
1852+
events::Event::PaymentFailed {
1853+
payment_hash,
1854+
rejected_by_dest: false,
1855+
#[cfg(test)]
1856+
error_code: None,
1857+
#[cfg(test)]
1858+
error_data: None,
1859+
}
1860+
)
1861+
},
1862+
};
1863+
}
1864+
Ok(())
1865+
}
1866+
18251867
/// Fails an HTLC backwards to the sender of it to us.
18261868
/// Note that while we take a channel_state lock as input, we do *not* assume consistency here.
18271869
/// There are several callsites that do stupid things like loop over a list of payment_hashes
@@ -2670,7 +2712,7 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
26702712
}
26712713

26722714
fn internal_revoke_and_ack(&self, their_node_id: &PublicKey, msg: &msgs::RevokeAndACK) -> Result<(), MsgHandleErrInternal> {
2673-
let (pending_forwards, mut pending_failures, short_channel_id) = {
2715+
let (pending_forwards, mut pending_failures, short_channel_id, htlcs_to_fail) = {
26742716
let mut channel_state_lock = self.channel_state.lock().unwrap();
26752717
let channel_state = &mut *channel_state_lock;
26762718
match channel_state.by_id.entry(msg.channel_id) {
@@ -2679,7 +2721,7 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
26792721
return Err(MsgHandleErrInternal::send_err_msg_no_close("Got a message for a channel from the wrong node!".to_owned(), msg.channel_id));
26802722
}
26812723
let was_frozen_for_monitor = chan.get().is_awaiting_monitor_update();
2682-
let (commitment_update, pending_forwards, pending_failures, closing_signed, monitor_update) =
2724+
let (commitment_update, pending_forwards, pending_failures, closing_signed, monitor_update, htlcs_to_fail) =
26832725
try_chan_entry!(self, chan.get_mut().revoke_and_ack(&msg, &self.fee_estimator, &self.logger), channel_state, chan);
26842726
if let Err(e) = self.monitor.update_monitor(chan.get().get_funding_txo().unwrap(), monitor_update) {
26852727
if was_frozen_for_monitor {
@@ -2701,14 +2743,15 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
27012743
msg,
27022744
});
27032745
}
2704-
(pending_forwards, pending_failures, chan.get().get_short_channel_id().expect("RAA should only work on a short-id-available channel"))
2746+
(pending_forwards, pending_failures, chan.get().get_short_channel_id().expect("RAA should only work on a short-id-available channel"), htlcs_to_fail)
27052747
},
27062748
hash_map::Entry::Vacant(_) => return Err(MsgHandleErrInternal::send_err_msg_no_close("Failed to find corresponding channel".to_owned(), msg.channel_id))
27072749
}
27082750
};
27092751
for failure in pending_failures.drain(..) {
27102752
self.fail_htlc_backwards_internal(self.channel_state.lock().unwrap(), failure.0, &failure.1, failure.2);
27112753
}
2754+
if let Err(e) = self.fail_holding_cell_htlcs(htlcs_to_fail, msg.channel_id) { return Err(e) };
27122755
self.forward_htlcs(&mut [(short_channel_id, pending_forwards)]);
27132756

27142757
Ok(())
@@ -2792,7 +2835,7 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
27922835
if chan.get().get_their_node_id() != *their_node_id {
27932836
return Err(MsgHandleErrInternal::send_err_msg_no_close("Got a message for a channel from the wrong node!".to_owned(), msg.channel_id));
27942837
}
2795-
let (funding_locked, revoke_and_ack, commitment_update, monitor_update_opt, mut order, shutdown) =
2838+
let (funding_locked, revoke_and_ack, commitment_update, monitor_update_opt, htlcs_to_fail, mut order, shutdown) =
27962839
try_chan_entry!(self, chan.get_mut().channel_reestablish(msg, &self.logger), channel_state, chan);
27972840
if let Some(monitor_update) = monitor_update_opt {
27982841
if let Err(e) = self.monitor.update_monitor(chan.get().get_funding_txo().unwrap(), monitor_update) {
@@ -2809,6 +2852,7 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
28092852
//TODO: Resend the funding_locked if needed once we get the monitor running again
28102853
}
28112854
}
2855+
if let Err(e) = self.fail_holding_cell_htlcs(htlcs_to_fail, msg.channel_id) { return Err(e) };
28122856
if let Some(msg) = funding_locked {
28132857
channel_state.pending_msg_events.push(events::MessageSendEvent::SendFundingLocked {
28142858
node_id: their_node_id.clone(),

0 commit comments

Comments
 (0)