Skip to content

Commit 99eef23

Browse files
authored
Merge pull request #640 from valentinewallace/test-holding-cell-edge-case
Fail back HTLCs that fail to be freed from the holding cell
2 parents d735a24 + 523cab8 commit 99eef23

File tree

3 files changed

+507
-109
lines changed

3 files changed

+507
-109
lines changed

lightning/src/ln/channel.rs

Lines changed: 103 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -2118,7 +2118,7 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
21182118

21192119
/// Used to fulfill holding_cell_htlcs when we get a remote ack (or implicitly get it by them
21202120
/// fulfilling or failing the last pending HTLC)
2121-
fn free_holding_cell_htlcs<L: Deref>(&mut self, logger: &L) -> Result<Option<(msgs::CommitmentUpdate, ChannelMonitorUpdate)>, ChannelError> where L::Target: Logger {
2121+
fn free_holding_cell_htlcs<L: Deref>(&mut self, logger: &L) -> Result<(Option<(msgs::CommitmentUpdate, ChannelMonitorUpdate)>, Vec<(HTLCSource, PaymentHash)>), ChannelError> where L::Target: Logger {
21222122
assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, 0);
21232123
if self.holding_cell_htlc_updates.len() != 0 || self.holding_cell_update_fee.is_some() {
21242124
log_trace!(logger, "Freeing holding cell with {} HTLC updates{}", self.holding_cell_htlc_updates.len(), if self.holding_cell_update_fee.is_some() { " and a fee update" } else { "" });
@@ -2133,110 +2133,94 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
21332133
let mut update_add_htlcs = Vec::with_capacity(htlc_updates.len());
21342134
let mut update_fulfill_htlcs = Vec::with_capacity(htlc_updates.len());
21352135
let mut update_fail_htlcs = Vec::with_capacity(htlc_updates.len());
2136-
let mut err = None;
2136+
let mut htlcs_to_fail = Vec::new();
21372137
for htlc_update in htlc_updates.drain(..) {
21382138
// Note that this *can* fail, though it should be due to rather-rare conditions on
21392139
// fee races with adding too many outputs which push our total payments just over
21402140
// the limit. In case it's less rare than I anticipate, we may want to revisit
21412141
// handling this case better and maybe fulfilling some of the HTLCs while attempting
21422142
// to rebalance channels.
2143-
if err.is_some() { // We're back to AwaitingRemoteRevoke (or are about to fail the channel)
2144-
self.holding_cell_htlc_updates.push(htlc_update);
2145-
} else {
2146-
match &htlc_update {
2147-
&HTLCUpdateAwaitingACK::AddHTLC {amount_msat, cltv_expiry, ref payment_hash, ref source, ref onion_routing_packet, ..} => {
2148-
match self.send_htlc(amount_msat, *payment_hash, cltv_expiry, source.clone(), onion_routing_packet.clone()) {
2149-
Ok(update_add_msg_option) => update_add_htlcs.push(update_add_msg_option.unwrap()),
2150-
Err(e) => {
2151-
match e {
2152-
ChannelError::Ignore(ref msg) => {
2153-
log_info!(logger, "Failed to send HTLC with payment_hash {} due to {}", log_bytes!(payment_hash.0), msg);
2154-
},
2155-
_ => {
2156-
log_info!(logger, "Failed to send HTLC with payment_hash {} resulting in a channel closure during holding_cell freeing", log_bytes!(payment_hash.0));
2157-
},
2158-
}
2159-
err = Some(e);
2143+
match &htlc_update {
2144+
&HTLCUpdateAwaitingACK::AddHTLC {amount_msat, cltv_expiry, ref payment_hash, ref source, ref onion_routing_packet, ..} => {
2145+
match self.send_htlc(amount_msat, *payment_hash, cltv_expiry, source.clone(), onion_routing_packet.clone()) {
2146+
Ok(update_add_msg_option) => update_add_htlcs.push(update_add_msg_option.unwrap()),
2147+
Err(e) => {
2148+
match e {
2149+
ChannelError::Ignore(ref msg) => {
2150+
log_info!(logger, "Failed to send HTLC with payment_hash {} due to {}", log_bytes!(payment_hash.0), msg);
2151+
// If we fail to send here, then this HTLC should
2152+
// be failed backwards. Failing to send here
2153+
// indicates that this HTLC may keep being put back
2154+
// into the holding cell without ever being
2155+
// successfully forwarded/failed/fulfilled, causing
2156+
// our counterparty to eventually close on us.
2157+
htlcs_to_fail.push((source.clone(), *payment_hash));
2158+
},
2159+
_ => {
2160+
panic!("Got a non-IgnoreError action trying to send holding cell HTLC");
2161+
},
21602162
}
21612163
}
2162-
},
2163-
&HTLCUpdateAwaitingACK::ClaimHTLC { ref payment_preimage, htlc_id, .. } => {
2164-
match self.get_update_fulfill_htlc(htlc_id, *payment_preimage, logger) {
2165-
Ok((update_fulfill_msg_option, additional_monitor_update_opt)) => {
2166-
update_fulfill_htlcs.push(update_fulfill_msg_option.unwrap());
2167-
if let Some(mut additional_monitor_update) = additional_monitor_update_opt {
2168-
monitor_update.updates.append(&mut additional_monitor_update.updates);
2169-
}
2170-
},
2171-
Err(e) => {
2172-
if let ChannelError::Ignore(_) = e {}
2173-
else {
2174-
panic!("Got a non-IgnoreError action trying to fulfill holding cell HTLC");
2175-
}
2164+
}
2165+
},
2166+
&HTLCUpdateAwaitingACK::ClaimHTLC { ref payment_preimage, htlc_id, .. } => {
2167+
match self.get_update_fulfill_htlc(htlc_id, *payment_preimage, logger) {
2168+
Ok((update_fulfill_msg_option, additional_monitor_update_opt)) => {
2169+
update_fulfill_htlcs.push(update_fulfill_msg_option.unwrap());
2170+
if let Some(mut additional_monitor_update) = additional_monitor_update_opt {
2171+
monitor_update.updates.append(&mut additional_monitor_update.updates);
2172+
}
2173+
},
2174+
Err(e) => {
2175+
if let ChannelError::Ignore(_) = e {}
2176+
else {
2177+
panic!("Got a non-IgnoreError action trying to fulfill holding cell HTLC");
21762178
}
21772179
}
2178-
},
2179-
&HTLCUpdateAwaitingACK::FailHTLC { htlc_id, ref err_packet } => {
2180-
match self.get_update_fail_htlc(htlc_id, err_packet.clone()) {
2181-
Ok(update_fail_msg_option) => update_fail_htlcs.push(update_fail_msg_option.unwrap()),
2182-
Err(e) => {
2183-
if let ChannelError::Ignore(_) = e {}
2184-
else {
2185-
panic!("Got a non-IgnoreError action trying to fail holding cell HTLC");
2186-
}
2180+
}
2181+
},
2182+
&HTLCUpdateAwaitingACK::FailHTLC { htlc_id, ref err_packet } => {
2183+
match self.get_update_fail_htlc(htlc_id, err_packet.clone()) {
2184+
Ok(update_fail_msg_option) => update_fail_htlcs.push(update_fail_msg_option.unwrap()),
2185+
Err(e) => {
2186+
if let ChannelError::Ignore(_) = e {}
2187+
else {
2188+
panic!("Got a non-IgnoreError action trying to fail holding cell HTLC");
21872189
}
21882190
}
2189-
},
2190-
}
2191-
if err.is_some() {
2192-
self.holding_cell_htlc_updates.push(htlc_update);
2193-
if let Some(ChannelError::Ignore(_)) = err {
2194-
// If we failed to add the HTLC, but got an Ignore error, we should
2195-
// still send the new commitment_signed, so reset the err to None.
2196-
err = None;
21972191
}
2198-
}
2192+
},
21992193
}
22002194
}
2201-
//TODO: Need to examine the type of err - if it's a fee issue or similar we may want to
2202-
//fail it back the route, if it's a temporary issue we can ignore it...
2203-
match err {
2204-
None => {
2205-
if update_add_htlcs.is_empty() && update_fulfill_htlcs.is_empty() && update_fail_htlcs.is_empty() && self.holding_cell_update_fee.is_none() {
2206-
// This should never actually happen and indicates we got some Errs back
2207-
// from update_fulfill_htlc/update_fail_htlc, but we handle it anyway in
2208-
// case there is some strange way to hit duplicate HTLC removes.
2209-
return Ok(None);
2210-
}
2211-
let update_fee = if let Some(feerate) = self.holding_cell_update_fee {
2212-
self.pending_update_fee = self.holding_cell_update_fee.take();
2213-
Some(msgs::UpdateFee {
2214-
channel_id: self.channel_id,
2215-
feerate_per_kw: feerate as u32,
2216-
})
2217-
} else {
2218-
None
2219-
};
2195+
if update_add_htlcs.is_empty() && update_fulfill_htlcs.is_empty() && update_fail_htlcs.is_empty() && self.holding_cell_update_fee.is_none() {
2196+
return Ok((None, htlcs_to_fail));
2197+
}
2198+
let update_fee = if let Some(feerate) = self.holding_cell_update_fee {
2199+
self.pending_update_fee = self.holding_cell_update_fee.take();
2200+
Some(msgs::UpdateFee {
2201+
channel_id: self.channel_id,
2202+
feerate_per_kw: feerate as u32,
2203+
})
2204+
} else {
2205+
None
2206+
};
22202207

2221-
let (commitment_signed, mut additional_update) = self.send_commitment_no_status_check(logger)?;
2222-
// send_commitment_no_status_check and get_update_fulfill_htlc may bump latest_monitor_id
2223-
// but we want them to be strictly increasing by one, so reset it here.
2224-
self.latest_monitor_update_id = monitor_update.update_id;
2225-
monitor_update.updates.append(&mut additional_update.updates);
2208+
let (commitment_signed, mut additional_update) = self.send_commitment_no_status_check(logger)?;
2209+
// send_commitment_no_status_check and get_update_fulfill_htlc may bump latest_monitor_id
2210+
// but we want them to be strictly increasing by one, so reset it here.
2211+
self.latest_monitor_update_id = monitor_update.update_id;
2212+
monitor_update.updates.append(&mut additional_update.updates);
22262213

2227-
Ok(Some((msgs::CommitmentUpdate {
2228-
update_add_htlcs,
2229-
update_fulfill_htlcs,
2230-
update_fail_htlcs,
2231-
update_fail_malformed_htlcs: Vec::new(),
2232-
update_fee: update_fee,
2233-
commitment_signed,
2234-
}, monitor_update)))
2235-
},
2236-
Some(e) => Err(e)
2237-
}
2214+
Ok((Some((msgs::CommitmentUpdate {
2215+
update_add_htlcs,
2216+
update_fulfill_htlcs,
2217+
update_fail_htlcs,
2218+
update_fail_malformed_htlcs: Vec::new(),
2219+
update_fee: update_fee,
2220+
commitment_signed,
2221+
}, monitor_update)), htlcs_to_fail))
22382222
} else {
2239-
Ok(None)
2223+
Ok((None, Vec::new()))
22402224
}
22412225
}
22422226

@@ -2245,7 +2229,7 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
22452229
/// waiting on this revoke_and_ack. The generation of this new commitment_signed may also fail,
22462230
/// generating an appropriate error *after* the channel state has been updated based on the
22472231
/// revoke_and_ack message.
2248-
pub fn revoke_and_ack<F: Deref, L: Deref>(&mut self, msg: &msgs::RevokeAndACK, fee_estimator: &F, logger: &L) -> Result<(Option<msgs::CommitmentUpdate>, Vec<(PendingHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, Option<msgs::ClosingSigned>, ChannelMonitorUpdate), ChannelError>
2232+
pub fn revoke_and_ack<F: Deref, L: Deref>(&mut self, msg: &msgs::RevokeAndACK, fee_estimator: &F, logger: &L) -> Result<(Option<msgs::CommitmentUpdate>, Vec<(PendingHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, Option<msgs::ClosingSigned>, ChannelMonitorUpdate, Vec<(HTLCSource, PaymentHash)>), ChannelError>
22492233
where F::Target: FeeEstimator,
22502234
L::Target: Logger,
22512235
{
@@ -2420,11 +2404,11 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
24202404
}
24212405
self.monitor_pending_forwards.append(&mut to_forward_infos);
24222406
self.monitor_pending_failures.append(&mut revoked_htlcs);
2423-
return Ok((None, Vec::new(), Vec::new(), None, monitor_update))
2407+
return Ok((None, Vec::new(), Vec::new(), None, monitor_update, Vec::new()))
24242408
}
24252409

24262410
match self.free_holding_cell_htlcs(logger)? {
2427-
Some((mut commitment_update, mut additional_update)) => {
2411+
(Some((mut commitment_update, mut additional_update)), htlcs_to_fail) => {
24282412
commitment_update.update_fail_htlcs.reserve(update_fail_htlcs.len());
24292413
for fail_msg in update_fail_htlcs.drain(..) {
24302414
commitment_update.update_fail_htlcs.push(fail_msg);
@@ -2439,9 +2423,9 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
24392423
self.latest_monitor_update_id = monitor_update.update_id;
24402424
monitor_update.updates.append(&mut additional_update.updates);
24412425

2442-
Ok((Some(commitment_update), to_forward_infos, revoked_htlcs, None, monitor_update))
2426+
Ok((Some(commitment_update), to_forward_infos, revoked_htlcs, None, monitor_update, htlcs_to_fail))
24432427
},
2444-
None => {
2428+
(None, htlcs_to_fail) => {
24452429
if require_commitment {
24462430
let (commitment_signed, mut additional_update) = self.send_commitment_no_status_check(logger)?;
24472431

@@ -2457,9 +2441,9 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
24572441
update_fail_malformed_htlcs,
24582442
update_fee: None,
24592443
commitment_signed
2460-
}), to_forward_infos, revoked_htlcs, None, monitor_update))
2444+
}), to_forward_infos, revoked_htlcs, None, monitor_update, htlcs_to_fail))
24612445
} else {
2462-
Ok((None, to_forward_infos, revoked_htlcs, self.maybe_propose_first_closing_signed(fee_estimator), monitor_update))
2446+
Ok((None, to_forward_infos, revoked_htlcs, self.maybe_propose_first_closing_signed(fee_estimator), monitor_update, htlcs_to_fail))
24632447
}
24642448
}
24652449
}
@@ -2561,6 +2545,11 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
25612545

25622546
self.holding_cell_htlc_updates.retain(|htlc_update| {
25632547
match htlc_update {
2548+
// Note that currently on channel reestablish we assert that there are
2549+
// no holding cell HTLC update_adds, so if in the future we stop
2550+
// dropping added HTLCs here and failing them backwards, then there will
2551+
// need to be corresponding changes made in the Channel's re-establish
2552+
// logic.
25642553
&HTLCUpdateAwaitingACK::AddHTLC { ref payment_hash, ref source, .. } => {
25652554
outbound_drops.push((source.clone(), payment_hash.clone()));
25662555
false
@@ -2828,15 +2817,33 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
28282817
}
28292818

28302819
if (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::MonitorUpdateFailed as u32)) == 0 {
2820+
// Note that if in the future we no longer drop holding cell update_adds on peer
2821+
// disconnect, this logic will need to be updated.
2822+
for htlc_update in self.holding_cell_htlc_updates.iter() {
2823+
if let &HTLCUpdateAwaitingACK::AddHTLC { .. } = htlc_update {
2824+
debug_assert!(false, "There shouldn't be any add-HTLCs in the holding cell now because they should have been dropped on peer disconnect. Panic here because said HTLCs won't be handled correctly.");
2825+
}
2826+
}
2827+
28312828
// We're up-to-date and not waiting on a remote revoke (if we are our
28322829
// channel_reestablish should result in them sending a revoke_and_ack), but we may
28332830
// have received some updates while we were disconnected. Free the holding cell
28342831
// now!
28352832
match self.free_holding_cell_htlcs(logger) {
28362833
Err(ChannelError::Close(msg)) => return Err(ChannelError::Close(msg)),
28372834
Err(ChannelError::Ignore(_)) | Err(ChannelError::CloseDelayBroadcast(_)) => panic!("Got non-channel-failing result from free_holding_cell_htlcs"),
2838-
Ok(Some((commitment_update, monitor_update))) => return Ok((resend_funding_locked, required_revoke, Some(commitment_update), Some(monitor_update), self.resend_order.clone(), shutdown_msg)),
2839-
Ok(None) => return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), shutdown_msg)),
2835+
Ok((Some((commitment_update, monitor_update)), htlcs_to_fail)) => {
2836+
// If in the future we no longer drop holding cell update_adds on peer
2837+
// disconnect, we may be handed some HTLCs to fail backwards here.
2838+
assert!(htlcs_to_fail.is_empty());
2839+
return Ok((resend_funding_locked, required_revoke, Some(commitment_update), Some(monitor_update), self.resend_order.clone(), shutdown_msg));
2840+
},
2841+
Ok((None, htlcs_to_fail)) => {
2842+
// If in the future we no longer drop holding cell update_adds on peer
2843+
// disconnect, we may be handed some HTLCs to fail backwards here.
2844+
assert!(htlcs_to_fail.is_empty());
2845+
return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), shutdown_msg));
2846+
},
28402847
}
28412848
} else {
28422849
return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), shutdown_msg));

0 commit comments

Comments
 (0)