Skip to content

Commit be123f7

Browse files
committed
Give peers one timer tick to finish handshake before disconnecting
This ensures we don't let a hung connection stick around forever if the peer never completes the initial handshake. This also resolves a race where, on receiving a second connection from a peer, we may reset their_node_id to None to prevent sending messages even though the `channel_encryptor` `is_ready_for_encryption()`. Sending pings only checks the `channel_encryptor` status, not `their_node_id` resulting in an `unwrap` on `None` in `enqueue_message`.
1 parent ed4a39f commit be123f7

File tree

1 file changed

+56
-14
lines changed

1 file changed

+56
-14
lines changed

lightning/src/ln/peer_handler.rs

Lines changed: 56 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,7 @@ impl<Descriptor: SocketDescriptor, CM: Deref, RM: Deref, L: Deref, CMH: Deref> P
860860
let features = InitFeatures::known();
861861
let resp = msgs::Init { features };
862862
self.enqueue_message(peer, &resp);
863+
peer.awaiting_pong_timer_tick_intervals = 0;
863864
},
864865
NextNoiseStep::ActThree => {
865866
let their_node_id = try_potential_handleerror!(peer.channel_encryptor.process_act_three(&peer.pending_read_buffer[..]));
@@ -870,6 +871,7 @@ impl<Descriptor: SocketDescriptor, CM: Deref, RM: Deref, L: Deref, CMH: Deref> P
870871
let features = InitFeatures::known();
871872
let resp = msgs::Init { features };
872873
self.enqueue_message(peer, &resp);
874+
peer.awaiting_pong_timer_tick_intervals = 0;
873875
},
874876
NextNoiseStep::NoiseComplete => {
875877
if peer.pending_read_is_header {
@@ -1530,12 +1532,29 @@ impl<Descriptor: SocketDescriptor, CM: Deref, RM: Deref, L: Deref, CMH: Deref> P
15301532
let peer_count = peers.len();
15311533

15321534
peers.retain(|descriptor, peer| {
1533-
if !peer.channel_encryptor.is_ready_for_encryption() {
1534-
// The peer needs to complete its handshake before we can exchange messages
1535+
let mut do_disconnect_peer = false;
1536+
if !peer.channel_encryptor.is_ready_for_encryption() || peer.their_node_id.is_none() {
1537+
// The peer needs to complete its handshake before we can exchange messages. We
1538+
// give peers one timer tick to complete handshake, reusing
1539+
// `awaiting_pong_timer_tick_intervals` to track number of timer ticks taken
1540+
// for handshake completion.
1541+
if peer.awaiting_pong_timer_tick_intervals != 0 {
1542+
do_disconnect_peer = true;
1543+
} else {
1544+
peer.awaiting_pong_timer_tick_intervals = 1;
1545+
return true;
1546+
}
1547+
}
1548+
1549+
if peer.awaiting_pong_timer_tick_intervals == -1 {
1550+
// Magic value set in `maybe_send_extra_ping`.
1551+
peer.awaiting_pong_timer_tick_intervals = 1;
1552+
peer.received_message_since_timer_tick = false;
15351553
return true;
15361554
}
15371555

1538-
if (peer.awaiting_pong_timer_tick_intervals > 0 && !peer.received_message_since_timer_tick)
1556+
if do_disconnect_peer
1557+
|| (peer.awaiting_pong_timer_tick_intervals > 0 && !peer.received_message_since_timer_tick)
15391558
|| peer.awaiting_pong_timer_tick_intervals as u64 >
15401559
MAX_BUFFER_DRAIN_TICK_INTERVALS_PER_PEER as u64 * peer_count as u64
15411560
{
@@ -1546,21 +1565,11 @@ impl<Descriptor: SocketDescriptor, CM: Deref, RM: Deref, L: Deref, CMH: Deref> P
15461565
node_id_to_descriptor.remove(&node_id);
15471566
self.message_handler.chan_handler.peer_disconnected(&node_id, false);
15481567
}
1549-
None => {
1550-
// This can't actually happen as we should have hit
1551-
// is_ready_for_encryption() previously on this same peer.
1552-
unreachable!();
1553-
},
1568+
None => {},
15541569
}
15551570
return false;
15561571
}
1557-
15581572
peer.received_message_since_timer_tick = false;
1559-
if peer.awaiting_pong_timer_tick_intervals == -1 {
1560-
// Magic value set in `maybe_send_extra_ping`.
1561-
peer.awaiting_pong_timer_tick_intervals = 1;
1562-
return true;
1563-
}
15641573

15651574
if peer.awaiting_pong_timer_tick_intervals > 0 {
15661575
peer.awaiting_pong_timer_tick_intervals += 1;
@@ -1758,4 +1767,37 @@ mod tests {
17581767
assert_eq!(cfgs[1].routing_handler.chan_upds_recvd.load(Ordering::Acquire), 100);
17591768
assert_eq!(cfgs[1].routing_handler.chan_anns_recvd.load(Ordering::Acquire), 50);
17601769
}
1770+
1771+
#[test]
1772+
fn test_handshake_timeout() {
1773+
// Tests that we time out a peer still waiting on handshake completion after a full timer
1774+
// tick.
1775+
let cfgs = create_peermgr_cfgs(2);
1776+
cfgs[0].routing_handler.request_full_sync.store(true, Ordering::Release);
1777+
cfgs[1].routing_handler.request_full_sync.store(true, Ordering::Release);
1778+
let peers = create_network(2, &cfgs);
1779+
1780+
let secp_ctx = Secp256k1::new();
1781+
let a_id = PublicKey::from_secret_key(&secp_ctx, &peers[0].our_node_secret);
1782+
let mut fd_a = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) };
1783+
let mut fd_b = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) };
1784+
let initial_data = peers[1].new_outbound_connection(a_id, fd_b.clone()).unwrap();
1785+
peers[0].new_inbound_connection(fd_a.clone()).unwrap();
1786+
1787+
// If we get a single timer tick before completion, that's fine
1788+
assert_eq!(peers[0].peers.lock().unwrap().peers.len(), 1);
1789+
peers[0].timer_tick_occurred();
1790+
assert_eq!(peers[0].peers.lock().unwrap().peers.len(), 1);
1791+
1792+
assert_eq!(peers[0].read_event(&mut fd_a, &initial_data).unwrap(), false);
1793+
peers[0].process_events();
1794+
assert_eq!(peers[1].read_event(&mut fd_b, &fd_a.outbound_data.lock().unwrap().split_off(0)).unwrap(), false);
1795+
peers[1].process_events();
1796+
1797+
// ...but if we get a second timer tick, we should disconnect the peer
1798+
peers[0].timer_tick_occurred();
1799+
assert_eq!(peers[0].peers.lock().unwrap().peers.len(), 0);
1800+
1801+
assert!(peers[0].read_event(&mut fd_a, &fd_b.outbound_data.lock().unwrap().split_off(0)).is_err());
1802+
}
17611803
}

0 commit comments

Comments
 (0)