Skip to content

Commit 60d37b2

Browse files
committed
Fix long-standing race in net-tokio reading after a disconnect event
If rust-lightning tells us to disconnect a socket after we read some bytes from the socket, but before we actually give those bytes to rust-lightning, we may end up calling rust-lightning with a Descriptor that isn't registered anymore. Sadly, there really isn't a good way to solve this, and it should be a pretty quick event, so we just busy-wait.
1 parent 5ada940 commit 60d37b2

File tree

1 file changed

+35
-19
lines changed

1 file changed

+35
-19
lines changed

lightning-net-tokio/src/lib.rs

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ use lightning::ln::peer_handler;
7070
use lightning::ln::peer_handler::SocketDescriptor as LnSocketTrait;
7171
use lightning::ln::msgs::ChannelMessageHandler;
7272

73-
use std::task;
73+
use std::{task, thread};
7474
use std::net::SocketAddr;
7575
use std::sync::{Arc, Mutex, MutexGuard};
7676
use std::sync::atomic::{AtomicU64, Ordering};
@@ -101,6 +101,11 @@ struct Connection {
101101
// socket. To wake it up (without otherwise changing its state, we can push a value into this
102102
// Sender.
103103
read_waker: mpsc::Sender<()>,
104+
// When we are told by rust-lightning to disconnect, we can't return to rust-lightning until we
105+
// are sure we won't call any more read/write PeerManager functions with the same connection.
106+
// This is set to true if we're in such a condition (with disconnect checked before with the
107+
// top-level mutex held) and false when we can return.
108+
block_disconnect_socket: bool,
104109
read_paused: bool,
105110
rl_requested_disconnect: bool,
106111
id: u64,
@@ -143,35 +148,43 @@ impl Connection {
143148
} }
144149
}
145150

151+
macro_rules! prepare_read_write_call {
152+
() => { {
153+
let mut us_lock = us.lock().unwrap();
154+
if us_lock.rl_requested_disconnect {
155+
shutdown_socket!("disconnect_socket() call from RL", Disconnect::CloseConnection);
156+
}
157+
us_lock.block_disconnect_socket = true;
158+
} }
159+
}
160+
146161
let read_paused = us.lock().unwrap().read_paused;
147162
tokio::select! {
148163
v = write_avail_receiver.recv() => {
149164
assert!(v.is_some()); // We can't have dropped the sending end, its in the us Arc!
150-
if us.lock().unwrap().rl_requested_disconnect {
151-
shutdown_socket!("disconnect_socket() call from RL", Disconnect::CloseConnection);
152-
}
165+
prepare_read_write_call!();
153166
if let Err(e) = peer_manager.write_buffer_space_avail(&mut our_descriptor) {
154167
shutdown_socket!(e, Disconnect::CloseConnection);
155168
}
169+
us.lock().unwrap().block_disconnect_socket = false;
156170
},
157171
_ = read_wake_receiver.recv() => {},
158172
read = reader.read(&mut buf), if !read_paused => match read {
159173
Ok(0) => shutdown_socket!("Connection closed", Disconnect::PeerDisconnected),
160174
Ok(len) => {
161-
if us.lock().unwrap().rl_requested_disconnect {
162-
shutdown_socket!("disconnect_socket() call from RL", Disconnect::CloseConnection);
163-
}
175+
prepare_read_write_call!();
164176
let read_res = peer_manager.read_event(&mut our_descriptor, &buf[0..len]);
177+
let mut us_lock = us.lock().unwrap();
165178
match read_res {
166179
Ok(pause_read) => {
167-
let mut us_lock = us.lock().unwrap();
168180
if pause_read {
169181
us_lock.read_paused = true;
170182
}
171183
Self::event_trigger(&mut us_lock);
172184
},
173185
Err(e) => shutdown_socket!(e, Disconnect::CloseConnection),
174186
}
187+
us_lock.block_disconnect_socket = false;
175188
},
176189
Err(e) => shutdown_socket!(e, Disconnect::PeerDisconnected),
177190
},
@@ -203,8 +216,8 @@ impl Connection {
203216

204217
(reader, write_receiver, read_receiver,
205218
Arc::new(Mutex::new(Self {
206-
writer: Some(writer), event_notify, write_avail, read_waker,
207-
read_paused: false, rl_requested_disconnect: false,
219+
writer: Some(writer), event_notify, write_avail, read_waker, read_paused: false,
220+
block_disconnect_socket: false, rl_requested_disconnect: false,
208221
id: ID_COUNTER.fetch_add(1, Ordering::AcqRel)
209222
})))
210223
}
@@ -411,15 +424,18 @@ impl peer_handler::SocketDescriptor for SocketDescriptor {
411424
}
412425

413426
fn disconnect_socket(&mut self) {
414-
let mut us = self.conn.lock().unwrap();
415-
us.rl_requested_disconnect = true;
416-
us.read_paused = true;
417-
// Wake up the sending thread, assuming it is still alive
418-
let _ = us.write_avail.try_send(());
419-
// TODO: There's a race where we don't meet the requirements of disconnect_socket if the
420-
// read task is about to call a PeerManager function (eg read_event or write_event).
421-
// Ideally we need to release the us lock and block until we have confirmation from the
422-
// read task that it has broken out of its main loop.
427+
{
428+
let mut us = self.conn.lock().unwrap();
429+
us.rl_requested_disconnect = true;
430+
us.read_paused = true;
431+
// Wake up the sending thread, assuming it is still alive
432+
let _ = us.write_avail.try_send(());
433+
// Happy-path return:
434+
if !us.block_disconnect_socket { return; }
435+
}
436+
while self.conn.lock().unwrap().block_disconnect_socket {
437+
thread::yield_now();
438+
}
423439
}
424440
}
425441
impl Clone for SocketDescriptor {

0 commit comments

Comments
 (0)