Skip to content

Commit ce43f53

Browse files
committed
Fix long-standing race in net-tokio reading after a disconnect event
If rust-lightning tells us to disconnect a socket after we read some bytes from the socket, but before we actually give those bytes to rust-lightning, we may end up calling rust-lightning with a Descriptor that isn't registered anymore. Sadly, there really isn't a good way to solve this, and it should be a pretty quick event, so we just busy-wait.
1 parent 0406d58 commit ce43f53

File tree

1 file changed

+35
-15
lines changed

1 file changed

+35
-15
lines changed

lightning-net-tokio/src/lib.rs

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ use lightning::ln::peer_handler;
5252
use lightning::ln::peer_handler::SocketDescriptor as LnSocketTrait;
5353
use lightning::ln::msgs::ChannelMessageHandler;
5454

55-
use std::task;
55+
use std::{task, thread};
5656
use std::net::SocketAddr;
5757
use std::sync::{Arc, Mutex};
5858
use std::sync::atomic::{AtomicU64, Ordering};
@@ -84,6 +84,11 @@ struct Connection {
8484
// send into any read_blocker to wake the reading future back up and set read_paused back to
8585
// false.
8686
read_blocker: Option<oneshot::Sender<()>>,
87+
// When we are told by rust-lightning to disconnect, we can't return to rust-lightning until we
88+
// are sure we won't call any more read/write PeerManager functions with the same connection.
89+
// This is set to true if we're in such a condition (with disconnect checked before with the
90+
// top-level mutex held) and false when we can return.
91+
disconnect_block: bool,
8792
read_paused: bool,
8893
// If we get disconnected via SocketDescriptor::disconnect_socket(), we don't call
8994
// disconnect_event(), but if we get an Err return value out of PeerManager, in general, we do.
@@ -105,20 +110,29 @@ impl Connection {
105110
} }
106111
}
107112

113+
macro_rules! prepare_read_write_call {
114+
() => { {
115+
let mut us_lock = us.lock().unwrap();
116+
if us_lock.disconnect {
117+
shutdown_socket!("disconnect_socket() call from RL");
118+
}
119+
us_lock.disconnect_block = true;
120+
} }
121+
}
122+
108123
// Whenever we want to block on reading or waiting for reading to resume, we have to
109124
// at least select with the write_avail_receiver, which is used by the
110125
// SocketDescriptor to wake us up if we need to shut down the socket or if we need
111126
// to generate a write_buffer_space_avail call.
112127
macro_rules! select_write_ev {
113128
($v: expr) => { {
114129
assert!($v.is_some()); // We can't have dropped the sending end, its in the us Arc!
115-
if us.lock().unwrap().disconnect {
116-
shutdown_socket!("disconnect_socket() call from RL");
117-
}
130+
prepare_read_write_call!();
118131
if let Err(e) = peer_manager.write_buffer_space_avail(&mut SocketDescriptor::new(us.clone())) {
119132
us.lock().unwrap().need_disconnect_event = false;
120133
shutdown_socket!(e);
121134
}
135+
us.lock().unwrap().disconnect_block = false;
122136
} }
123137
}
124138

@@ -151,6 +165,7 @@ impl Connection {
151165
v = write_avail_receiver.recv() => select_write_ev!(v),
152166
}
153167
}
168+
prepare_read_write_call!();
154169
match peer_manager.read_event(&mut SocketDescriptor::new(Arc::clone(&us)), &buf[0..len]) {
155170
Ok(pause_read) => {
156171
if pause_read {
@@ -172,6 +187,7 @@ impl Connection {
172187
shutdown_socket!(e)
173188
},
174189
}
190+
us.lock().unwrap().disconnect_block = false;
175191
},
176192
Err(e) => {
177193
println!("Connection closed: {}", e);
@@ -180,6 +196,7 @@ impl Connection {
180196
},
181197
}
182198
}
199+
us.lock().unwrap().disconnect_block = false;
183200
let writer_option = us.lock().unwrap().writer.take();
184201
if let Some(mut writer) = writer_option {
185202
// If the socket is already closed, shutdown() will fail, so just ignore it.
@@ -209,7 +226,7 @@ impl Connection {
209226

210227
(reader, receiver,
211228
Arc::new(Mutex::new(Self {
212-
writer: Some(writer), event_notify, write_avail,
229+
writer: Some(writer), event_notify, write_avail, disconnect_block: false,
213230
read_blocker: None, read_paused: false, need_disconnect_event: true, disconnect: false,
214231
id: ID_COUNTER.fetch_add(1, Ordering::AcqRel)
215232
})))
@@ -380,16 +397,19 @@ impl peer_handler::SocketDescriptor for SocketDescriptor {
380397
}
381398

382399
fn disconnect_socket(&mut self) {
383-
let mut us = self.conn.lock().unwrap();
384-
us.need_disconnect_event = false;
385-
us.disconnect = true;
386-
us.read_paused = true;
387-
// Wake up the sending thread, assuming its still alive
388-
let _ = us.write_avail.try_send(());
389-
// TODO: There's a race where we don't meet the requirements of disconnect_socket if the
390-
// read task is about to call a PeerManager function (eg read_event or write_event).
391-
// Ideally we need to release the us lock and block until we have confirmation from the
392-
// read task that it has broken out of its main loop.
400+
{
401+
let mut us = self.conn.lock().unwrap();
402+
us.need_disconnect_event = false;
403+
us.disconnect = true;
404+
us.read_paused = true;
405+
// Wake up the sending thread, assuming its still alive
406+
let _ = us.write_avail.try_send(());
407+
// Happy-path return:
408+
if !us.disconnect_block { return; }
409+
}
410+
while self.conn.lock().unwrap().disconnect_block {
411+
thread::yield_now();
412+
}
393413
}
394414
}
395415
impl Clone for SocketDescriptor {

0 commit comments

Comments
 (0)