Skip to content

Commit 0104080

Browse files
committed
Expand lockorder testing to look at mutexes, not specific instances
Our existing lockorder inversion checks look at specific instances of mutexes rather than the general mutex itself. This changes that behavior to look at the instruction pointer at which a mutex was created and treat all mutexes which were created at the same location as equivalent. This allows us to detect lockorder inversions which occur across tests, though it does substantially reduce parallelism during test runs.
1 parent 0627c0c commit 0104080

File tree

1 file changed

+175
-99
lines changed

1 file changed

+175
-99
lines changed

lightning/src/debug_sync.rs

Lines changed: 175 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use core::time::Duration;
55
use std::collections::HashSet;
66
use std::cell::RefCell;
77

8+
#[cfg(not(feature = "backtrace"))]
89
use std::sync::atomic::{AtomicUsize, Ordering};
910

1011
use std::sync::Mutex as StdMutex;
@@ -15,7 +16,12 @@ use std::sync::RwLockWriteGuard as StdRwLockWriteGuard;
1516
use std::sync::Condvar as StdCondvar;
1617

1718
#[cfg(feature = "backtrace")]
18-
use backtrace::Backtrace;
19+
use {prelude::HashMap, backtrace::Backtrace, std::sync::Once};
20+
21+
#[cfg(not(feature = "backtrace"))]
22+
struct Backtrace{}
23+
#[cfg(not(feature = "backtrace"))]
24+
impl Backtrace { fn new() -> Backtrace { Backtrace {} } }
1925

2026
pub type LockResult<Guard> = Result<Guard, ()>;
2127

@@ -46,14 +52,19 @@ thread_local! {
4652
/// We track the set of locks currently held by a reference to their `LockMetadata`
4753
static LOCKS_HELD: RefCell<HashSet<Arc<LockMetadata>>> = RefCell::new(HashSet::new());
4854
}
55+
#[cfg(not(feature = "backtrace"))]
4956
static LOCK_IDX: AtomicUsize = AtomicUsize::new(0);
5057

58+
#[cfg(feature = "backtrace")]
59+
static mut LOCKS: Option<StdMutex<HashMap<u64, Arc<LockMetadata>>>> = None;
60+
#[cfg(feature = "backtrace")]
61+
static LOCKS_INIT: Once = Once::new();
62+
5163
/// Metadata about a single lock, by id, the set of things locked-before it, and the backtrace of
5264
/// when the Mutex itself was constructed.
5365
struct LockMetadata {
5466
lock_idx: u64,
55-
locked_before: StdMutex<HashSet<Arc<LockMetadata>>>,
56-
#[cfg(feature = "backtrace")]
67+
locked_before: StdMutex<HashSet<LockDep>>,
5768
lock_construction_bt: Backtrace,
5869
}
5970
impl PartialEq for LockMetadata {
@@ -64,14 +75,62 @@ impl std::hash::Hash for LockMetadata {
6475
fn hash<H: std::hash::Hasher>(&self, hasher: &mut H) { hasher.write_u64(self.lock_idx); }
6576
}
6677

78+
struct LockDep {
79+
lock: Arc<LockMetadata>,
80+
lockdep_trace: Option<Backtrace>,
81+
}
82+
impl LockDep {
83+
/// Note that `Backtrace::new()` is rather expensive so we rely on the caller to fill in the
84+
/// `lockdep_backtrace` field after ensuring we need it.
85+
fn new_without_bt(lock: &Arc<LockMetadata>) -> Self {
86+
Self { lock: Arc::clone(lock), lockdep_trace: None }
87+
}
88+
}
89+
impl PartialEq for LockDep {
90+
fn eq(&self, o: &LockDep) -> bool { self.lock.lock_idx == o.lock.lock_idx }
91+
}
92+
impl Eq for LockDep {}
93+
impl std::hash::Hash for LockDep {
94+
fn hash<H: std::hash::Hasher>(&self, hasher: &mut H) { hasher.write_u64(self.lock.lock_idx); }
95+
}
96+
6797
impl LockMetadata {
68-
fn new() -> LockMetadata {
69-
LockMetadata {
70-
locked_before: StdMutex::new(HashSet::new()),
71-
lock_idx: LOCK_IDX.fetch_add(1, Ordering::Relaxed) as u64,
72-
#[cfg(feature = "backtrace")]
73-
lock_construction_bt: Backtrace::new(),
98+
fn new() -> Arc<LockMetadata> {
99+
let lock_idx;
100+
let backtrace = Backtrace::new();
101+
102+
#[cfg(not(feature = "backtrace"))]
103+
{ lock_idx = LOCK_IDX.fetch_add(1, Ordering::Relaxed) as u64; }
104+
105+
#[cfg(feature = "backtrace")]
106+
{
107+
let mut ip = None;
108+
// Find the first frame which is after `debug_sync` (or which is in our tests) and use
109+
// that as the mutex construction site. Note that the first few frames may be in
110+
// `backtrace`, so we have to ignore those.
111+
let sync_mutex_constr_regex = regex::Regex::new(r"lightning.*debug_sync.*new").unwrap();
112+
let mut found_debug_sync = false;
113+
for frame in backtrace.frames() {
114+
let symbol_name = frame.symbols().last().unwrap().name().unwrap().as_str().unwrap();
115+
if !sync_mutex_constr_regex.is_match(symbol_name) {
116+
if found_debug_sync {
117+
ip = Some(frame.ip() as usize as u64);
118+
break;
119+
}
120+
} else { found_debug_sync = true; }
121+
}
122+
lock_idx = ip.unwrap();
123+
LOCKS_INIT.call_once(|| { unsafe { LOCKS = Some(StdMutex::new(HashMap::new())); } });
124+
if let Some(metadata) = unsafe { LOCKS.as_ref() }.unwrap().lock().unwrap().get(&lock_idx) {
125+
return Arc::clone(&metadata);
126+
}
74127
}
128+
129+
Arc::new(LockMetadata {
130+
locked_before: StdMutex::new(HashSet::new()),
131+
lock_idx,
132+
lock_construction_bt: backtrace,
133+
})
75134
}
76135

77136
// Returns whether we were a recursive lock (only relevant for read)
@@ -89,18 +148,25 @@ impl LockMetadata {
89148
}
90149
for locked in held.borrow().iter() {
91150
if !read && *locked == *this {
92-
panic!("Tried to lock a lock while it was held!");
151+
// With `feature = "backtrace"` set, we may be looking at different instances
152+
// of the same lock.
153+
debug_assert!(cfg!(feature = "backtrace"), "Tried to lock a lock while it was held!");
93154
}
94155
for locked_dep in locked.locked_before.lock().unwrap().iter() {
95-
if *locked_dep == *this {
156+
if locked_dep.lock == *this && locked_dep.lock != *locked {
96157
#[cfg(feature = "backtrace")]
97-
panic!("Tried to violate existing lockorder.\nMutex that should be locked after the current lock was created at the following backtrace.\nNote that to get a backtrace for the lockorder violation, you should set RUST_BACKTRACE=1\n{:?}", locked.lock_construction_bt);
158+
panic!("Tried to violate existing lockorder.\nMutex that should be locked after the current lock was created at the following backtrace.\nNote that to get a backtrace for the lockorder violation, you should set RUST_BACKTRACE=1\nLock constructed at:\n{:?}\n\nLock dep created at:\n{:?}\n\n", locked.lock_construction_bt, locked_dep.lockdep_trace);
98159
#[cfg(not(feature = "backtrace"))]
99160
panic!("Tried to violate existing lockorder. Build with the backtrace feature for more info.");
100161
}
101162
}
102163
// Insert any already-held locks in our locked-before set.
103-
this.locked_before.lock().unwrap().insert(Arc::clone(locked));
164+
let mut locked_before = this.locked_before.lock().unwrap();
165+
let mut lockdep = LockDep::new_without_bt(locked);
166+
if !locked_before.contains(&lockdep) {
167+
lockdep.lockdep_trace = Some(Backtrace::new());
168+
locked_before.insert(lockdep);
169+
}
104170
}
105171
held.borrow_mut().insert(Arc::clone(this));
106172
inserted = true;
@@ -116,10 +182,15 @@ impl LockMetadata {
116182
// Since a try-lock will simply fail if the lock is held already, we do not
117183
// consider try-locks to ever generate lockorder inversions. However, if a try-lock
118184
// succeeds, we do consider it to have created lockorder dependencies.
185+
held.borrow_mut().insert(Arc::clone(this));
186+
let mut locked_before = this.locked_before.lock().unwrap();
119187
for locked in held.borrow().iter() {
120-
this.locked_before.lock().unwrap().insert(Arc::clone(locked));
188+
let mut lockdep = LockDep::new_without_bt(locked);
189+
if !locked_before.contains(&lockdep) {
190+
lockdep.lockdep_trace = Some(Backtrace::new());
191+
locked_before.insert(lockdep);
192+
}
121193
}
122-
held.borrow_mut().insert(Arc::clone(this));
123194
});
124195
}
125196
}
@@ -170,7 +241,7 @@ impl<T: Sized> DerefMut for MutexGuard<'_, T> {
170241

171242
impl<T> Mutex<T> {
172243
pub fn new(inner: T) -> Mutex<T> {
173-
Mutex { inner: StdMutex::new(inner), deps: Arc::new(LockMetadata::new()) }
244+
Mutex { inner: StdMutex::new(inner), deps: LockMetadata::new() }
174245
}
175246

176247
pub fn lock<'a>(&'a self) -> LockResult<MutexGuard<'a, T>> {
@@ -249,7 +320,7 @@ impl<T: Sized> DerefMut for RwLockWriteGuard<'_, T> {
249320

250321
impl<T> RwLock<T> {
251322
pub fn new(inner: T) -> RwLock<T> {
252-
RwLock { inner: StdRwLock::new(inner), deps: Arc::new(LockMetadata::new()) }
323+
RwLock { inner: StdRwLock::new(inner), deps: LockMetadata::new() }
253324
}
254325

255326
pub fn read<'a>(&'a self) -> LockResult<RwLockReadGuard<'a, T>> {
@@ -271,96 +342,101 @@ impl<T> RwLock<T> {
271342
}
272343
}
273344

274-
#[test]
275-
#[should_panic]
276-
fn recursive_lock_fail() {
277-
let mutex = Mutex::new(());
278-
let _a = mutex.lock().unwrap();
279-
let _b = mutex.lock().unwrap();
280-
}
281-
282-
#[test]
283-
fn recursive_read() {
284-
let lock = RwLock::new(());
285-
let _a = lock.read().unwrap();
286-
let _b = lock.read().unwrap();
287-
}
345+
pub type FairRwLock<T> = RwLock<T>;
288346

289-
#[test]
290-
#[should_panic]
291-
fn lockorder_fail() {
292-
let a = Mutex::new(());
293-
let b = Mutex::new(());
294-
{
295-
let _a = a.lock().unwrap();
296-
let _b = b.lock().unwrap();
297-
}
298-
{
299-
let _b = b.lock().unwrap();
300-
let _a = a.lock().unwrap();
347+
mod tests {
348+
use super::{RwLock, Mutex};
349+
350+
#[test]
351+
#[should_panic]
352+
#[cfg(not(feature = "backtrace"))]
353+
fn recursive_lock_fail() {
354+
let mutex = Mutex::new(());
355+
let _a = mutex.lock().unwrap();
356+
let _b = mutex.lock().unwrap();
357+
}
358+
359+
#[test]
360+
fn recursive_read() {
361+
let lock = RwLock::new(());
362+
let _a = lock.read().unwrap();
363+
let _b = lock.read().unwrap();
364+
}
365+
366+
#[test]
367+
#[should_panic]
368+
fn lockorder_fail() {
369+
let a = Mutex::new(());
370+
let b = Mutex::new(());
371+
{
372+
let _a = a.lock().unwrap();
373+
let _b = b.lock().unwrap();
374+
}
375+
{
376+
let _b = b.lock().unwrap();
377+
let _a = a.lock().unwrap();
378+
}
301379
}
302-
}
303380

304-
#[test]
305-
#[should_panic]
306-
fn write_lockorder_fail() {
307-
let a = RwLock::new(());
308-
let b = RwLock::new(());
309-
{
310-
let _a = a.write().unwrap();
311-
let _b = b.write().unwrap();
312-
}
313-
{
314-
let _b = b.write().unwrap();
315-
let _a = a.write().unwrap();
381+
#[test]
382+
#[should_panic]
383+
fn write_lockorder_fail() {
384+
let a = RwLock::new(());
385+
let b = RwLock::new(());
386+
{
387+
let _a = a.write().unwrap();
388+
let _b = b.write().unwrap();
389+
}
390+
{
391+
let _b = b.write().unwrap();
392+
let _a = a.write().unwrap();
393+
}
316394
}
317-
}
318395

319-
#[test]
320-
#[should_panic]
321-
fn read_lockorder_fail() {
322-
let a = RwLock::new(());
323-
let b = RwLock::new(());
324-
{
325-
let _a = a.read().unwrap();
326-
let _b = b.read().unwrap();
327-
}
328-
{
329-
let _b = b.read().unwrap();
330-
let _a = a.read().unwrap();
396+
#[test]
397+
#[should_panic]
398+
fn read_lockorder_fail() {
399+
let a = RwLock::new(());
400+
let b = RwLock::new(());
401+
{
402+
let _a = a.read().unwrap();
403+
let _b = b.read().unwrap();
404+
}
405+
{
406+
let _b = b.read().unwrap();
407+
let _a = a.read().unwrap();
408+
}
331409
}
332-
}
333410

334-
#[test]
335-
fn read_recurisve_no_lockorder() {
336-
// Like the above, but note that no lockorder is implied when we recursively read-lock a
337-
// RwLock, causing this to pass just fine.
338-
let a = RwLock::new(());
339-
let b = RwLock::new(());
340-
let _outer = a.read().unwrap();
341-
{
342-
let _a = a.read().unwrap();
343-
let _b = b.read().unwrap();
344-
}
345-
{
346-
let _b = b.read().unwrap();
347-
let _a = a.read().unwrap();
411+
#[test]
412+
fn read_recurisve_no_lockorder() {
413+
// Like the above, but note that no lockorder is implied when we recursively read-lock a
414+
// RwLock, causing this to pass just fine.
415+
let a = RwLock::new(());
416+
let b = RwLock::new(());
417+
let _outer = a.read().unwrap();
418+
{
419+
let _a = a.read().unwrap();
420+
let _b = b.read().unwrap();
421+
}
422+
{
423+
let _b = b.read().unwrap();
424+
let _a = a.read().unwrap();
425+
}
348426
}
349-
}
350427

351-
#[test]
352-
#[should_panic]
353-
fn read_write_lockorder_fail() {
354-
let a = RwLock::new(());
355-
let b = RwLock::new(());
356-
{
357-
let _a = a.write().unwrap();
358-
let _b = b.read().unwrap();
359-
}
360-
{
361-
let _b = b.read().unwrap();
362-
let _a = a.write().unwrap();
428+
#[test]
429+
#[should_panic]
430+
fn read_write_lockorder_fail() {
431+
let a = RwLock::new(());
432+
let b = RwLock::new(());
433+
{
434+
let _a = a.write().unwrap();
435+
let _b = b.read().unwrap();
436+
}
437+
{
438+
let _b = b.read().unwrap();
439+
let _a = a.write().unwrap();
440+
}
363441
}
364442
}
365-
366-
pub type FairRwLock<T> = RwLock<T>;

0 commit comments

Comments
 (0)