Skip to content

Commit 4d0f955

Browse files
rhyshgopherbot
authored andcommitted
runtime: blame unlocker for mutex delay
Correct how the mutex contention profile reports on runtime-internal mutex values, to match sync.Mutex's semantics. Decide at the start of unlock2 whether we'd like to collect a contention sample. If so: Opt in to a slightly slower unlock path which avoids accidentally accepting blame for delay caused by other Ms. Release the lock before doing an O(N) traversal of the stack of waiting Ms, to calculate the total delay to those Ms that our critical section caused. Report that, with the current callstack, in the mutex profile. Fixes #66999 Change-Id: I561ed8dc120669bd045d514cb0d1c6c99c2add04 Reviewed-on: https://go-review.googlesource.com/c/go/+/667615 LUCI-TryBot-Result: Go LUCI <[email protected]> Auto-Submit: Rhys Hiltner <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> Reviewed-by: Michael Pratt <[email protected]>
1 parent bce8486 commit 4d0f955

File tree

2 files changed

+165
-129
lines changed

2 files changed

+165
-129
lines changed

src/runtime/lock_spinbit.go

Lines changed: 107 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ func key8(p *uintptr) *uint8 {
8686
// forming a singly-linked list with the mutex's key field pointing to the head
8787
// of the list.
8888
type mWaitList struct {
89-
next muintptr // next m waiting for lock
89+
next muintptr // next m waiting for lock
90+
startTicks int64 // when this m started waiting for the current lock holder, in cputicks
9091
}
9192

9293
// lockVerifyMSize confirms that we can recreate the low bits of the M pointer.
@@ -170,16 +171,15 @@ func lock2(l *mutex) {
170171
}
171172
semacreate(gp.m)
172173

173-
timer := &lockTimer{lock: l}
174-
timer.begin()
174+
var startTime int64
175175
// On uniprocessors, no point spinning.
176176
// On multiprocessors, spin for mutexActiveSpinCount attempts.
177177
spin := 0
178178
if ncpu > 1 {
179179
spin = mutexActiveSpinCount
180180
}
181181

182-
var weSpin, atTail bool
182+
var weSpin, atTail, haveTimers bool
183183
v := atomic.Loaduintptr(&l.key)
184184
tryAcquire:
185185
for i := 0; ; i++ {
@@ -192,13 +192,13 @@ tryAcquire:
192192
next = next &^ mutexSleeping
193193
}
194194
if atomic.Casuintptr(&l.key, v, next) {
195-
timer.end()
195+
gp.m.mLockProfile.end(startTime)
196196
return
197197
}
198198
} else {
199199
prev8 := atomic.Xchg8(k8, mutexLocked|mutexSleeping)
200200
if prev8&mutexLocked == 0 {
201-
timer.end()
201+
gp.m.mLockProfile.end(startTime)
202202
return
203203
}
204204
}
@@ -228,6 +228,13 @@ tryAcquire:
228228
throw("runtime·lock: sleeping while lock is available")
229229
}
230230

231+
// Collect times for mutex profile (seen in unlock2 only via mWaitList),
232+
// and for "/sync/mutex/wait/total:seconds" metric (to match).
233+
if !haveTimers {
234+
gp.m.mWaitList.startTicks = cputicks()
235+
startTime = gp.m.mLockProfile.start()
236+
haveTimers = true
237+
}
231238
// Store the current head of the list of sleeping Ms in our gp.m.mWaitList.next field
232239
gp.m.mWaitList.next = mutexWaitListHead(v)
233240

@@ -260,16 +267,54 @@ func unlock(l *mutex) {
260267
func unlock2(l *mutex) {
261268
gp := getg()
262269

263-
prev8 := atomic.Xchg8(key8(&l.key), 0)
270+
var prev8 uint8
271+
var haveStackLock bool
272+
var endTicks int64
273+
if !mutexSampleContention() {
274+
// Not collecting a sample for the contention profile, do the quick release
275+
prev8 = atomic.Xchg8(key8(&l.key), 0)
276+
} else {
277+
// If there's contention, we'll sample it. Don't allow another
278+
// lock2/unlock2 pair to finish before us and take our blame. Prevent
279+
// that by trading for the stack lock with a CAS.
280+
v := atomic.Loaduintptr(&l.key)
281+
for {
282+
if v&^mutexMMask == 0 || v&mutexStackLocked != 0 {
283+
// No contention, or (stack lock unavailable) no way to calculate it
284+
prev8 = atomic.Xchg8(key8(&l.key), 0)
285+
endTicks = 0
286+
break
287+
}
288+
289+
// There's contention, the stack lock appeared to be available, and
290+
// we'd like to collect a sample for the contention profile.
291+
if endTicks == 0 {
292+
// Read the time before releasing the lock. The profile will be
293+
// strictly smaller than what other threads would see by timing
294+
// their lock calls.
295+
endTicks = cputicks()
296+
}
297+
next := (v | mutexStackLocked) &^ (mutexLocked | mutexSleeping)
298+
if atomic.Casuintptr(&l.key, v, next) {
299+
haveStackLock = true
300+
prev8 = uint8(v)
301+
// The fast path of lock2 may have cleared mutexSleeping.
302+
// Restore it so we're sure to call unlock2Wake below.
303+
prev8 |= mutexSleeping
304+
break
305+
}
306+
v = atomic.Loaduintptr(&l.key)
307+
}
308+
}
264309
if prev8&mutexLocked == 0 {
265310
throw("unlock of unlocked lock")
266311
}
267312

268313
if prev8&mutexSleeping != 0 {
269-
unlock2Wake(l)
314+
unlock2Wake(l, haveStackLock, endTicks)
270315
}
271316

272-
gp.m.mLockProfile.recordUnlock(l)
317+
gp.m.mLockProfile.store()
273318
gp.m.locks--
274319
if gp.m.locks < 0 {
275320
throw("runtime·unlock: lock count")
@@ -279,15 +324,35 @@ func unlock2(l *mutex) {
279324
}
280325
}
281326

327+
// mutexSampleContention returns whether the current mutex operation should
328+
// report any contention it discovers.
329+
func mutexSampleContention() bool {
330+
if rate := int64(atomic.Load64(&mutexprofilerate)); rate <= 0 {
331+
return false
332+
} else {
333+
// TODO: have SetMutexProfileFraction do the clamping
334+
rate32 := uint32(rate)
335+
if int64(rate32) != rate {
336+
rate32 = ^uint32(0)
337+
}
338+
return cheaprandn(rate32) == 0
339+
}
340+
}
341+
282342
// unlock2Wake updates the list of Ms waiting on l, waking an M if necessary.
283343
//
284344
//go:nowritebarrier
285-
func unlock2Wake(l *mutex) {
345+
func unlock2Wake(l *mutex, haveStackLock bool, endTicks int64) {
286346
v := atomic.Loaduintptr(&l.key)
287347

288348
// On occasion, seek out and wake the M at the bottom of the stack so it
289349
// doesn't starve.
290350
antiStarve := cheaprandn(mutexTailWakePeriod) == 0
351+
352+
if haveStackLock {
353+
goto useStackLock
354+
}
355+
291356
if !(antiStarve || // avoiding starvation may require a wake
292357
v&mutexSpinning == 0 || // no spinners means we must wake
293358
mutexPreferLowLatency(l)) { // prefer waiters be awake as much as possible
@@ -324,6 +389,30 @@ func unlock2Wake(l *mutex) {
324389
// We own the mutexStackLocked flag. New Ms may push themselves onto the
325390
// stack concurrently, but we're now the only thread that can remove or
326391
// modify the Ms that are sleeping in the list.
392+
useStackLock:
393+
394+
if endTicks != 0 {
395+
// Find the M at the bottom of the stack of waiters, which has been
396+
// asleep for the longest. Take the average of its wait time and the
397+
// head M's wait time for the mutex contention profile, matching the
398+
// estimate we do in semrelease1 (for sync.Mutex contention).
399+
//
400+
// We don't keep track of the tail node (we don't need it often), so do
401+
// an O(N) walk on the list of sleeping Ms to find it.
402+
head := mutexWaitListHead(v).ptr()
403+
for node, n := head, 0; ; {
404+
n++
405+
next := node.mWaitList.next.ptr()
406+
if next == nil {
407+
cycles := endTicks - (head.mWaitList.startTicks+node.mWaitList.startTicks)/2
408+
node.mWaitList.startTicks = endTicks
409+
head.mWaitList.startTicks = endTicks
410+
getg().m.mLockProfile.recordUnlock(cycles * int64(n))
411+
break
412+
}
413+
node = next
414+
}
415+
}
327416

328417
var committed *m // If we choose an M within the stack, we've made a promise to wake it
329418
for {
@@ -349,8 +438,14 @@ func unlock2Wake(l *mutex) {
349438
prev, wakem = wakem, next
350439
}
351440
if wakem != mp {
352-
prev.mWaitList.next = wakem.mWaitList.next
353441
committed = wakem
442+
prev.mWaitList.next = wakem.mWaitList.next
443+
// An M sets its own startTicks when it first goes to sleep.
444+
// When an unlock operation is sampled for the mutex
445+
// contention profile, it takes blame for the entire list of
446+
// waiting Ms but only updates the startTicks value at the
447+
// tail. Copy any updates to the next-oldest M.
448+
prev.mWaitList.startTicks = wakem.mWaitList.startTicks
354449
}
355450
}
356451
}
@@ -365,7 +460,7 @@ func unlock2Wake(l *mutex) {
365460
// Claimed an M. Wake it.
366461
semawakeup(wakem)
367462
}
368-
break
463+
return
369464
}
370465

371466
v = atomic.Loaduintptr(&l.key)

0 commit comments

Comments
 (0)