@@ -86,7 +86,8 @@ func key8(p *uintptr) *uint8 {
86
86
// forming a singly-linked list with the mutex's key field pointing to the head
87
87
// of the list.
88
88
type mWaitList struct {
89
- next muintptr // next m waiting for lock
89
+ next muintptr // next m waiting for lock
90
+ startTicks int64 // when this m started waiting for the current lock holder, in cputicks
90
91
}
91
92
92
93
// lockVerifyMSize confirms that we can recreate the low bits of the M pointer.
@@ -170,16 +171,15 @@ func lock2(l *mutex) {
170
171
}
171
172
semacreate (gp .m )
172
173
173
- timer := & lockTimer {lock : l }
174
- timer .begin ()
174
+ var startTime int64
175
175
// On uniprocessors, no point spinning.
176
176
// On multiprocessors, spin for mutexActiveSpinCount attempts.
177
177
spin := 0
178
178
if ncpu > 1 {
179
179
spin = mutexActiveSpinCount
180
180
}
181
181
182
- var weSpin , atTail bool
182
+ var weSpin , atTail , haveTimers bool
183
183
v := atomic .Loaduintptr (& l .key )
184
184
tryAcquire:
185
185
for i := 0 ; ; i ++ {
@@ -192,13 +192,13 @@ tryAcquire:
192
192
next = next &^ mutexSleeping
193
193
}
194
194
if atomic .Casuintptr (& l .key , v , next ) {
195
- timer . end ()
195
+ gp . m . mLockProfile . end (startTime )
196
196
return
197
197
}
198
198
} else {
199
199
prev8 := atomic .Xchg8 (k8 , mutexLocked | mutexSleeping )
200
200
if prev8 & mutexLocked == 0 {
201
- timer . end ()
201
+ gp . m . mLockProfile . end (startTime )
202
202
return
203
203
}
204
204
}
@@ -228,6 +228,13 @@ tryAcquire:
228
228
throw ("runtime·lock: sleeping while lock is available" )
229
229
}
230
230
231
+ // Collect times for mutex profile (seen in unlock2 only via mWaitList),
232
+ // and for "/sync/mutex/wait/total:seconds" metric (to match).
233
+ if ! haveTimers {
234
+ gp .m .mWaitList .startTicks = cputicks ()
235
+ startTime = gp .m .mLockProfile .start ()
236
+ haveTimers = true
237
+ }
231
238
// Store the current head of the list of sleeping Ms in our gp.m.mWaitList.next field
232
239
gp .m .mWaitList .next = mutexWaitListHead (v )
233
240
@@ -260,16 +267,54 @@ func unlock(l *mutex) {
260
267
func unlock2 (l * mutex ) {
261
268
gp := getg ()
262
269
263
- prev8 := atomic .Xchg8 (key8 (& l .key ), 0 )
270
+ var prev8 uint8
271
+ var haveStackLock bool
272
+ var endTicks int64
273
+ if ! mutexSampleContention () {
274
+ // Not collecting a sample for the contention profile, do the quick release
275
+ prev8 = atomic .Xchg8 (key8 (& l .key ), 0 )
276
+ } else {
277
+ // If there's contention, we'll sample it. Don't allow another
278
+ // lock2/unlock2 pair to finish before us and take our blame. Prevent
279
+ // that by trading for the stack lock with a CAS.
280
+ v := atomic .Loaduintptr (& l .key )
281
+ for {
282
+ if v &^mutexMMask == 0 || v & mutexStackLocked != 0 {
283
+ // No contention, or (stack lock unavailable) no way to calculate it
284
+ prev8 = atomic .Xchg8 (key8 (& l .key ), 0 )
285
+ endTicks = 0
286
+ break
287
+ }
288
+
289
+ // There's contention, the stack lock appeared to be available, and
290
+ // we'd like to collect a sample for the contention profile.
291
+ if endTicks == 0 {
292
+ // Read the time before releasing the lock. The profile will be
293
+ // strictly smaller than what other threads would see by timing
294
+ // their lock calls.
295
+ endTicks = cputicks ()
296
+ }
297
+ next := (v | mutexStackLocked ) &^ (mutexLocked | mutexSleeping )
298
+ if atomic .Casuintptr (& l .key , v , next ) {
299
+ haveStackLock = true
300
+ prev8 = uint8 (v )
301
+ // The fast path of lock2 may have cleared mutexSleeping.
302
+ // Restore it so we're sure to call unlock2Wake below.
303
+ prev8 |= mutexSleeping
304
+ break
305
+ }
306
+ v = atomic .Loaduintptr (& l .key )
307
+ }
308
+ }
264
309
if prev8 & mutexLocked == 0 {
265
310
throw ("unlock of unlocked lock" )
266
311
}
267
312
268
313
if prev8 & mutexSleeping != 0 {
269
- unlock2Wake (l )
314
+ unlock2Wake (l , haveStackLock , endTicks )
270
315
}
271
316
272
- gp .m .mLockProfile .recordUnlock ( l )
317
+ gp .m .mLockProfile .store ( )
273
318
gp .m .locks --
274
319
if gp .m .locks < 0 {
275
320
throw ("runtime·unlock: lock count" )
@@ -279,15 +324,35 @@ func unlock2(l *mutex) {
279
324
}
280
325
}
281
326
327
+ // mutexSampleContention returns whether the current mutex operation should
328
+ // report any contention it discovers.
329
+ func mutexSampleContention () bool {
330
+ if rate := int64 (atomic .Load64 (& mutexprofilerate )); rate <= 0 {
331
+ return false
332
+ } else {
333
+ // TODO: have SetMutexProfileFraction do the clamping
334
+ rate32 := uint32 (rate )
335
+ if int64 (rate32 ) != rate {
336
+ rate32 = ^ uint32 (0 )
337
+ }
338
+ return cheaprandn (rate32 ) == 0
339
+ }
340
+ }
341
+
282
342
// unlock2Wake updates the list of Ms waiting on l, waking an M if necessary.
283
343
//
284
344
//go:nowritebarrier
285
- func unlock2Wake (l * mutex ) {
345
+ func unlock2Wake (l * mutex , haveStackLock bool , endTicks int64 ) {
286
346
v := atomic .Loaduintptr (& l .key )
287
347
288
348
// On occasion, seek out and wake the M at the bottom of the stack so it
289
349
// doesn't starve.
290
350
antiStarve := cheaprandn (mutexTailWakePeriod ) == 0
351
+
352
+ if haveStackLock {
353
+ goto useStackLock
354
+ }
355
+
291
356
if ! (antiStarve || // avoiding starvation may require a wake
292
357
v & mutexSpinning == 0 || // no spinners means we must wake
293
358
mutexPreferLowLatency (l )) { // prefer waiters be awake as much as possible
@@ -324,6 +389,30 @@ func unlock2Wake(l *mutex) {
324
389
// We own the mutexStackLocked flag. New Ms may push themselves onto the
325
390
// stack concurrently, but we're now the only thread that can remove or
326
391
// modify the Ms that are sleeping in the list.
392
+ useStackLock:
393
+
394
+ if endTicks != 0 {
395
+ // Find the M at the bottom of the stack of waiters, which has been
396
+ // asleep for the longest. Take the average of its wait time and the
397
+ // head M's wait time for the mutex contention profile, matching the
398
+ // estimate we do in semrelease1 (for sync.Mutex contention).
399
+ //
400
+ // We don't keep track of the tail node (we don't need it often), so do
401
+ // an O(N) walk on the list of sleeping Ms to find it.
402
+ head := mutexWaitListHead (v ).ptr ()
403
+ for node , n := head , 0 ; ; {
404
+ n ++
405
+ next := node .mWaitList .next .ptr ()
406
+ if next == nil {
407
+ cycles := endTicks - (head .mWaitList .startTicks + node .mWaitList .startTicks )/ 2
408
+ node .mWaitList .startTicks = endTicks
409
+ head .mWaitList .startTicks = endTicks
410
+ getg ().m .mLockProfile .recordUnlock (cycles * int64 (n ))
411
+ break
412
+ }
413
+ node = next
414
+ }
415
+ }
327
416
328
417
var committed * m // If we choose an M within the stack, we've made a promise to wake it
329
418
for {
@@ -349,8 +438,14 @@ func unlock2Wake(l *mutex) {
349
438
prev , wakem = wakem , next
350
439
}
351
440
if wakem != mp {
352
- prev .mWaitList .next = wakem .mWaitList .next
353
441
committed = wakem
442
+ prev .mWaitList .next = wakem .mWaitList .next
443
+ // An M sets its own startTicks when it first goes to sleep.
444
+ // When an unlock operation is sampled for the mutex
445
+ // contention profile, it takes blame for the entire list of
446
+ // waiting Ms but only updates the startTicks value at the
447
+ // tail. Copy any updates to the next-oldest M.
448
+ prev .mWaitList .startTicks = wakem .mWaitList .startTicks
354
449
}
355
450
}
356
451
}
@@ -365,7 +460,7 @@ func unlock2Wake(l *mutex) {
365
460
// Claimed an M. Wake it.
366
461
semawakeup (wakem )
367
462
}
368
- break
463
+ return
369
464
}
370
465
371
466
v = atomic .Loaduintptr (& l .key )
0 commit comments