Skip to content

Commit 1c00354

Browse files
rscgopherbot
authored andcommitted
runtime: change mutex profile to count every blocked goroutine
The pprof mutex profile was meant to match the Google C++ (now Abseil) mutex profiler, originally designed and implemented by Mike Burrows. When we worked on the Go version, pjw and I missed that C++ counts the time each thread is blocked, even if multiple threads are blocked on a mutex. That is, if 100 threads are blocked on the same mutex for the same 10ms, that still counts as 1000ms of contention in C++. In Go, to date, /debug/pprof/mutex has counted that as only 10ms of contention. If 100 goroutines are blocked on one mutex and only 1 goroutine is blocked on another mutex, we probably do want to see the first mutex as being more contended, so the Abseil approach is the more useful one. This CL adopts "contention scales with number of goroutines blocked", to better match Abseil [1]. However, it still makes sure to attribute the time to the unlock that caused the backup, not subsequent innocent unlocks that were affected by the congestion. In this way it still gives more accurate profiles than Abseil does. [1] https://github.com/abseil/abseil-cpp/blob/lts_2023_01_25/absl/synchronization/mutex.cc#L2390 Fixes #61015. Change-Id: I7eb9e706867ffa8c0abb5b26a1b448f6eba49331 Reviewed-on: https://go-review.googlesource.com/c/go/+/506415 Run-TryBot: Russ Cox <[email protected]> Auto-Submit: Russ Cox <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Michael Knyszek <[email protected]>
1 parent 14a3ffc commit 1c00354

File tree

5 files changed

+115
-26
lines changed

5 files changed

+115
-26
lines changed

src/runtime/export_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1330,7 +1330,7 @@ func (t *SemTable) Enqueue(addr *uint32) {
13301330
//
13311331
// Returns true if there actually was a waiter to be dequeued.
13321332
func (t *SemTable) Dequeue(addr *uint32) bool {
1333-
s, _ := t.semTable.rootFor(addr).dequeue(addr)
1333+
s, _, _ := t.semTable.rootFor(addr).dequeue(addr)
13341334
if s != nil {
13351335
releaseSudog(s)
13361336
return true

src/runtime/mprof.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -553,8 +553,6 @@ func mutexevent(cycles int64, skip int) {
553553
cycles = 0
554554
}
555555
rate := int64(atomic.Load64(&mutexprofilerate))
556-
// TODO(pjw): measure impact of always calling fastrand vs using something
557-
// like malloc.go:nextSample()
558556
if rate > 0 && int64(fastrand())%rate == 0 {
559557
saveblockevent(cycles, rate, skip+1, mutexProfile)
560558
}

src/runtime/pprof/pprof_test.go

Lines changed: 58 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,7 +1023,7 @@ func containsStack(got [][]string, want []string) bool {
10231023
// awaitBlockedGoroutine spins on runtime.Gosched until a runtime stack dump
10241024
// shows a goroutine in the given state with a stack frame in
10251025
// runtime/pprof.<fName>.
1026-
func awaitBlockedGoroutine(t *testing.T, state, fName string) {
1026+
func awaitBlockedGoroutine(t *testing.T, state, fName string, count int) {
10271027
re := fmt.Sprintf(`(?m)^goroutine \d+ \[%s\]:\n(?:.+\n\t.+\n)*runtime/pprof\.%s`, regexp.QuoteMeta(state), fName)
10281028
r := regexp.MustCompile(re)
10291029

@@ -1047,7 +1047,7 @@ func awaitBlockedGoroutine(t *testing.T, state, fName string) {
10471047
buf = make([]byte, 2*len(buf))
10481048
continue
10491049
}
1050-
if r.Match(buf[:n]) {
1050+
if len(r.FindAll(buf[:n], -1)) >= count {
10511051
return
10521052
}
10531053
}
@@ -1056,7 +1056,7 @@ func awaitBlockedGoroutine(t *testing.T, state, fName string) {
10561056
func blockChanRecv(t *testing.T) {
10571057
c := make(chan bool)
10581058
go func() {
1059-
awaitBlockedGoroutine(t, "chan receive", "blockChanRecv")
1059+
awaitBlockedGoroutine(t, "chan receive", "blockChanRecv", 1)
10601060
c <- true
10611061
}()
10621062
<-c
@@ -1065,7 +1065,7 @@ func blockChanRecv(t *testing.T) {
10651065
func blockChanSend(t *testing.T) {
10661066
c := make(chan bool)
10671067
go func() {
1068-
awaitBlockedGoroutine(t, "chan send", "blockChanSend")
1068+
awaitBlockedGoroutine(t, "chan send", "blockChanSend", 1)
10691069
<-c
10701070
}()
10711071
c <- true
@@ -1074,7 +1074,7 @@ func blockChanSend(t *testing.T) {
10741074
func blockChanClose(t *testing.T) {
10751075
c := make(chan bool)
10761076
go func() {
1077-
awaitBlockedGoroutine(t, "chan receive", "blockChanClose")
1077+
awaitBlockedGoroutine(t, "chan receive", "blockChanClose", 1)
10781078
close(c)
10791079
}()
10801080
<-c
@@ -1086,7 +1086,7 @@ func blockSelectRecvAsync(t *testing.T) {
10861086
c2 := make(chan bool, 1)
10871087
go func() {
10881088
for i := 0; i < numTries; i++ {
1089-
awaitBlockedGoroutine(t, "select", "blockSelectRecvAsync")
1089+
awaitBlockedGoroutine(t, "select", "blockSelectRecvAsync", 1)
10901090
c <- true
10911091
}
10921092
}()
@@ -1102,7 +1102,7 @@ func blockSelectSendSync(t *testing.T) {
11021102
c := make(chan bool)
11031103
c2 := make(chan bool)
11041104
go func() {
1105-
awaitBlockedGoroutine(t, "select", "blockSelectSendSync")
1105+
awaitBlockedGoroutine(t, "select", "blockSelectSendSync", 1)
11061106
<-c
11071107
}()
11081108
select {
@@ -1115,7 +1115,7 @@ func blockMutex(t *testing.T) {
11151115
var mu sync.Mutex
11161116
mu.Lock()
11171117
go func() {
1118-
awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutex")
1118+
awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutex", 1)
11191119
mu.Unlock()
11201120
}()
11211121
// Note: Unlock releases mu before recording the mutex event,
@@ -1125,12 +1125,36 @@ func blockMutex(t *testing.T) {
11251125
mu.Lock()
11261126
}
11271127

1128+
func blockMutexN(t *testing.T, n int, d time.Duration) {
1129+
var wg sync.WaitGroup
1130+
var mu sync.Mutex
1131+
mu.Lock()
1132+
go func() {
1133+
awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutex", n)
1134+
time.Sleep(d)
1135+
mu.Unlock()
1136+
}()
1137+
// Note: Unlock releases mu before recording the mutex event,
1138+
// so it's theoretically possible for this to proceed and
1139+
// capture the profile before the event is recorded. As long
1140+
// as this is blocked before the unlock happens, it's okay.
1141+
for i := 0; i < n; i++ {
1142+
wg.Add(1)
1143+
go func() {
1144+
defer wg.Done()
1145+
mu.Lock()
1146+
mu.Unlock()
1147+
}()
1148+
}
1149+
wg.Wait()
1150+
}
1151+
11281152
func blockCond(t *testing.T) {
11291153
var mu sync.Mutex
11301154
c := sync.NewCond(&mu)
11311155
mu.Lock()
11321156
go func() {
1133-
awaitBlockedGoroutine(t, "sync.Cond.Wait", "blockCond")
1157+
awaitBlockedGoroutine(t, "sync.Cond.Wait", "blockCond", 1)
11341158
mu.Lock()
11351159
c.Signal()
11361160
mu.Unlock()
@@ -1217,7 +1241,11 @@ func TestMutexProfile(t *testing.T) {
12171241
t.Fatalf("need MutexProfileRate 0, got %d", old)
12181242
}
12191243

1220-
blockMutex(t)
1244+
const (
1245+
N = 100
1246+
D = 100 * time.Millisecond
1247+
)
1248+
blockMutexN(t, N, D)
12211249

12221250
t.Run("debug=1", func(t *testing.T) {
12231251
var w strings.Builder
@@ -1230,15 +1258,11 @@ func TestMutexProfile(t *testing.T) {
12301258
}
12311259
prof = strings.Trim(prof, "\n")
12321260
lines := strings.Split(prof, "\n")
1233-
if len(lines) != 6 {
1234-
t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof)
1235-
}
12361261
if len(lines) < 6 {
1237-
return
1262+
t.Fatalf("expected >=6 lines, got %d %q\n%s", len(lines), prof, prof)
12381263
}
12391264
// checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931"
12401265
r2 := `^\d+ \d+ @(?: 0x[[:xdigit:]]+)+`
1241-
//r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
12421266
if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
12431267
t.Errorf("%q didn't match %q", lines[3], r2)
12441268
}
@@ -1263,12 +1287,30 @@ func TestMutexProfile(t *testing.T) {
12631287

12641288
stks := stacks(p)
12651289
for _, want := range [][]string{
1266-
{"sync.(*Mutex).Unlock", "runtime/pprof.blockMutex.func1"},
1290+
{"sync.(*Mutex).Unlock", "runtime/pprof.blockMutexN.func1"},
12671291
} {
12681292
if !containsStack(stks, want) {
12691293
t.Errorf("No matching stack entry for %+v", want)
12701294
}
12711295
}
1296+
1297+
i := 0
1298+
for ; i < len(p.SampleType); i++ {
1299+
if p.SampleType[i].Unit == "nanoseconds" {
1300+
break
1301+
}
1302+
}
1303+
if i >= len(p.SampleType) {
1304+
t.Fatalf("profile did not contain nanoseconds sample")
1305+
}
1306+
total := int64(0)
1307+
for _, s := range p.Sample {
1308+
total += s.Value[i]
1309+
}
1310+
d := time.Duration(total)
1311+
if d < N*D*9/10 || d > N*D*2 { // want N*D but allow [0.9,2.0]*that.
1312+
t.Fatalf("profile samples total %v, want %v", d, N*D)
1313+
}
12721314
})
12731315
}
12741316

src/runtime/runtime2.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ type gobuf struct {
342342
bp uintptr // for framepointer-enabled architectures
343343
}
344344

345-
// sudog represents a g in a wait list, such as for sending/receiving
345+
// sudog (pseudo-g) represents a g in a wait list, such as for sending/receiving
346346
// on a channel.
347347
//
348348
// sudog is necessary because the g ↔ synchronization object relation
@@ -382,6 +382,13 @@ type sudog struct {
382382
// because c was closed.
383383
success bool
384384

385+
// waiters is a count of semaRoot waiting list other than head of list,
386+
// clamped to a uint16 to fit in unused space.
387+
// Only meaningful at the head of the list.
388+
// (If we wanted to be overly clever, we could store a high 16 bits
389+
// in the second entry in the list.)
390+
waiters uint16
391+
385392
parent *sudog // semaRoot binary tree
386393
waitlink *sudog // g.waiting list or semaRoot
387394
waittail *sudog // semaRoot

src/runtime/sema.go

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -191,15 +191,36 @@ func semrelease1(addr *uint32, handoff bool, skipframes int) {
191191
unlock(&root.lock)
192192
return
193193
}
194-
s, t0 := root.dequeue(addr)
194+
s, t0, tailtime := root.dequeue(addr)
195195
if s != nil {
196196
root.nwait.Add(-1)
197197
}
198198
unlock(&root.lock)
199199
if s != nil { // May be slow or even yield, so unlock first
200200
acquiretime := s.acquiretime
201201
if acquiretime != 0 {
202-
mutexevent(t0-acquiretime, 3+skipframes)
202+
// Charge contention that this (delayed) unlock caused.
203+
// If there are N more goroutines waiting beyond the
204+
// one that's waking up, charge their delay as well, so that
205+
// contention holding up many goroutines shows up as
206+
// more costly than contention holding up a single goroutine.
207+
// It would take O(N) time to calculate how long each goroutine
208+
// has been waiting, so instead we charge avg(head-wait, tail-wait)*N.
209+
// head-wait is the longest wait and tail-wait is the shortest.
210+
// (When we do a lifo insertion, we preserve this property by
211+
// copying the old head's acquiretime into the inserted new head.
212+
// In that case the overall average may be slightly high, but that's fine:
213+
// the average of the ends is only an approximation to the actual
214+
// average anyway.)
215+
// The root.dequeue above changed the head and tail acquiretime
216+
// to the current time, so the next unlock will not re-count this contention.
217+
dt0 := t0 - acquiretime
218+
dt := dt0
219+
if s.waiters != 0 {
220+
dtail := t0 - tailtime
221+
dt += (dtail + dt0) / 2 * int64(s.waiters)
222+
}
223+
mutexevent(dt, 3+skipframes)
203224
}
204225
if s.ticket != 0 {
205226
throw("corrupted semaphore ticket")
@@ -248,6 +269,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
248269
s.elem = unsafe.Pointer(addr)
249270
s.next = nil
250271
s.prev = nil
272+
s.waiters = 0
251273

252274
var last *sudog
253275
pt := &root.treap
@@ -258,7 +280,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
258280
// Substitute s in t's place in treap.
259281
*pt = s
260282
s.ticket = t.ticket
261-
s.acquiretime = t.acquiretime
283+
s.acquiretime = t.acquiretime // preserve head acquiretime as oldest time
262284
s.parent = t.parent
263285
s.prev = t.prev
264286
s.next = t.next
@@ -274,6 +296,10 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
274296
if s.waittail == nil {
275297
s.waittail = t
276298
}
299+
s.waiters = t.waiters
300+
if s.waiters+1 != 0 {
301+
s.waiters++
302+
}
277303
t.parent = nil
278304
t.prev = nil
279305
t.next = nil
@@ -287,6 +313,9 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
287313
}
288314
t.waittail = s
289315
s.waitlink = nil
316+
if t.waiters+1 != 0 {
317+
t.waiters++
318+
}
290319
}
291320
return
292321
}
@@ -330,7 +359,10 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) {
330359
// in semaRoot blocked on addr.
331360
// If the sudog was being profiled, dequeue returns the time
332361
// at which it was woken up as now. Otherwise now is 0.
333-
func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now int64) {
362+
// If there are additional entries in the wait list, dequeue
363+
// returns tailtime set to the last entry's acquiretime.
364+
// Otherwise tailtime is found.acquiretime.
365+
func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) {
334366
ps := &root.treap
335367
s := *ps
336368
for ; s != nil; s = *ps {
@@ -343,7 +375,7 @@ func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now int64) {
343375
ps = &s.next
344376
}
345377
}
346-
return nil, 0
378+
return nil, 0, 0
347379

348380
Found:
349381
now = int64(0)
@@ -368,7 +400,16 @@ Found:
368400
} else {
369401
t.waittail = nil
370402
}
403+
t.waiters = s.waiters
404+
if t.waiters > 1 {
405+
t.waiters--
406+
}
407+
// Set head and tail acquire time to 'now',
408+
// because the caller will take care of charging
409+
// the delays before now for all entries in the list.
371410
t.acquiretime = now
411+
tailtime = s.waittail.acquiretime
412+
s.waittail.acquiretime = now
372413
s.waitlink = nil
373414
s.waittail = nil
374415
} else {
@@ -390,13 +431,14 @@ Found:
390431
} else {
391432
root.treap = nil
392433
}
434+
tailtime = s.acquiretime
393435
}
394436
s.parent = nil
395437
s.elem = nil
396438
s.next = nil
397439
s.prev = nil
398440
s.ticket = 0
399-
return s, now
441+
return s, now, tailtime
400442
}
401443

402444
// rotateLeft rotates the tree rooted at node x.

0 commit comments

Comments
 (0)