Skip to content

Commit e8776e1

Browse files
rhyshgopherbot
authored andcommitted
runtime: benchmark mutex handoffs
The speed of handing off a mutex to a waiting thread is sensitive to the configuration of the spinning section of lock2. Measure that latency directly, to complement our existing benchmarks of mutex throughput. For #68578 Change-Id: I7637684bcff62eb05cc008491f095f653d13af4b Reviewed-on: https://go-review.googlesource.com/c/go/+/602176 Reviewed-by: Dmitri Shuralyov <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> Auto-Submit: Rhys Hiltner <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent aac7106 commit e8776e1

File tree

1 file changed

+110
-0
lines changed

1 file changed

+110
-0
lines changed

src/runtime/runtime_test.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ package runtime_test
77
import (
88
"flag"
99
"fmt"
10+
"internal/cpu"
11+
"internal/runtime/atomic"
1012
"io"
1113
. "runtime"
1214
"runtime/debug"
@@ -561,3 +563,111 @@ func BenchmarkOSYield(b *testing.B) {
561563
OSYield()
562564
}
563565
}
566+
567+
func BenchmarkMutexHandoff(b *testing.B) {
568+
testcase := func(delay func(l *Mutex)) func(b *testing.B) {
569+
return func(b *testing.B) {
570+
if workers := 2; GOMAXPROCS(0) < workers {
571+
b.Skipf("requires GOMAXPROCS >= %d", workers)
572+
}
573+
574+
// Measure latency of mutex handoff between threads.
575+
//
576+
// Hand off a runtime.mutex between two threads, one running a
577+
// "coordinator" goroutine and the other running a "worker"
578+
// goroutine. We don't override the runtime's typical
579+
// goroutine/thread mapping behavior.
580+
//
581+
// Measure the latency, starting when the coordinator enters a call
582+
// to runtime.unlock and ending when the worker's call to
583+
// runtime.lock returns. The benchmark can specify a "delay"
584+
// function to simulate the length of the mutex-holder's critical
585+
// section, including to arrange for the worker's thread to be in
586+
// either the "spinning" or "sleeping" portions of the runtime.lock2
587+
// implementation. Measurement starts after any such "delay".
588+
//
589+
// The two threads' goroutines communicate their current position to
590+
// each other in a non-blocking way via the "turn" state.
591+
592+
var state struct {
593+
_ [cpu.CacheLinePadSize]byte
594+
lock Mutex
595+
_ [cpu.CacheLinePadSize]byte
596+
turn atomic.Int64
597+
_ [cpu.CacheLinePadSize]byte
598+
}
599+
600+
var delta atomic.Int64
601+
var wg sync.WaitGroup
602+
603+
// coordinator:
604+
// - acquire the mutex
605+
// - set the turn to 2 mod 4, instructing the worker to begin its Lock call
606+
// - wait until the mutex is contended
607+
// - wait a bit more so the worker can commit to its sleep
608+
// - release the mutex and wait for it to be our turn (0 mod 4) again
609+
wg.Add(1)
610+
go func() {
611+
defer wg.Done()
612+
var t int64
613+
for range b.N {
614+
Lock(&state.lock)
615+
state.turn.Add(2)
616+
delay(&state.lock)
617+
t -= Nanotime() // start the timer
618+
Unlock(&state.lock)
619+
for state.turn.Load()&0x2 != 0 {
620+
}
621+
}
622+
state.turn.Add(1)
623+
delta.Add(t)
624+
}()
625+
626+
// worker:
627+
// - wait until its our turn (2 mod 4)
628+
// - acquire and release the mutex
629+
// - switch the turn counter back to the coordinator (0 mod 4)
630+
wg.Add(1)
631+
go func() {
632+
defer wg.Done()
633+
var t int64
634+
for {
635+
switch state.turn.Load() & 0x3 {
636+
case 0:
637+
case 1, 3:
638+
delta.Add(t)
639+
return
640+
case 2:
641+
Lock(&state.lock)
642+
t += Nanotime() // stop the timer
643+
Unlock(&state.lock)
644+
state.turn.Add(2)
645+
}
646+
}
647+
}()
648+
649+
wg.Wait()
650+
b.ReportMetric(float64(delta.Load())/float64(b.N), "ns/op")
651+
}
652+
}
653+
654+
b.Run("Solo", func(b *testing.B) {
655+
var lock Mutex
656+
for range b.N {
657+
Lock(&lock)
658+
Unlock(&lock)
659+
}
660+
})
661+
662+
b.Run("FastPingPong", testcase(func(l *Mutex) {}))
663+
b.Run("SlowPingPong", testcase(func(l *Mutex) {
664+
// Wait for the worker to stop spinning and prepare to sleep
665+
for !MutexContended(l) {
666+
}
667+
// Wait a bit longer so the OS can finish committing the worker to its
668+
// sleep. Balance consistency against getting enough iterations.
669+
const extraNs = 10e3
670+
for t0 := Nanotime(); Nanotime()-t0 < extraNs; {
671+
}
672+
}))
673+
}

0 commit comments

Comments
 (0)