@@ -7,6 +7,8 @@ package runtime_test
7
7
import (
8
8
"flag"
9
9
"fmt"
10
+ "internal/cpu"
11
+ "internal/runtime/atomic"
10
12
"io"
11
13
. "runtime"
12
14
"runtime/debug"
@@ -561,3 +563,111 @@ func BenchmarkOSYield(b *testing.B) {
561
563
OSYield ()
562
564
}
563
565
}
566
+
567
+ func BenchmarkMutexHandoff (b * testing.B ) {
568
+ testcase := func (delay func (l * Mutex )) func (b * testing.B ) {
569
+ return func (b * testing.B ) {
570
+ if workers := 2 ; GOMAXPROCS (0 ) < workers {
571
+ b .Skipf ("requires GOMAXPROCS >= %d" , workers )
572
+ }
573
+
574
+ // Measure latency of mutex handoff between threads.
575
+ //
576
+ // Hand off a runtime.mutex between two threads, one running a
577
+ // "coordinator" goroutine and the other running a "worker"
578
+ // goroutine. We don't override the runtime's typical
579
+ // goroutine/thread mapping behavior.
580
+ //
581
+ // Measure the latency, starting when the coordinator enters a call
582
+ // to runtime.unlock and ending when the worker's call to
583
+ // runtime.lock returns. The benchmark can specify a "delay"
584
+ // function to simulate the length of the mutex-holder's critical
585
+ // section, including to arrange for the worker's thread to be in
586
+ // either the "spinning" or "sleeping" portions of the runtime.lock2
587
+ // implementation. Measurement starts after any such "delay".
588
+ //
589
+ // The two threads' goroutines communicate their current position to
590
+ // each other in a non-blocking way via the "turn" state.
591
+
592
+ var state struct {
593
+ _ [cpu .CacheLinePadSize ]byte
594
+ lock Mutex
595
+ _ [cpu .CacheLinePadSize ]byte
596
+ turn atomic.Int64
597
+ _ [cpu .CacheLinePadSize ]byte
598
+ }
599
+
600
+ var delta atomic.Int64
601
+ var wg sync.WaitGroup
602
+
603
+ // coordinator:
604
+ // - acquire the mutex
605
+ // - set the turn to 2 mod 4, instructing the worker to begin its Lock call
606
+ // - wait until the mutex is contended
607
+ // - wait a bit more so the worker can commit to its sleep
608
+ // - release the mutex and wait for it to be our turn (0 mod 4) again
609
+ wg .Add (1 )
610
+ go func () {
611
+ defer wg .Done ()
612
+ var t int64
613
+ for range b .N {
614
+ Lock (& state .lock )
615
+ state .turn .Add (2 )
616
+ delay (& state .lock )
617
+ t -= Nanotime () // start the timer
618
+ Unlock (& state .lock )
619
+ for state .turn .Load ()& 0x2 != 0 {
620
+ }
621
+ }
622
+ state .turn .Add (1 )
623
+ delta .Add (t )
624
+ }()
625
+
626
+ // worker:
627
+ // - wait until its our turn (2 mod 4)
628
+ // - acquire and release the mutex
629
+ // - switch the turn counter back to the coordinator (0 mod 4)
630
+ wg .Add (1 )
631
+ go func () {
632
+ defer wg .Done ()
633
+ var t int64
634
+ for {
635
+ switch state .turn .Load () & 0x3 {
636
+ case 0 :
637
+ case 1 , 3 :
638
+ delta .Add (t )
639
+ return
640
+ case 2 :
641
+ Lock (& state .lock )
642
+ t += Nanotime () // stop the timer
643
+ Unlock (& state .lock )
644
+ state .turn .Add (2 )
645
+ }
646
+ }
647
+ }()
648
+
649
+ wg .Wait ()
650
+ b .ReportMetric (float64 (delta .Load ())/ float64 (b .N ), "ns/op" )
651
+ }
652
+ }
653
+
654
+ b .Run ("Solo" , func (b * testing.B ) {
655
+ var lock Mutex
656
+ for range b .N {
657
+ Lock (& lock )
658
+ Unlock (& lock )
659
+ }
660
+ })
661
+
662
+ b .Run ("FastPingPong" , testcase (func (l * Mutex ) {}))
663
+ b .Run ("SlowPingPong" , testcase (func (l * Mutex ) {
664
+ // Wait for the worker to stop spinning and prepare to sleep
665
+ for ! MutexContended (l ) {
666
+ }
667
+ // Wait a bit longer so the OS can finish committing the worker to its
668
+ // sleep. Balance consistency against getting enough iterations.
669
+ const extraNs = 10e3
670
+ for t0 := Nanotime (); Nanotime ()- t0 < extraNs ; {
671
+ }
672
+ }))
673
+ }
0 commit comments