Skip to content

Commit 1be701a

Browse files
rhyshgopherbot
authored andcommitted
Revert "runtime: split mutex profile clocks"
This reverts commit 8ab131f (CL 586796) Reason for revert: This is part of a patch series that changed the handling of contended lock2/unlock2 calls, reducing the maximum throughput of contended runtime.mutex values, and causing a performance regression on applications where that is (or became) the bottleneck. Updates #66999 Updates #67585 Change-Id: I54711691e86e072081482102019d168292b5150a Reviewed-on: https://go-review.googlesource.com/c/go/+/589095 Reviewed-by: Michael Pratt <[email protected]> Auto-Submit: Rhys Hiltner <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Than McIntosh <[email protected]>
1 parent e6504ce commit 1be701a

File tree

3 files changed

+49
-66
lines changed

3 files changed

+49
-66
lines changed

src/runtime/lock_futex.go

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,7 @@ func lock2(l *mutex) {
8080
gp.stackguard0, gp.throwsplit = stackPreempt, true
8181
}
8282

83-
var startNanos int64
84-
const sampleRate = gTrackingPeriod
85-
sample := cheaprandn(sampleRate) == 0
86-
if sample {
87-
startNanos = nanotime()
88-
}
89-
gp.m.mWaitList.acquireTicks = cputicks()
90-
83+
gp.m.mWaitList.acquireTimes = timePair{nanotime: nanotime(), cputicks: cputicks()}
9184
// On uniprocessors, no point spinning.
9285
// On multiprocessors, spin for ACTIVE_SPIN attempts.
9386
spin := 0
@@ -119,19 +112,14 @@ Loop:
119112

120113
if v == old || atomic.Casuintptr(&l.key, old, v) {
121114
gp.m.mWaitList.clearLinks()
122-
gp.m.mWaitList.acquireTicks = 0
115+
gp.m.mWaitList.acquireTimes = timePair{}
123116
break
124117
}
125118
v = atomic.Loaduintptr(&l.key)
126119
}
127120
if gp == gp.m.curg {
128121
gp.stackguard0, gp.throwsplit = stackguard0, throwsplit
129122
}
130-
131-
if sample {
132-
endNanos := nanotime()
133-
gp.m.mLockProfile.waitTime.Add((endNanos - startNanos) * sampleRate)
134-
}
135123
return
136124
}
137125
i = 0
@@ -173,8 +161,7 @@ func unlock(l *mutex) {
173161
}
174162

175163
func unlock2(l *mutex) {
176-
var claimed bool
177-
var cycles int64
164+
now, dt := timePair{nanotime: nanotime(), cputicks: cputicks()}, timePair{}
178165
for {
179166
v := atomic.Loaduintptr(&l.key)
180167
if v == mutex_locked {
@@ -184,11 +171,10 @@ func unlock2(l *mutex) {
184171
} else if v&mutex_locked == 0 {
185172
throw("unlock of unlocked lock")
186173
} else {
187-
if !claimed {
188-
claimed = true
189-
nowTicks := cputicks()
174+
if now != (timePair{}) {
190175
head := muintptr(v &^ (mutex_sleeping | mutex_locked))
191-
cycles = claimMutexWaitTime(nowTicks, head)
176+
dt = claimMutexWaitTime(now, head)
177+
now = timePair{}
192178
}
193179

194180
// Other M's are waiting for the lock.
@@ -200,7 +186,7 @@ func unlock2(l *mutex) {
200186
}
201187

202188
gp := getg()
203-
gp.m.mLockProfile.recordUnlock(cycles)
189+
gp.m.mLockProfile.recordUnlock(dt)
204190
gp.m.locks--
205191
if gp.m.locks < 0 {
206192
throw("runtime·unlock: lock count")

src/runtime/lock_sema.go

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,7 @@ func lock2(l *mutex) {
6060
gp.stackguard0, gp.throwsplit = stackPreempt, true
6161
}
6262

63-
var startNanos int64
64-
const sampleRate = gTrackingPeriod
65-
sample := cheaprandn(sampleRate) == 0
66-
if sample {
67-
startNanos = nanotime()
68-
}
69-
gp.m.mWaitList.acquireTicks = cputicks()
70-
63+
gp.m.mWaitList.acquireTimes = timePair{nanotime: nanotime(), cputicks: cputicks()}
7164
// On uniprocessor's, no point spinning.
7265
// On multiprocessors, spin for ACTIVE_SPIN attempts.
7366
spin := 0
@@ -95,19 +88,14 @@ Loop:
9588

9689
if v == old || atomic.Casuintptr(&l.key, old, v) {
9790
gp.m.mWaitList.clearLinks()
98-
gp.m.mWaitList.acquireTicks = 0
91+
gp.m.mWaitList.acquireTimes = timePair{}
9992
break
10093
}
10194
v = atomic.Loaduintptr(&l.key)
10295
}
10396
if gp == gp.m.curg {
10497
gp.stackguard0, gp.throwsplit = stackguard0, throwsplit
10598
}
106-
107-
if sample {
108-
endNanos := nanotime()
109-
gp.m.mLockProfile.waitTime.Add((endNanos - startNanos) * sampleRate)
110-
}
11199
return
112100
}
113101
i = 0
@@ -157,8 +145,7 @@ func unlock(l *mutex) {
157145
//
158146
//go:nowritebarrier
159147
func unlock2(l *mutex) {
160-
var claimed bool
161-
var cycles int64
148+
now, dt := timePair{nanotime: nanotime(), cputicks: cputicks()}, timePair{}
162149
gp := getg()
163150
var mp *m
164151
for {
@@ -168,11 +155,9 @@ func unlock2(l *mutex) {
168155
break
169156
}
170157
} else {
171-
if !claimed {
172-
claimed = true
173-
nowTicks := cputicks()
174-
head := muintptr(v &^ locked)
175-
cycles = claimMutexWaitTime(nowTicks, head)
158+
if now != (timePair{}) {
159+
dt = claimMutexWaitTime(now, muintptr(v&^locked))
160+
now = timePair{}
176161
}
177162

178163
// Other M's are waiting for the lock.
@@ -186,7 +171,7 @@ func unlock2(l *mutex) {
186171
}
187172
}
188173

189-
gp.m.mLockProfile.recordUnlock(cycles)
174+
gp.m.mLockProfile.recordUnlock(dt)
190175
gp.m.locks--
191176
if gp.m.locks < 0 {
192177
throw("runtime·unlock: lock count")

src/runtime/mprof.go

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
590590
// mutex.
591591
//
592592
// Having found the head and tail nodes and a correct waiters count, the
593-
// unlocking M can read and update those two nodes' acquireTicks field and thus
593+
// unlocking M can read and update those two nodes' acquireTimes fields and thus
594594
// take responsibility for (an estimate of) the entire list's delay since the
595595
// last unlock call.
596596
//
@@ -603,16 +603,21 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
603603
// runtime controls the order of thread wakeups (it's a LIFO stack), but with
604604
// lock_futex.go the OS can wake an arbitrary thread.
605605
type mWaitList struct {
606-
acquireTicks int64 // start of current wait (set by us, updated by others during unlock)
606+
acquireTimes timePair // start of current wait (set by us, updated by others during unlock)
607607
next muintptr // next m waiting for lock (set by us, cleared by another during unlock)
608608
prev muintptr // previous m waiting for lock (an amortized hint, set by another during unlock)
609609
tail muintptr // final m waiting for lock (an amortized hint, set by others during unlock)
610610
waiters int32 // length of waiting m list (an amortized hint, set by another during unlock)
611611
}
612612

613+
type timePair struct {
614+
nanotime int64
615+
cputicks int64
616+
}
617+
613618
// clearLinks resets the fields related to the M's position in the list of Ms
614-
// waiting for a mutex. It leaves acquireTicks intact, since this M may still be
615-
// waiting and may have had its acquireTicks updated by an unlock2 call.
619+
// waiting for a mutex. It leaves acquireTimes intact, since this M may still be
620+
// waiting and may have had its acquireTimes updated by an unlock2 call.
616621
//
617622
// In lock_sema.go, the previous owner of the mutex dequeues an M and then wakes
618623
// it; with semaphore-based sleep, it's important that each M receives only one
@@ -731,8 +736,8 @@ func removeMutexWaitList(head muintptr, mp *m) muintptr {
731736
hp := head.ptr()
732737
tail := hp.mWaitList.tail
733738
waiters := hp.mWaitList.waiters
734-
headTicks := hp.mWaitList.acquireTicks
735-
tailTicks := hp.mWaitList.tail.ptr().mWaitList.acquireTicks
739+
headTimes := hp.mWaitList.acquireTimes
740+
tailTimes := hp.mWaitList.tail.ptr().mWaitList.acquireTimes
736741

737742
mp.mWaitList.tail = 0
738743

@@ -768,40 +773,42 @@ func removeMutexWaitList(head muintptr, mp *m) muintptr {
768773
hp.mWaitList.prev = 0
769774
hp.mWaitList.tail = tail
770775
hp.mWaitList.waiters = waiters - 1
771-
hp.mWaitList.acquireTicks = headTicks
776+
hp.mWaitList.acquireTimes = headTimes
772777
}
773778
if tp := tail.ptr(); tp != nil {
774-
tp.mWaitList.acquireTicks = tailTicks
779+
tp.mWaitList.acquireTimes = tailTimes
775780
}
776781
return head
777782
}
778783

779-
// claimMutexWaitTime advances the acquireTicks of the list of waiting Ms at
784+
// claimMutexWaitTime advances the acquireTimes of the list of waiting Ms at
780785
// head to now, returning an estimate of the total wait time claimed by that
781786
// action.
782-
func claimMutexWaitTime(nowTicks int64, head muintptr) int64 {
787+
func claimMutexWaitTime(now timePair, head muintptr) timePair {
783788
fixMutexWaitList(head)
784789
hp := head.ptr()
785790
if hp == nil {
786-
return 0
791+
return timePair{}
787792
}
788793
tp := hp.mWaitList.tail.ptr()
789794
waiters := hp.mWaitList.waiters
790-
headTicks := hp.mWaitList.acquireTicks
791-
tailTicks := tp.mWaitList.acquireTicks
795+
headTimes := hp.mWaitList.acquireTimes
796+
tailTimes := tp.mWaitList.acquireTimes
792797

793-
var cycles int64
794-
cycles = nowTicks - headTicks
798+
var dt timePair
799+
dt.nanotime = now.nanotime - headTimes.nanotime
800+
dt.cputicks = now.cputicks - headTimes.cputicks
795801
if waiters > 1 {
796-
cycles = int64(waiters) * (cycles + nowTicks - tailTicks) / 2
802+
dt.nanotime = int64(waiters) * (dt.nanotime + now.nanotime - tailTimes.nanotime) / 2
803+
dt.cputicks = int64(waiters) * (dt.cputicks + now.cputicks - tailTimes.cputicks) / 2
797804
}
798805

799806
// When removeMutexWaitList removes a head or tail node, it's responsible
800807
// for applying these changes to the new head or tail.
801-
hp.mWaitList.acquireTicks = nowTicks
802-
tp.mWaitList.acquireTicks = nowTicks
808+
hp.mWaitList.acquireTimes = now
809+
tp.mWaitList.acquireTimes = now
803810

804-
return cycles
811+
return dt
805812
}
806813

807814
// mLockProfile is part of the M struct to hold information relating to mutex
@@ -832,21 +839,26 @@ type mLockProfile struct {
832839
// From unlock2, we might not be holding a p in this code.
833840
//
834841
//go:nowritebarrierrec
835-
func (prof *mLockProfile) recordUnlock(cycles int64) {
836-
if cycles != 0 {
842+
func (prof *mLockProfile) recordUnlock(dt timePair) {
843+
if dt != (timePair{}) {
837844
// We could make a point of clearing out the local storage right before
838845
// this, to have a slightly better chance of being able to see the call
839846
// stack if the program has several (nested) contended locks. If apps
840847
// are seeing a lot of _LostContendedRuntimeLock samples, maybe that'll
841848
// be a worthwhile change.
842-
prof.proposeUnlock(cycles)
849+
prof.proposeUnlock(dt)
843850
}
844851
if getg().m.locks == 1 && prof.cycles != 0 {
845852
prof.store()
846853
}
847854
}
848855

849-
func (prof *mLockProfile) proposeUnlock(cycles int64) {
856+
func (prof *mLockProfile) proposeUnlock(dt timePair) {
857+
if nanos := dt.nanotime; nanos > 0 {
858+
prof.waitTime.Add(nanos)
859+
}
860+
861+
cycles := dt.cputicks
850862
if cycles <= 0 {
851863
return
852864
}

0 commit comments

Comments
 (0)