Skip to content

Commit 8ab131f

Browse files
rhyshgopherbot
authored andcommitted
runtime: split mutex profile clocks
Mutex contention measurements work with two clocks: nanotime for use in runtime/metrics, and cputicks for the runtime/pprof profile. They're subject to different sampling rates: the runtime/metrics view is always enabled, but the profile is adjustable and is turned off by default. They have different levels of overhead: it can take as little as one instruction to read cputicks while nanotime calls are more elaborate (although some platforms implement cputicks as a nanotime call). The use of the timestamps is also different: the profile's view needs to attach the delay in some Ms' lock2 calls to another M's unlock2 call stack, but the metric's view is only an int64. Treat them differently. Don't bother threading the nanotime clock through to the unlock2 call, measure and report it directly within lock2. Sample nanotime at a constant gTrackingPeriod. Don't consult any clocks unless the mutex is actually contended. Continue liberal use of cputicks for now. For #66999 Change-Id: I1c2085ea0e695bfa90c30fadedc99ced9eb1f69e Reviewed-on: https://go-review.googlesource.com/c/go/+/586796 TryBot-Result: Gopher Robot <[email protected]> Auto-Submit: Rhys Hiltner <[email protected]> Reviewed-by: Michael Pratt <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Run-TryBot: Rhys Hiltner <[email protected]> Reviewed-by: Carlos Amedee <[email protected]>
1 parent 1d3d7d3 commit 8ab131f

File tree

3 files changed

+66
-49
lines changed

3 files changed

+66
-49
lines changed

src/runtime/lock_futex.go

+21-7
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,14 @@ func lock2(l *mutex) {
8080
gp.stackguard0, gp.throwsplit = stackPreempt, true
8181
}
8282

83-
gp.m.mWaitList.acquireTimes = timePair{nanotime: nanotime(), cputicks: cputicks()}
83+
var startNanos int64
84+
const sampleRate = gTrackingPeriod
85+
sample := cheaprandn(sampleRate) == 0
86+
if sample {
87+
startNanos = nanotime()
88+
}
89+
gp.m.mWaitList.acquireTicks = cputicks()
90+
8491
// On uniprocessors, no point spinning.
8592
// On multiprocessors, spin for ACTIVE_SPIN attempts.
8693
spin := 0
@@ -112,14 +119,19 @@ Loop:
112119

113120
if v == old || atomic.Casuintptr(&l.key, old, v) {
114121
gp.m.mWaitList.clearLinks()
115-
gp.m.mWaitList.acquireTimes = timePair{}
122+
gp.m.mWaitList.acquireTicks = 0
116123
break
117124
}
118125
v = atomic.Loaduintptr(&l.key)
119126
}
120127
if gp == gp.m.curg {
121128
gp.stackguard0, gp.throwsplit = stackguard0, throwsplit
122129
}
130+
131+
if sample {
132+
endNanos := nanotime()
133+
gp.m.mLockProfile.waitTime.Add((endNanos - startNanos) * sampleRate)
134+
}
123135
return
124136
}
125137
i = 0
@@ -161,7 +173,8 @@ func unlock(l *mutex) {
161173
}
162174

163175
func unlock2(l *mutex) {
164-
now, dt := timePair{nanotime: nanotime(), cputicks: cputicks()}, timePair{}
176+
var claimed bool
177+
var cycles int64
165178
for {
166179
v := atomic.Loaduintptr(&l.key)
167180
if v == mutex_locked {
@@ -171,10 +184,11 @@ func unlock2(l *mutex) {
171184
} else if v&mutex_locked == 0 {
172185
throw("unlock of unlocked lock")
173186
} else {
174-
if now != (timePair{}) {
187+
if !claimed {
188+
claimed = true
189+
nowTicks := cputicks()
175190
head := muintptr(v &^ (mutex_sleeping | mutex_locked))
176-
dt = claimMutexWaitTime(now, head)
177-
now = timePair{}
191+
cycles = claimMutexWaitTime(nowTicks, head)
178192
}
179193

180194
// Other M's are waiting for the lock.
@@ -186,7 +200,7 @@ func unlock2(l *mutex) {
186200
}
187201

188202
gp := getg()
189-
gp.m.mLockProfile.recordUnlock(dt)
203+
gp.m.mLockProfile.recordUnlock(cycles)
190204
gp.m.locks--
191205
if gp.m.locks < 0 {
192206
throw("runtime·unlock: lock count")

src/runtime/lock_sema.go

+22-7
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,14 @@ func lock2(l *mutex) {
6060
gp.stackguard0, gp.throwsplit = stackPreempt, true
6161
}
6262

63-
gp.m.mWaitList.acquireTimes = timePair{nanotime: nanotime(), cputicks: cputicks()}
63+
var startNanos int64
64+
const sampleRate = gTrackingPeriod
65+
sample := cheaprandn(sampleRate) == 0
66+
if sample {
67+
startNanos = nanotime()
68+
}
69+
gp.m.mWaitList.acquireTicks = cputicks()
70+
6471
// On uniprocessor's, no point spinning.
6572
// On multiprocessors, spin for ACTIVE_SPIN attempts.
6673
spin := 0
@@ -88,14 +95,19 @@ Loop:
8895

8996
if v == old || atomic.Casuintptr(&l.key, old, v) {
9097
gp.m.mWaitList.clearLinks()
91-
gp.m.mWaitList.acquireTimes = timePair{}
98+
gp.m.mWaitList.acquireTicks = 0
9299
break
93100
}
94101
v = atomic.Loaduintptr(&l.key)
95102
}
96103
if gp == gp.m.curg {
97104
gp.stackguard0, gp.throwsplit = stackguard0, throwsplit
98105
}
106+
107+
if sample {
108+
endNanos := nanotime()
109+
gp.m.mLockProfile.waitTime.Add((endNanos - startNanos) * sampleRate)
110+
}
99111
return
100112
}
101113
i = 0
@@ -145,7 +157,8 @@ func unlock(l *mutex) {
145157
//
146158
//go:nowritebarrier
147159
func unlock2(l *mutex) {
148-
now, dt := timePair{nanotime: nanotime(), cputicks: cputicks()}, timePair{}
160+
var claimed bool
161+
var cycles int64
149162
gp := getg()
150163
var mp *m
151164
for {
@@ -155,9 +168,11 @@ func unlock2(l *mutex) {
155168
break
156169
}
157170
} else {
158-
if now != (timePair{}) {
159-
dt = claimMutexWaitTime(now, muintptr(v&^locked))
160-
now = timePair{}
171+
if !claimed {
172+
claimed = true
173+
nowTicks := cputicks()
174+
head := muintptr(v &^ locked)
175+
cycles = claimMutexWaitTime(nowTicks, head)
161176
}
162177

163178
// Other M's are waiting for the lock.
@@ -171,7 +186,7 @@ func unlock2(l *mutex) {
171186
}
172187
}
173188

174-
gp.m.mLockProfile.recordUnlock(dt)
189+
gp.m.mLockProfile.recordUnlock(cycles)
175190
gp.m.locks--
176191
if gp.m.locks < 0 {
177192
throw("runtime·unlock: lock count")

src/runtime/mprof.go

+23-35
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
590590
// mutex.
591591
//
592592
// Having found the head and tail nodes and a correct waiters count, the
593-
// unlocking M can read and update those two nodes' acquireTimes fields and thus
593+
// unlocking M can read and update those two nodes' acquireTicks field and thus
594594
// take responsibility for (an estimate of) the entire list's delay since the
595595
// last unlock call.
596596
//
@@ -603,21 +603,16 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
603603
// runtime controls the order of thread wakeups (it's a LIFO stack), but with
604604
// lock_futex.go the OS can wake an arbitrary thread.
605605
type mWaitList struct {
606-
acquireTimes timePair // start of current wait (set by us, updated by others during unlock)
606+
acquireTicks int64 // start of current wait (set by us, updated by others during unlock)
607607
next muintptr // next m waiting for lock (set by us, cleared by another during unlock)
608608
prev muintptr // previous m waiting for lock (an amortized hint, set by another during unlock)
609609
tail muintptr // final m waiting for lock (an amortized hint, set by others during unlock)
610610
waiters int32 // length of waiting m list (an amortized hint, set by another during unlock)
611611
}
612612

613-
type timePair struct {
614-
nanotime int64
615-
cputicks int64
616-
}
617-
618613
// clearLinks resets the fields related to the M's position in the list of Ms
619-
// waiting for a mutex. It leaves acquireTimes intact, since this M may still be
620-
// waiting and may have had its acquireTimes updated by an unlock2 call.
614+
// waiting for a mutex. It leaves acquireTicks intact, since this M may still be
615+
// waiting and may have had its acquireTicks updated by an unlock2 call.
621616
//
622617
// In lock_sema.go, the previous owner of the mutex dequeues an M and then wakes
623618
// it; with semaphore-based sleep, it's important that each M receives only one
@@ -736,8 +731,8 @@ func removeMutexWaitList(head muintptr, mp *m) muintptr {
736731
hp := head.ptr()
737732
tail := hp.mWaitList.tail
738733
waiters := hp.mWaitList.waiters
739-
headTimes := hp.mWaitList.acquireTimes
740-
tailTimes := hp.mWaitList.tail.ptr().mWaitList.acquireTimes
734+
headTicks := hp.mWaitList.acquireTicks
735+
tailTicks := hp.mWaitList.tail.ptr().mWaitList.acquireTicks
741736

742737
mp.mWaitList.tail = 0
743738

@@ -773,42 +768,40 @@ func removeMutexWaitList(head muintptr, mp *m) muintptr {
773768
hp.mWaitList.prev = 0
774769
hp.mWaitList.tail = tail
775770
hp.mWaitList.waiters = waiters - 1
776-
hp.mWaitList.acquireTimes = headTimes
771+
hp.mWaitList.acquireTicks = headTicks
777772
}
778773
if tp := tail.ptr(); tp != nil {
779-
tp.mWaitList.acquireTimes = tailTimes
774+
tp.mWaitList.acquireTicks = tailTicks
780775
}
781776
return head
782777
}
783778

784-
// claimMutexWaitTime advances the acquireTimes of the list of waiting Ms at
779+
// claimMutexWaitTime advances the acquireTicks of the list of waiting Ms at
785780
// head to now, returning an estimate of the total wait time claimed by that
786781
// action.
787-
func claimMutexWaitTime(now timePair, head muintptr) timePair {
782+
func claimMutexWaitTime(nowTicks int64, head muintptr) int64 {
788783
fixMutexWaitList(head)
789784
hp := head.ptr()
790785
if hp == nil {
791-
return timePair{}
786+
return 0
792787
}
793788
tp := hp.mWaitList.tail.ptr()
794789
waiters := hp.mWaitList.waiters
795-
headTimes := hp.mWaitList.acquireTimes
796-
tailTimes := tp.mWaitList.acquireTimes
790+
headTicks := hp.mWaitList.acquireTicks
791+
tailTicks := tp.mWaitList.acquireTicks
797792

798-
var dt timePair
799-
dt.nanotime = now.nanotime - headTimes.nanotime
800-
dt.cputicks = now.cputicks - headTimes.cputicks
793+
var cycles int64
794+
cycles = nowTicks - headTicks
801795
if waiters > 1 {
802-
dt.nanotime = int64(waiters) * (dt.nanotime + now.nanotime - tailTimes.nanotime) / 2
803-
dt.cputicks = int64(waiters) * (dt.cputicks + now.cputicks - tailTimes.cputicks) / 2
796+
cycles = int64(waiters) * (cycles + nowTicks - tailTicks) / 2
804797
}
805798

806799
// When removeMutexWaitList removes a head or tail node, it's responsible
807800
// for applying these changes to the new head or tail.
808-
hp.mWaitList.acquireTimes = now
809-
tp.mWaitList.acquireTimes = now
801+
hp.mWaitList.acquireTicks = nowTicks
802+
tp.mWaitList.acquireTicks = nowTicks
810803

811-
return dt
804+
return cycles
812805
}
813806

814807
// mLockProfile is part of the M struct to hold information relating to mutex
@@ -839,26 +832,21 @@ type mLockProfile struct {
839832
// From unlock2, we might not be holding a p in this code.
840833
//
841834
//go:nowritebarrierrec
842-
func (prof *mLockProfile) recordUnlock(dt timePair) {
843-
if dt != (timePair{}) {
835+
func (prof *mLockProfile) recordUnlock(cycles int64) {
836+
if cycles != 0 {
844837
// We could make a point of clearing out the local storage right before
845838
// this, to have a slightly better chance of being able to see the call
846839
// stack if the program has several (nested) contended locks. If apps
847840
// are seeing a lot of _LostContendedRuntimeLock samples, maybe that'll
848841
// be a worthwhile change.
849-
prof.proposeUnlock(dt)
842+
prof.proposeUnlock(cycles)
850843
}
851844
if getg().m.locks == 1 && prof.cycles != 0 {
852845
prof.store()
853846
}
854847
}
855848

856-
func (prof *mLockProfile) proposeUnlock(dt timePair) {
857-
if nanos := dt.nanotime; nanos > 0 {
858-
prof.waitTime.Add(nanos)
859-
}
860-
861-
cycles := dt.cputicks
849+
func (prof *mLockProfile) proposeUnlock(cycles int64) {
862850
if cycles <= 0 {
863851
return
864852
}

0 commit comments

Comments
 (0)