Skip to content

Commit 591c4b5

Browse files
rhyshgopherbot
authored andcommitted
runtime: verify attribution of mutex delay
Have the test use the same clock (cputicks) as the profiler, and use the test's own measurements as hard bounds on the magnitude to expect in the profile. Compare the depiction of two users of the same lock: one where the critical section is fast, one where it is slow. Confirm that the profile shows the slow critical section as a large source of delay (with #66999 fixed), rather than showing the fast critical section as a large recipient of delay. Previously reviewed as https://go.dev/cl/586237. For #66999 Change-Id: Ic2d78cc29153d5322577d84abdc448e95ed8f594 Reviewed-on: https://go-review.googlesource.com/c/go/+/667616 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> Reviewed-by: Cherry Mui <[email protected]> Auto-Submit: Rhys Hiltner <[email protected]>
1 parent 4d0f955 commit 591c4b5

File tree

2 files changed

+169
-35
lines changed

2 files changed

+169
-35
lines changed

src/runtime/export_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ var Atoi32 = atoi32
4040
var ParseByteCount = parseByteCount
4141

4242
var Nanotime = nanotime
43+
var Cputicks = cputicks
44+
var CyclesPerSecond = pprof_cyclesPerSecond
4345
var NetpollBreak = netpollBreak
4446
var Usleep = usleep
4547

src/runtime/metrics_test.go

Lines changed: 167 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,9 +1020,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10201020
return metricGrowth, profileGrowth, p
10211021
}
10221022

1023-
testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64, explain func()) {
1024-
return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64, explain func()) {
1025-
metricGrowth, profileGrowth, p := measureDelta(t, func() {
1023+
testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
1024+
return func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
1025+
metricGrowth, totalProfileGrowth, p := measureDelta(t, func() {
10261026
var started, stopped sync.WaitGroup
10271027
started.Add(workers)
10281028
stopped.Add(workers)
@@ -1042,7 +1042,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10421042
stopped.Wait()
10431043
})
10441044

1045-
if profileGrowth == 0 {
1045+
if totalProfileGrowth == 0 {
10461046
t.Errorf("no increase in mutex profile")
10471047
}
10481048
if metricGrowth == 0 && strictTiming {
@@ -1062,7 +1062,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10621062
// together. It doesn't work as well for user-space contention, where the
10631063
// involved goroutines are not _Grunnable the whole time and so need to pass
10641064
// through the scheduler.
1065-
t.Logf("lock contention growth in runtime/pprof's view (%fs)", profileGrowth)
1065+
t.Logf("lock contention growth in runtime/pprof's view (%fs)", totalProfileGrowth)
10661066
t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth)
10671067

10681068
acceptStacks = append([][]string(nil), acceptStacks...)
@@ -1082,7 +1082,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10821082
}
10831083

10841084
var stks [][]string
1085-
values := make([][2]int64, len(acceptStacks))
1085+
values := make([][2]int64, len(acceptStacks)+1)
10861086
for _, s := range p.Sample {
10871087
var have []string
10881088
for _, loc := range s.Location {
@@ -1091,16 +1091,26 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10911091
}
10921092
}
10931093
stks = append(stks, have)
1094+
found := false
10941095
for i, stk := range acceptStacks {
10951096
if slices.Equal(have, stk) {
10961097
values[i][0] += s.Value[0]
10971098
values[i][1] += s.Value[1]
1099+
found = true
1100+
break
10981101
}
10991102
}
1103+
if !found {
1104+
values[len(values)-1][0] += s.Value[0]
1105+
values[len(values)-1][1] += s.Value[1]
1106+
}
11001107
}
1108+
profileGrowth = make([]int64, len(acceptStacks)+1)
1109+
profileGrowth[len(profileGrowth)-1] = values[len(values)-1][1]
11011110
for i, stk := range acceptStacks {
11021111
n += values[i][0]
11031112
value += values[i][1]
1113+
profileGrowth[i] = values[i][1]
11041114
t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1])
11051115
}
11061116
if n == 0 && value == 0 {
@@ -1122,11 +1132,6 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
11221132
name := t.Name()
11231133

11241134
t.Run("runtime.lock", func(t *testing.T) {
1125-
mus := make([]runtime.Mutex, 200)
1126-
var needContention atomic.Int64
1127-
delay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
1128-
delayMicros := delay.Microseconds()
1129-
11301135
// The goroutine that acquires the lock will only proceed when it
11311136
// detects that its partner is contended for the lock. That will lead to
11321137
// live-lock if anything (such as a STW) prevents the partner goroutine
@@ -1136,46 +1141,107 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
11361141
// will end up contended twice. Instead, disable the GC.
11371142
defer debug.SetGCPercent(debug.SetGCPercent(-1))
11381143

1139-
const workers = 2
1140-
if runtime.GOMAXPROCS(0) < workers {
1141-
t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers)
1144+
mus := make([]runtime.Mutex, 200)
1145+
var needContention atomic.Int64
1146+
1147+
baseDelay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
1148+
fastDelayMicros := baseDelay.Microseconds()
1149+
slowDelayMicros := baseDelay.Microseconds() * 4
1150+
1151+
const (
1152+
fastRole = iota
1153+
slowRole
1154+
workerCount
1155+
)
1156+
if runtime.GOMAXPROCS(0) < workerCount {
1157+
t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workerCount)
11421158
}
11431159

1160+
minTicks := make([][]int64, workerCount) // lower bound, known-contended time, measured by cputicks
1161+
maxTicks := make([][]int64, workerCount) // upper bound, total lock() duration, measured by cputicks
1162+
for i := range minTicks {
1163+
minTicks[i] = make([]int64, len(mus))
1164+
maxTicks[i] = make([]int64, len(mus))
1165+
}
1166+
var id atomic.Int32
11441167
fn := func() bool {
11451168
n := int(needContention.Load())
11461169
if n < 0 {
11471170
return false
11481171
}
11491172
mu := &mus[n]
11501173

1151-
runtime.Lock(mu)
1152-
for int(needContention.Load()) == n {
1153-
if runtime.MutexContended(mu) {
1154-
// make them wait a little while
1155-
for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
1156-
runtime.Usleep(uint32(delayMicros))
1174+
// Each worker has a role: to have a fast or slow critical section.
1175+
// Rotate the role assignments as we step through the mutex slice so
1176+
// we don't end up with one M always claiming the same kind of work.
1177+
id := int(id.Add(1))
1178+
role := (id + n) % workerCount
1179+
1180+
marker, delayMicros := fastMarkerFrame, fastDelayMicros
1181+
if role == slowRole {
1182+
marker, delayMicros = slowMarkerFrame, slowDelayMicros
1183+
}
1184+
1185+
// Each lock is used by two different critical sections, one fast
1186+
// and one slow, identified in profiles by their different "marker"
1187+
// functions. We expect the profile to blame each for the amount of
1188+
// delay it inflicts on other users of the lock. We run one worker
1189+
// of each kind, so any contention in one would be due to the other.
1190+
//
1191+
// We measure how long our runtime.lock call takes, which sets an
1192+
// upper bound on how much blame to expect for the other worker type
1193+
// in the profile. And if we acquire the lock first, we wait for the
1194+
// other worker to announce its contention. We measure the
1195+
// known-contended time, to use as a lower bound on how much blame
1196+
// we expect of ourselves in the profile. Then we stall for a little
1197+
// while (different amounts for "fast" versus "slow") before
1198+
// unlocking the mutex.
1199+
1200+
marker(func() {
1201+
t0 := runtime.Cputicks()
1202+
runtime.Lock(mu)
1203+
maxTicks[role][n] = runtime.Cputicks() - t0
1204+
minTicks[role][n] = 0
1205+
for int(needContention.Load()) == n {
1206+
if runtime.MutexContended(mu) {
1207+
t1 := runtime.Cputicks()
1208+
// make them wait a little while
1209+
for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
1210+
runtime.Usleep(uint32(1 + delayMicros/8))
1211+
}
1212+
minTicks[role][n] = runtime.Cputicks() - t1
1213+
break
11571214
}
1158-
break
11591215
}
1160-
}
1161-
runtime.Unlock(mu)
1162-
needContention.Store(int64(n - 1))
1216+
runtime.Unlock(mu)
1217+
needContention.Store(int64(n - 1))
1218+
})
11631219

11641220
return true
11651221
}
11661222

1167-
stks := [][]string{{
1168-
"runtime.unlock",
1169-
"runtime_test." + name + ".func5.1",
1170-
"runtime_test.(*contentionWorker).run",
1171-
}}
1223+
stks := make([][]string, 2)
1224+
for i := range stks {
1225+
marker := "runtime_test.fastMarkerFrame"
1226+
if i == slowRole {
1227+
marker = "runtime_test.slowMarkerFrame"
1228+
}
1229+
1230+
stks[i] = []string{
1231+
"runtime.unlock",
1232+
"runtime_test." + name + ".func5.1.1",
1233+
marker,
1234+
"runtime_test." + name + ".func5.1",
1235+
"runtime_test.(*contentionWorker).run",
1236+
}
1237+
}
11721238

11731239
t.Run("sample-1", func(t *testing.T) {
11741240
old := runtime.SetMutexProfileFraction(1)
11751241
defer runtime.SetMutexProfileFraction(old)
11761242

11771243
needContention.Store(int64(len(mus) - 1))
1178-
metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workers, fn)(t)
1244+
metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
11791245
defer func() {
11801246
if t.Failed() {
11811247
explain()
@@ -1187,7 +1253,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
11871253
// gTrackingPeriod, so we don't have a hard lower bound here.
11881254
testenv.SkipFlaky(t, 64253)
11891255

1190-
if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want {
1256+
if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); have < want {
11911257
// The test imposes a delay with usleep, verified with calls to
11921258
// nanotime. Compare against the runtime/metrics package's view
11931259
// (based on nanotime) rather than runtime/pprof's view (based
@@ -1199,10 +1265,68 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
11991265
t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want)
12001266
}
12011267

1268+
var slowMinTicks, fastMinTicks int64
1269+
for role, ticks := range minTicks {
1270+
for _, delta := range ticks {
1271+
if role == slowRole {
1272+
slowMinTicks += delta
1273+
} else {
1274+
fastMinTicks += delta
1275+
}
1276+
}
1277+
}
1278+
var slowMaxTicks, fastMaxTicks int64
1279+
for role, ticks := range maxTicks {
1280+
for _, delta := range ticks {
1281+
if role == slowRole {
1282+
slowMaxTicks += delta
1283+
} else {
1284+
fastMaxTicks += delta
1285+
}
1286+
}
1287+
}
1288+
1289+
cpuGHz := float64(runtime.CyclesPerSecond()) / 1e9
1290+
for _, set := range []struct {
1291+
name string
1292+
profTime int64
1293+
minTime int64
1294+
maxTime int64
1295+
}{
1296+
{
1297+
name: "slow",
1298+
profTime: profileGrowth[slowRole],
1299+
minTime: int64(float64(slowMinTicks) / cpuGHz),
1300+
maxTime: int64(float64(fastMaxTicks) / cpuGHz),
1301+
},
1302+
{
1303+
name: "fast",
1304+
profTime: profileGrowth[fastRole],
1305+
minTime: int64(float64(fastMinTicks) / cpuGHz),
1306+
maxTime: int64(float64(slowMaxTicks) / cpuGHz),
1307+
},
1308+
} {
1309+
t.Logf("profile's view of delays due to %q critical section: %dns", set.name, set.profTime)
1310+
t.Logf("test's view of known-contended time within %q critical section: %dns", set.name, set.minTime)
1311+
t.Logf("test's view of lock duration before critical sections other than %q: %dns", set.name, set.maxTime)
1312+
1313+
if set.profTime < set.minTime {
1314+
t.Errorf("profile undercounted %q critical section", set.name)
1315+
}
1316+
if set.profTime > set.maxTime {
1317+
t.Errorf("profile overcounted %q critical section", set.name)
1318+
}
1319+
}
1320+
1321+
var totalProfileGrowth float64
1322+
for _, growth := range profileGrowth {
1323+
totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
1324+
}
1325+
12021326
const slop = 1.5 // account for nanotime vs cputicks
12031327
t.Run("compare timers", func(t *testing.T) {
12041328
testenv.SkipFlaky(t, 64253)
1205-
if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth {
1329+
if totalProfileGrowth > slop*metricGrowth || metricGrowth > slop*totalProfileGrowth {
12061330
t.Errorf("views differ by more than %fx", slop)
12071331
}
12081332
})
@@ -1215,7 +1339,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
12151339
defer runtime.SetMutexProfileFraction(old)
12161340

12171341
needContention.Store(int64(len(mus) - 1))
1218-
metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workers, fn)(t)
1342+
metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
12191343
defer func() {
12201344
if t.Failed() {
12211345
explain()
@@ -1228,7 +1352,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
12281352
// 1e-9 chance of that, which is an acceptably low flakiness rate.
12291353
const samplingSlop = 2.5
12301354

1231-
if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want {
1355+
if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); samplingSlop*have < want {
12321356
// The test imposes a delay with usleep, verified with calls to
12331357
// nanotime. Compare against the runtime/metrics package's view
12341358
// (based on nanotime) rather than runtime/pprof's view (based
@@ -1239,8 +1363,13 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
12391363
t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want)
12401364
}
12411365

1366+
var totalProfileGrowth float64
1367+
for _, growth := range profileGrowth {
1368+
totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
1369+
}
1370+
12421371
const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
1243-
if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth {
1372+
if totalProfileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*totalProfileGrowth {
12441373
t.Errorf("views differ by more than %fx", timerSlop)
12451374
}
12461375
})
@@ -1307,6 +1436,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
13071436
})
13081437
}
13091438

1439+
func slowMarkerFrame(fn func()) { fn() }
1440+
func fastMarkerFrame(fn func()) { fn() }
1441+
13101442
// contentionWorker provides cleaner call stacks for lock contention profile tests
13111443
type contentionWorker struct {
13121444
before func()

0 commit comments

Comments
 (0)