Skip to content

Commit e6b8b2f

Browse files
rhyshgopherbot
authored andcommitted
Revert "runtime: improve runtime-internal mutex profile tests"
This reverts commit f9ba2cf (CL 586237) Reason for revert: This is part of a patch series that changed the handling of contended lock2/unlock2 calls, reducing the maximum throughput of contended runtime.mutex values, and causing a performance regression on applications where that is (or became) the bottleneck. This test verifies that the semantics of the mutex profile for runtime.mutex values matches that of sync.Mutex values. Without the rest of the patch series, this test would correctly identify that Go 1.22's semantics are incorrect (issue #66999). Updates #66999 Updates #67585 Change-Id: Id06ae01d7bc91c94054c80d273e6530cb2d59d10 Reviewed-on: https://go-review.googlesource.com/c/go/+/589096 Reviewed-by: Michael Pratt <[email protected]> Reviewed-by: Than McIntosh <[email protected]> Auto-Submit: Rhys Hiltner <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 1be701a commit e6b8b2f

File tree

2 files changed

+29
-148
lines changed

2 files changed

+29
-148
lines changed

src/runtime/export_test.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ var Atoi32 = atoi32
3939
var ParseByteCount = parseByteCount
4040

4141
var Nanotime = nanotime
42-
var Cputicks = cputicks
43-
var CyclesPerSecond = pprof_cyclesPerSecond
4442
var NetpollBreak = netpollBreak
4543
var Usleep = usleep
4644

src/runtime/metrics_test.go

Lines changed: 29 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,9 +1008,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10081008
return metricGrowth, profileGrowth, p
10091009
}
10101010

1011-
testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64) {
1012-
return func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64) {
1013-
metricGrowth, totalProfileGrowth, p := measureDelta(t, func() {
1011+
testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) {
1012+
return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) {
1013+
metricGrowth, profileGrowth, p := measureDelta(t, func() {
10141014
var started, stopped sync.WaitGroup
10151015
started.Add(workers)
10161016
stopped.Add(workers)
@@ -1030,7 +1030,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10301030
stopped.Wait()
10311031
})
10321032

1033-
if totalProfileGrowth == 0 {
1033+
if profileGrowth == 0 {
10341034
t.Errorf("no increase in mutex profile")
10351035
}
10361036
if metricGrowth == 0 && strictTiming {
@@ -1050,7 +1050,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10501050
// together. It doesn't work as well for user-space contention, where the
10511051
// involved goroutines are not _Grunnable the whole time and so need to pass
10521052
// through the scheduler.
1053-
t.Logf("lock contention growth in runtime/pprof's view (%fs)", totalProfileGrowth)
1053+
t.Logf("lock contention growth in runtime/pprof's view (%fs)", profileGrowth)
10541054
t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth)
10551055

10561056
acceptStacks = append([][]string(nil), acceptStacks...)
@@ -1070,7 +1070,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10701070
}
10711071

10721072
var stks [][]string
1073-
values := make([][2]int64, len(acceptStacks)+1)
1073+
values := make([][2]int64, len(acceptStacks))
10741074
for _, s := range p.Sample {
10751075
var have []string
10761076
for _, loc := range s.Location {
@@ -1079,26 +1079,16 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10791079
}
10801080
}
10811081
stks = append(stks, have)
1082-
found := false
10831082
for i, stk := range acceptStacks {
10841083
if slices.Equal(have, stk) {
10851084
values[i][0] += s.Value[0]
10861085
values[i][1] += s.Value[1]
1087-
found = true
1088-
break
10891086
}
10901087
}
1091-
if !found {
1092-
values[len(values)-1][0] += s.Value[0]
1093-
values[len(values)-1][1] += s.Value[1]
1094-
}
10951088
}
1096-
profileGrowth = make([]int64, len(acceptStacks)+1)
1097-
profileGrowth[len(profileGrowth)-1] = values[len(values)-1][1]
10981089
for i, stk := range acceptStacks {
10991090
n += values[i][0]
11001091
value += values[i][1]
1101-
profileGrowth[i] = values[i][1]
11021092
t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1])
11031093
}
11041094
if n == 0 && value == 0 {
@@ -1120,14 +1110,8 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
11201110
t.Run("runtime.lock", func(t *testing.T) {
11211111
mus := make([]runtime.Mutex, 200)
11221112
var needContention atomic.Int64
1123-
baseDelay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
1124-
fastDelayMicros := baseDelay.Microseconds()
1125-
slowDelayMicros := baseDelay.Microseconds() * 4
1126-
1127-
const (
1128-
fastRole = 0
1129-
slowRole = 1
1130-
)
1113+
delay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
1114+
delayMicros := delay.Microseconds()
11311115

11321116
// The goroutine that acquires the lock will only proceed when it
11331117
// detects that its partner is contended for the lock. That will lead to
@@ -1143,69 +1127,34 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
11431127
t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers)
11441128
}
11451129

1146-
minTicks := make([][]int64, workers) // lower bound, known-contended time, measured by cputicks
1147-
maxTicks := make([][]int64, workers) // upper bound, total lock() duration, measured by cputicks
1148-
for i := range minTicks {
1149-
minTicks[i] = make([]int64, len(mus))
1150-
maxTicks[i] = make([]int64, len(mus))
1151-
}
1152-
var id atomic.Int32
11531130
fn := func() bool {
11541131
n := int(needContention.Load())
11551132
if n < 0 {
11561133
return false
11571134
}
11581135
mu := &mus[n]
11591136

1160-
// Each worker has a role: to have a fast or slow critical section.
1161-
// Rotate the role assignments as we step through the mutex slice so
1162-
// we don't end up with one M always claiming the same kind of work.
1163-
id := int(id.Add(1))
1164-
role := (id + n) % workers
1165-
1166-
marker, delayMicros := fastMarkerFrame, fastDelayMicros
1167-
if role == slowRole {
1168-
marker, delayMicros = slowMarkerFrame, slowDelayMicros
1169-
}
1170-
1171-
marker(func() {
1172-
t0 := runtime.Cputicks()
1173-
runtime.Lock(mu)
1174-
maxTicks[role][n] = runtime.Cputicks() - t0
1175-
minTicks[role][n] = 0
1176-
for int(needContention.Load()) == n {
1177-
if runtime.MutexContended(mu) {
1178-
t1 := runtime.Cputicks()
1179-
// make them wait a little while
1180-
for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
1181-
runtime.Usleep(uint32(1 + delayMicros/8))
1182-
}
1183-
minTicks[role][n] = runtime.Cputicks() - t1
1184-
break
1137+
runtime.Lock(mu)
1138+
for int(needContention.Load()) == n {
1139+
if runtime.MutexContended(mu) {
1140+
// make them wait a little while
1141+
for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
1142+
runtime.Usleep(uint32(delayMicros))
11851143
}
1144+
break
11861145
}
1187-
runtime.Unlock(mu)
1188-
needContention.Store(int64(n - 1))
1189-
})
1146+
}
1147+
runtime.Unlock(mu)
1148+
needContention.Store(int64(n - 1))
11901149

11911150
return true
11921151
}
11931152

1194-
stks := make([][]string, 2)
1195-
for i := range stks {
1196-
marker := "runtime_test.fastMarkerFrame"
1197-
if i == slowRole {
1198-
marker = "runtime_test.slowMarkerFrame"
1199-
}
1200-
1201-
stks[i] = []string{
1202-
"runtime.unlock",
1203-
"runtime_test." + name + ".func4.1.1",
1204-
marker,
1205-
"runtime_test." + name + ".func4.1",
1206-
"runtime_test.(*contentionWorker).run",
1207-
}
1208-
}
1153+
stks := [][]string{{
1154+
"runtime.unlock",
1155+
"runtime_test." + name + ".func4.1",
1156+
"runtime_test.(*contentionWorker).run",
1157+
}}
12091158

12101159
t.Run("sample-1", func(t *testing.T) {
12111160
old := runtime.SetMutexProfileFraction(1)
@@ -1215,11 +1164,11 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
12151164
metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t)
12161165

12171166
t.Run("metric", func(t *testing.T) {
1218-
// The runtime/metrics view is sampled at 1 per gTrackingPeriod,
1219-
// so we don't have a hard lower bound here.
1167+
// The runtime/metrics view may be sampled at 1 per
1168+
// gTrackingPeriod, so we don't have a hard lower bound here.
12201169
testenv.SkipFlaky(t, 64253)
12211170

1222-
if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); have < want {
1171+
if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want {
12231172
// The test imposes a delay with usleep, verified with calls to
12241173
// nanotime. Compare against the runtime/metrics package's view
12251174
// (based on nanotime) rather than runtime/pprof's view (based
@@ -1231,68 +1180,10 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
12311180
t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want)
12321181
}
12331182

1234-
var slowMinTicks, fastMinTicks int64
1235-
for role, ticks := range minTicks {
1236-
for _, delta := range ticks {
1237-
if role == slowRole {
1238-
slowMinTicks += delta
1239-
} else {
1240-
fastMinTicks += delta
1241-
}
1242-
}
1243-
}
1244-
var slowMaxTicks, fastMaxTicks int64
1245-
for role, ticks := range maxTicks {
1246-
for _, delta := range ticks {
1247-
if role == slowRole {
1248-
slowMaxTicks += delta
1249-
} else {
1250-
fastMaxTicks += delta
1251-
}
1252-
}
1253-
}
1254-
1255-
cpuGHz := float64(runtime.CyclesPerSecond()) / 1e9
1256-
for _, set := range []struct {
1257-
name string
1258-
profTime int64
1259-
minTime int64
1260-
maxTime int64
1261-
}{
1262-
{
1263-
name: "slow",
1264-
profTime: profileGrowth[slowRole],
1265-
minTime: int64(float64(slowMinTicks) / cpuGHz),
1266-
maxTime: int64(float64(fastMaxTicks) / cpuGHz),
1267-
},
1268-
{
1269-
name: "fast",
1270-
profTime: profileGrowth[fastRole],
1271-
minTime: int64(float64(fastMinTicks) / cpuGHz),
1272-
maxTime: int64(float64(slowMaxTicks) / cpuGHz),
1273-
},
1274-
} {
1275-
t.Logf("profile's view of delays due to %q critical section: %dns", set.name, set.profTime)
1276-
t.Logf("test's view of known-contended time within %q critical section: %dns", set.name, set.minTime)
1277-
t.Logf("test's view of lock duration before critical sections other than %q: %dns", set.name, set.maxTime)
1278-
1279-
if set.profTime < set.minTime {
1280-
t.Errorf("profile undercounted %q critical section", set.name)
1281-
}
1282-
if set.profTime > set.maxTime {
1283-
t.Errorf("profile overcounted %q critical section", set.name)
1284-
}
1285-
}
1286-
1287-
var totalProfileGrowth float64
1288-
for _, growth := range profileGrowth {
1289-
totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
1290-
}
1291-
12921183
const slop = 1.5 // account for nanotime vs cputicks
12931184
t.Run("compare timers", func(t *testing.T) {
12941185
testenv.SkipFlaky(t, 64253)
1295-
if totalProfileGrowth > slop*metricGrowth || metricGrowth > slop*totalProfileGrowth {
1186+
if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth {
12961187
t.Errorf("views differ by more than %fx", slop)
12971188
}
12981189
})
@@ -1313,7 +1204,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
13131204
// 1e-9 chance of that, which is an acceptably low flakiness rate.
13141205
const samplingSlop = 2.5
13151206

1316-
if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); samplingSlop*have < want {
1207+
if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want {
13171208
// The test imposes a delay with usleep, verified with calls to
13181209
// nanotime. Compare against the runtime/metrics package's view
13191210
// (based on nanotime) rather than runtime/pprof's view (based
@@ -1324,13 +1215,8 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
13241215
t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want)
13251216
}
13261217

1327-
var totalProfileGrowth float64
1328-
for _, growth := range profileGrowth {
1329-
totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
1330-
}
1331-
13321218
const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
1333-
if totalProfileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*totalProfileGrowth {
1219+
if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth {
13341220
t.Errorf("views differ by more than %fx", timerSlop)
13351221
}
13361222
})
@@ -1397,9 +1283,6 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
13971283
})
13981284
}
13991285

1400-
func slowMarkerFrame(fn func()) { fn() }
1401-
func fastMarkerFrame(fn func()) { fn() }
1402-
14031286
// contentionWorker provides cleaner call stacks for lock contention profile tests
14041287
type contentionWorker struct {
14051288
before func()

0 commit comments

Comments
 (0)