@@ -1020,9 +1020,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1020
1020
return metricGrowth , profileGrowth , p
1021
1021
}
1022
1022
1023
- testcase := func (strictTiming bool , acceptStacks [][]string , workers int , fn func () bool ) func (t * testing.T ) (metricGrowth , profileGrowth float64 , n , value int64 , explain func ()) {
1024
- return func (t * testing.T ) (metricGrowth , profileGrowth float64 , n , value int64 , explain func ()) {
1025
- metricGrowth , profileGrowth , p := measureDelta (t , func () {
1023
+ testcase := func (strictTiming bool , acceptStacks [][]string , workers int , fn func () bool ) func (t * testing.T ) (metricGrowth float64 , profileGrowth [] int64 , n , value int64 , explain func ()) {
1024
+ return func (t * testing.T ) (metricGrowth float64 , profileGrowth [] int64 , n , value int64 , explain func ()) {
1025
+ metricGrowth , totalProfileGrowth , p := measureDelta (t , func () {
1026
1026
var started , stopped sync.WaitGroup
1027
1027
started .Add (workers )
1028
1028
stopped .Add (workers )
@@ -1042,7 +1042,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1042
1042
stopped .Wait ()
1043
1043
})
1044
1044
1045
- if profileGrowth == 0 {
1045
+ if totalProfileGrowth == 0 {
1046
1046
t .Errorf ("no increase in mutex profile" )
1047
1047
}
1048
1048
if metricGrowth == 0 && strictTiming {
@@ -1062,7 +1062,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1062
1062
// together. It doesn't work as well for user-space contention, where the
1063
1063
// involved goroutines are not _Grunnable the whole time and so need to pass
1064
1064
// through the scheduler.
1065
- t .Logf ("lock contention growth in runtime/pprof's view (%fs)" , profileGrowth )
1065
+ t .Logf ("lock contention growth in runtime/pprof's view (%fs)" , totalProfileGrowth )
1066
1066
t .Logf ("lock contention growth in runtime/metrics' view (%fs)" , metricGrowth )
1067
1067
1068
1068
acceptStacks = append ([][]string (nil ), acceptStacks ... )
@@ -1082,7 +1082,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1082
1082
}
1083
1083
1084
1084
var stks [][]string
1085
- values := make ([][2 ]int64 , len (acceptStacks ))
1085
+ values := make ([][2 ]int64 , len (acceptStacks )+ 1 )
1086
1086
for _ , s := range p .Sample {
1087
1087
var have []string
1088
1088
for _ , loc := range s .Location {
@@ -1091,16 +1091,26 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1091
1091
}
1092
1092
}
1093
1093
stks = append (stks , have )
1094
+ found := false
1094
1095
for i , stk := range acceptStacks {
1095
1096
if slices .Equal (have , stk ) {
1096
1097
values [i ][0 ] += s .Value [0 ]
1097
1098
values [i ][1 ] += s .Value [1 ]
1099
+ found = true
1100
+ break
1098
1101
}
1099
1102
}
1103
+ if ! found {
1104
+ values [len (values )- 1 ][0 ] += s .Value [0 ]
1105
+ values [len (values )- 1 ][1 ] += s .Value [1 ]
1106
+ }
1100
1107
}
1108
+ profileGrowth = make ([]int64 , len (acceptStacks )+ 1 )
1109
+ profileGrowth [len (profileGrowth )- 1 ] = values [len (values )- 1 ][1 ]
1101
1110
for i , stk := range acceptStacks {
1102
1111
n += values [i ][0 ]
1103
1112
value += values [i ][1 ]
1113
+ profileGrowth [i ] = values [i ][1 ]
1104
1114
t .Logf ("stack %v has samples totaling n=%d value=%d" , stk , values [i ][0 ], values [i ][1 ])
1105
1115
}
1106
1116
if n == 0 && value == 0 {
@@ -1122,11 +1132,6 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1122
1132
name := t .Name ()
1123
1133
1124
1134
t .Run ("runtime.lock" , func (t * testing.T ) {
1125
- mus := make ([]runtime.Mutex , 200 )
1126
- var needContention atomic.Int64
1127
- delay := 100 * time .Microsecond // large relative to system noise, for comparison between clocks
1128
- delayMicros := delay .Microseconds ()
1129
-
1130
1135
// The goroutine that acquires the lock will only proceed when it
1131
1136
// detects that its partner is contended for the lock. That will lead to
1132
1137
// live-lock if anything (such as a STW) prevents the partner goroutine
@@ -1136,46 +1141,107 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1136
1141
// will end up contended twice. Instead, disable the GC.
1137
1142
defer debug .SetGCPercent (debug .SetGCPercent (- 1 ))
1138
1143
1139
- const workers = 2
1140
- if runtime .GOMAXPROCS (0 ) < workers {
1141
- t .Skipf ("contention on runtime-internal locks requires GOMAXPROCS >= %d" , workers )
1144
+ mus := make ([]runtime.Mutex , 200 )
1145
+ var needContention atomic.Int64
1146
+
1147
+ baseDelay := 100 * time .Microsecond // large relative to system noise, for comparison between clocks
1148
+ fastDelayMicros := baseDelay .Microseconds ()
1149
+ slowDelayMicros := baseDelay .Microseconds () * 4
1150
+
1151
+ const (
1152
+ fastRole = iota
1153
+ slowRole
1154
+ workerCount
1155
+ )
1156
+ if runtime .GOMAXPROCS (0 ) < workerCount {
1157
+ t .Skipf ("contention on runtime-internal locks requires GOMAXPROCS >= %d" , workerCount )
1142
1158
}
1143
1159
1160
+ minTicks := make ([][]int64 , workerCount ) // lower bound, known-contended time, measured by cputicks
1161
+ maxTicks := make ([][]int64 , workerCount ) // upper bound, total lock() duration, measured by cputicks
1162
+ for i := range minTicks {
1163
+ minTicks [i ] = make ([]int64 , len (mus ))
1164
+ maxTicks [i ] = make ([]int64 , len (mus ))
1165
+ }
1166
+ var id atomic.Int32
1144
1167
fn := func () bool {
1145
1168
n := int (needContention .Load ())
1146
1169
if n < 0 {
1147
1170
return false
1148
1171
}
1149
1172
mu := & mus [n ]
1150
1173
1151
- runtime .Lock (mu )
1152
- for int (needContention .Load ()) == n {
1153
- if runtime .MutexContended (mu ) {
1154
- // make them wait a little while
1155
- for start := runtime .Nanotime (); (runtime .Nanotime ()- start )/ 1000 < delayMicros ; {
1156
- runtime .Usleep (uint32 (delayMicros ))
1174
+ // Each worker has a role: to have a fast or slow critical section.
1175
+ // Rotate the role assignments as we step through the mutex slice so
1176
+ // we don't end up with one M always claiming the same kind of work.
1177
+ id := int (id .Add (1 ))
1178
+ role := (id + n ) % workerCount
1179
+
1180
+ marker , delayMicros := fastMarkerFrame , fastDelayMicros
1181
+ if role == slowRole {
1182
+ marker , delayMicros = slowMarkerFrame , slowDelayMicros
1183
+ }
1184
+
1185
+ // Each lock is used by two different critical sections, one fast
1186
+ // and one slow, identified in profiles by their different "marker"
1187
+ // functions. We expect the profile to blame each for the amount of
1188
+ // delay it inflicts on other users of the lock. We run one worker
1189
+ // of each kind, so any contention in one would be due to the other.
1190
+ //
1191
+ // We measure how long our runtime.lock call takes, which sets an
1192
+ // upper bound on how much blame to expect for the other worker type
1193
+ // in the profile. And if we acquire the lock first, we wait for the
1194
+ // other worker to announce its contention. We measure the
1195
+ // known-contended time, to use as a lower bound on how much blame
1196
+ // we expect of ourselves in the profile. Then we stall for a little
1197
+ // while (different amounts for "fast" versus "slow") before
1198
+ // unlocking the mutex.
1199
+
1200
+ marker (func () {
1201
+ t0 := runtime .Cputicks ()
1202
+ runtime .Lock (mu )
1203
+ maxTicks [role ][n ] = runtime .Cputicks () - t0
1204
+ minTicks [role ][n ] = 0
1205
+ for int (needContention .Load ()) == n {
1206
+ if runtime .MutexContended (mu ) {
1207
+ t1 := runtime .Cputicks ()
1208
+ // make them wait a little while
1209
+ for start := runtime .Nanotime (); (runtime .Nanotime ()- start )/ 1000 < delayMicros ; {
1210
+ runtime .Usleep (uint32 (1 + delayMicros / 8 ))
1211
+ }
1212
+ minTicks [role ][n ] = runtime .Cputicks () - t1
1213
+ break
1157
1214
}
1158
- break
1159
1215
}
1160
- }
1161
- runtime . Unlock ( mu )
1162
- needContention . Store ( int64 ( n - 1 ) )
1216
+ runtime . Unlock ( mu )
1217
+ needContention . Store ( int64 ( n - 1 ) )
1218
+ } )
1163
1219
1164
1220
return true
1165
1221
}
1166
1222
1167
- stks := [][]string {{
1168
- "runtime.unlock" ,
1169
- "runtime_test." + name + ".func5.1" ,
1170
- "runtime_test.(*contentionWorker).run" ,
1171
- }}
1223
+ stks := make ([][]string , 2 )
1224
+ for i := range stks {
1225
+ marker := "runtime_test.fastMarkerFrame"
1226
+ if i == slowRole {
1227
+ marker = "runtime_test.slowMarkerFrame"
1228
+ }
1229
+
1230
+ stks [i ] = []string {
1231
+ "runtime.unlock" ,
1232
+ "runtime_test." + name + ".func5.1.1" ,
1233
+ marker ,
1234
+ "runtime_test." + name + ".func5.1" ,
1235
+ "runtime_test.(*contentionWorker).run" ,
1236
+ }
1237
+ }
1172
1238
1173
1239
t .Run ("sample-1" , func (t * testing.T ) {
1174
1240
old := runtime .SetMutexProfileFraction (1 )
1175
1241
defer runtime .SetMutexProfileFraction (old )
1176
1242
1177
1243
needContention .Store (int64 (len (mus ) - 1 ))
1178
- metricGrowth , profileGrowth , n , _ , explain := testcase (true , stks , workers , fn )(t )
1244
+ metricGrowth , profileGrowth , n , _ , explain := testcase (true , stks , workerCount , fn )(t )
1179
1245
defer func () {
1180
1246
if t .Failed () {
1181
1247
explain ()
@@ -1187,7 +1253,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1187
1253
// gTrackingPeriod, so we don't have a hard lower bound here.
1188
1254
testenv .SkipFlaky (t , 64253 )
1189
1255
1190
- if have , want := metricGrowth , delay .Seconds ()* float64 (len (mus )); have < want {
1256
+ if have , want := metricGrowth , baseDelay .Seconds ()* float64 (len (mus )); have < want {
1191
1257
// The test imposes a delay with usleep, verified with calls to
1192
1258
// nanotime. Compare against the runtime/metrics package's view
1193
1259
// (based on nanotime) rather than runtime/pprof's view (based
@@ -1199,10 +1265,68 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1199
1265
t .Errorf ("mutex profile reported contention count different from the known true count (%d != %d)" , have , want )
1200
1266
}
1201
1267
1268
+ var slowMinTicks , fastMinTicks int64
1269
+ for role , ticks := range minTicks {
1270
+ for _ , delta := range ticks {
1271
+ if role == slowRole {
1272
+ slowMinTicks += delta
1273
+ } else {
1274
+ fastMinTicks += delta
1275
+ }
1276
+ }
1277
+ }
1278
+ var slowMaxTicks , fastMaxTicks int64
1279
+ for role , ticks := range maxTicks {
1280
+ for _ , delta := range ticks {
1281
+ if role == slowRole {
1282
+ slowMaxTicks += delta
1283
+ } else {
1284
+ fastMaxTicks += delta
1285
+ }
1286
+ }
1287
+ }
1288
+
1289
+ cpuGHz := float64 (runtime .CyclesPerSecond ()) / 1e9
1290
+ for _ , set := range []struct {
1291
+ name string
1292
+ profTime int64
1293
+ minTime int64
1294
+ maxTime int64
1295
+ }{
1296
+ {
1297
+ name : "slow" ,
1298
+ profTime : profileGrowth [slowRole ],
1299
+ minTime : int64 (float64 (slowMinTicks ) / cpuGHz ),
1300
+ maxTime : int64 (float64 (fastMaxTicks ) / cpuGHz ),
1301
+ },
1302
+ {
1303
+ name : "fast" ,
1304
+ profTime : profileGrowth [fastRole ],
1305
+ minTime : int64 (float64 (fastMinTicks ) / cpuGHz ),
1306
+ maxTime : int64 (float64 (slowMaxTicks ) / cpuGHz ),
1307
+ },
1308
+ } {
1309
+ t .Logf ("profile's view of delays due to %q critical section: %dns" , set .name , set .profTime )
1310
+ t .Logf ("test's view of known-contended time within %q critical section: %dns" , set .name , set .minTime )
1311
+ t .Logf ("test's view of lock duration before critical sections other than %q: %dns" , set .name , set .maxTime )
1312
+
1313
+ if set .profTime < set .minTime {
1314
+ t .Errorf ("profile undercounted %q critical section" , set .name )
1315
+ }
1316
+ if set .profTime > set .maxTime {
1317
+ t .Errorf ("profile overcounted %q critical section" , set .name )
1318
+ }
1319
+ }
1320
+
1321
+ var totalProfileGrowth float64
1322
+ for _ , growth := range profileGrowth {
1323
+ totalProfileGrowth += float64 (growth ) * time .Nanosecond .Seconds ()
1324
+ }
1325
+
1202
1326
const slop = 1.5 // account for nanotime vs cputicks
1203
1327
t .Run ("compare timers" , func (t * testing.T ) {
1204
1328
testenv .SkipFlaky (t , 64253 )
1205
- if profileGrowth > slop * metricGrowth || metricGrowth > slop * profileGrowth {
1329
+ if totalProfileGrowth > slop * metricGrowth || metricGrowth > slop * totalProfileGrowth {
1206
1330
t .Errorf ("views differ by more than %fx" , slop )
1207
1331
}
1208
1332
})
@@ -1215,7 +1339,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1215
1339
defer runtime .SetMutexProfileFraction (old )
1216
1340
1217
1341
needContention .Store (int64 (len (mus ) - 1 ))
1218
- metricGrowth , profileGrowth , n , _ , explain := testcase (true , stks , workers , fn )(t )
1342
+ metricGrowth , profileGrowth , n , _ , explain := testcase (true , stks , workerCount , fn )(t )
1219
1343
defer func () {
1220
1344
if t .Failed () {
1221
1345
explain ()
@@ -1228,7 +1352,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1228
1352
// 1e-9 chance of that, which is an acceptably low flakiness rate.
1229
1353
const samplingSlop = 2.5
1230
1354
1231
- if have , want := metricGrowth , delay .Seconds ()* float64 (len (mus )); samplingSlop * have < want {
1355
+ if have , want := metricGrowth , baseDelay .Seconds ()* float64 (len (mus )); samplingSlop * have < want {
1232
1356
// The test imposes a delay with usleep, verified with calls to
1233
1357
// nanotime. Compare against the runtime/metrics package's view
1234
1358
// (based on nanotime) rather than runtime/pprof's view (based
@@ -1239,8 +1363,13 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1239
1363
t .Errorf ("mutex profile reported contention count too different from the expected count (%d far from %d)" , have , want )
1240
1364
}
1241
1365
1366
+ var totalProfileGrowth float64
1367
+ for _ , growth := range profileGrowth {
1368
+ totalProfileGrowth += float64 (growth ) * time .Nanosecond .Seconds ()
1369
+ }
1370
+
1242
1371
const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
1243
- if profileGrowth > timerSlop * metricGrowth || metricGrowth > timerSlop * profileGrowth {
1372
+ if totalProfileGrowth > timerSlop * metricGrowth || metricGrowth > timerSlop * totalProfileGrowth {
1244
1373
t .Errorf ("views differ by more than %fx" , timerSlop )
1245
1374
}
1246
1375
})
@@ -1307,6 +1436,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1307
1436
})
1308
1437
}
1309
1438
1439
+ func slowMarkerFrame (fn func ()) { fn () }
1440
+ func fastMarkerFrame (fn func ()) { fn () }
1441
+
1310
1442
// contentionWorker provides cleaner call stacks for lock contention profile tests
1311
1443
type contentionWorker struct {
1312
1444
before func ()
0 commit comments