Skip to content

Commit adc9c45

Browse files
rhyshgopherbot
authored andcommitted
runtime: clean up new lock2 structure
Simplify some flow control, as suggested on https://go.dev/cl/620435. The MutexCapture microbenchmark shows a bit of throughput improvement at moderate levels of contention, and little change to capture and starvation. (Note that the capture and starvation figures below are in terms of power-of-two buckets multiplied by throughput, so they either follow similar patterns or move by a factor of two.) For #68578 goos: linux goarch: amd64 pkg: runtime cpu: 13th Gen Intel(R) Core(TM) i7-13700H │ old │ new │ │ sec/op │ sec/op vs base │ MutexCapture 18.21n ± 0% 18.35n ± 0% +0.77% (p=0.000 n=10) MutexCapture-2 21.46n ± 8% 21.05n ± 12% ~ (p=0.796 n=10) MutexCapture-3 22.56n ± 9% 22.59n ± 18% ~ (p=0.631 n=10) MutexCapture-4 22.85n ± 5% 22.74n ± 2% ~ (p=0.565 n=10) MutexCapture-5 22.84n ± 5% 22.50n ± 14% ~ (p=0.912 n=10) MutexCapture-6 23.33n ± 14% 22.22n ± 3% -4.78% (p=0.004 n=10) MutexCapture-7 27.04n ± 14% 23.78n ± 15% ~ (p=0.089 n=10) MutexCapture-8 25.44n ± 10% 23.03n ± 6% -9.48% (p=0.004 n=10) MutexCapture-9 25.56n ± 7% 24.39n ± 11% ~ (p=0.218 n=10) MutexCapture-10 26.77n ± 10% 24.00n ± 7% -10.33% (p=0.023 n=10) MutexCapture-11 27.02n ± 7% 24.55n ± 15% -9.18% (p=0.035 n=10) MutexCapture-12 26.71n ± 8% 24.96n ± 8% ~ (p=0.148 n=10) MutexCapture-13 25.58n ± 4% 25.82n ± 5% ~ (p=0.271 n=10) MutexCapture-14 26.86n ± 6% 25.91n ± 7% ~ (p=0.529 n=10) MutexCapture-15 25.12n ± 13% 26.16n ± 4% ~ (p=0.353 n=10) MutexCapture-16 26.18n ± 4% 26.21n ± 9% ~ (p=0.838 n=10) MutexCapture-17 26.04n ± 4% 25.85n ± 5% ~ (p=0.363 n=10) MutexCapture-18 26.02n ± 7% 25.93n ± 5% ~ (p=0.853 n=10) MutexCapture-19 25.67n ± 5% 26.21n ± 4% ~ (p=0.631 n=10) MutexCapture-20 25.50n ± 6% 25.99n ± 8% ~ (p=0.404 n=10) geomean 24.73n 24.02n -2.88% │ old │ new │ │ sec/streak-p90 │ sec/streak-p90 vs base │ MutexCapture 76.36m ± 0% 76.96m ± 0% +0.79% (p=0.000 n=10) MutexCapture-2 10.609µ ± 50% 5.390µ ± 119% ~ (p=0.579 n=10) MutexCapture-3 5.936µ ± 93% 5.782µ ± 18% ~ (p=0.684 n=10) MutexCapture-4 5.849µ ± 5% 5.820µ ± 2% ~ (p=0.579 n=10) MutexCapture-5 5.849µ ± 5% 5.759µ ± 14% ~ (p=0.912 n=10) MutexCapture-6 5.975µ ± 14% 5.687µ ± 3% -4.81% (p=0.004 n=10) MutexCapture-7 6.921µ ± 14% 6.086µ ± 18% ~ (p=0.165 n=10) MutexCapture-8 6.512µ ± 10% 5.894µ ± 6% -9.50% (p=0.004 n=10) MutexCapture-9 6.544µ ± 7% 6.245µ ± 11% ~ (p=0.218 n=10) MutexCapture-10 6.962µ ± 11% 6.144µ ± 7% -11.76% (p=0.023 n=10) MutexCapture-11 6.938µ ± 7% 6.284µ ± 130% ~ (p=0.190 n=10) MutexCapture-12 6.838µ ± 8% 6.408µ ± 13% ~ (p=0.404 n=10) MutexCapture-13 6.549µ ± 4% 6.608µ ± 5% ~ (p=0.271 n=10) MutexCapture-14 6.877µ ± 8% 6.634µ ± 7% ~ (p=0.436 n=10) MutexCapture-15 6.433µ ± 13% 6.697µ ± 4% ~ (p=0.247 n=10) MutexCapture-16 6.702µ ± 10% 6.711µ ± 116% ~ (p=0.796 n=10) MutexCapture-17 6.730µ ± 3% 6.619µ ± 5% ~ (p=0.225 n=10) MutexCapture-18 6.663µ ± 7% 6.716µ ± 13% ~ (p=0.853 n=10) MutexCapture-19 6.570µ ± 5% 6.710µ ± 4% ~ (p=0.529 n=10) MutexCapture-20 6.528µ ± 6% 6.775µ ± 11% ~ (p=0.247 n=10) geomean 10.66µ 10.00µ -6.13% │ old │ new │ │ sec/starve-p90 │ sec/starve-p90 vs base │ MutexCapture-2 10.609µ ± 50% 5.390µ ± 119% ~ (p=0.579 n=10) MutexCapture-3 184.8µ ± 91% 183.9µ ± 48% ~ (p=0.436 n=10) MutexCapture-4 388.8µ ± 270% 375.6µ ± 280% ~ (p=0.436 n=10) MutexCapture-5 807.2µ ± 83% 2880.9µ ± 85% ~ (p=0.105 n=10) MutexCapture-6 2.272m ± 61% 2.173m ± 34% ~ (p=0.280 n=10) MutexCapture-7 1.351m ± 125% 2.990m ± 70% ~ (p=0.393 n=10) MutexCapture-8 3.328m ± 97% 3.064m ± 96% ~ (p=0.739 n=10) MutexCapture-9 3.526m ± 91% 3.081m ± 47% -12.62% (p=0.015 n=10) MutexCapture-10 3.641m ± 86% 3.228m ± 90% -11.34% (p=0.005 n=10) MutexCapture-11 3.324m ± 109% 3.190m ± 71% ~ (p=0.481 n=10) MutexCapture-12 3.519m ± 77% 3.200m ± 106% ~ (p=0.393 n=10) MutexCapture-13 3.353m ± 91% 3.368m ± 99% ~ (p=0.853 n=10) MutexCapture-14 3.314m ± 101% 3.396m ± 286% ~ (p=0.353 n=10) MutexCapture-15 3.534m ± 83% 3.397m ± 91% ~ (p=0.739 n=10) MutexCapture-16 3.485m ± 90% 3.436m ± 116% ~ (p=0.853 n=10) MutexCapture-17 6.516m ± 48% 3.452m ± 88% ~ (p=0.190 n=10) MutexCapture-18 6.645m ± 105% 3.439m ± 108% ~ (p=0.218 n=10) MutexCapture-19 6.521m ± 46% 4.907m ± 42% ~ (p=0.529 n=10) MutexCapture-20 6.532m ± 47% 3.516m ± 89% ~ (p=0.089 n=10) geomean 1.919m 1.783m -7.06% Change-Id: I36106e1baf8afd132f1568748d1b83b797fa260e Reviewed-on: https://go-review.googlesource.com/c/go/+/629415 Reviewed-by: Michael Knyszek <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Dmitri Shuralyov <[email protected]> Auto-Submit: Rhys Hiltner <[email protected]>
1 parent c315862 commit adc9c45

File tree

1 file changed

+30
-27
lines changed

1 file changed

+30
-27
lines changed

src/runtime/lock_spinbit.go

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,8 @@ func lock2(l *mutex) {
159159

160160
k8 := key8(&l.key)
161161

162-
var v8 uint8
163162
// Speculative grab for lock.
164-
v8 = atomic.Xchg8(k8, mutexLocked)
163+
v8 := atomic.Xchg8(k8, mutexLocked)
165164
if v8&mutexLocked == 0 {
166165
if v8&mutexSleeping != 0 {
167166
atomic.Or8(k8, mutexSleeping)
@@ -183,11 +182,13 @@ func lock2(l *mutex) {
183182
v := atomic.Loaduintptr(&l.key)
184183
tryAcquire:
185184
for i := 0; ; i++ {
186-
for v&mutexLocked == 0 {
185+
if v&mutexLocked == 0 {
187186
if weSpin {
188-
next := (v &^ mutexMMask) | (v & (mutexMMask &^ mutexSpinning)) | mutexLocked
189-
if next&^mutexMMask != 0 {
190-
next |= mutexSleeping
187+
next := (v &^ mutexSpinning) | mutexSleeping | mutexLocked
188+
if next&^mutexMMask == 0 {
189+
// The fast-path Xchg8 may have cleared mutexSleeping. Fix
190+
// the hint so unlock2 knows when to use its slow path.
191+
next = next &^ mutexSleeping
191192
}
192193
if atomic.Casuintptr(&l.key, v, next) {
193194
timer.end()
@@ -201,6 +202,7 @@ tryAcquire:
201202
}
202203
}
203204
v = atomic.Loaduintptr(&l.key)
205+
continue tryAcquire
204206
}
205207

206208
if !weSpin && v&mutexSpinning == 0 && atomic.Casuintptr(&l.key, v, v|mutexSpinning) {
@@ -214,35 +216,36 @@ tryAcquire:
214216
v = atomic.Loaduintptr(&l.key)
215217
continue tryAcquire
216218
} else if i < spin+mutexPassiveSpinCount {
217-
osyield() // TODO: Consider removing this step. See https://go.dev/issue/69268
219+
osyield() // TODO: Consider removing this step. See https://go.dev/issue/69268.
218220
v = atomic.Loaduintptr(&l.key)
219221
continue tryAcquire
220222
}
221223
}
222224

223225
// Go to sleep
224-
for v&mutexLocked != 0 {
225-
// Store the current head of the list of sleeping Ms in our gp.m.mWaitList.next field
226-
gp.m.mWaitList.next = mutexWaitListHead(v)
227-
228-
// Pack a (partial) pointer to this M with the current lock state bits
229-
next := (uintptr(unsafe.Pointer(gp.m)) &^ mutexMMask) | v&mutexMMask | mutexSleeping
230-
if weSpin { // If we were spinning, prepare to retire
231-
next = next &^ mutexSpinning
232-
}
226+
if v&mutexLocked == 0 {
227+
throw("runtime·lock: sleeping while lock is available")
228+
}
233229

234-
if atomic.Casuintptr(&l.key, v, next) {
235-
weSpin = false
236-
// We've pushed ourselves onto the stack of waiters. Wait.
237-
semasleep(-1)
238-
atTail = gp.m.mWaitList.next == 0 // we were at risk of starving
239-
gp.m.mWaitList.next = 0
240-
i = 0
241-
v = atomic.Loaduintptr(&l.key)
242-
continue tryAcquire
243-
}
244-
v = atomic.Loaduintptr(&l.key)
230+
// Store the current head of the list of sleeping Ms in our gp.m.mWaitList.next field
231+
gp.m.mWaitList.next = mutexWaitListHead(v)
232+
233+
// Pack a (partial) pointer to this M with the current lock state bits
234+
next := (uintptr(unsafe.Pointer(gp.m)) &^ mutexMMask) | v&mutexMMask | mutexSleeping
235+
if weSpin { // If we were spinning, prepare to retire
236+
next = next &^ mutexSpinning
237+
}
238+
239+
if atomic.Casuintptr(&l.key, v, next) {
240+
weSpin = false
241+
// We've pushed ourselves onto the stack of waiters. Wait.
242+
semasleep(-1)
243+
atTail = gp.m.mWaitList.next == 0 // we were at risk of starving
244+
i = 0
245245
}
246+
247+
gp.m.mWaitList.next = 0
248+
v = atomic.Loaduintptr(&l.key)
246249
}
247250
}
248251

0 commit comments

Comments
 (0)