Skip to content

Commit 62e4156

Browse files
committed
runtime: fix lock acquire cycles related to scavenge.lock
There are currently two edges in the lock cycle graph caused by scavenge.lock: with sched.lock and mheap_.lock. These edges appear because of the call to ready() and stack growths respectively. Furthermore, there's already an invariant in the code wherein mheap_.lock must be acquired before scavenge.lock, hence the cycle. The fix to this is to bring scavenge.lock higher in the lock cycle graph, such that sched.lock and mheap_.lock are only acquired once scavenge.lock is already held. To faciliate this change, we move scavenger waking outside of gcSetTriggerRatio such that it doesn't have to happen with the heap locked. Furthermore, we check scavenge generation numbers with the heap locked by using gopark instead of goparkunlock, and specify a function which aborts the park should there be any skew in generation count. Fixes #34047. Change-Id: I3519119214bac66375e2b1262b36ce376c820d12 Reviewed-on: https://go-review.googlesource.com/c/go/+/191977 Run-TryBot: Michael Knyszek <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent f7a00c9 commit 62e4156

File tree

3 files changed

+45
-29
lines changed

3 files changed

+45
-29
lines changed

src/runtime/mgc.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,8 @@ func setGCPercent(in int32) (out int32) {
229229
gcSetTriggerRatio(memstats.triggerRatio)
230230
unlock(&mheap_.lock)
231231
})
232+
// Pacing changed, so the scavenger should be awoken.
233+
wakeScavenger()
232234

233235
// If we just disabled GC, wait for any concurrent GC mark to
234236
// finish so we always return with no GC running.
@@ -1664,6 +1666,9 @@ func gcMarkTermination(nextTriggerRatio float64) {
16641666
// Update GC trigger and pacing for the next cycle.
16651667
gcSetTriggerRatio(nextTriggerRatio)
16661668

1669+
// Pacing changed, so the scavenger should be awoken.
1670+
wakeScavenger()
1671+
16671672
// Update timing memstats
16681673
now := nanotime()
16691674
sec, nsec, _ := time_now()

src/runtime/mgcscavenge.go

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -186,36 +186,41 @@ func gcPaceScavenger() {
186186

187187
now := nanotime()
188188

189-
lock(&scavenge.lock)
190-
191189
// Update all the pacing parameters in mheap with scavenge.lock held,
192190
// so that scavenge.gen is kept in sync with the updated values.
193191
mheap_.scavengeRetainedGoal = retainedGoal
194192
mheap_.scavengeRetainedBasis = retainedNow
195193
mheap_.scavengeTimeBasis = now
196194
mheap_.scavengeBytesPerNS = float64(totalWork) / float64(totalTime)
197-
scavenge.gen++ // increase scavenge generation
198-
199-
// Wake up background scavenger if needed, since the pacing was just updated.
200-
wakeScavengerLocked()
201-
202-
unlock(&scavenge.lock)
195+
mheap_.scavengeGen++ // increase scavenge generation
203196
}
204197

205-
// State of the background scavenger.
198+
// Sleep/wait state of the background scavenger.
206199
var scavenge struct {
207200
lock mutex
208201
g *g
209202
parked bool
210203
timer *timer
211-
gen uint32 // read with either lock or mheap_.lock, write with both
204+
205+
// Generation counter.
206+
//
207+
// It represents the last generation count (as defined by
208+
// mheap_.scavengeGen) checked by the scavenger and is updated
209+
// each time the scavenger checks whether it is on-pace.
210+
//
211+
// Skew between this field and mheap_.scavengeGen is used to
212+
// determine whether a new update is available.
213+
//
214+
// Protected by mheap_.lock.
215+
gen uint64
212216
}
213217

214-
// wakeScavengerLocked unparks the scavenger if necessary. It must be called
218+
// wakeScavenger unparks the scavenger if necessary. It must be called
215219
// after any pacing update.
216220
//
217-
// scavenge.lock must be held.
218-
func wakeScavengerLocked() {
221+
// mheap_.lock and scavenge.lock must not be held.
222+
func wakeScavenger() {
223+
lock(&scavenge.lock)
219224
if scavenge.parked {
220225
// Try to stop the timer but we don't really care if we succeed.
221226
// It's possible that either a timer was never started, or that
@@ -230,36 +235,43 @@ func wakeScavengerLocked() {
230235
scavenge.parked = false
231236
ready(scavenge.g, 0, true)
232237
}
238+
unlock(&scavenge.lock)
233239
}
234240

235241
// scavengeSleep attempts to put the scavenger to sleep for ns.
236-
// It also checks to see if gen != scavenge.gen before going to sleep,
237-
// and aborts if true (meaning an update had occurred).
238242
//
239243
// Note that this function should only be called by the scavenger.
240244
//
241245
// The scavenger may be woken up earlier by a pacing change, and it may not go
242246
// to sleep at all if there's a pending pacing change.
243247
//
244248
// Returns false if awoken early (i.e. true means a complete sleep).
245-
func scavengeSleep(gen uint32, ns int64) bool {
249+
func scavengeSleep(ns int64) bool {
246250
lock(&scavenge.lock)
247251

248-
// If there was an update, just abort the sleep.
249-
if scavenge.gen != gen {
252+
// First check if there's a pending update.
253+
// If there is one, don't bother sleeping.
254+
var hasUpdate bool
255+
systemstack(func() {
256+
lock(&mheap_.lock)
257+
hasUpdate = mheap_.scavengeGen != scavenge.gen
258+
unlock(&mheap_.lock)
259+
})
260+
if hasUpdate {
250261
unlock(&scavenge.lock)
251262
return false
252263
}
253264

254265
// Set the timer.
266+
//
267+
// This must happen here instead of inside gopark
268+
// because we can't close over any variables without
269+
// failing escape analysis.
255270
now := nanotime()
256271
scavenge.timer.when = now + ns
257272
startTimer(scavenge.timer)
258273

259-
// Park the goroutine. It's fine that we don't publish the
260-
// fact that the timer was set; even if the timer wakes up
261-
// and fire scavengeReady before we park, it'll block on
262-
// scavenge.lock.
274+
// Mark ourself as asleep and go to sleep.
263275
scavenge.parked = true
264276
goparkunlock(&scavenge.lock, waitReasonSleep, traceEvGoSleep, 2)
265277

@@ -280,9 +292,7 @@ func bgscavenge(c chan int) {
280292

281293
scavenge.timer = new(timer)
282294
scavenge.timer.f = func(_ interface{}, _ uintptr) {
283-
lock(&scavenge.lock)
284-
wakeScavengerLocked()
285-
unlock(&scavenge.lock)
295+
wakeScavenger()
286296
}
287297

288298
c <- 1
@@ -311,14 +321,14 @@ func bgscavenge(c chan int) {
311321
released := uintptr(0)
312322
park := false
313323
ttnext := int64(0)
314-
gen := uint32(0)
315324

316325
// Run on the system stack since we grab the heap lock,
317326
// and a stack growth with the heap lock means a deadlock.
318327
systemstack(func() {
319328
lock(&mheap_.lock)
320329

321-
gen = scavenge.gen
330+
// Update the last generation count that the scavenger has handled.
331+
scavenge.gen = mheap_.scavengeGen
322332

323333
// If background scavenging is disabled or if there's no work to do just park.
324334
retained := heapRetained()
@@ -375,7 +385,7 @@ func bgscavenge(c chan int) {
375385
if released == 0 {
376386
// If we were unable to release anything this may be because there's
377387
// no free memory available to scavenge. Go to sleep and try again.
378-
if scavengeSleep(gen, retryDelayNS) {
388+
if scavengeSleep(retryDelayNS) {
379389
// If we successfully slept through the delay, back off exponentially.
380390
retryDelayNS *= 2
381391
}
@@ -387,7 +397,7 @@ func bgscavenge(c chan int) {
387397
// If there's an appreciable amount of time until the next scavenging
388398
// goal, just sleep. We'll get woken up if anything changes and this
389399
// way we avoid spinning.
390-
scavengeSleep(gen, ttnext)
400+
scavengeSleep(ttnext)
391401
continue
392402
}
393403

src/runtime/mheap.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ type mheap struct {
107107
scavengeRetainedBasis uint64
108108
scavengeBytesPerNS float64
109109
scavengeRetainedGoal uint64
110+
scavengeGen uint64 // incremented on each pacing update
110111

111112
// Page reclaimer state
112113

0 commit comments

Comments
 (0)