Skip to content

Commit 15aa6bb

Browse files
committed
runtime: replace assist sleep loop with park/ready
GC assists must block until the assist can be satisfied (either through stealing credit or doing work) or the GC cycle ends. Currently, this is implemented as a retry loop with a 100 µs delay. This obviously isn't ideal, as it wastes CPU and delays mutator execution. It also has the somewhat peculiar downside that sleeping a G requires allocation, and this requires working around recursive allocation. Replace this timed delay with a proper scheduling queue. When an assist can't be satisfied immediately, it adds the allocating G to a queue and parks it. Any time background scan credit is flushed, it consults this queue, directly satisfies the debt of queued assists, and wakes up satisfied assists before flushing any remaining credit to the background credit pool. No effect on the go1 benchmarks. Slightly speeds up the garbage benchmark. name old time/op new time/op delta XBenchGarbage-12 5.81ms ± 1% 5.72ms ± 4% -1.65% (p=0.011 n=20+20) Updates #12041. Change-Id: I8ee3b6274dd097b12b10a8030796a958a4b0e7b7 Reviewed-on: https://go-review.googlesource.com/15890 Reviewed-by: Rick Hudson <[email protected]> Run-TryBot: Austin Clements <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 0ca4488 commit 15aa6bb

File tree

2 files changed

+137
-23
lines changed

2 files changed

+137
-23
lines changed

src/runtime/mgc.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -839,6 +839,14 @@ var work struct {
839839
// initialHeapLive is the value of memstats.heap_live at the
840840
// beginning of this GC cycle.
841841
initialHeapLive uint64
842+
843+
// assistQueue is a queue of assists that are blocked because
844+
// there was neither enough credit to steal or enough work to
845+
// do.
846+
assistQueue struct {
847+
lock mutex
848+
head, tail guintptr
849+
}
842850
}
843851

844852
// GC runs a garbage collection and blocks the caller until the
@@ -1094,6 +1102,10 @@ func gc(mode gcMode) {
10941102
// in these caches.
10951103
gcFlushGCWork()
10961104

1105+
// Wake all blocked assists. These will run when we
1106+
// start the world again.
1107+
gcWakeAllAssists()
1108+
10971109
gcController.endCycle()
10981110
} else {
10991111
t := nanotime()

src/runtime/mgcmark.go

Lines changed: 125 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ retry:
364364
// stack to determine if we should preform an assist.
365365

366366
// GC is done, so ignore any remaining debt.
367-
scanWork = 0
367+
gp.gcAssistBytes = 0
368368
return
369369
}
370370
// Track time spent in this assist. Since we're on the
@@ -389,7 +389,7 @@ retry:
389389
}
390390

391391
// Record that we did this much scan work.
392-
scanWork -= workDone
392+
//
393393
// Back out the number of bytes of assist credit that
394394
// this scan work counts for. The "1+" is a poor man's
395395
// round-up, to ensure this adds credit even if
@@ -432,31 +432,133 @@ retry:
432432
// We called complete() above, so we should yield to
433433
// the now-runnable GC coordinator.
434434
Gosched()
435-
436-
// It's likely that this assist wasn't able to pay off
437-
// its debt, but it's also likely that the Gosched let
438-
// the GC finish this cycle and there's no point in
439-
// waiting. If the GC finished, skip the delay below.
440-
if atomicload(&gcBlackenEnabled) == 0 {
441-
scanWork = 0
442-
}
443435
}
444436

445-
if scanWork > 0 {
437+
if gp.gcAssistBytes < 0 {
446438
// We were unable steal enough credit or perform
447439
// enough work to pay off the assist debt. We need to
448440
// do one of these before letting the mutator allocate
449-
// more, so go around again after performing an
450-
// interruptible sleep for 100 us (the same as the
451-
// getfull barrier) to let other mutators run.
452-
453-
// timeSleep may allocate, so avoid recursive assist.
454-
gcAssistBytes := gp.gcAssistBytes
455-
gp.gcAssistBytes = int64(^uint64(0) >> 1)
456-
timeSleep(100 * 1000)
457-
gp.gcAssistBytes = gcAssistBytes
458-
goto retry
441+
// more to prevent over-allocation.
442+
//
443+
// Add this G to an assist queue and park. When the GC
444+
// has more background credit, it will satisfy queued
445+
// assists before flushing to the global credit pool.
446+
//
447+
// Note that this does *not* get woken up when more
448+
// work is added to the work list. The theory is that
449+
// there wasn't enough work to do anyway, so we might
450+
// as well let background marking take care of the
451+
// work that is available.
452+
lock(&work.assistQueue.lock)
453+
454+
// If the GC cycle is over, just return. This is the
455+
// likely path if we called Gosched above. We do this
456+
// under the lock to prevent a GC cycle from ending
457+
// between this check and queuing the assist.
458+
if atomicload(&gcBlackenEnabled) == 0 {
459+
unlock(&work.assistQueue.lock)
460+
return
461+
}
462+
463+
oldHead, oldTail := work.assistQueue.head, work.assistQueue.tail
464+
if oldHead == 0 {
465+
work.assistQueue.head.set(gp)
466+
} else {
467+
oldTail.ptr().schedlink.set(gp)
468+
}
469+
work.assistQueue.tail.set(gp)
470+
gp.schedlink.set(nil)
471+
// Recheck for background credit now that this G is in
472+
// the queue, but can still back out. This avoids a
473+
// race in case background marking has flushed more
474+
// credit since we checked above.
475+
if atomicloadint64(&gcController.bgScanCredit) > 0 {
476+
work.assistQueue.head = oldHead
477+
work.assistQueue.tail = oldTail
478+
if oldTail != 0 {
479+
oldTail.ptr().schedlink.set(nil)
480+
}
481+
unlock(&work.assistQueue.lock)
482+
goto retry
483+
}
484+
// Park for real.
485+
goparkunlock(&work.assistQueue.lock, "GC assist", traceEvGoBlock, 2)
486+
487+
// At this point either background GC has satisfied
488+
// this G's assist debt, or the GC cycle is over.
489+
}
490+
}
491+
492+
// gcWakeAllAssists wakes all currently blocked assists. This is used
493+
// at the end of a GC cycle.
494+
func gcWakeAllAssists() {
495+
lock(&work.assistQueue.lock)
496+
injectglist(work.assistQueue.head.ptr())
497+
work.assistQueue.head.set(nil)
498+
work.assistQueue.tail.set(nil)
499+
unlock(&work.assistQueue.lock)
500+
}
501+
502+
// gcFlushBgCredit flushes scanWork units of background scan work
503+
// credit. This first satisfies blocked assists on the
504+
// work.assistQueue and then flushes any remaining credit to
505+
// gcController.bgScanCredit.
506+
func gcFlushBgCredit(scanWork int64) {
507+
if work.assistQueue.head == 0 {
508+
// Fast path; there are no blocked assists. There's a
509+
// small window here where an assist may add itself to
510+
// the blocked queue and park. If that happens, we'll
511+
// just get it on the next flush.
512+
xaddint64(&gcController.bgScanCredit, scanWork)
513+
return
514+
}
515+
516+
scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork)
517+
518+
lock(&work.assistQueue.lock)
519+
gp := work.assistQueue.head.ptr()
520+
for gp != nil && scanBytes > 0 {
521+
// Note that gp.gcAssistBytes is negative because gp
522+
// is in debt. Think carefully about the signs below.
523+
if scanBytes+gp.gcAssistBytes >= 0 {
524+
// Satisfy this entire assist debt.
525+
scanBytes += gp.gcAssistBytes
526+
gp.gcAssistBytes = 0
527+
xgp := gp
528+
gp = gp.schedlink.ptr()
529+
ready(xgp, 0)
530+
} else {
531+
// Partially satisfy this assist.
532+
gp.gcAssistBytes += scanBytes
533+
scanBytes = 0
534+
// As a heuristic, we move this assist to the
535+
// back of the queue so that large assists
536+
// can't clog up the assist queue and
537+
// substantially delay small assists.
538+
xgp := gp
539+
gp = gp.schedlink.ptr()
540+
if gp == nil {
541+
// gp is the only assist in the queue.
542+
gp = xgp
543+
} else {
544+
xgp.schedlink = 0
545+
work.assistQueue.tail.ptr().schedlink.set(xgp)
546+
work.assistQueue.tail.set(xgp)
547+
}
548+
break
549+
}
550+
}
551+
work.assistQueue.head.set(gp)
552+
if gp == nil {
553+
work.assistQueue.tail.set(nil)
554+
}
555+
556+
if scanBytes > 0 {
557+
// Convert from scan bytes back to work.
558+
scanWork = int64(float64(scanBytes) * gcController.assistWorkPerByte)
559+
xaddint64(&gcController.bgScanCredit, scanWork)
459560
}
561+
unlock(&work.assistQueue.lock)
460562
}
461563

462564
//go:nowritebarrier
@@ -725,7 +827,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
725827
if gcw.scanWork >= gcCreditSlack {
726828
xaddint64(&gcController.scanWork, gcw.scanWork)
727829
if flushBgCredit {
728-
xaddint64(&gcController.bgScanCredit, gcw.scanWork-initScanWork)
830+
gcFlushBgCredit(gcw.scanWork - initScanWork)
729831
initScanWork = 0
730832
}
731833
gcw.scanWork = 0
@@ -736,7 +838,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
736838
if gcw.scanWork > 0 {
737839
xaddint64(&gcController.scanWork, gcw.scanWork)
738840
if flushBgCredit {
739-
xaddint64(&gcController.bgScanCredit, gcw.scanWork-initScanWork)
841+
gcFlushBgCredit(gcw.scanWork - initScanWork)
740842
}
741843
gcw.scanWork = 0
742844
}

0 commit comments

Comments
 (0)