Skip to content

Commit 62ba520

Browse files
committed
runtime: eliminate getfull barrier from concurrent mark
Currently dedicated mark workers participate in the getfull barrier during concurrent mark. However, the getfull barrier wasn't designed for concurrent work and this causes no end of headaches. In the concurrent setting, participants come and go. This makes mark completion susceptible to live-lock: since dedicated workers are only periodically polling for completion, it's possible for the program to be in some transient worker each time one of the dedicated workers wakes up to check if it can exit the getfull barrier. It also complicates reasoning about the system because dedicated workers participate directly in the getfull barrier, but transient workers must instead use trygetfull because they have exit conditions that aren't captured by getfull (e.g., fractional workers exit when preempted). The complexity of implementing these exit conditions contributed to #11677. Furthermore, the getfull barrier is inefficient because we could be running user code instead of spinning on a P. In effect, we're dedicating 25% of the CPU to marking even if that means we have to spin to make that 25%. It also causes issues on Windows because we can't actually sleep for 100µs (#8687). Fix this by making dedicated workers no longer participate in the getfull barrier. Instead, dedicated workers simply return to the scheduler when they fail to get more work, regardless of what others workers are doing, and the scheduler only starts new dedicated workers if there's work available. Everything that needs to be handled by this barrier is already handled by detection of mark completion. This makes the system much more symmetric because all workers and assists now use trygetfull during concurrent mark. It also loosens the 25% CPU target so that we can give some of that 25% back to user code if there isn't enough work to keep the mark worker busy. And it eliminates the problematic 100µs sleep on Windows during concurrent mark (though not during mark termination). The downside of this is that if we hit a bottleneck in the heap graph that then expands back out, the system may shut down dedicated workers and take a while to start them back up. We'll address this in the next commit. Updates #12041 and #8687. No effect on the go1 benchmarks. This slows down the garbage benchmark by 9%, but we'll more than make it up in the next commit. name old time/op new time/op delta XBenchGarbage-12 5.80ms ± 2% 6.32ms ± 4% +9.03% (p=0.000 n=20+20) Change-Id: I65100a9ba005a8b5cf97940798918672ea9dd09b Reviewed-on: https://go-review.googlesource.com/16297 Reviewed-by: Rick Hudson <[email protected]>
1 parent afab771 commit 62ba520

File tree

2 files changed

+64
-68
lines changed

2 files changed

+64
-68
lines changed

src/runtime/mgc.go

Lines changed: 50 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,7 @@ type gcMarkWorkerMode int
250250
const (
251251
// gcMarkWorkerDedicatedMode indicates that the P of a mark
252252
// worker is dedicated to running that mark worker. The mark
253-
// worker should run without preemption until concurrent mark
254-
// is done.
253+
// worker should run without preemption.
255254
gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota
256255

257256
// gcMarkWorkerFractionalMode indicates that a P is currently
@@ -593,6 +592,36 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
593592
return nil
594593
}
595594

595+
if !gcMarkWorkAvailable(_p_) {
596+
// No work to be done right now. This can happen at
597+
// the end of the mark phase when there are still
598+
// assists tapering off. Don't bother running a worker
599+
// now because it'll just return immediately.
600+
if work.nwait == work.nproc {
601+
// There are also no workers, which
602+
// means we've reached a completion point.
603+
// There may not be any workers to
604+
// signal it, so signal it here.
605+
readied := false
606+
if gcBlackenPromptly {
607+
if work.bgMark1.done == 0 {
608+
throw("completing mark 2, but bgMark1.done == 0")
609+
}
610+
readied = work.bgMark2.complete()
611+
} else {
612+
readied = work.bgMark1.complete()
613+
}
614+
if readied {
615+
// complete just called ready,
616+
// but we're inside the
617+
// scheduler. Let it know that
618+
// that's okay.
619+
resetspinning()
620+
}
621+
}
622+
return nil
623+
}
624+
596625
decIfPositive := func(ptr *int64) bool {
597626
if *ptr > 0 {
598627
if xaddint64(ptr, -1) >= 0 {
@@ -612,36 +641,6 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
612641
// else for a while, so kick everything out of its run
613642
// queue.
614643
} else {
615-
if !gcMarkWorkAvailable(_p_) {
616-
// No work to be done right now. This can
617-
// happen at the end of the mark phase when
618-
// there are still assists tapering off. Don't
619-
// bother running background mark because
620-
// it'll just return immediately.
621-
if work.nwait == work.nproc {
622-
// There are also no workers, which
623-
// means we've reached a completion point.
624-
// There may not be any workers to
625-
// signal it, so signal it here.
626-
readied := false
627-
if gcBlackenPromptly {
628-
if work.bgMark1.done == 0 {
629-
throw("completing mark 2, but bgMark1.done == 0")
630-
}
631-
readied = work.bgMark2.complete()
632-
} else {
633-
readied = work.bgMark1.complete()
634-
}
635-
if readied {
636-
// complete just called ready,
637-
// but we're inside the
638-
// scheduler. Let it know that
639-
// that's okay.
640-
resetspinning()
641-
}
642-
}
643-
return nil
644-
}
645644
if !decIfPositive(&c.fractionalMarkWorkersNeeded) {
646645
// No more workers are need right now.
647646
return nil
@@ -1368,46 +1367,37 @@ func gcBgMarkWorker(p *p) {
13681367
throw("work.nwait was > work.nproc")
13691368
}
13701369

1371-
done := false
13721370
switch p.gcMarkWorkerMode {
13731371
default:
13741372
throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
13751373
case gcMarkWorkerDedicatedMode:
1376-
gcDrain(&p.gcw, gcDrainBlock|gcDrainFlushBgCredit)
1377-
// gcDrain did the xadd(&work.nwait +1) to
1378-
// match the decrement above. It only returns
1379-
// at a mark completion point.
1380-
done = true
1381-
if !p.gcw.empty() {
1382-
throw("gcDrain returned with buffer")
1383-
}
1374+
gcDrain(&p.gcw, gcDrainNoBlock|gcDrainFlushBgCredit)
13841375
case gcMarkWorkerFractionalMode, gcMarkWorkerIdleMode:
13851376
gcDrain(&p.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
1377+
}
13861378

1387-
// If we are nearing the end of mark, dispose
1388-
// of the cache promptly. We must do this
1389-
// before signaling that we're no longer
1390-
// working so that other workers can't observe
1391-
// no workers and no work while we have this
1392-
// cached, and before we compute done.
1393-
if gcBlackenPromptly {
1394-
p.gcw.dispose()
1395-
}
1379+
// If we are nearing the end of mark, dispose
1380+
// of the cache promptly. We must do this
1381+
// before signaling that we're no longer
1382+
// working so that other workers can't observe
1383+
// no workers and no work while we have this
1384+
// cached, and before we compute done.
1385+
if gcBlackenPromptly {
1386+
p.gcw.dispose()
1387+
}
13961388

1397-
// Was this the last worker and did we run out
1398-
// of work?
1399-
incnwait := xadd(&work.nwait, +1)
1400-
if incnwait > work.nproc {
1401-
println("runtime: p.gcMarkWorkerMode=", p.gcMarkWorkerMode,
1402-
"work.nwait=", incnwait, "work.nproc=", work.nproc)
1403-
throw("work.nwait > work.nproc")
1404-
}
1405-
done = incnwait == work.nproc && !gcMarkWorkAvailable(nil)
1389+
// Was this the last worker and did we run out
1390+
// of work?
1391+
incnwait := xadd(&work.nwait, +1)
1392+
if incnwait > work.nproc {
1393+
println("runtime: p.gcMarkWorkerMode=", p.gcMarkWorkerMode,
1394+
"work.nwait=", incnwait, "work.nproc=", work.nproc)
1395+
throw("work.nwait > work.nproc")
14061396
}
14071397

14081398
// If this worker reached a background mark completion
14091399
// point, signal the main GC goroutine.
1410-
if done {
1400+
if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
14111401
if gcBlackenPromptly {
14121402
if work.bgMark1.done == 0 {
14131403
throw("completing mark 2, but bgMark1.done == 0")

src/runtime/mgcmark.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -761,32 +761,38 @@ type gcDrainFlags int
761761

762762
const (
763763
gcDrainUntilPreempt gcDrainFlags = 1 << iota
764+
gcDrainNoBlock
764765
gcDrainFlushBgCredit
765766

766-
// gcDrainBlock is the opposite of gcDrainUntilPreempt. This
767-
// is the default, but callers should use the constant for
768-
// documentation purposes.
767+
// gcDrainBlock means neither gcDrainUntilPreempt or
768+
// gcDrainNoBlock. It is the default, but callers should use
769+
// the constant for documentation purposes.
769770
gcDrainBlock gcDrainFlags = 0
770771
)
771772

772773
// gcDrain scans roots and objects in work buffers, blackening grey
773774
// objects until all roots and work buffers have been drained.
774775
//
775-
// If flags&gcDrainUntilPreempt != 0, gcDrain also returns if
776-
// g.preempt is set. Otherwise, this will block until all dedicated
777-
// workers are blocked in gcDrain.
776+
// If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt
777+
// is set. This implies gcDrainNoBlock.
778+
//
779+
// If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is
780+
// unable to get more work. Otherwise, it will block until all
781+
// blocking calls are blocked in gcDrain.
778782
//
779783
// If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work
780784
// credit to gcController.bgScanCredit every gcCreditSlack units of
781785
// scan work.
786+
//
782787
//go:nowritebarrier
783788
func gcDrain(gcw *gcWork, flags gcDrainFlags) {
784789
if !writeBarrierEnabled {
785790
throw("gcDrain phase incorrect")
786791
}
787792

788793
gp := getg()
789-
blocking := flags&gcDrainUntilPreempt == 0
794+
preemtible := flags&gcDrainUntilPreempt != 0
795+
blocking := flags&(gcDrainUntilPreempt|gcDrainNoBlock) == 0
790796
flushBgCredit := flags&gcDrainFlushBgCredit != 0
791797

792798
// Drain root marking jobs.
@@ -804,7 +810,7 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
804810
initScanWork := gcw.scanWork
805811

806812
// Drain heap marking jobs.
807-
for blocking || !gp.preempt {
813+
for !(preemtible && gp.preempt) {
808814
// If another proc wants a pointer, give it some.
809815
if work.nwait > 0 && work.full == 0 {
810816
gcw.balance()

0 commit comments

Comments
 (0)