Skip to content

Commit 3be48b4

Browse files
committed
runtime: pass gcWork to scanstack
Currently scanstack obtains its own gcWork from the P for the duration of the stack scan and then, if called during mark termination, disposes the gcWork. However, this means that the number of workbufs allocated will be at least the number of stacks scanned during mark termination, which may be very high (especially during a STW GC). This happens because, in steady state, each scanstack will obtain a fresh workbuf (either from the empty list or by allocating it), fill it with the scan results, and then dispose it to the full list. Nothing is consuming from the full list during this (and hence nothing is recycling them to the empty list), so the length of the full list by the time mark termination starts draining it is at least the number of stacks scanned. Fix this by pushing the gcWork acquisition up the stack to either the gcDrain that calls markroot that calls scanstack (which batches across many stack scans and is the path taken during STW GC) or to newstack (which is still a single scanstack call, but this is roughly bounded by the number of Ps). This fix reduces the workbuf allocation for the test program from issue #15319 from 213 MB (roughly 2KB * 1e5 goroutines) to 10 MB. Fixes #15319. Note that there's potentially a similar issue in write barriers during mark 2. Fixing that will be more difficult since there's no broader non-preemptible context, but it should also be less of a problem since the full list is being drained during mark 2. Some overall improvements in the go1 benchmarks, plus the usual noise. No significant change in the garbage benchmark (time/op or GC memory). name old time/op new time/op delta BinaryTree17-12 2.54s ± 1% 2.51s ± 1% -1.09% (p=0.000 n=20+19) Fannkuch11-12 2.12s ± 0% 2.17s ± 0% +2.18% (p=0.000 n=19+18) FmtFprintfEmpty-12 45.1ns ± 1% 45.2ns ± 0% ~ (p=0.078 n=19+18) FmtFprintfString-12 127ns ± 0% 128ns ± 0% +1.08% (p=0.000 n=19+16) FmtFprintfInt-12 125ns ± 0% 122ns ± 1% -2.71% (p=0.000 n=14+18) FmtFprintfIntInt-12 196ns ± 0% 190ns ± 1% -2.91% (p=0.000 n=12+20) FmtFprintfPrefixedInt-12 196ns ± 0% 194ns ± 1% -0.94% (p=0.000 n=13+18) FmtFprintfFloat-12 253ns ± 1% 251ns ± 1% -0.86% (p=0.000 n=19+20) FmtManyArgs-12 807ns ± 1% 784ns ± 1% -2.85% (p=0.000 n=20+20) GobDecode-12 7.13ms ± 1% 7.12ms ± 1% ~ (p=0.351 n=19+20) GobEncode-12 5.89ms ± 0% 5.95ms ± 0% +0.94% (p=0.000 n=19+19) Gzip-12 219ms ± 1% 221ms ± 1% +1.35% (p=0.000 n=18+20) Gunzip-12 37.5ms ± 1% 37.4ms ± 0% ~ (p=0.057 n=20+19) HTTPClientServer-12 81.4µs ± 4% 81.9µs ± 3% ~ (p=0.118 n=17+18) JSONEncode-12 15.7ms ± 1% 15.8ms ± 1% +0.73% (p=0.000 n=17+18) JSONDecode-12 57.9ms ± 1% 57.2ms ± 1% -1.34% (p=0.000 n=19+19) Mandelbrot200-12 4.12ms ± 1% 4.10ms ± 0% -0.33% (p=0.000 n=19+17) GoParse-12 3.22ms ± 2% 3.25ms ± 1% +0.72% (p=0.000 n=18+20) RegexpMatchEasy0_32-12 70.6ns ± 1% 71.1ns ± 2% +0.63% (p=0.005 n=19+20) RegexpMatchEasy0_1K-12 240ns ± 0% 239ns ± 1% -0.59% (p=0.000 n=19+20) RegexpMatchEasy1_32-12 71.3ns ± 1% 71.3ns ± 1% ~ (p=0.844 n=17+17) RegexpMatchEasy1_1K-12 384ns ± 2% 371ns ± 1% -3.45% (p=0.000 n=19+20) RegexpMatchMedium_32-12 109ns ± 1% 108ns ± 2% -0.48% (p=0.029 n=19+19) RegexpMatchMedium_1K-12 34.3µs ± 1% 34.5µs ± 2% ~ (p=0.160 n=18+20) RegexpMatchHard_32-12 1.79µs ± 9% 1.72µs ± 2% -3.83% (p=0.000 n=19+19) RegexpMatchHard_1K-12 53.3µs ± 4% 51.8µs ± 1% -2.82% (p=0.000 n=19+20) Revcomp-12 386ms ± 0% 388ms ± 0% +0.72% (p=0.000 n=17+20) Template-12 62.9ms ± 1% 62.5ms ± 1% -0.57% (p=0.010 n=18+19) TimeParse-12 325ns ± 0% 331ns ± 0% +1.84% (p=0.000 n=18+19) TimeFormat-12 338ns ± 0% 343ns ± 0% +1.34% (p=0.000 n=18+20) [Geo mean] 52.7µs 52.5µs -0.42% Change-Id: Ib2d34736c4ae2ec329605b0fbc44636038d8d018 Reviewed-on: https://go-review.googlesource.com/23391 Run-TryBot: Austin Clements <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Rick Hudson <[email protected]>
1 parent a1f7db8 commit 3be48b4

File tree

3 files changed

+11
-9
lines changed

3 files changed

+11
-9
lines changed

src/runtime/mgcmark.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ func markroot(gcw *gcWork, i uint32) {
231231
// we scan the stacks we can and ask running
232232
// goroutines to scan themselves; and the
233233
// second blocks.
234-
scang(gp)
234+
scang(gp, gcw)
235235

236236
if selfScan {
237237
casgstatus(userG, _Gwaiting, _Grunning)
@@ -653,7 +653,7 @@ func gcFlushBgCredit(scanWork int64) {
653653
//
654654
//go:nowritebarrier
655655
//go:systemstack
656-
func scanstack(gp *g) {
656+
func scanstack(gp *g, gcw *gcWork) {
657657
if gp.gcscanvalid {
658658
return
659659
}
@@ -742,7 +742,6 @@ func scanstack(gp *g) {
742742

743743
// Scan the stack.
744744
var cache pcvalueCache
745-
gcw := &getg().m.p.ptr().gcw
746745
n := 0
747746
scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
748747
scanframeworker(frame, &cache, gcw)
@@ -770,9 +769,6 @@ func scanstack(gp *g) {
770769
}
771770
gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
772771
tracebackdefers(gp, scanframe, nil)
773-
if gcphase == _GCmarktermination {
774-
gcw.dispose()
775-
}
776772
gcUnlockStackBarriers(gp)
777773
if gcphase == _GCmark {
778774
// gp may have added itself to the rescan list between

src/runtime/proc.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,7 @@ func casgcopystack(gp *g) uint32 {
792792
// scang blocks until gp's stack has been scanned.
793793
// It might be scanned by scang or it might be scanned by the goroutine itself.
794794
// Either way, the stack scan has completed when scang returns.
795-
func scang(gp *g) {
795+
func scang(gp *g, gcw *gcWork) {
796796
// Invariant; we (the caller, markroot for a specific goroutine) own gp.gcscandone.
797797
// Nothing is racing with us now, but gcscandone might be set to true left over
798798
// from an earlier round of stack scanning (we scan twice per GC).
@@ -833,7 +833,7 @@ loop:
833833
// the goroutine until we're done.
834834
if castogscanstatus(gp, s, s|_Gscan) {
835835
if !gp.gcscandone {
836-
scanstack(gp)
836+
scanstack(gp, gcw)
837837
gp.gcscandone = true
838838
}
839839
restartg(gp)

src/runtime/stack.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1010,7 +1010,13 @@ func newstack() {
10101010
// return.
10111011
}
10121012
if !gp.gcscandone {
1013-
scanstack(gp)
1013+
// gcw is safe because we're on the
1014+
// system stack.
1015+
gcw := &gp.m.p.ptr().gcw
1016+
scanstack(gp, gcw)
1017+
if gcBlackenPromptly {
1018+
gcw.dispose()
1019+
}
10141020
gp.gcscandone = true
10151021
}
10161022
gp.preemptscan = false

0 commit comments

Comments
 (0)