Skip to content

Commit 2517f49

Browse files
committed
runtime: remove debugCachedWork
debugCachedWork and all of its dependent fields and code were added to aid in debugging issue #27993. Now that the source of the problem is known and mitigated (via the extra work check after STW in gcMarkDone), these extra checks are no longer required and simply make the code more difficult to follow. Remove it all. Updates #27993 Change-Id: I594beedd5ca61733ba9cc9eaad8f80ea92df1a0d Reviewed-on: https://go-review.googlesource.com/c/go/+/262350 Trust: Michael Pratt <[email protected]> Run-TryBot: Michael Pratt <[email protected]> TryBot-Result: Go Bot <[email protected]> Reviewed-by: Austin Clements <[email protected]>
1 parent aa161e7 commit 2517f49

File tree

5 files changed

+27
-211
lines changed

5 files changed

+27
-211
lines changed

src/cmd/compile/internal/gc/inl_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ func TestIntendedInlining(t *testing.T) {
8383
"puintptr.ptr",
8484
"spanOf",
8585
"spanOfUnchecked",
86-
//"(*gcWork).putFast", // TODO(austin): For debugging #27993
86+
"(*gcWork).putFast",
8787
"(*gcWork).tryGetFast",
8888
"(*guintptr).set",
8989
"(*markBits).advance",

src/runtime/mgc.go

Lines changed: 24 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1407,19 +1407,6 @@ func gcStart(trigger gcTrigger) {
14071407
// This is protected by markDoneSema.
14081408
var gcMarkDoneFlushed uint32
14091409

1410-
// debugCachedWork enables extra checks for debugging premature mark
1411-
// termination.
1412-
//
1413-
// For debugging issue #27993.
1414-
const debugCachedWork = false
1415-
1416-
// gcWorkPauseGen is for debugging the mark completion algorithm.
1417-
// gcWork put operations spin while gcWork.pauseGen == gcWorkPauseGen.
1418-
// Only used if debugCachedWork is true.
1419-
//
1420-
// For debugging issue #27993.
1421-
var gcWorkPauseGen uint32 = 1
1422-
14231410
// gcMarkDone transitions the GC from mark to mark termination if all
14241411
// reachable objects have been marked (that is, there are no grey
14251412
// objects and can be no more in the future). Otherwise, it flushes
@@ -1475,15 +1462,7 @@ top:
14751462
// Flush the write barrier buffer, since this may add
14761463
// work to the gcWork.
14771464
wbBufFlush1(_p_)
1478-
// For debugging, shrink the write barrier
1479-
// buffer so it flushes immediately.
1480-
// wbBuf.reset will keep it at this size as
1481-
// long as throwOnGCWork is set.
1482-
if debugCachedWork {
1483-
b := &_p_.wbBuf
1484-
b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers]))
1485-
b.debugGen = gcWorkPauseGen
1486-
}
1465+
14871466
// Flush the gcWork, since this may create global work
14881467
// and set the flushedWork flag.
14891468
//
@@ -1494,29 +1473,12 @@ top:
14941473
if _p_.gcw.flushedWork {
14951474
atomic.Xadd(&gcMarkDoneFlushed, 1)
14961475
_p_.gcw.flushedWork = false
1497-
} else if debugCachedWork {
1498-
// For debugging, freeze the gcWork
1499-
// until we know whether we've reached
1500-
// completion or not. If we think
1501-
// we've reached completion, but
1502-
// there's a paused gcWork, then
1503-
// that's a bug.
1504-
_p_.gcw.pauseGen = gcWorkPauseGen
1505-
// Capture the G's stack.
1506-
for i := range _p_.gcw.pauseStack {
1507-
_p_.gcw.pauseStack[i] = 0
1508-
}
1509-
callers(1, _p_.gcw.pauseStack[:])
15101476
}
15111477
})
15121478
casgstatus(gp, _Gwaiting, _Grunning)
15131479
})
15141480

15151481
if gcMarkDoneFlushed != 0 {
1516-
if debugCachedWork {
1517-
// Release paused gcWorks.
1518-
atomic.Xadd(&gcWorkPauseGen, 1)
1519-
}
15201482
// More grey objects were discovered since the
15211483
// previous termination check, so there may be more
15221484
// work to do. Keep going. It's possible the
@@ -1526,13 +1488,6 @@ top:
15261488
goto top
15271489
}
15281490

1529-
if debugCachedWork {
1530-
throwOnGCWork = true
1531-
// Release paused gcWorks. If there are any, they
1532-
// should now observe throwOnGCWork and panic.
1533-
atomic.Xadd(&gcWorkPauseGen, 1)
1534-
}
1535-
15361491
// There was no global work, no local work, and no Ps
15371492
// communicated work since we took markDoneSema. Therefore
15381493
// there are no grey objects and no more objects can be
@@ -1549,59 +1504,33 @@ top:
15491504
// below. The important thing is that the wb remains active until
15501505
// all marking is complete. This includes writes made by the GC.
15511506

1552-
if debugCachedWork {
1553-
// For debugging, double check that no work was added after we
1554-
// went around above and disable write barrier buffering.
1507+
// There is sometimes work left over when we enter mark termination due
1508+
// to write barriers performed after the completion barrier above.
1509+
// Detect this and resume concurrent mark. This is obviously
1510+
// unfortunate.
1511+
//
1512+
// See issue #27993 for details.
1513+
//
1514+
// Switch to the system stack to call wbBufFlush1, though in this case
1515+
// it doesn't matter because we're non-preemptible anyway.
1516+
restart := false
1517+
systemstack(func() {
15551518
for _, p := range allp {
1556-
gcw := &p.gcw
1557-
if !gcw.empty() {
1558-
printlock()
1559-
print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork)
1560-
if gcw.wbuf1 == nil {
1561-
print(" wbuf1=<nil>")
1562-
} else {
1563-
print(" wbuf1.n=", gcw.wbuf1.nobj)
1564-
}
1565-
if gcw.wbuf2 == nil {
1566-
print(" wbuf2=<nil>")
1567-
} else {
1568-
print(" wbuf2.n=", gcw.wbuf2.nobj)
1569-
}
1570-
print("\n")
1571-
if gcw.pauseGen == gcw.putGen {
1572-
println("runtime: checkPut already failed at this generation")
1573-
}
1574-
throw("throwOnGCWork")
1519+
wbBufFlush1(p)
1520+
if !p.gcw.empty() {
1521+
restart = true
1522+
break
15751523
}
15761524
}
1577-
} else {
1578-
// For unknown reasons (see issue #27993), there is
1579-
// sometimes work left over when we enter mark
1580-
// termination. Detect this and resume concurrent
1581-
// mark. This is obviously unfortunate.
1582-
//
1583-
// Switch to the system stack to call wbBufFlush1,
1584-
// though in this case it doesn't matter because we're
1585-
// non-preemptible anyway.
1586-
restart := false
1525+
})
1526+
if restart {
1527+
getg().m.preemptoff = ""
15871528
systemstack(func() {
1588-
for _, p := range allp {
1589-
wbBufFlush1(p)
1590-
if !p.gcw.empty() {
1591-
restart = true
1592-
break
1593-
}
1594-
}
1529+
now := startTheWorldWithSema(true)
1530+
work.pauseNS += now - work.pauseStart
15951531
})
1596-
if restart {
1597-
getg().m.preemptoff = ""
1598-
systemstack(func() {
1599-
now := startTheWorldWithSema(true)
1600-
work.pauseNS += now - work.pauseStart
1601-
})
1602-
semrelease(&worldsema)
1603-
goto top
1604-
}
1532+
semrelease(&worldsema)
1533+
goto top
16051534
}
16061535

16071536
// Disable assists and background workers. We must do
@@ -2085,7 +2014,7 @@ func gcMark(start_time int64) {
20852014
// ensured all reachable objects were marked, all of
20862015
// these must be pointers to black objects. Hence we
20872016
// can just discard the write barrier buffer.
2088-
if debug.gccheckmark > 0 || throwOnGCWork {
2017+
if debug.gccheckmark > 0 {
20892018
// For debugging, flush the buffer and make
20902019
// sure it really was all marked.
20912020
wbBufFlush1(p)
@@ -2117,8 +2046,6 @@ func gcMark(start_time int64) {
21172046
gcw.dispose()
21182047
}
21192048

2120-
throwOnGCWork = false
2121-
21222049
cachestats()
21232050

21242051
// Update the marked heap stat.

src/runtime/mgcwork.go

Lines changed: 0 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,6 @@ const (
2222
workbufAlloc = 32 << 10
2323
)
2424

25-
// throwOnGCWork causes any operations that add pointers to a gcWork
26-
// buffer to throw.
27-
//
28-
// TODO(austin): This is a temporary debugging measure for issue
29-
// #27993. To be removed before release.
30-
var throwOnGCWork bool
31-
3225
func init() {
3326
if workbufAlloc%pageSize != 0 || workbufAlloc%_WorkbufSize != 0 {
3427
throw("bad workbufAlloc")
@@ -93,17 +86,6 @@ type gcWork struct {
9386
// termination check. Specifically, this indicates that this
9487
// gcWork may have communicated work to another gcWork.
9588
flushedWork bool
96-
97-
// pauseGen causes put operations to spin while pauseGen ==
98-
// gcWorkPauseGen if debugCachedWork is true.
99-
pauseGen uint32
100-
101-
// putGen is the pauseGen of the last putGen.
102-
putGen uint32
103-
104-
// pauseStack is the stack at which this P was paused if
105-
// debugCachedWork is true.
106-
pauseStack [16]uintptr
10789
}
10890

10991
// Most of the methods of gcWork are go:nowritebarrierrec because the
@@ -122,60 +104,10 @@ func (w *gcWork) init() {
122104
w.wbuf2 = wbuf2
123105
}
124106

125-
func (w *gcWork) checkPut(ptr uintptr, ptrs []uintptr) {
126-
if debugCachedWork {
127-
alreadyFailed := w.putGen == w.pauseGen
128-
w.putGen = w.pauseGen
129-
if !canPreemptM(getg().m) {
130-
// If we were to spin, the runtime may
131-
// deadlock. Since we can't be preempted, the
132-
// spin could prevent gcMarkDone from
133-
// finishing the ragged barrier, which is what
134-
// releases us from the spin.
135-
return
136-
}
137-
for atomic.Load(&gcWorkPauseGen) == w.pauseGen {
138-
}
139-
if throwOnGCWork {
140-
printlock()
141-
if alreadyFailed {
142-
println("runtime: checkPut already failed at this generation")
143-
}
144-
println("runtime: late gcWork put")
145-
if ptr != 0 {
146-
gcDumpObject("ptr", ptr, ^uintptr(0))
147-
}
148-
for _, ptr := range ptrs {
149-
gcDumpObject("ptrs", ptr, ^uintptr(0))
150-
}
151-
println("runtime: paused at")
152-
for _, pc := range w.pauseStack {
153-
if pc == 0 {
154-
break
155-
}
156-
f := findfunc(pc)
157-
if f.valid() {
158-
// Obviously this doesn't
159-
// relate to ancestor
160-
// tracebacks, but this
161-
// function prints what we
162-
// want.
163-
printAncestorTracebackFuncInfo(f, pc)
164-
} else {
165-
println("\tunknown PC ", hex(pc), "\n")
166-
}
167-
}
168-
throw("throwOnGCWork")
169-
}
170-
}
171-
}
172-
173107
// put enqueues a pointer for the garbage collector to trace.
174108
// obj must point to the beginning of a heap object or an oblet.
175109
//go:nowritebarrierrec
176110
func (w *gcWork) put(obj uintptr) {
177-
w.checkPut(obj, nil)
178-
179111
flushed := false
180112
wbuf := w.wbuf1
181113
// Record that this may acquire the wbufSpans or heap lock to
@@ -214,8 +146,6 @@ func (w *gcWork) put(obj uintptr) {
214146
// otherwise it returns false and the caller needs to call put.
215147
//go:nowritebarrierrec
216148
func (w *gcWork) putFast(obj uintptr) bool {
217-
w.checkPut(obj, nil)
218-
219149
wbuf := w.wbuf1
220150
if wbuf == nil {
221151
return false
@@ -237,8 +167,6 @@ func (w *gcWork) putBatch(obj []uintptr) {
237167
return
238168
}
239169

240-
w.checkPut(0, obj)
241-
242170
flushed := false
243171
wbuf := w.wbuf1
244172
if wbuf == nil {
@@ -360,12 +288,10 @@ func (w *gcWork) balance() {
360288
return
361289
}
362290
if wbuf := w.wbuf2; wbuf.nobj != 0 {
363-
w.checkPut(0, wbuf.obj[:wbuf.nobj])
364291
putfull(wbuf)
365292
w.flushedWork = true
366293
w.wbuf2 = getempty()
367294
} else if wbuf := w.wbuf1; wbuf.nobj > 4 {
368-
w.checkPut(0, wbuf.obj[:wbuf.nobj])
369295
w.wbuf1 = handoff(wbuf)
370296
w.flushedWork = true // handoff did putfull
371297
} else {

src/runtime/mwbbuf.go

Lines changed: 2 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,6 @@ type wbBuf struct {
5757
// on. This must be a multiple of wbBufEntryPointers because
5858
// the write barrier only checks for overflow once per entry.
5959
buf [wbBufEntryPointers * wbBufEntries]uintptr
60-
61-
// debugGen causes the write barrier buffer to flush after
62-
// every write barrier if equal to gcWorkPauseGen. This is for
63-
// debugging #27993. This is only set if debugCachedWork is
64-
// set.
65-
debugGen uint32
6660
}
6761

6862
const (
@@ -86,7 +80,7 @@ const (
8680
func (b *wbBuf) reset() {
8781
start := uintptr(unsafe.Pointer(&b.buf[0]))
8882
b.next = start
89-
if writeBarrier.cgo || (debugCachedWork && (throwOnGCWork || b.debugGen == atomic.Load(&gcWorkPauseGen))) {
83+
if writeBarrier.cgo {
9084
// Effectively disable the buffer by forcing a flush
9185
// on every barrier.
9286
b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers]))
@@ -204,32 +198,10 @@ func wbBufFlush(dst *uintptr, src uintptr) {
204198
// Switch to the system stack so we don't have to worry about
205199
// the untyped stack slots or safe points.
206200
systemstack(func() {
207-
if debugCachedWork {
208-
// For debugging, include the old value of the
209-
// slot and some other data in the traceback.
210-
wbBuf := &getg().m.p.ptr().wbBuf
211-
var old uintptr
212-
if dst != nil {
213-
// dst may be nil in direct calls to wbBufFlush.
214-
old = *dst
215-
}
216-
wbBufFlush1Debug(old, wbBuf.buf[0], wbBuf.buf[1], &wbBuf.buf[0], wbBuf.next)
217-
} else {
218-
wbBufFlush1(getg().m.p.ptr())
219-
}
201+
wbBufFlush1(getg().m.p.ptr())
220202
})
221203
}
222204

223-
// wbBufFlush1Debug is a temporary function for debugging issue
224-
// #27993. It exists solely to add some context to the traceback.
225-
//
226-
//go:nowritebarrierrec
227-
//go:systemstack
228-
//go:noinline
229-
func wbBufFlush1Debug(old, buf1, buf2 uintptr, start *uintptr, next uintptr) {
230-
wbBufFlush1(getg().m.p.ptr())
231-
}
232-
233205
// wbBufFlush1 flushes p's write barrier buffer to the GC work queue.
234206
//
235207
// This must not have write barriers because it is part of the write

src/runtime/panic.go

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -421,15 +421,6 @@ func newdefer(siz int32) *_defer {
421421
total := roundupsize(totaldefersize(uintptr(siz)))
422422
d = (*_defer)(mallocgc(total, deferType, true))
423423
})
424-
if debugCachedWork {
425-
// Duplicate the tail below so if there's a
426-
// crash in checkPut we can tell if d was just
427-
// allocated or came from the pool.
428-
d.siz = siz
429-
d.link = gp._defer
430-
gp._defer = d
431-
return d
432-
}
433424
}
434425
d.siz = siz
435426
d.heap = true

0 commit comments

Comments
 (0)