Skip to content

Commit 9eba17f

Browse files
committed
runtime: for deep stacks, print both the top 50 and bottom 50 frames
This is relatively easy using the new traceback iterator. Ancestor tracebacks are now limited to 50 frames. We could keep that at 100, but the fact that it used 100 before seemed arbitrary and unnecessary. Fixes #7181 Updates #54466 Change-Id: If693045881d84848f17e568df275a5105b6f1cb0 Reviewed-on: https://go-review.googlesource.com/c/go/+/475960 Run-TryBot: Austin Clements <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Michael Pratt <[email protected]>
1 parent 6be7fd3 commit 9eba17f

File tree

6 files changed

+291
-44
lines changed

6 files changed

+291
-44
lines changed

doc/go1.21.html

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,14 @@ <h2 id="runtime">Runtime</h2>
4949
TODO: complete this section, or delete if not needed
5050
</p>
5151

52+
<p><!-- https://go.dev/issue/7181 -->
53+
When printing very deep stacks, the runtime now prints the first 50
54+
(innermost) frames followed by the bottom 50 (outermost) frames,
55+
rather than just printing the first 100 frames. This makes it easier
56+
to see how deeply recursive stacks started, and is especially
57+
valuable for debugging stack overflows.
58+
</p>
59+
5260
<h2 id="compiler">Compiler</h2>
5361

5462
<p>

src/runtime/export_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ var NetpollGenericInit = netpollGenericInit
4747
var Memmove = memmove
4848
var MemclrNoHeapPointers = memclrNoHeapPointers
4949

50+
const TracebackInnerFrames = tracebackInnerFrames
51+
const TracebackOuterFrames = tracebackOuterFrames
52+
5053
var LockPartialOrder = lockPartialOrder
5154

5255
type LockRank lockRank

src/runtime/proc.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4356,7 +4356,7 @@ func saveAncestors(callergp *g) *[]ancestorInfo {
43564356
ancestors := make([]ancestorInfo, n)
43574357
copy(ancestors[1:], callerAncestors)
43584358

4359-
var pcs [_TracebackMaxFrames]uintptr
4359+
var pcs [tracebackInnerFrames]uintptr
43604360
npcs := gcallers(callergp, 0, pcs[:])
43614361
ipcs := make([]uintptr, npcs)
43624362
copy(ipcs, pcs[:])

src/runtime/runtime2.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,9 +1049,6 @@ type ancestorInfo struct {
10491049
gopc uintptr // pc of go statement that created this goroutine
10501050
}
10511051

1052-
// The maximum number of frames we print for a traceback
1053-
const _TracebackMaxFrames = 100
1054-
10551052
// A waitReason explains why a goroutine has been stopped.
10561053
// See gopark. Do not re-use waitReasons, add new ones.
10571054
type waitReason uint8

src/runtime/traceback.go

Lines changed: 142 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,17 @@ import (
2121

2222
const usesLR = sys.MinFrameSize > 0
2323

24+
const (
25+
// tracebackInnerFrames is the number of innermost frames to print in a
26+
// stack trace. The total maximum frames is tracebackInnerFrames +
27+
// tracebackOuterFrames.
28+
tracebackInnerFrames = 50
29+
30+
// tracebackOuterFrames is the number of outermost frames to print in a
31+
// stack trace.
32+
tracebackOuterFrames = 50
33+
)
34+
2435
// unwindFlags control the behavior of various unwinders.
2536
type unwindFlags uint8
2637

@@ -812,18 +823,80 @@ func traceback1(pc, sp, lr uintptr, gp *g, flags unwindFlags) {
812823
flags &^= unwindTrap
813824
}
814825

815-
// Print traceback. By default, omits runtime frames.
816-
// If that means we print nothing at all, repeat forcing all frames printed.
826+
// Print traceback.
827+
//
828+
// We print the first tracebackInnerFrames frames, and the last
829+
// tracebackOuterFrames frames. There are many possible approaches to this.
830+
// There are various complications to this:
831+
//
832+
// - We'd prefer to walk the stack once because in really bad situations
833+
// traceback may crash (and we want as much output as possible) or the stack
834+
// may be changing.
835+
//
836+
// - Each physical frame can represent several logical frames, so we might
837+
// have to pause in the middle of a physical frame and pick up in the middle
838+
// of a physical frame.
839+
//
840+
// - The cgo symbolizer can expand a cgo PC to more than one logical frame,
841+
// and involves juggling state on the C side that we don't manage. Since its
842+
// expansion state is managed on the C side, we can't capture the expansion
843+
// state part way through, and because the output strings are managed on the
844+
// C side, we can't capture the output. Thus, our only choice is to replay a
845+
// whole expansion, potentially discarding some of it.
846+
//
847+
// Rejected approaches:
848+
//
849+
// - Do two passes where the first pass just counts and the second pass does
850+
// all the printing. This is undesireable if the stack is corrupted or changing
851+
// because we won't see a partial stack if we panic.
852+
//
853+
// - Keep a ring buffer of the last N logical frames and use this to print
854+
// the bottom frames once we reach the end of the stack. This works, but
855+
// requires keeping a surprising amount of state on the stack, and we have
856+
// to run the cgo symbolizer twice—once to count frames, and a second to
857+
// print them—since we can't retain the strings it returns.
858+
//
859+
// Instead, we print the outer frames, and if we reach that limit, we clone
860+
// the unwinder, count the remaining frames, and then skip forward and
861+
// finish printing from the clone. This makes two passes over the outer part
862+
// of the stack, but the single pass over the inner part ensures that's
863+
// printed immediately and not revisited. It keeps minimal state on the
864+
// stack. And through a combination of skip counts and limits, we can do all
865+
// of the steps we need with a single traceback printer implementation.
866+
//
867+
// We could be more lax about exactly how many frames we print, for example
868+
// always stopping and resuming on physical frame boundaries, or at least
869+
// cgo expansion boundaries. It's not clear that's much simpler.
817870
flags |= unwindPrintErrors
818871
var u unwinder
819-
u.initAt(pc, sp, lr, gp, flags)
820-
n := traceback2(&u, false)
821-
if n == 0 {
872+
tracebackWithRuntime := func(showRuntime bool) int {
873+
const maxInt int = 0x7fffffff
822874
u.initAt(pc, sp, lr, gp, flags)
823-
n = traceback2(&u, true)
875+
n, lastN := traceback2(&u, showRuntime, 0, tracebackInnerFrames)
876+
if n < tracebackInnerFrames {
877+
// We printed the whole stack.
878+
return n
879+
}
880+
// Clone the unwinder and figure out how many frames are left. This
881+
// count will include any logical frames already printed for u's current
882+
// physical frame.
883+
u2 := u
884+
remaining, _ := traceback2(&u, showRuntime, maxInt, 0)
885+
elide := remaining - lastN - tracebackOuterFrames
886+
if elide > 0 {
887+
print("...", elide, " frames elided...\n")
888+
traceback2(&u2, showRuntime, lastN+elide, tracebackOuterFrames)
889+
} else if elide <= 0 {
890+
// There are tracebackOuterFrames or fewer frames left to print.
891+
// Just print the rest of the stack.
892+
traceback2(&u2, showRuntime, lastN, tracebackOuterFrames)
893+
}
894+
return n
824895
}
825-
if n == _TracebackMaxFrames {
826-
print("...additional frames elided...\n")
896+
// By default, omits runtime frames. If that means we print nothing at all,
897+
// repeat forcing all frames printed.
898+
if tracebackWithRuntime(false) == 0 {
899+
tracebackWithRuntime(true)
827900
}
828901
printcreatedby(gp)
829902

@@ -835,22 +908,51 @@ func traceback1(pc, sp, lr uintptr, gp *g, flags unwindFlags) {
835908
}
836909
}
837910

838-
func traceback2(u *unwinder, showRuntime bool) int {
911+
// traceback2 prints a stack trace starting at u. It skips the first "skip"
912+
// logical frames, after which it prints at most "max" logical frames. It
913+
// returns n, which is the number of logical frames skipped and printed, and
914+
// lastN, which is the number of logical frames skipped or printed just in the
915+
// phyiscal frame that u references.
916+
func traceback2(u *unwinder, showRuntime bool, skip, max int) (n, lastN int) {
917+
// commitFrame commits to a logical frame and returns whether this frame
918+
// should be printed and whether iteration should stop.
919+
commitFrame := func() (pr, stop bool) {
920+
if skip == 0 && max == 0 {
921+
// Stop
922+
return false, true
923+
}
924+
n++
925+
lastN++
926+
if skip > 0 {
927+
// Skip
928+
skip--
929+
return false, false
930+
}
931+
// Print
932+
max--
933+
return true, false
934+
}
935+
839936
gp := u.g.ptr()
840937
level, _, _ := gotraceback()
841-
n := 0
842-
const max = _TracebackMaxFrames
843938
var cgoBuf [32]uintptr
844-
for ; n < max && u.valid(); u.next() {
939+
for ; u.valid(); u.next() {
940+
lastN = 0
845941
f := u.frame.fn
846-
for iu, uf := newInlineUnwinder(f, u.symPC(), noEscapePtr(&u.cache)); n < max && uf.valid(); uf = iu.next(uf) {
942+
for iu, uf := newInlineUnwinder(f, u.symPC(), noEscapePtr(&u.cache)); uf.valid(); uf = iu.next(uf) {
847943
sf := iu.srcFunc(uf)
848944
callee := u.calleeFuncID
849945
u.calleeFuncID = sf.funcID
850946
if !(showRuntime || showframe(sf, gp, n == 0, callee)) {
851947
continue
852948
}
853949

950+
if pr, stop := commitFrame(); stop {
951+
return
952+
} else if !pr {
953+
continue
954+
}
955+
854956
name := sf.name()
855957
file, line := iu.fileLine(uf)
856958
if name == "runtime.gopanic" {
@@ -878,34 +980,39 @@ func traceback2(u *unwinder, showRuntime bool) int {
878980
}
879981
}
880982
print("\n")
881-
n++
882983
}
883984

884985
// Print cgo frames.
885986
if cgoN := u.cgoCallers(cgoBuf[:]); cgoN > 0 {
886987
var arg cgoSymbolizerArg
887988
anySymbolized := false
989+
stop := false
888990
for _, pc := range cgoBuf[:cgoN] {
889-
if n >= max {
890-
break
891-
}
892991
if cgoSymbolizer == nil {
893-
print("non-Go function at pc=", hex(pc), "\n")
992+
if pr, stop := commitFrame(); stop {
993+
break
994+
} else if pr {
995+
print("non-Go function at pc=", hex(pc), "\n")
996+
}
894997
} else {
895-
c := printOneCgoTraceback(pc, max-n, &arg)
896-
n += c - 1 // +1 a few lines down
998+
stop = printOneCgoTraceback(pc, commitFrame, &arg)
897999
anySymbolized = true
1000+
if stop {
1001+
break
1002+
}
8981003
}
899-
n++
9001004
}
9011005
if anySymbolized {
9021006
// Free symbolization state.
9031007
arg.pc = 0
9041008
callCgoSymbolizer(&arg)
9051009
}
1010+
if stop {
1011+
return
1012+
}
9061013
}
9071014
}
908-
return n
1015+
return n, 0
9091016
}
9101017

9111018
// printAncestorTraceback prints the traceback of the given ancestor.
@@ -918,7 +1025,7 @@ func printAncestorTraceback(ancestor ancestorInfo) {
9181025
printAncestorTracebackFuncInfo(f, pc)
9191026
}
9201027
}
921-
if len(ancestor.pcs) == _TracebackMaxFrames {
1028+
if len(ancestor.pcs) == tracebackInnerFrames {
9221029
print("...additional frames elided...\n")
9231030
}
9241031
// Show what created goroutine, except main goroutine (goid 1).
@@ -1405,24 +1512,30 @@ func printCgoTraceback(callers *cgoCallers) {
14051512
return
14061513
}
14071514

1515+
commitFrame := func() (pr, stop bool) { return true, false }
14081516
var arg cgoSymbolizerArg
14091517
for _, c := range callers {
14101518
if c == 0 {
14111519
break
14121520
}
1413-
printOneCgoTraceback(c, 0x7fffffff, &arg)
1521+
printOneCgoTraceback(c, commitFrame, &arg)
14141522
}
14151523
arg.pc = 0
14161524
callCgoSymbolizer(&arg)
14171525
}
14181526

14191527
// printOneCgoTraceback prints the traceback of a single cgo caller.
14201528
// This can print more than one line because of inlining.
1421-
// Returns the number of frames printed.
1422-
func printOneCgoTraceback(pc uintptr, max int, arg *cgoSymbolizerArg) int {
1423-
c := 0
1529+
// It returns the "stop" result of commitFrame.
1530+
func printOneCgoTraceback(pc uintptr, commitFrame func() (pr, stop bool), arg *cgoSymbolizerArg) bool {
14241531
arg.pc = pc
1425-
for c <= max {
1532+
for {
1533+
if pr, stop := commitFrame(); stop {
1534+
return true
1535+
} else if !pr {
1536+
continue
1537+
}
1538+
14261539
callCgoSymbolizer(arg)
14271540
if arg.funcName != nil {
14281541
// Note that we don't print any argument
@@ -1437,12 +1550,10 @@ func printOneCgoTraceback(pc uintptr, max int, arg *cgoSymbolizerArg) int {
14371550
print(gostringnocopy(arg.file), ":", arg.lineno, " ")
14381551
}
14391552
print("pc=", hex(pc), "\n")
1440-
c++
14411553
if arg.more == 0 {
1442-
break
1554+
return false
14431555
}
14441556
}
1445-
return c
14461557
}
14471558

14481559
// callCgoSymbolizer calls the cgoSymbolizer function.

0 commit comments

Comments
 (0)