Skip to content

Commit cc96b6e

Browse files
committed
internal/crashmonitor: adjust trap frames
Typically all frames in the middle of a stack trace are calls, and report the "return PC". That is, the instruction following the CALL where the callee will eventually return to. runtime.CallersFrames is aware of this property and will decrement each PC by 1 to "back up" to the location of the CALL, which is the actual line number the user expects. This does not work for traps (such as a SIGSEGV), as a trap is not a call, so the reported PC is not the return PC, but the actual PC of the trap. runtime.Callers is aware of this and will intentionally increment trap PCs in order to correct for the decrement performed by runtime.CallersFrames. See runtime.tracebackPCs and runtume.(*unwinder).symPC. We must emulate the same behavior, otherwise we will report the location of the instruction immediately prior to the trap, which may be on a different line, or even a different inlined functions. Fixes golang/go#70637. Change-Id: I7cd6ffe783a81aa91dcdca313d910341c0bb0440 Reviewed-on: https://go-review.googlesource.com/c/telemetry/+/637755 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Alan Donovan <[email protected]> Reviewed-by: Cherry Mui <[email protected]>
1 parent 773b2e3 commit cc96b6e

File tree

2 files changed

+217
-50
lines changed

2 files changed

+217
-50
lines changed

internal/crashmonitor/monitor.go

Lines changed: 100 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,35 @@ func telemetryCounterName(crash []byte) (string, error) {
170170
// there is no possibility of strings from the crash report (which may
171171
// contain PII) leaking into the telemetry system.
172172
func parseStackPCs(crash string) ([]uintptr, error) {
173+
// getSymbol parses the symbol name out of a line of the form:
174+
// SYMBOL(ARGS)
175+
//
176+
// Note: SYMBOL may contain parens "pkg.(*T).method". However, type
177+
// parameters are always replaced with ..., so they cannot introduce
178+
// more parens. e.g., "pkg.(*T[...]).method".
179+
//
180+
// ARGS can contain parens. We want the first paren that is not
181+
// immediately preceded by a ".".
182+
//
183+
// TODO(prattmic): This is mildly complicated and is only used to find
184+
// runtime.sigpanic, so perhaps simplify this by checking explicitly
185+
// for sigpanic.
186+
getSymbol := func(line string) (string, error) {
187+
var prev rune
188+
for i, c := range line {
189+
if line[i] != '(' {
190+
prev = c
191+
continue
192+
}
193+
if prev == '.' {
194+
prev = c
195+
continue
196+
}
197+
return line[:i], nil
198+
}
199+
return "", fmt.Errorf("no symbol for stack frame: %s", line)
200+
}
201+
173202
// getPC parses the PC out of a line of the form:
174203
// \tFILE:LINE +0xRELPC sp=... fp=... pc=...
175204
getPC := func(line string) (uint64, error) {
@@ -186,6 +215,9 @@ func parseStackPCs(crash string) ([]uintptr, error) {
186215
childSentinel = sentinel()
187216
on = false // are we in the first running goroutine?
188217
lines = strings.Split(crash, "\n")
218+
symLine = true // within a goroutine, every other line is a symbol or file/line/pc location, starting with symbol.
219+
currSymbol string
220+
prevSymbol string // symbol of the most recent previous frame with a PC.
189221
)
190222
for i := 0; i < len(lines); i++ {
191223
line := lines[i]
@@ -228,21 +260,76 @@ func parseStackPCs(crash string) ([]uintptr, error) {
228260
// Note: SYMBOL may contain parens "pkg.(*T).method"
229261
// The RELPC is sometimes missing.
230262

231-
// Skip the symbol(args) line.
232-
i++
233-
if i == len(lines) {
234-
break
235-
}
236-
line = lines[i]
263+
if symLine {
264+
var err error
265+
currSymbol, err = getSymbol(line)
266+
if err != nil {
267+
return nil, fmt.Errorf("error extracting symbol: %v", err)
268+
}
237269

238-
// Parse the PC, and correct for the parent and child's
239-
// different mappings of the text section.
240-
pc, err := getPC(line)
241-
if err != nil {
242-
// Inlined frame, perhaps; skip it.
243-
continue
270+
symLine = false // Next line is FILE:LINE.
271+
} else {
272+
// Parse the PC, and correct for the parent and child's
273+
// different mappings of the text section.
274+
pc, err := getPC(line)
275+
if err != nil {
276+
// Inlined frame, perhaps; skip it.
277+
278+
// Done with this frame. Next line is a new frame.
279+
//
280+
// Don't update prevSymbol; we only want to
281+
// track frames with a PC.
282+
currSymbol = ""
283+
symLine = true
284+
continue
285+
}
286+
287+
pc = pc-parentSentinel+childSentinel
288+
289+
// If the previous frame was sigpanic, then this frame
290+
// was a trap (e.g., SIGSEGV).
291+
//
292+
// Typically all middle frames are calls, and report
293+
// the "return PC". That is, the instruction following
294+
// the CALL where the callee will eventually return to.
295+
//
296+
// runtime.CallersFrames is aware of this property and
297+
// will decrement each PC by 1 to "back up" to the
298+
// location of the CALL, which is the actual line
299+
// number the user expects.
300+
//
301+
// This does not work for traps, as a trap is not a
302+
// call, so the reported PC is not the return PC, but
303+
// the actual PC of the trap.
304+
//
305+
// runtime.Callers is aware of this and will
306+
// intentionally increment trap PCs in order to correct
307+
// for the decrement performed by
308+
// runtime.CallersFrames. See runtime.tracebackPCs and
309+
// runtume.(*unwinder).symPC.
310+
//
311+
// We must emulate the same behavior, otherwise we will
312+
// report the location of the instruction immediately
313+
// prior to the trap, which may be on a different line,
314+
// or even a different inlined functions.
315+
//
316+
// TODO(prattmic): The runtime applies the same trap
317+
// behavior for other "injected calls", see injectCall
318+
// in runtime.(*unwinder).next. Do we want to handle
319+
// those as well? I don't believe we'd ever see
320+
// runtime.asyncPreempt or runtime.debugCallV2 in a
321+
// typical crash.
322+
if prevSymbol == "runtime.sigpanic" {
323+
pc++
324+
}
325+
326+
pcs = append(pcs, uintptr(pc))
327+
328+
// Done with this frame. Next line is a new frame.
329+
prevSymbol = currSymbol
330+
currSymbol = ""
331+
symLine = true
244332
}
245-
pcs = append(pcs, uintptr(pc-parentSentinel+childSentinel))
246333
}
247334
return pcs, nil
248335
}

internal/crashmonitor/monitor_test.go

Lines changed: 117 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,20 @@ import (
2424
func TestMain(m *testing.M) {
2525
entry := os.Getenv("CRASHMONITOR_TEST_ENTRYPOINT")
2626
switch entry {
27-
case "via-stderr":
27+
case "via-stderr.panic", "via-stderr.trap":
2828
// This mode bypasses Start and debug.SetCrashOutput;
2929
// the crash is printed to stderr.
3030
debug.SetTraceback("system")
3131
crashmonitor.WriteSentinel(os.Stderr)
3232

33-
child() // this line is "TestMain:+9"
33+
if entry == "via-stderr.panic" {
34+
childPanic() // this line is "TestMain:+10"
35+
} else {
36+
childTrap() // this line is "TestMain:+12"
37+
}
3438
panic("unreachable")
3539

36-
case "start.panic", "start.exit":
40+
case "start.panic", "start.trap", "start.exit":
3741
// These modes uses Start and debug.SetCrashOutput.
3842
// We stub the actual telemetry by instead writing to a file.
3943
crashmonitor.SetIncrementCounter(func(name string) {
@@ -46,12 +50,18 @@ func TestMain(m *testing.M) {
4650
ReportCrashes: true,
4751
TelemetryDir: os.Getenv("CRASHMONITOR_TELEMETRY_DIR"),
4852
})
49-
if entry == "start.panic" {
53+
switch entry {
54+
case "start.panic":
5055
go func() {
51-
child() // this line is "TestMain.func2:1"
56+
childPanic() // this line is "TestMain.func3:+1"
5257
}()
5358
select {} // deadlocks when reached
54-
} else {
59+
case "start.trap":
60+
go func() {
61+
childTrap() // this line is "TestMain.func4:+1"
62+
}()
63+
select {} // deadlocks when reached
64+
case "start.exit":
5565
os.Exit(42)
5666
}
5767

@@ -60,48 +70,94 @@ func TestMain(m *testing.M) {
6070
}
6171
}
6272

63-
func child() {
73+
func childPanic() {
74+
fmt.Println("hello")
75+
grandchildPanic() // this line is "childPanic:+2"
76+
}
77+
78+
func grandchildPanic() {
79+
panic("oops") // this line is "grandchildPanic:=79" (the call from child is inlined)
80+
}
81+
82+
var sinkPtr *int
83+
84+
func childTrap() {
6485
fmt.Println("hello")
65-
grandchild() // this line is "child:+2"
86+
grandchildTrap(sinkPtr) // this line is "childTrap:+2"
6687
}
6788

68-
func grandchild() {
69-
panic("oops") // this line is "grandchild:=92" (the call from child is inlined)
89+
func grandchildTrap(i *int) {
90+
*i = 42 // this line is "grandchildTrap:=90" (the call from childTrap is inlined)
7091
}
7192

7293
// TestViaStderr is an internal test that asserts that the telemetry
7394
// stack generated by the panic in grandchild is correct. It uses
7495
// stderr, and does not rely on [start.Start] or [debug.SetCrashOutput].
7596
func TestViaStderr(t *testing.T) {
76-
_, _, stderr := runSelf(t, "via-stderr")
77-
got, err := crashmonitor.TelemetryCounterName(stderr)
78-
if err != nil {
79-
t.Fatal(err)
80-
}
81-
got = sanitize(counter.DecodeStack(got))
82-
want := "crash/crash\n" +
97+
// Standard panic.
98+
t.Run("panic", func(t *testing.T) {
99+
_, _, stderr := runSelf(t, "via-stderr.panic")
100+
got, err := crashmonitor.TelemetryCounterName(stderr)
101+
if err != nil {
102+
t.Fatal(err)
103+
}
104+
got = sanitize(counter.DecodeStack(got))
105+
want := "crash/crash\n" +
83106
"runtime.gopanic:--\n" +
84-
"golang.org/x/telemetry/internal/crashmonitor_test.grandchild:=69\n" +
85-
"golang.org/x/telemetry/internal/crashmonitor_test.child:+2\n" +
86-
"golang.org/x/telemetry/internal/crashmonitor_test.TestMain:+9\n" +
107+
"golang.org/x/telemetry/internal/crashmonitor_test.grandchildPanic:=79\n" +
108+
"golang.org/x/telemetry/internal/crashmonitor_test.childPanic:+2\n" +
109+
"golang.org/x/telemetry/internal/crashmonitor_test.TestMain:+10\n" +
87110
"main.main:--\n" +
88111
"runtime.main:--\n" +
89112
"runtime.goexit:--"
90113

91-
if !crashmonitor.Supported() { // !go1.23
92-
// Traceback excludes PCs for inlined frames. Before go1.23
93-
// (https://go.dev/cl/571798 specifically), passing the set of
94-
// PCs in the traceback to runtime.CallersFrames, would report
95-
// only the innermost inlined frame and none of the inline
96-
// "callers".
97-
//
98-
// Thus, here we must drop the caller of the inlined frame.
99-
want = strings.ReplaceAll(want, "golang.org/x/telemetry/internal/crashmonitor_test.child:+2\n", "")
100-
}
114+
if !crashmonitor.Supported() { // !go1.23
115+
// Traceback excludes PCs for inlined frames. Before
116+
// go1.23 (https://go.dev/cl/571798 specifically),
117+
// passing the set of PCs in the traceback to
118+
// runtime.CallersFrames, would report only the
119+
// innermost inlined frame and none of the inline
120+
// "callers".
121+
//
122+
// Thus, here we must drop the caller of the inlined
123+
// frame.
124+
want = strings.ReplaceAll(want, "golang.org/x/telemetry/internal/crashmonitor_test.childPanic:+2\n", "")
125+
}
101126

102-
if got != want {
103-
t.Errorf("got counter name <<%s>>, want <<%s>>", got, want)
104-
}
127+
if got != want {
128+
t.Errorf("got counter name <<%s>>, want <<%s>>", got, want)
129+
}
130+
})
131+
132+
// Panic via trap.
133+
t.Run("trap", func(t *testing.T) {
134+
_, _, stderr := runSelf(t, "via-stderr.trap")
135+
got, err := crashmonitor.TelemetryCounterName(stderr)
136+
if err != nil {
137+
t.Fatal(err)
138+
}
139+
got = sanitize(counter.DecodeStack(got))
140+
want := "crash/crash\n" +
141+
"runtime.gopanic:--\n" +
142+
"runtime.panicmem:--\n" +
143+
"runtime.sigpanic:--\n" +
144+
"golang.org/x/telemetry/internal/crashmonitor_test.grandchildTrap:=90\n" +
145+
"golang.org/x/telemetry/internal/crashmonitor_test.childTrap:+2\n" +
146+
"golang.org/x/telemetry/internal/crashmonitor_test.TestMain:+12\n" +
147+
"main.main:--\n" +
148+
"runtime.main:--\n" +
149+
"runtime.goexit:--"
150+
151+
if !crashmonitor.Supported() { // !go1.23
152+
// See above.
153+
want = strings.ReplaceAll(want, "runtime.sigpanic:--\n", "")
154+
want = strings.ReplaceAll(want, "golang.org/x/telemetry/internal/crashmonitor_test.childTrap:+2\n", "")
155+
}
156+
157+
if got != want {
158+
t.Errorf("got counter name <<%s>>, want <<%s>>", got, want)
159+
}
160+
})
105161
}
106162

107163
func waitForExitFile(t *testing.T, exitFile string) {
@@ -144,8 +200,8 @@ func TestStart(t *testing.T) {
144200
}
145201
})
146202

147-
// Assert that the crash monitor increments a telemetry
148-
// counter of the correct name when the child process panics.
203+
// Assert that the crash monitor increments a telemetry counter of the
204+
// correct name when the child process panics.
149205
t.Run("panic", func(t *testing.T) {
150206
// Gather a stack trace from executing the panic statement above.
151207
telemetryFile, exitFile, _ := runSelf(t, "start.panic")
@@ -157,14 +213,38 @@ func TestStart(t *testing.T) {
157213
got := sanitize(counter.DecodeStack(string(data)))
158214
want := "crash/crash\n" +
159215
"runtime.gopanic:--\n" +
160-
"golang.org/x/telemetry/internal/crashmonitor_test.grandchild:=69\n" +
161-
"golang.org/x/telemetry/internal/crashmonitor_test.child:+2\n" +
216+
"golang.org/x/telemetry/internal/crashmonitor_test.grandchildPanic:=79\n" +
217+
"golang.org/x/telemetry/internal/crashmonitor_test.childPanic:+2\n" +
162218
"golang.org/x/telemetry/internal/crashmonitor_test.TestMain.func3:+1\n" +
163219
"runtime.goexit:--"
164220
if got != want {
165221
t.Errorf("got counter name <<%s>>, want <<%s>>", got, want)
166222
}
167223
})
224+
225+
// Assert that the crash monitor increments a telemetry counter of the
226+
// correct name when the child process panics via trap.
227+
t.Run("trap", func(t *testing.T) {
228+
// Gather a stack trace from executing the panic statement above.
229+
telemetryFile, exitFile, _ := runSelf(t, "start.trap")
230+
waitForExitFile(t, exitFile)
231+
data, err := os.ReadFile(telemetryFile)
232+
if err != nil {
233+
t.Fatalf("failed to read file: %v", err)
234+
}
235+
got := sanitize(counter.DecodeStack(string(data)))
236+
want := "crash/crash\n" +
237+
"runtime.gopanic:--\n" +
238+
"runtime.panicmem:--\n" +
239+
"runtime.sigpanic:--\n" +
240+
"golang.org/x/telemetry/internal/crashmonitor_test.grandchildTrap:=90\n" +
241+
"golang.org/x/telemetry/internal/crashmonitor_test.childTrap:+2\n" +
242+
"golang.org/x/telemetry/internal/crashmonitor_test.TestMain.func4:+1\n" +
243+
"runtime.goexit:--"
244+
if got != want {
245+
t.Errorf("got counter name <<%s>>, want <<%s>>", got, want)
246+
}
247+
})
168248
}
169249

170250
// runSelf fork+exec's this test executable using an alternate entry point.

0 commit comments

Comments
 (0)