Skip to content

Commit bd77d6e

Browse files
rhyshprattmic
authored andcommitted
runtime/pprof: check if PC is reused for inlining
When describing call stacks that include inlined function calls, the runtime uses "fake" PCs to represent the frames that inlining removed. Those PCs correspond to real NOP instructions that the compiler inserts for this purpose. Describing the call stack in a protobuf-formatted profile requires the runtime/pprof package to collapse any sequences of fake call sites back into single PCs, removing the NOPs but retaining their line info. But because the NOP instructions are part of the function, they can appear as leaf nodes in a CPU profile. That results in an address that should sometimes be ignored (when it appears as a call site) and that sometimes should be present in the profile (when it is observed consuming CPU time). When processing a PC address, consider it first as a fake PC to add to the current inlining deck, and then as a previously-seen (real) PC. Fixes #50996 Change-Id: I80802369978bd7ac9969839ecfc9995ea4f84ab4 Reviewed-on: https://go-review.googlesource.com/c/go/+/384239 Reviewed-by: Cherry Mui <[email protected]> Reviewed-by: Michael Pratt <[email protected]>
1 parent c3c7477 commit bd77d6e

File tree

1 file changed

+49
-1
lines changed

1 file changed

+49
-1
lines changed

src/runtime/pprof/proto.go

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,10 @@ type locInfo struct {
244244
// to represent inlined functions
245245
// https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368
246246
pcs []uintptr
247+
248+
// results of allFrames call for this PC
249+
frames []runtime.Frame
250+
symbolizeResult symbolizeFlag
247251
}
248252

249253
// newProfileBuilder returns a new profileBuilder.
@@ -399,6 +403,24 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
399403
for len(stk) > 0 {
400404
addr := stk[0]
401405
if l, ok := b.locs[addr]; ok {
406+
// When generating code for an inlined function, the compiler adds
407+
// NOP instructions to the outermost function as a placeholder for
408+
// each layer of inlining. When the runtime generates tracebacks for
409+
// stacks that include inlined functions, it uses the addresses of
410+
// those NOPs as "fake" PCs on the stack as if they were regular
411+
// function call sites. But if a profiling signal arrives while the
412+
// CPU is executing one of those NOPs, its PC will show up as a leaf
413+
// in the profile with its own Location entry. So, always check
414+
// whether addr is a "fake" PC in the context of the current call
415+
// stack by trying to add it to the inlining deck before assuming
416+
// that the deck is complete.
417+
if len(b.deck.pcs) > 0 {
418+
if added := b.deck.tryAdd(addr, l.frames, l.symbolizeResult); added {
419+
stk = stk[1:]
420+
continue
421+
}
422+
}
423+
402424
// first record the location if there is any pending accumulated info.
403425
if id := b.emitLocation(); id > 0 {
404426
locs = append(locs, id)
@@ -451,6 +473,27 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
451473
return locs
452474
}
453475

476+
// Here's an example of how Go 1.17 writes out inlined functions, compiled for
477+
// linux/amd64. The disassembly of main.main shows two levels of inlining: main
478+
// calls b, b calls a, a does some work.
479+
//
480+
// inline.go:9 0x4553ec 90 NOPL // func main() { b(v) }
481+
// inline.go:6 0x4553ed 90 NOPL // func b(v *int) { a(v) }
482+
// inline.go:5 0x4553ee 48c7002a000000 MOVQ $0x2a, 0(AX) // func a(v *int) { *v = 42 }
483+
//
484+
// If a profiling signal arrives while executing the MOVQ at 0x4553ee (for line
485+
// 5), the runtime will report the stack as the MOVQ frame being called by the
486+
// NOPL at 0x4553ed (for line 6) being called by the NOPL at 0x4553ec (for line
487+
// 9).
488+
//
489+
// The role of pcDeck is to collapse those three frames back into a single
490+
// location at 0x4553ee, with file/line/function symbolization info representing
491+
// the three layers of calls. It does that via sequential calls to pcDeck.tryAdd
492+
// starting with the leaf-most address. The fourth call to pcDeck.tryAdd will be
493+
// for the caller of main.main. Because main.main was not inlined in its caller,
494+
// the deck will reject the addition, and the fourth PC on the stack will get
495+
// its own location.
496+
454497
// pcDeck is a helper to detect a sequence of inlined functions from
455498
// a stack trace returned by the runtime.
456499
//
@@ -535,7 +578,12 @@ func (b *profileBuilder) emitLocation() uint64 {
535578
newFuncs := make([]newFunc, 0, 8)
536579

537580
id := uint64(len(b.locs)) + 1
538-
b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, b.deck.pcs...)}
581+
b.locs[addr] = locInfo{
582+
id: id,
583+
pcs: append([]uintptr{}, b.deck.pcs...),
584+
symbolizeResult: b.deck.symbolizeResult,
585+
frames: append([]runtime.Frame{}, b.deck.frames...),
586+
}
539587

540588
start := b.pb.startMessage()
541589
b.pb.uint64Opt(tagLocation_ID, id)

0 commit comments

Comments
 (0)