From: Rhys Hiltner Date: Tue, 8 Feb 2022 22:51:41 +0000 (-0800) Subject: runtime/pprof: check if PC is reused for inlining X-Git-Tag: go1.19beta1~1122 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=bd77d6e24048e5a8b7b07d2d0b7cf552d21905f5;p=gostls13.git runtime/pprof: check if PC is reused for inlining When describing call stacks that include inlined function calls, the runtime uses "fake" PCs to represent the frames that inlining removed. Those PCs correspond to real NOP instructions that the compiler inserts for this purpose. Describing the call stack in a protobuf-formatted profile requires the runtime/pprof package to collapse any sequences of fake call sites back into single PCs, removing the NOPs but retaining their line info. But because the NOP instructions are part of the function, they can appear as leaf nodes in a CPU profile. That results in an address that should sometimes be ignored (when it appears as a call site) and that sometimes should be present in the profile (when it is observed consuming CPU time). When processing a PC address, consider it first as a fake PC to add to the current inlining deck, and then as a previously-seen (real) PC. Fixes #50996 Change-Id: I80802369978bd7ac9969839ecfc9995ea4f84ab4 Reviewed-on: https://go-review.googlesource.com/c/go/+/384239 Reviewed-by: Cherry Mui Reviewed-by: Michael Pratt --- diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go index 073a076802..215bd0bf96 100644 --- a/src/runtime/pprof/proto.go +++ b/src/runtime/pprof/proto.go @@ -244,6 +244,10 @@ type locInfo struct { // to represent inlined functions // https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368 pcs []uintptr + + // results of allFrames call for this PC + frames []runtime.Frame + symbolizeResult symbolizeFlag } // newProfileBuilder returns a new profileBuilder. @@ -399,6 +403,24 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo for len(stk) > 0 { addr := stk[0] if l, ok := b.locs[addr]; ok { + // When generating code for an inlined function, the compiler adds + // NOP instructions to the outermost function as a placeholder for + // each layer of inlining. When the runtime generates tracebacks for + // stacks that include inlined functions, it uses the addresses of + // those NOPs as "fake" PCs on the stack as if they were regular + // function call sites. But if a profiling signal arrives while the + // CPU is executing one of those NOPs, its PC will show up as a leaf + // in the profile with its own Location entry. So, always check + // whether addr is a "fake" PC in the context of the current call + // stack by trying to add it to the inlining deck before assuming + // that the deck is complete. + if len(b.deck.pcs) > 0 { + if added := b.deck.tryAdd(addr, l.frames, l.symbolizeResult); added { + stk = stk[1:] + continue + } + } + // first record the location if there is any pending accumulated info. if id := b.emitLocation(); id > 0 { locs = append(locs, id) @@ -451,6 +473,27 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo return locs } +// Here's an example of how Go 1.17 writes out inlined functions, compiled for +// linux/amd64. The disassembly of main.main shows two levels of inlining: main +// calls b, b calls a, a does some work. +// +// inline.go:9 0x4553ec 90 NOPL // func main() { b(v) } +// inline.go:6 0x4553ed 90 NOPL // func b(v *int) { a(v) } +// inline.go:5 0x4553ee 48c7002a000000 MOVQ $0x2a, 0(AX) // func a(v *int) { *v = 42 } +// +// If a profiling signal arrives while executing the MOVQ at 0x4553ee (for line +// 5), the runtime will report the stack as the MOVQ frame being called by the +// NOPL at 0x4553ed (for line 6) being called by the NOPL at 0x4553ec (for line +// 9). +// +// The role of pcDeck is to collapse those three frames back into a single +// location at 0x4553ee, with file/line/function symbolization info representing +// the three layers of calls. It does that via sequential calls to pcDeck.tryAdd +// starting with the leaf-most address. The fourth call to pcDeck.tryAdd will be +// for the caller of main.main. Because main.main was not inlined in its caller, +// the deck will reject the addition, and the fourth PC on the stack will get +// its own location. + // pcDeck is a helper to detect a sequence of inlined functions from // a stack trace returned by the runtime. // @@ -535,7 +578,12 @@ func (b *profileBuilder) emitLocation() uint64 { newFuncs := make([]newFunc, 0, 8) id := uint64(len(b.locs)) + 1 - b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, b.deck.pcs...)} + b.locs[addr] = locInfo{ + id: id, + pcs: append([]uintptr{}, b.deck.pcs...), + symbolizeResult: b.deck.symbolizeResult, + frames: append([]runtime.Frame{}, b.deck.frames...), + } start := b.pb.startMessage() b.pb.uint64Opt(tagLocation_ID, id)