From 9e83c11fca44282e4b8b1e099f437f557b73b43e Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 20 Apr 2017 15:54:12 -0400 Subject: [PATCH] runtime/pprof: expand inlined frames in symbolized proto profiles Currently proto symbolization uses runtime.FuncForPC and assumes each PC maps to a single frame. This isn't true in the presence of inlining (even with leaf-only inlining this can get incorrect results). Change PC symbolization to use runtime.CallersFrames to expand each PC to all of the frames at that PC. Change-Id: I8d20dff7495a5de495ae07f569122c225d433ced Reviewed-on: https://go-review.googlesource.com/41256 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Michael Matloob --- src/runtime/pprof/pprof.go | 4 +- src/runtime/pprof/pprof_test.go | 27 +++++++++ src/runtime/pprof/proto.go | 98 ++++++++++++++++++++------------- src/runtime/pprof/protomem.go | 5 +- 4 files changed, 91 insertions(+), 43 deletions(-) diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go index 74092d2565..b6253b1aa5 100644 --- a/src/runtime/pprof/pprof.go +++ b/src/runtime/pprof/pprof.go @@ -400,9 +400,7 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro locs = locs[:0] for _, addr := range p.Stack(index[k]) { // For count profiles, all stack addresses are - // return PCs. Adjust them to be call PCs for - // locForPC. - addr-- + // return PCs, which is what locForPC expects. locs = append(locs, b.locForPC(addr)) } b.pbSample(values, locs, nil) diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go index f762fa5a6c..0d957ad848 100644 --- a/src/runtime/pprof/pprof_test.go +++ b/src/runtime/pprof/pprof_test.go @@ -87,6 +87,33 @@ func TestCPUProfileMultithreaded(t *testing.T) { }) } +func TestCPUProfileInlining(t *testing.T) { + testCPUProfile(t, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, func(dur time.Duration) { + cpuHogger(inlinedCaller, dur) + }) +} + +func inlinedCaller() { + inlinedCallee() +} + +func inlinedCallee() { + // We could just use cpuHog1, but for loops prevent inlining + // right now. :( + foo := salt1 + i := 0 +loop: + if foo > 0 { + foo *= foo + } else { + foo *= foo + 1 + } + if i++; i < 1e5 { + goto loop + } + salt1 = foo +} + func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) { p, err := profile.Parse(bytes.NewReader(valBytes)) if err != nil { diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go index 5210706f4e..9e16e580ee 100644 --- a/src/runtime/pprof/proto.go +++ b/src/runtime/pprof/proto.go @@ -43,7 +43,7 @@ type profileBuilder struct { strings []string stringMap map[string]int locs map[uintptr]int - funcs map[*runtime.Func]int + funcs map[string]int // Package path-qualified function name to Function.ID mem []memMap } @@ -183,24 +183,60 @@ func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file } // locForPC returns the location ID for addr. -// addr must be a call address (not a return address). +// addr must be a return PC. This returns the location of the call. // It may emit to b.pb, so there must be no message encoding in progress. func (b *profileBuilder) locForPC(addr uintptr) uint64 { id := uint64(b.locs[addr]) if id != 0 { return id } - f := runtime.FuncForPC(addr) - if f != nil && f.Name() == "runtime.goexit" { + + // Expand this one address using CallersFrames so we can cache + // each expansion. In general, CallersFrames takes a whole + // stack, but in this case we know there will be no skips in + // the stack and we have return PCs anyway. + frames := runtime.CallersFrames([]uintptr{addr}) + frame, more := frames.Next() + if frame.Function == "runtime.goexit" { + // Short-circuit if we see runtime.goexit so the loop + // below doesn't allocate a useless empty location. return 0 } - funcID, lineno := b.funcForPC(addr) + + if frame.PC == 0 { + // If we failed to resolve the frame, at least make up + // a reasonable call PC. This mostly happens in tests. + frame.PC = addr - 1 + } + + // We can't write out functions while in the middle of the + // Location message, so record new functions we encounter and + // write them out after the Location. + type newFunc struct { + id uint64 + name, file string + } + newFuncs := make([]newFunc, 0, 8) + id = uint64(len(b.locs)) + 1 b.locs[addr] = int(id) start := b.pb.startMessage() b.pb.uint64Opt(tagLocation_ID, id) - b.pb.uint64Opt(tagLocation_Address, uint64(addr)) - b.pbLine(tagLocation_Line, funcID, int64(lineno)) + b.pb.uint64Opt(tagLocation_Address, uint64(frame.PC)) + for frame.Function != "runtime.goexit" { + // Write out each line in frame expansion. + funcID := uint64(b.funcs[frame.Function]) + if funcID == 0 { + funcID = uint64(len(b.funcs)) + 1 + b.funcs[frame.Function] = int(funcID) + newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File}) + } + b.pbLine(tagLocation_Line, funcID, int64(frame.Line)) + if !more { + break + } + frame, more = frames.Next() + } if len(b.mem) > 0 { i := sort.Search(len(b.mem), func(i int) bool { return b.mem[i].end > addr @@ -210,34 +246,19 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 { } } b.pb.endMessage(tagProfile_Location, start) - b.flush() - return id -} -// funcForPC returns the func ID and line number for addr. -// It may emit to b.pb, so there must be no message encoding in progress. -func (b *profileBuilder) funcForPC(addr uintptr) (funcID uint64, lineno int) { - f := runtime.FuncForPC(addr) - if f == nil { - return 0, 0 - } - file, lineno := f.FileLine(addr) - funcID = uint64(b.funcs[f]) - if funcID != 0 { - return funcID, lineno + // Write out functions we found during frame expansion. + for _, fn := range newFuncs { + start := b.pb.startMessage() + b.pb.uint64Opt(tagFunction_ID, fn.id) + b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name)) + b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name)) + b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file)) + b.pb.endMessage(tagProfile_Function, start) } - funcID = uint64(len(b.funcs)) + 1 - b.funcs[f] = int(funcID) - name := f.Name() - start := b.pb.startMessage() - b.pb.uint64Opt(tagFunction_ID, funcID) - b.pb.int64Opt(tagFunction_Name, b.stringIndex(name)) - b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(name)) - b.pb.int64Opt(tagFunction_Filename, b.stringIndex(file)) - b.pb.endMessage(tagProfile_Function, start) b.flush() - return funcID, lineno + return id } // newProfileBuilder returns a new profileBuilder. @@ -253,7 +274,7 @@ func newProfileBuilder(w io.Writer) *profileBuilder { strings: []string{""}, stringMap: map[string]int{"": 0}, locs: map[uintptr]int{}, - funcs: map[*runtime.Func]int{}, + funcs: map[string]int{}, } b.readMapping() return b @@ -351,11 +372,14 @@ func (b *profileBuilder) build() error { locs = locs[:0] for i, addr := range e.stk { - // Addresses from stack traces point to the next instruction after - // each call. Adjust by -1 to land somewhere on the actual call - // (except for the leaf, which is not a call). - if i > 0 { - addr-- + // Addresses from stack traces point to the + // next instruction after each call, except + // for the leaf, which points to where the + // signal occurred. locForPC expects return + // PCs, so increment the leaf address to look + // like a return PC. + if i == 0 { + addr++ } l := b.locForPC(addr) if l == 0 { // runtime.goexit diff --git a/src/runtime/pprof/protomem.go b/src/runtime/pprof/protomem.go index 86c7dacfe6..2756cfd28d 100644 --- a/src/runtime/pprof/protomem.go +++ b/src/runtime/pprof/protomem.go @@ -29,9 +29,8 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64) error for tries := 0; tries < 2; tries++ { for _, addr := range r.Stack() { // For heap profiles, all stack - // addresses are return PCs. Adjust - // them to be call PCs for locForPC. - addr-- + // addresses are return PCs, which is + // what locForPC expects. if hideRuntime { if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") { continue -- 2.50.0