]> Cypherpunks repositories - gostls13.git/commitdiff
runtime/pprof: correct inlined function location encoding for non-CPU profiles
authorHana (Hyang-Ah) Kim <hyangah@gmail.com>
Sun, 3 Nov 2019 18:51:25 +0000 (03:51 +0900)
committerHyang-Ah Hana Kim <hyangah@gmail.com>
Fri, 8 Nov 2019 04:08:53 +0000 (04:08 +0000)
Change-Id: Id270a3477bf1a581755c4311eb12f990aa2260b5
Reviewed-on: https://go-review.googlesource.com/c/go/+/205097
Run-TryBot: Hyang-Ah Hana Kim <hyangah@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/runtime/pprof/mprof_test.go
src/runtime/pprof/pprof.go
src/runtime/pprof/proto.go
src/runtime/pprof/protomem.go

index 4c14527e5b1f682b5138bc421d409eeae666a905..8bc1ae41f2e21c8881f803488cc9948acfdf58d9 100644 (file)
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+// +build !js
+
 package pprof
 
 import (
@@ -10,6 +12,7 @@ import (
        "reflect"
        "regexp"
        "runtime"
+       "runtime/pprof/internal/profile"
        "testing"
        "unsafe"
 )
@@ -27,6 +30,10 @@ func allocateTransient2M() {
        memSink = make([]byte, 2<<20)
 }
 
+func allocateTransient2MInline() {
+       memSink = make([]byte, 2<<20)
+}
+
 type Obj32 struct {
        link *Obj32
        pad  [32 - unsafe.Sizeof(uintptr(0))]byte
@@ -71,42 +78,93 @@ func TestMemoryProfiler(t *testing.T) {
        // Do the interesting allocations.
        allocateTransient1M()
        allocateTransient2M()
+       allocateTransient2MInline()
        allocatePersistent1K()
        allocateReflect()
        memSink = nil
 
        runtime.GC() // materialize stats
-       var buf bytes.Buffer
-       if err := Lookup("heap").WriteTo(&buf, 1); err != nil {
-               t.Fatalf("failed to write heap profile: %v", err)
-       }
 
        memoryProfilerRun++
 
-       tests := []string{
-               fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-#      0x[0-9,a-f]+    runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+       .*/runtime/pprof/mprof_test\.go:40
-#      0x[0-9,a-f]+    runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:74
+       tests := []struct {
+               stk    []string
+               legacy string
+       }{{
+               stk: []string{"runtime/pprof.allocatePersistent1K", "runtime/pprof.TestMemoryProfiler"},
+               legacy: fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#      0x[0-9,a-f]+    runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+       .*/runtime/pprof/mprof_test\.go:47
+#      0x[0-9,a-f]+    runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:82
 `, 32*memoryProfilerRun, 1024*memoryProfilerRun, 32*memoryProfilerRun, 1024*memoryProfilerRun),
-
-               fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-#      0x[0-9,a-f]+    runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+        .*/runtime/pprof/mprof_test.go:21
-#      0x[0-9,a-f]+    runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:72
+       }, {
+               stk: []string{"runtime/pprof.allocateTransient1M", "runtime/pprof.TestMemoryProfiler"},
+               legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#      0x[0-9,a-f]+    runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+        .*/runtime/pprof/mprof_test.go:24
+#      0x[0-9,a-f]+    runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:79
 `, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),
-
-               fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-#      0x[0-9,a-f]+    runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+        .*/runtime/pprof/mprof_test.go:27
-#      0x[0-9,a-f]+    runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:73
+       }, {
+               stk: []string{"runtime/pprof.allocateTransient2M", "runtime/pprof.TestMemoryProfiler"},
+               legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#      0x[0-9,a-f]+    runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+        .*/runtime/pprof/mprof_test.go:30
+#      0x[0-9,a-f]+    runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:80
 `, memoryProfilerRun, (2<<20)*memoryProfilerRun),
-
-               fmt.Sprintf(`0: 0 \[%v: %v\] @( 0x[0-9,a-f]+)+
-#      0x[0-9,a-f]+    runtime/pprof\.allocateReflectTransient\+0x[0-9,a-f]+   .*/runtime/pprof/mprof_test.go:48
+       }, {
+               stk: []string{"runtime/pprof.allocateTransient2MInline", "runtime/pprof.TestMemoryProfiler"},
+               legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
+#      0x[0-9,a-f]+    runtime/pprof\.allocateTransient2MInline\+0x[0-9,a-f]+  .*/runtime/pprof/mprof_test.go:34
+#      0x[0-9,a-f]+    runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:81
 `, memoryProfilerRun, (2<<20)*memoryProfilerRun),
-       }
+       }, {
+               stk: []string{"runtime/pprof.allocateReflectTransient"},
+               legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @( 0x[0-9,a-f]+)+
+#      0x[0-9,a-f]+    runtime/pprof\.allocateReflectTransient\+0x[0-9,a-f]+   .*/runtime/pprof/mprof_test.go:55
+`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
+       }}
 
-       for _, test := range tests {
-               if !regexp.MustCompile(test).Match(buf.Bytes()) {
-                       t.Fatalf("The entry did not match:\n%v\n\nProfile:\n%v\n", test, buf.String())
+       t.Run("debug=1", func(t *testing.T) {
+               var buf bytes.Buffer
+               if err := Lookup("heap").WriteTo(&buf, 1); err != nil {
+                       t.Fatalf("failed to write heap profile: %v", err)
                }
-       }
+
+               for _, test := range tests {
+                       if !regexp.MustCompile(test.legacy).Match(buf.Bytes()) {
+                               t.Fatalf("The entry did not match:\n%v\n\nProfile:\n%v\n", test.legacy, buf.String())
+                       }
+               }
+       })
+
+       t.Run("proto", func(t *testing.T) {
+               var buf bytes.Buffer
+               if err := Lookup("heap").WriteTo(&buf, 0); err != nil {
+                       t.Fatalf("failed to write heap profile: %v", err)
+               }
+               p, err := profile.Parse(&buf)
+               if err != nil {
+                       t.Fatalf("failed to parse heap profile: %v", err)
+               }
+               t.Logf("Profile = %v", p)
+
+               stks := stacks(p)
+               for _, test := range tests {
+                       if !containsStack(stks, test.stk) {
+                               t.Fatalf("No matching stack entry for %q\n\nProfile:\n%v\n", test.stk, p)
+                       }
+               }
+               // Check the inlined function location is encoded correctly.
+               for _, loc := range p.Location {
+                       inlinedCaller, inlinedCallee := false, false
+                       for _, line := range loc.Line {
+                               if line.Function.Name == "runtime/pprof.allocateTransient2MInline" {
+                                       inlinedCallee = true
+                               }
+                               if inlinedCallee && line.Function.Name == "runtime/pprof.TestMemoryProfiler" {
+                                       inlinedCaller = true
+                               }
+                       }
+                       if inlinedCallee != inlinedCaller {
+                               t.Errorf("want allocateTransient2MInline after TestMemoryProfiler in one location, got separate location entries:\n%v", loc)
+                       }
+               }
+       })
 }
index 2fc567ef3416d1f5e550e9399c44cc6f1241dc0b..a7916bf6fb9d3fe213c1b1c2366750aa829e93fc 100644 (file)
@@ -386,16 +386,9 @@ func printCountCycleProfile(w io.Writer, countName, cycleName string, scaler fun
                count, nanosec := scaler(r.Count, float64(r.Cycles)/cpuGHz)
                values[0] = count
                values[1] = int64(nanosec)
-               locs = locs[:0]
-               for _, addr := range r.Stack() {
-                       // For count profiles, all stack addresses are
-                       // return PCs, which is what locForPC expects.
-                       l := b.locForPC(addr)
-                       if l == 0 { // runtime.goexit
-                               continue
-                       }
-                       locs = append(locs, l)
-               }
+               // For count profiles, all stack addresses are
+               // return PCs, which is what appendLocsForStack expects.
+               locs = b.appendLocsForStack(locs[:0], r.Stack())
                b.pbSample(values, locs, nil)
        }
        b.build()
@@ -451,16 +444,9 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro
        var locs []uint64
        for _, k := range keys {
                values[0] = int64(count[k])
-               locs = locs[:0]
-               for _, addr := range p.Stack(index[k]) {
-                       // For count profiles, all stack addresses are
-                       // return PCs, which is what locForPC expects.
-                       l := b.locForPC(addr)
-                       if l == 0 { // runtime.goexit
-                               continue
-                       }
-                       locs = append(locs, l)
-               }
+               // For count profiles, all stack addresses are
+               // return PCs, which is what appendLocsForStack expects.
+               locs = b.appendLocsForStack(locs[:0], p.Stack(index[k]))
                b.pbSample(values, locs, nil)
        }
        b.build()
index 688df4b942fb8b8bac23207d34c1d70a76752a9b..bc4697a8ab5f86f45b562684c1533b9000635d89 100644 (file)
@@ -44,6 +44,7 @@ type profileBuilder struct {
        locs      map[uintptr]locInfo // list of locInfo starting with the given PC.
        funcs     map[string]int      // Package path-qualified function name to Function.ID
        mem       []memMap
+       deck      pcDeck
 }
 
 type memMap struct {
@@ -428,8 +429,6 @@ func (b *profileBuilder) build() {
        }
 
        values := []int64{0, 0}
-
-       var deck = &pcDeck{}
        var locs []uint64
 
        for e := b.m.all; e != nil; e = e.nextAll {
@@ -445,61 +444,13 @@ func (b *profileBuilder) build() {
                        }
                }
 
-               deck.reset()
-               locs = locs[:0]
-
                // Addresses from stack traces point to the next instruction after each call,
                // except for the leaf, which points to where the signal occurred.
-               // deck.add+emitLocation expects return PCs so increment the leaf address to
+               // appendLocsForStack expects return PCs so increment the leaf address to
                // look like a return PC.
                e.stk[0] += 1
-               for stk := e.stk; len(stk) > 0; {
-                       addr := stk[0]
-                       if l, ok := b.locs[addr]; ok {
-                               // first record the location if there is any pending accumulated info.
-                               if id := b.emitLocation(deck); id > 0 {
-                                       locs = append(locs, id)
-                               }
-
-                               // then, record the cached location.
-                               locs = append(locs, l.id)
-                               stk = stk[len(l.pcs):] // skip the matching pcs.
-                               continue
-                       }
-
-                       frames, symbolizeResult := allFrames(addr)
-                       if len(frames) == 0 { // runtime.goexit.
-                               if id := b.emitLocation(deck); id > 0 {
-                                       locs = append(locs, id)
-                               }
-                               stk = stk[1:]
-                               continue
-                       }
-
-                       if added := deck.tryAdd(addr, frames, symbolizeResult); added {
-                               stk = stk[1:]
-                               continue
-                       }
-                       // add failed because this addr is not inlined with
-                       // the existing PCs in the deck. Flush the deck and retry to
-                       // handle this pc.
-                       if id := b.emitLocation(deck); id > 0 {
-                               locs = append(locs, id)
-                       }
-
-                       // check cache again - previous emitLocation added a new entry
-                       if l, ok := b.locs[addr]; ok {
-                               locs = append(locs, l.id)
-                               stk = stk[len(l.pcs):] // skip the matching pcs.
-                       } else {
-                               deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
-                               stk = stk[1:]
-                       }
-               }
-               if id := b.emitLocation(deck); id > 0 { // emit remaining location.
-                       locs = append(locs, id)
-               }
-               e.stk[0] -= 1 // undo the adjustment on the leaf done before the loop.
+               locs = b.appendLocsForStack(locs[:0], e.stk)
+               e.stk[0] -= 1 // undo the adjustment on the leaf.
 
                b.pbSample(values, locs, labels)
        }
@@ -517,6 +468,62 @@ func (b *profileBuilder) build() {
        b.zw.Close()
 }
 
+// appendLocsForStack appends the location IDs for the given stack trace to the given
+// location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of
+// an inline marker as the runtime traceback function returns.
+//
+// It may emit to b.pb, so there must be no message encoding in progress.
+func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
+       b.deck.reset()
+       for len(stk) > 0 {
+               addr := stk[0]
+               if l, ok := b.locs[addr]; ok {
+                       // first record the location if there is any pending accumulated info.
+                       if id := b.emitLocation(); id > 0 {
+                               locs = append(locs, id)
+                       }
+
+                       // then, record the cached location.
+                       locs = append(locs, l.id)
+                       stk = stk[len(l.pcs):] // skip the matching pcs.
+                       continue
+               }
+
+               frames, symbolizeResult := allFrames(addr)
+               if len(frames) == 0 { // runtime.goexit.
+                       if id := b.emitLocation(); id > 0 {
+                               locs = append(locs, id)
+                       }
+                       stk = stk[1:]
+                       continue
+               }
+
+               if added := b.deck.tryAdd(addr, frames, symbolizeResult); added {
+                       stk = stk[1:]
+                       continue
+               }
+               // add failed because this addr is not inlined with
+               // the existing PCs in the deck. Flush the deck and retry to
+               // handle this pc.
+               if id := b.emitLocation(); id > 0 {
+                       locs = append(locs, id)
+               }
+
+               // check cache again - previous emitLocation added a new entry
+               if l, ok := b.locs[addr]; ok {
+                       locs = append(locs, l.id)
+                       stk = stk[len(l.pcs):] // skip the matching pcs.
+               } else {
+                       b.deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
+                       stk = stk[1:]
+               }
+       }
+       if id := b.emitLocation(); id > 0 { // emit remaining location.
+               locs = append(locs, id)
+       }
+       return locs
+}
+
 // pcDeck is a helper to detect a sequence of inlined functions from
 // a stack trace returned by the runtime.
 //
@@ -583,15 +590,14 @@ func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symb
 // and returns the location ID encoded in the profile protobuf.
 // It emits to b.pb, so there must be no message encoding in progress.
 // It resets the deck.
-func (b *profileBuilder) emitLocation(deck *pcDeck) uint64 {
-       defer deck.reset()
-
-       if len(deck.pcs) == 0 {
+func (b *profileBuilder) emitLocation() uint64 {
+       if len(b.deck.pcs) == 0 {
                return 0
        }
+       defer b.deck.reset()
 
-       addr := deck.pcs[0]
-       firstFrame := deck.frames[0]
+       addr := b.deck.pcs[0]
+       firstFrame := b.deck.frames[0]
 
        // We can't write out functions while in the middle of the
        // Location message, so record new functions we encounter and
@@ -603,12 +609,12 @@ func (b *profileBuilder) emitLocation(deck *pcDeck) uint64 {
        newFuncs := make([]newFunc, 0, 8)
 
        id := uint64(len(b.locs)) + 1
-       b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, deck.pcs...)}
+       b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, b.deck.pcs...)}
 
        start := b.pb.startMessage()
        b.pb.uint64Opt(tagLocation_ID, id)
        b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
-       for _, frame := range deck.frames {
+       for _, frame := range b.deck.frames {
                // Write out each line in frame expansion.
                funcID := uint64(b.funcs[frame.Function])
                if funcID == 0 {
@@ -623,7 +629,7 @@ func (b *profileBuilder) emitLocation(deck *pcDeck) uint64 {
                        b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
 
                        m := b.mem[i]
-                       m.funcs |= deck.symbolizeResult
+                       m.funcs |= b.deck.symbolizeResult
                        b.mem[i] = m
                        break
                }
index 1c88aae43a0aa2da185a012e5b24062b02323b6e..fa75a28c6263c1be8aedf0ce997f9d77396552e1 100644 (file)
@@ -27,30 +27,27 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defau
        values := []int64{0, 0, 0, 0}
        var locs []uint64
        for _, r := range p {
-               locs = locs[:0]
                hideRuntime := true
                for tries := 0; tries < 2; tries++ {
-                       for _, addr := range r.Stack() {
-                               // For heap profiles, all stack
-                               // addresses are return PCs, which is
-                               // what locForPC expects.
-                               if hideRuntime {
+                       stk := r.Stack()
+                       // For heap profiles, all stack
+                       // addresses are return PCs, which is
+                       // what appendLocsForStack expects.
+                       if hideRuntime {
+                               for i, addr := range stk {
                                        if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") {
                                                continue
                                        }
                                        // Found non-runtime. Show any runtime uses above it.
-                                       hideRuntime = false
+                                       stk = stk[i:]
+                                       break
                                }
-                               l := b.locForPC(addr)
-                               if l == 0 { // runtime.goexit
-                                       continue
-                               }
-                               locs = append(locs, l)
                        }
+                       locs = b.appendLocsForStack(locs[:0], stk)
                        if len(locs) > 0 {
                                break
                        }
-                       hideRuntime = false // try again, and show all frames
+                       hideRuntime = false // try again, and show all frames next time.
                }
 
                values[0], values[1] = scaleHeapSample(r.AllocObjects, r.AllocBytes, rate)