// Must not call other functions nor access heap/globals in the loop,
// otherwise under race detector the samples will be in the race runtime.
func cpuHog1(x int) int {
+ return cpuHog0(x, 1e5)
+}
+
+func cpuHog0(x, n int) int {
foo := x
- for i := 0; i < 1e5; i++ {
+ for i := 0; i < n; i++ {
if foo > 0 {
foo *= foo
} else {
}
func TestCPUProfileInlining(t *testing.T) {
- testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) {
+ p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) {
cpuHogger(inlinedCaller, &salt1, dur)
})
+
+ // Check if inlined function locations are encoded correctly. The inlinedCalee and inlinedCaller should be in one location.
+ for _, loc := range p.Location {
+ hasInlinedCallerAfterInlinedCallee, hasInlinedCallee := false, false
+ for _, line := range loc.Line {
+ if line.Function.Name == "runtime/pprof.inlinedCallee" {
+ hasInlinedCallee = true
+ }
+ if hasInlinedCallee && line.Function.Name == "runtime/pprof.inlinedCaller" {
+ hasInlinedCallerAfterInlinedCallee = true
+ }
+ }
+ if hasInlinedCallee != hasInlinedCallerAfterInlinedCallee {
+ t.Fatalf("want inlinedCallee followed by inlinedCaller, got separate Location entries:\n%v", p)
+ }
+ }
}
func inlinedCaller(x int) int {
- x = inlinedCallee(x)
+ x = inlinedCallee(x, 1e5)
return x
}
-func inlinedCallee(x int) int {
- // We could just use cpuHog1, but for loops prevent inlining
- // right now. :(
- foo := x
- i := 0
-loop:
- if foo > 0 {
- foo *= foo
- } else {
- foo *= foo + 1
+func inlinedCallee(x, n int) int {
+ return cpuHog0(x, n)
+}
+
+func TestCPUProfileRecursion(t *testing.T) {
+ p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.recursionCallee", "runtime/pprof.recursionCaller"}, avoidFunctions(), func(dur time.Duration) {
+ cpuHogger(recursionCaller, &salt1, dur)
+ })
+
+ // check the Location encoding was not confused by recursive calls.
+ for i, loc := range p.Location {
+ recursionFunc := 0
+ for _, line := range loc.Line {
+ if name := line.Function.Name; name == "runtime/pprof.recursionCaller" || name == "runtime/pprof.recursionCallee" {
+ recursionFunc++
+ }
+ }
+ if recursionFunc > 1 {
+ t.Fatalf("want at most one recursionCaller or recursionCallee in one Location, got a violating Location (index: %d):\n%v", i, p)
+ }
}
- if i++; i < 1e5 {
- goto loop
+}
+
+func recursionCaller(x int) int {
+ y := recursionCallee(3, x)
+ return y
+}
+
+func recursionCallee(n, x int) int {
+ if n == 0 {
+ return 1
}
- return foo
+ y := inlinedCallee(x, 1e4)
+ return y * recursionCallee(n-1, x)
}
-func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) {
+func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) *profile.Profile {
p, err := profile.Parse(bytes.NewReader(valBytes))
if err != nil {
t.Fatal(err)
count := uintptr(sample.Value[0])
f(count, sample.Location, sample.Label)
}
+ return p
}
// testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need,
-// as interpreted by matches.
-func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) {
+// as interpreted by matches, and returns the parsed profile.
+func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) *profile.Profile {
switch runtime.GOOS {
case "darwin":
switch runtime.GOARCH {
f(duration)
StopCPUProfile()
- if profileOk(t, matches, need, avoid, prof, duration) {
- return
+ if p, ok := profileOk(t, matches, need, avoid, prof, duration); ok {
+ return p
}
duration *= 2
t.Skip("ignore the failure in QEMU; see golang.org/issue/9605")
}
t.FailNow()
+ return nil
}
func contains(slice []string, s string) bool {
type matchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool
-func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (ok bool) {
+func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (_ *profile.Profile, ok bool) {
ok = true
// Check that profile is well formed, contains 'need', and does not contain
avoidSamples := make([]uintptr, len(avoid))
var samples uintptr
var buf bytes.Buffer
- parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
+ p := parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
fmt.Fprintf(&buf, "%d:", count)
fprintStack(&buf, stk)
samples += count
// not enough samples due to coarse timer
// resolution. Let it go.
t.Log("too few samples on Windows (golang.org/issue/10842)")
- return false
+ return p, false
}
// Check that we got a reasonable number of samples.
}
if len(need) == 0 {
- return ok
+ return p, ok
}
var total uintptr
ok = false
}
}
- return ok
+ return p, ok
}
// Fork can hang if preempted with signals frequently enough (see issue 5517).
pb protobuf
strings []string
stringMap map[string]int
- locs map[uintptr]int
- funcs map[string]int // Package path-qualified function name to Function.ID
+ locs map[uintptr]locInfo // list of locInfo starting with the given PC.
+ funcs map[string]int // Package path-qualified function name to Function.ID
mem []memMap
}
b.pb.endMessage(tag, start)
}
+func allFrames(addr uintptr) ([]runtime.Frame, symbolizeFlag) {
+ // Expand this one address using CallersFrames so we can cache
+ // each expansion. In general, CallersFrames takes a whole
+ // stack, but in this case we know there will be no skips in
+ // the stack and we have return PCs anyway.
+ frames := runtime.CallersFrames([]uintptr{addr})
+ frame, more := frames.Next()
+ if frame.Function == "runtime.goexit" {
+ // Short-circuit if we see runtime.goexit so the loop
+ // below doesn't allocate a useless empty location.
+ return nil, 0
+ }
+
+ symbolizeResult := lookupTried
+ if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 {
+ symbolizeResult |= lookupFailed
+ }
+
+ if frame.PC == 0 {
+ // If we failed to resolve the frame, at least make up
+ // a reasonable call PC. This mostly happens in tests.
+ frame.PC = addr - 1
+ }
+ ret := []runtime.Frame{frame}
+ for frame.Function != "runtime.goexit" && more == true {
+ frame, more = frames.Next()
+ ret = append(ret, frame)
+ }
+ return ret, symbolizeResult
+}
+
// locForPC returns the location ID for addr.
// addr must a return PC or 1 + the PC of an inline marker. This returns the location of the corresponding call.
// It may emit to b.pb, so there must be no message encoding in progress.
func (b *profileBuilder) locForPC(addr uintptr) uint64 {
- id := uint64(b.locs[addr])
- if id != 0 {
- return id
+ if loc, ok := b.locs[addr]; ok {
+ return loc.id
}
// Expand this one address using CallersFrames so we can cache
}
newFuncs := make([]newFunc, 0, 8)
- id = uint64(len(b.locs)) + 1
- b.locs[addr] = int(id)
+ id := uint64(len(b.locs)) + 1
+ b.locs[addr] = locInfo{id: id, pcs: []uintptr{addr}}
start := b.pb.startMessage()
b.pb.uint64Opt(tagLocation_ID, id)
b.pb.uint64Opt(tagLocation_Address, uint64(frame.PC))
return id
}
+type locInfo struct {
+ // location id assigned by the profileBuilder
+ id uint64
+
+ // sequence of PCs, including the fake PCs returned by the traceback
+ // to represent inlined functions
+ // https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368
+ pcs []uintptr
+}
+
// newProfileBuilder returns a new profileBuilder.
// CPU profiling data obtained from the runtime can be added
// by calling b.addCPUData, and then the eventual profile
start: time.Now(),
strings: []string{""},
stringMap: map[string]int{"": 0},
- locs: map[uintptr]int{},
+ locs: map[uintptr]locInfo{},
funcs: map[string]int{},
}
b.readMapping()
}
values := []int64{0, 0}
+
+ var deck = &pcDeck{}
var locs []uint64
+
for e := b.m.all; e != nil; e = e.nextAll {
values[0] = e.count
values[1] = e.count * b.period
}
}
+ deck.reset()
locs = locs[:0]
- for i, addr := range e.stk {
- // Addresses from stack traces point to the
- // next instruction after each call, except
- // for the leaf, which points to where the
- // signal occurred. locForPC expects return
- // PCs, so increment the leaf address to look
- // like a return PC.
- if i == 0 {
- addr++
+
+ // Addresses from stack traces point to the next instruction after each call,
+ // except for the leaf, which points to where the signal occurred.
+ // deck.add+emitLocation expects return PCs so increment the leaf address to
+ // look like a return PC.
+ e.stk[0] += 1
+ for stk := e.stk; len(stk) > 0; {
+ addr := stk[0]
+ if l, ok := b.locs[addr]; ok {
+ // first record the location if there is any pending accumulated info.
+ if id := b.emitLocation(deck); id > 0 {
+ locs = append(locs, id)
+ }
+
+ // then, record the cached location.
+ locs = append(locs, l.id)
+ stk = stk[len(l.pcs):] // skip the matching pcs.
+ continue
}
- l := b.locForPC(addr)
- if l == 0 { // runtime.goexit
+
+ frames, symbolizeResult := allFrames(addr)
+ if len(frames) == 0 { // runtime.goexit.
+ if id := b.emitLocation(deck); id > 0 {
+ locs = append(locs, id)
+ }
+ stk = stk[1:]
continue
}
- locs = append(locs, l)
+
+ if added := deck.tryAdd(addr, frames, symbolizeResult); added {
+ stk = stk[1:]
+ continue
+ }
+ // add failed because this addr is not inlined with
+ // the existing PCs in the deck. Flush the deck and retry to
+ // handle this pc.
+ if id := b.emitLocation(deck); id > 0 {
+ locs = append(locs, id)
+ }
+
+ // check cache again - previous emitLocation added a new entry
+ if l, ok := b.locs[addr]; ok {
+ locs = append(locs, l.id)
+ stk = stk[len(l.pcs):] // skip the matching pcs.
+ } else {
+ deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
+ stk = stk[1:]
+ }
+ }
+ if id := b.emitLocation(deck); id > 0 { // emit remaining location.
+ locs = append(locs, id)
}
+ e.stk[0] -= 1 // undo the adjustment on the leaf done before the loop.
+
b.pbSample(values, locs, labels)
}
b.zw.Close()
}
+// pcDeck is a helper to detect a sequence of inlined functions from
+// a stack trace returned by the runtime.
+//
+// The stack traces returned by runtime's trackback functions are fully
+// expanded (at least for Go functions) and include the fake pcs representing
+// inlined functions. The profile proto expects the inlined functions to be
+// encoded in one Location message.
+// https://github.com/google/pprof/blob/5e965273ee43930341d897407202dd5e10e952cb/proto/profile.proto#L177-L184
+//
+// Runtime does not directly expose whether a frame is for an inlined function
+// and looking up debug info is not ideal, so we use a heuristic to filter
+// the fake pcs and restore the inlined and entry functions. Inlined functions
+// have the following properties:
+// Frame's Func is nil (note: also true for non-Go functions), and
+// Frame's Entry matches its entry function frame's Entry. (note: could also be true for recursive calls and non-Go functions),
+// Frame's Name does not match its entry function frame's name.
+//
+// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
+// we use pcDeck to temporarily hold the observed pcs and their expanded frames
+// until we observe the entry function frame.
+type pcDeck struct {
+ pcs []uintptr
+ frames []runtime.Frame
+ symbolizeResult symbolizeFlag
+}
+
+func (d *pcDeck) reset() {
+ d.pcs = d.pcs[:0]
+ d.frames = d.frames[:0]
+ d.symbolizeResult = 0
+}
+
+// tryAdd tries to add the pc and Frames expanded from it (most likely one,
+// since the stack trace is already fully expanded) and the symbolizeResult
+// to the deck. If it fails the caller needs to flush the deck and retry.
+func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
+ if existing := len(d.pcs); existing > 0 {
+ // 'frames' are all expanded from one 'pc' and represent all inlined functions
+ // so we check only the first one.
+ newFrame := frames[0]
+ last := d.frames[existing-1]
+ if last.Func != nil && newFrame.Func != nil { // Can't be an inlined frame.
+ return false
+ }
+
+ if last.Entry == 0 || newFrame.Entry == 0 { // Possibly not a Go function. Don't try to merge.
+ return false
+ }
+
+ if last.Entry != newFrame.Entry { // newFrame is for a different function.
+ return false
+ }
+ if last.Function == newFrame.Function { // maybe recursion.
+ return false
+ }
+ }
+ d.pcs = append(d.pcs, pc)
+ d.frames = append(d.frames, frames...)
+ d.symbolizeResult |= symbolizeResult
+ return true
+}
+
+// emitLocation emits the new location and function information recorded in the deck
+// and returns the location ID encoded in the profile protobuf.
+// It emits to b.pb, so there must be no message encoding in progress.
+// It resets the deck.
+func (b *profileBuilder) emitLocation(deck *pcDeck) uint64 {
+ defer deck.reset()
+
+ if len(deck.pcs) == 0 {
+ return 0
+ }
+
+ addr := deck.pcs[0]
+ firstFrame := deck.frames[0]
+
+ // We can't write out functions while in the middle of the
+ // Location message, so record new functions we encounter and
+ // write them out after the Location.
+ type newFunc struct {
+ id uint64
+ name, file string
+ }
+ newFuncs := make([]newFunc, 0, 8)
+
+ id := uint64(len(b.locs)) + 1
+ b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, deck.pcs...)}
+
+ start := b.pb.startMessage()
+ b.pb.uint64Opt(tagLocation_ID, id)
+ b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
+ for _, frame := range deck.frames {
+ // Write out each line in frame expansion.
+ funcID := uint64(b.funcs[frame.Function])
+ if funcID == 0 {
+ funcID = uint64(len(b.funcs)) + 1
+ b.funcs[frame.Function] = int(funcID)
+ newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
+ }
+ b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
+ }
+ for i := range b.mem {
+ if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
+ b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
+
+ m := b.mem[i]
+ m.funcs |= deck.symbolizeResult
+ b.mem[i] = m
+ break
+ }
+ }
+ b.pb.endMessage(tagProfile_Location, start)
+
+ // Write out functions we found during frame expansion.
+ for _, fn := range newFuncs {
+ start := b.pb.startMessage()
+ b.pb.uint64Opt(tagFunction_ID, fn.id)
+ b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
+ b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
+ b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
+ b.pb.endMessage(tagProfile_Function, start)
+ }
+
+ b.flush()
+ return id
+}
+
// readMapping reads /proc/self/maps and writes mappings to b.pb.
// It saves the address ranges of the mappings in b.mem for use
// when emitting locations.