import (
"internal/abi"
- "internal/goarch"
"internal/profilerecord"
"internal/runtime/atomic"
"runtime/internal/sys"
gp := getg()
mp := acquirem() // we must not be preempted while accessing profstack
- var nstk int
+ nstk := 1
if tracefpunwindoff() || gp.m.hasCgoOnStack() {
+ mp.profStack[0] = logicalStackSentinel
if gp.m.curg == nil || gp.m.curg == gp {
- nstk = callers(skip, mp.profStack)
+ nstk = callers(skip, mp.profStack[1:])
} else {
- nstk = gcallers(gp.m.curg, skip, mp.profStack)
+ nstk = gcallers(gp.m.curg, skip, mp.profStack[1:])
}
} else {
+ mp.profStack[0] = uintptr(skip)
if gp.m.curg == nil || gp.m.curg == gp {
if skip > 0 {
// We skip one fewer frame than the provided value for frame
// frame, whereas the saved frame pointer will give us the
// caller's return address first (so, not including
// saveblockevent)
- skip -= 1
+ mp.profStack[0] -= 1
}
- nstk = fpTracebackPartialExpand(skip, unsafe.Pointer(getfp()), mp.profStack)
+ nstk += fpTracebackPCs(unsafe.Pointer(getfp()), mp.profStack[1:])
} else {
- mp.profStack[0] = gp.m.curg.sched.pc
- nstk = 1 + fpTracebackPartialExpand(skip, unsafe.Pointer(gp.m.curg.sched.bp), mp.profStack[1:])
+ mp.profStack[1] = gp.m.curg.sched.pc
+ nstk += 1 + fpTracebackPCs(unsafe.Pointer(gp.m.curg.sched.bp), mp.profStack[2:])
}
}
releasem(mp)
}
-// fpTracebackPartialExpand records a call stack obtained starting from fp.
-// This function will skip the given number of frames, properly accounting for
-// inlining, and save remaining frames as "physical" return addresses. The
-// consumer should later use CallersFrames or similar to expand inline frames.
-func fpTracebackPartialExpand(skip int, fp unsafe.Pointer, pcBuf []uintptr) int {
- var n int
- lastFuncID := abi.FuncIDNormal
- skipOrAdd := func(retPC uintptr) bool {
- if skip > 0 {
- skip--
- } else if n < len(pcBuf) {
- pcBuf[n] = retPC
- n++
- }
- return n < len(pcBuf)
- }
- for n < len(pcBuf) && fp != nil {
- // return addr sits one word above the frame pointer
- pc := *(*uintptr)(unsafe.Pointer(uintptr(fp) + goarch.PtrSize))
-
- if skip > 0 {
- callPC := pc - 1
- fi := findfunc(callPC)
- u, uf := newInlineUnwinder(fi, callPC)
- for ; uf.valid(); uf = u.next(uf) {
- sf := u.srcFunc(uf)
- if sf.funcID == abi.FuncIDWrapper && elideWrapperCalling(lastFuncID) {
- // ignore wrappers
- } else if more := skipOrAdd(uf.pc + 1); !more {
- return n
- }
- lastFuncID = sf.funcID
- }
- } else {
- // We've skipped the desired number of frames, so no need
- // to perform further inline expansion now.
- pcBuf[n] = pc
- n++
- }
-
- // follow the frame pointer to the next one
- fp = unsafe.Pointer(*(*uintptr)(fp))
- }
- return n
-}
-
// lockTimer assists with profiling contention on runtime-internal locks.
//
// There are several steps between the time that an M experiences contention and
// the [testing] package's -test.blockprofile flag instead
// of calling BlockProfile directly.
func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
- n, ok = blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
+ return blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
copyBlockProfileRecord(&p[0], r)
p = p[1:]
})
- if !ok {
- return
- }
- expandFrames(p[:n])
- return
-}
-
-func expandFrames(p []BlockProfileRecord) {
- expandedStack := makeProfStack()
- for i := range p {
- cf := CallersFrames(p[i].Stack())
- j := 0
- for ; j < len(expandedStack); j++ {
- f, more := cf.Next()
- // f.PC is a "call PC", but later consumers will expect
- // "return PCs"
- expandedStack[i] = f.PC + 1
- if !more {
- break
- }
- }
- k := copy(p[i].Stack0[:], expandedStack[:j])
- clear(p[i].Stack0[k:])
- }
}
// blockProfileInternal returns the number of records n in the profile. If there
return
}
-// copyBlockProfileRecord copies the sample values and call stack from src to dst.
-// The call stack is copied as-is. The caller is responsible for handling inline
-// expansion, needed when the call stack was collected with frame pointer unwinding.
func copyBlockProfileRecord(dst *BlockProfileRecord, src profilerecord.BlockProfileRecord) {
dst.Count = src.Count
dst.Cycles = src.Cycles
if asanenabled {
asanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0))
}
- // We just copy the stack here without inline expansion
- // (needed if frame pointer unwinding is used)
- // since this function is called under the profile lock,
- // and doing something that might allocate can violate lock ordering.
- i := copy(dst.Stack0[:], src.Stack)
+ i := fpunwindExpand(dst.Stack0[:], src.Stack)
clear(dst.Stack0[i:])
}
// Most clients should use the [runtime/pprof] package
// instead of calling MutexProfile directly.
func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
- n, ok = mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
+ return mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
copyBlockProfileRecord(&p[0], r)
p = p[1:]
})
- if !ok {
- return
- }
- expandFrames(p[:n])
- return
}
// mutexProfileInternal returns the number of records n in the profile. If there
Label(i int) *labelMap
}
-// expandInlinedFrames copies the call stack from pcs into dst, expanding any
-// PCs corresponding to inlined calls into the corresponding PCs for the inlined
-// functions. Returns the number of frames copied to dst.
-func expandInlinedFrames(dst, pcs []uintptr) int {
- cf := runtime.CallersFrames(pcs)
- var n int
- for n < len(dst) {
- f, more := cf.Next()
- // f.PC is a "call PC", but later consumers will expect
- // "return PCs"
- dst[n] = f.PC + 1
- n++
- if !more {
- break
- }
- }
- return n
-}
-
// printCountCycleProfile outputs block profile records (for block or mutex profiles)
// as the pprof-proto format output. Translations from cycle count to time duration
// are done because The proto expects count and time (nanoseconds) instead of count
values[1] = int64(float64(r.Cycles) / cpuGHz)
// For count profiles, all stack addresses are
// return PCs, which is what appendLocsForStack expects.
- n := expandInlinedFrames(expandedStack, r.Stack)
+ n := pprof_fpunwindExpand(expandedStack[:], r.Stack)
locs = b.appendLocsForStack(locs[:0], expandedStack[:n])
b.pbSample(values, locs, nil)
}
for i := range p {
r := &p[i]
fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
- n := expandInlinedFrames(expandedStack, r.Stack)
+ n := pprof_fpunwindExpand(expandedStack, r.Stack)
stack := expandedStack[:n]
for _, pc := range stack {
fmt.Fprintf(w, " %#x", pc)
runtime.GC()
goroutineDeep(t, depth-4) // -4 for produceProfileEvents, **, chanrecv1, chanrev, gopark
}
-
-func TestMutexBlockFullAggregation(t *testing.T) {
- // This regression test is adapted from
- // https://github.com/grafana/pyroscope-go/issues/103,
- // authored by Tolya Korniltsev
-
- var m sync.Mutex
-
- prev := runtime.SetMutexProfileFraction(-1)
- defer runtime.SetMutexProfileFraction(prev)
-
- const fraction = 1
- const iters = 100
- const workers = 2
-
- runtime.SetMutexProfileFraction(fraction)
- runtime.SetBlockProfileRate(1)
- defer runtime.SetBlockProfileRate(0)
-
- wg := sync.WaitGroup{}
- wg.Add(workers)
- for j := 0; j < workers; j++ {
- go func() {
- for i := 0; i < iters; i++ {
- m.Lock()
- // Wait at least 1 millisecond to pass the
- // starvation threshold for the mutex
- time.Sleep(time.Millisecond)
- m.Unlock()
- }
- wg.Done()
- }()
- }
- wg.Wait()
-
- assertNoDuplicates := func(name string, collect func([]runtime.BlockProfileRecord) (int, bool)) {
- var p []runtime.BlockProfileRecord
- n, ok := collect(nil)
- for {
- p = make([]runtime.BlockProfileRecord, n+50)
- n, ok = collect(p)
- if ok {
- p = p[:n]
- break
- }
- }
- seen := make(map[string]struct{})
- for _, r := range p {
- cf := runtime.CallersFrames(r.Stack())
- var stack strings.Builder
- for {
- f, more := cf.Next()
- stack.WriteString(f.Func.Name())
- if !more {
- break
- }
- stack.WriteByte('\n')
- }
- s := stack.String()
- if !strings.Contains(s, "TestMutexBlockFullAggregation") {
- continue
- }
- if _, ok := seen[s]; ok {
- t.Errorf("saw duplicate entry in %s profile with stack:\n%s", name, s)
- }
- seen[s] = struct{}{}
- }
- if len(seen) == 0 {
- t.Errorf("did not see any samples in %s profile for this test", name)
- }
- }
- t.Run("mutex", func(t *testing.T) {
- assertNoDuplicates("mutex", runtime.MutexProfile)
- })
- t.Run("block", func(t *testing.T) {
- assertNoDuplicates("block", runtime.BlockProfile)
- })
-}