]> Cypherpunks repositories - gostls13.git/commitdiff
Revert "runtime: avoid multiple records with identical stacks from MutexProfile"
authorCherry Mui <cherryyz@google.com>
Mon, 15 Jul 2024 19:49:44 +0000 (19:49 +0000)
committerCherry Mui <cherryyz@google.com>
Mon, 15 Jul 2024 20:09:02 +0000 (20:09 +0000)
This reverts CL 595966.

Reason for revert: This CL contains a bug. See the comment in https://go-review.googlesource.com/c/go/+/595966/8#message-57f4c1f9570b5fe912e06f4ae3b52817962533c0

Change-Id: I48030907ded173ae20a8965bf1b41a713dd06059
Reviewed-on: https://go-review.googlesource.com/c/go/+/598219
Reviewed-by: Than McIntosh <thanm@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
src/runtime/mprof.go
src/runtime/pprof/pprof.go
src/runtime/pprof/pprof_test.go

index e5620aec11dfd013b45256c57007cf24df1cd411..b4fe0e5549c7cd8f1b90f40500434a2cff29757a 100644 (file)
@@ -9,7 +9,6 @@ package runtime
 
 import (
        "internal/abi"
-       "internal/goarch"
        "internal/profilerecord"
        "internal/runtime/atomic"
        "runtime/internal/sys"
@@ -543,14 +542,16 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
        gp := getg()
        mp := acquirem() // we must not be preempted while accessing profstack
 
-       var nstk int
+       nstk := 1
        if tracefpunwindoff() || gp.m.hasCgoOnStack() {
+               mp.profStack[0] = logicalStackSentinel
                if gp.m.curg == nil || gp.m.curg == gp {
-                       nstk = callers(skip, mp.profStack)
+                       nstk = callers(skip, mp.profStack[1:])
                } else {
-                       nstk = gcallers(gp.m.curg, skip, mp.profStack)
+                       nstk = gcallers(gp.m.curg, skip, mp.profStack[1:])
                }
        } else {
+               mp.profStack[0] = uintptr(skip)
                if gp.m.curg == nil || gp.m.curg == gp {
                        if skip > 0 {
                                // We skip one fewer frame than the provided value for frame
@@ -558,12 +559,12 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
                                // frame, whereas the saved frame pointer will give us the
                                // caller's return address first (so, not including
                                // saveblockevent)
-                               skip -= 1
+                               mp.profStack[0] -= 1
                        }
-                       nstk = fpTracebackPartialExpand(skip, unsafe.Pointer(getfp()), mp.profStack)
+                       nstk += fpTracebackPCs(unsafe.Pointer(getfp()), mp.profStack[1:])
                } else {
-                       mp.profStack[0] = gp.m.curg.sched.pc
-                       nstk = 1 + fpTracebackPartialExpand(skip, unsafe.Pointer(gp.m.curg.sched.bp), mp.profStack[1:])
+                       mp.profStack[1] = gp.m.curg.sched.pc
+                       nstk += 1 + fpTracebackPCs(unsafe.Pointer(gp.m.curg.sched.bp), mp.profStack[2:])
                }
        }
 
@@ -571,52 +572,6 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
        releasem(mp)
 }
 
-// fpTracebackPartialExpand records a call stack obtained starting from fp.
-// This function will skip the given number of frames, properly accounting for
-// inlining, and save remaining frames as "physical" return addresses. The
-// consumer should later use CallersFrames or similar to expand inline frames.
-func fpTracebackPartialExpand(skip int, fp unsafe.Pointer, pcBuf []uintptr) int {
-       var n int
-       lastFuncID := abi.FuncIDNormal
-       skipOrAdd := func(retPC uintptr) bool {
-               if skip > 0 {
-                       skip--
-               } else if n < len(pcBuf) {
-                       pcBuf[n] = retPC
-                       n++
-               }
-               return n < len(pcBuf)
-       }
-       for n < len(pcBuf) && fp != nil {
-               // return addr sits one word above the frame pointer
-               pc := *(*uintptr)(unsafe.Pointer(uintptr(fp) + goarch.PtrSize))
-
-               if skip > 0 {
-                       callPC := pc - 1
-                       fi := findfunc(callPC)
-                       u, uf := newInlineUnwinder(fi, callPC)
-                       for ; uf.valid(); uf = u.next(uf) {
-                               sf := u.srcFunc(uf)
-                               if sf.funcID == abi.FuncIDWrapper && elideWrapperCalling(lastFuncID) {
-                                       // ignore wrappers
-                               } else if more := skipOrAdd(uf.pc + 1); !more {
-                                       return n
-                               }
-                               lastFuncID = sf.funcID
-                       }
-               } else {
-                       // We've skipped the desired number of frames, so no need
-                       // to perform further inline expansion now.
-                       pcBuf[n] = pc
-                       n++
-               }
-
-               // follow the frame pointer to the next one
-               fp = unsafe.Pointer(*(*uintptr)(fp))
-       }
-       return n
-}
-
 // lockTimer assists with profiling contention on runtime-internal locks.
 //
 // There are several steps between the time that an M experiences contention and
@@ -1120,34 +1075,10 @@ type BlockProfileRecord struct {
 // the [testing] package's -test.blockprofile flag instead
 // of calling BlockProfile directly.
 func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
-       n, ok = blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
+       return blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
                copyBlockProfileRecord(&p[0], r)
                p = p[1:]
        })
-       if !ok {
-               return
-       }
-       expandFrames(p[:n])
-       return
-}
-
-func expandFrames(p []BlockProfileRecord) {
-       expandedStack := makeProfStack()
-       for i := range p {
-               cf := CallersFrames(p[i].Stack())
-               j := 0
-               for ; j < len(expandedStack); j++ {
-                       f, more := cf.Next()
-                       // f.PC is a "call PC", but later consumers will expect
-                       // "return PCs"
-                       expandedStack[i] = f.PC + 1
-                       if !more {
-                               break
-                       }
-               }
-               k := copy(p[i].Stack0[:], expandedStack[:j])
-               clear(p[i].Stack0[k:])
-       }
 }
 
 // blockProfileInternal returns the number of records n in the profile. If there
@@ -1180,9 +1111,6 @@ func blockProfileInternal(size int, copyFn func(profilerecord.BlockProfileRecord
        return
 }
 
-// copyBlockProfileRecord copies the sample values and call stack from src to dst.
-// The call stack is copied as-is. The caller is responsible for handling inline
-// expansion, needed when the call stack was collected with frame pointer unwinding.
 func copyBlockProfileRecord(dst *BlockProfileRecord, src profilerecord.BlockProfileRecord) {
        dst.Count = src.Count
        dst.Cycles = src.Cycles
@@ -1195,11 +1123,7 @@ func copyBlockProfileRecord(dst *BlockProfileRecord, src profilerecord.BlockProf
        if asanenabled {
                asanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0))
        }
-       // We just copy the stack here without inline expansion
-       // (needed if frame pointer unwinding is used)
-       // since this function is called under the profile lock,
-       // and doing something that might allocate can violate lock ordering.
-       i := copy(dst.Stack0[:], src.Stack)
+       i := fpunwindExpand(dst.Stack0[:], src.Stack)
        clear(dst.Stack0[i:])
 }
 
@@ -1218,15 +1142,10 @@ func pprof_blockProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok
 // Most clients should use the [runtime/pprof] package
 // instead of calling MutexProfile directly.
 func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
-       n, ok = mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
+       return mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
                copyBlockProfileRecord(&p[0], r)
                p = p[1:]
        })
-       if !ok {
-               return
-       }
-       expandFrames(p[:n])
-       return
 }
 
 // mutexProfileInternal returns the number of records n in the profile. If there
index 4b7a9f63c6500608b0262e730ad6bf4a0facb3e3..d3af5bba914ecc1ee11a4a18836851ce58bae07d 100644 (file)
@@ -404,25 +404,6 @@ type countProfile interface {
        Label(i int) *labelMap
 }
 
-// expandInlinedFrames copies the call stack from pcs into dst, expanding any
-// PCs corresponding to inlined calls into the corresponding PCs for the inlined
-// functions. Returns the number of frames copied to dst.
-func expandInlinedFrames(dst, pcs []uintptr) int {
-       cf := runtime.CallersFrames(pcs)
-       var n int
-       for n < len(dst) {
-               f, more := cf.Next()
-               // f.PC is a "call PC", but later consumers will expect
-               // "return PCs"
-               dst[n] = f.PC + 1
-               n++
-               if !more {
-                       break
-               }
-       }
-       return n
-}
-
 // printCountCycleProfile outputs block profile records (for block or mutex profiles)
 // as the pprof-proto format output. Translations from cycle count to time duration
 // are done because The proto expects count and time (nanoseconds) instead of count
@@ -445,7 +426,7 @@ func printCountCycleProfile(w io.Writer, countName, cycleName string, records []
                values[1] = int64(float64(r.Cycles) / cpuGHz)
                // For count profiles, all stack addresses are
                // return PCs, which is what appendLocsForStack expects.
-               n := expandInlinedFrames(expandedStack, r.Stack)
+               n := pprof_fpunwindExpand(expandedStack[:], r.Stack)
                locs = b.appendLocsForStack(locs[:0], expandedStack[:n])
                b.pbSample(values, locs, nil)
        }
@@ -954,7 +935,7 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu
        for i := range p {
                r := &p[i]
                fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
-               n := expandInlinedFrames(expandedStack, r.Stack)
+               n := pprof_fpunwindExpand(expandedStack, r.Stack)
                stack := expandedStack[:n]
                for _, pc := range stack {
                        fmt.Fprintf(w, " %#x", pc)
index bd11914544fbc007ad89ac7111f621aa9406ea44..09abbb31ae6ccd02a31df2e62edc250da8ec62db 100644 (file)
@@ -2578,81 +2578,3 @@ func produceProfileEvents(t *testing.T, depth int) {
        runtime.GC()
        goroutineDeep(t, depth-4) // -4 for produceProfileEvents, **, chanrecv1, chanrev, gopark
 }
-
-func TestMutexBlockFullAggregation(t *testing.T) {
-       // This regression test is adapted from
-       // https://github.com/grafana/pyroscope-go/issues/103,
-       // authored by Tolya Korniltsev
-
-       var m sync.Mutex
-
-       prev := runtime.SetMutexProfileFraction(-1)
-       defer runtime.SetMutexProfileFraction(prev)
-
-       const fraction = 1
-       const iters = 100
-       const workers = 2
-
-       runtime.SetMutexProfileFraction(fraction)
-       runtime.SetBlockProfileRate(1)
-       defer runtime.SetBlockProfileRate(0)
-
-       wg := sync.WaitGroup{}
-       wg.Add(workers)
-       for j := 0; j < workers; j++ {
-               go func() {
-                       for i := 0; i < iters; i++ {
-                               m.Lock()
-                               // Wait at least 1 millisecond to pass the
-                               // starvation threshold for the mutex
-                               time.Sleep(time.Millisecond)
-                               m.Unlock()
-                       }
-                       wg.Done()
-               }()
-       }
-       wg.Wait()
-
-       assertNoDuplicates := func(name string, collect func([]runtime.BlockProfileRecord) (int, bool)) {
-               var p []runtime.BlockProfileRecord
-               n, ok := collect(nil)
-               for {
-                       p = make([]runtime.BlockProfileRecord, n+50)
-                       n, ok = collect(p)
-                       if ok {
-                               p = p[:n]
-                               break
-                       }
-               }
-               seen := make(map[string]struct{})
-               for _, r := range p {
-                       cf := runtime.CallersFrames(r.Stack())
-                       var stack strings.Builder
-                       for {
-                               f, more := cf.Next()
-                               stack.WriteString(f.Func.Name())
-                               if !more {
-                                       break
-                               }
-                               stack.WriteByte('\n')
-                       }
-                       s := stack.String()
-                       if !strings.Contains(s, "TestMutexBlockFullAggregation") {
-                               continue
-                       }
-                       if _, ok := seen[s]; ok {
-                               t.Errorf("saw duplicate entry in %s profile with stack:\n%s", name, s)
-                       }
-                       seen[s] = struct{}{}
-               }
-               if len(seen) == 0 {
-                       t.Errorf("did not see any samples in %s profile for this test", name)
-               }
-       }
-       t.Run("mutex", func(t *testing.T) {
-               assertNoDuplicates("mutex", runtime.MutexProfile)
-       })
-       t.Run("block", func(t *testing.T) {
-               assertNoDuplicates("block", runtime.BlockProfile)
-       })
-}