runtime: increase profiling stack depth to 128

author Felix Geisendörfer <felix.geisendoerfer@datadoghq.com>

Fri, 17 May 2024 13:07:07 +0000 (15:07 +0200)

committer Austin Clements <austin@google.com>

Tue, 21 May 2024 14:38:45 +0000 (14:38 +0000)
author Felix Geisendörfer <felix.geisendoerfer@datadoghq.com>
Fri, 17 May 2024 13:07:07 +0000 (15:07 +0200)
committer Austin Clements <austin@google.com>
Tue, 21 May 2024 14:38:45 +0000 (14:38 +0000)
diff --git a/doc/next/6-stdlib/99-minor/runtime/pprof/43669.md b/doc/next/6-stdlib/99-minor/runtime/pprof/43669.md

new file mode 100644 (file)

index 0000000..119308b
--- /dev/null
+++ b/doc/next/6-stdlib/99-minor/runtime/pprof/43669.md
@@ -0,0 +1,2 @@
+The maximum stack depth for alloc, mutex, block, threadcreate and goroutine
+profiles has been raised from 32 to 128 frames.
diff --git a/src/cmd/internal/objabi/pkgspecial.go b/src/cmd/internal/objabi/pkgspecial.go

index 6c2425d3ffa380e2f19281c11c5767e740ed1397..2925896bd8bef712a47d4bb44a411d6d4afbea1d 100644 (file)
--- a/src/cmd/internal/objabi/pkgspecial.go
+++ b/src/cmd/internal/objabi/pkgspecial.go
@@ -58,6 +58,7 @@ var runtimePkgs = []string{
         "internal/godebugs",
         "internal/goexperiment",
         "internal/goos",
+       "internal/profilerecord",
         "internal/stringslite",
  }
  
diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go

index c83ad23cc66608b8670e8cc84e905e5b874723d0..067298cf42507a2c652453d99b26f4732b5af82f 100644 (file)
--- a/src/go/build/deps_test.go
+++ b/src/go/build/deps_test.go
@@ -45,7 +45,7 @@ var depsRules = `
           internal/goarch, internal/godebugs,
           internal/goexperiment, internal/goos, internal/byteorder,
           internal/goversion, internal/nettrace, internal/platform,
-         internal/trace/traceviewer/format,
+         internal/profilerecord, internal/trace/traceviewer/format,
           log/internal,
           unicode/utf8, unicode/utf16, unicode,
           unsafe;
@@ -65,7 +65,8 @@ var depsRules = `
         internal/goarch,
         internal/godebugs,
         internal/goexperiment,
-       internal/goos
+       internal/goos,
+       internal/profilerecord
         < internal/bytealg
         < internal/stringslite
         < internal/itoa
diff --git a/src/internal/profilerecord/profilerecord.go b/src/internal/profilerecord/profilerecord.go

new file mode 100644 (file)

index 0000000..a5efdce
--- /dev/null
+++ b/src/internal/profilerecord/profilerecord.go
@@ -0,0 +1,28 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package profilerecord holds internal types used to represent profiling
+// records with deep stack traces.
+//
+// TODO: Consider moving this to internal/runtime, see golang.org/issue/65355.
+package profilerecord
+
+type StackRecord struct {
+       Stack []uintptr
+}
+
+type MemProfileRecord struct {
+       AllocBytes, FreeBytes     int64
+       AllocObjects, FreeObjects int64
+       Stack                     []uintptr
+}
+
+func (r *MemProfileRecord) InUseBytes() int64   { return r.AllocBytes - r.FreeBytes }
+func (r *MemProfileRecord) InUseObjects() int64 { return r.AllocObjects - r.FreeObjects }
+
+type BlockProfileRecord struct {
+       Count  int64
+       Cycles int64
+       Stack  []uintptr
+}
diff --git a/src/runtime/cpuprof.go b/src/runtime/cpuprof.go

index b2898ba9094e108d5ce0df74ba9d0424dcfafda9..80490aa585ca2a66864176201152296cd4505bc1 100644 (file)
--- a/src/runtime/cpuprof.go
+++ b/src/runtime/cpuprof.go
@@ -209,8 +209,8 @@ func CPUProfile() []byte {
         panic("CPUProfile no longer available")
  }
  
-//go:linkname runtime_pprof_runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
-func runtime_pprof_runtime_cyclesPerSecond() int64 {
+//go:linkname pprof_cyclesPerSecond
+func pprof_cyclesPerSecond() int64 {
         return ticksPerSecond()
  }
  
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go

index f0e5533cec282b585bf9be5969ad504cc6925088..df0f2552af085abbdb808fe5b36211472bf646a3 100644 (file)
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -9,6 +9,7 @@ package runtime
  
  import (
         "internal/abi"
+       "internal/profilerecord"
         "internal/runtime/atomic"
         "runtime/internal/sys"
         "unsafe"
@@ -56,7 +57,7 @@ const (
         // includes inlined frames. We may record more than this many
         // "physical" frames when using frame pointer unwinding to account
         // for deferred handling of skipping frames & inline expansion.
-       maxLogicalStack = 32
+       maxLogicalStack = 128
         // maxSkip is to account for deferred inline expansion
         // when using frame pointer unwinding. We record the stack
         // with "physical" frame pointers but handle skipping "logical"
@@ -445,7 +446,16 @@ func mProf_PostSweep() {
  
  // Called by malloc to record a profiled block.
  func mProf_Malloc(mp *m, p unsafe.Pointer, size uintptr) {
-       nstk := callers(4, mp.profStack)
+       if mp.profStack == nil {
+               // mp.profStack is nil if we happen to sample an allocation during the
+               // initialization of mp. This case is rare, so we just ignore such
+               // allocations. Change MemProfileRate to 1 if you need to reproduce such
+               // cases for testing purposes.
+               return
+       }
+       // Only use the part of mp.profStack we need and ignore the extra space
+       // reserved for delayed inline expansion with frame pointer unwinding.
+       nstk := callers(4, mp.profStack[:maxLogicalStack])
         index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future))
  
         b := stkbucket(memProfile, size, mp.profStack[:nstk], true)
@@ -536,7 +546,6 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) {
                 print("requested skip=", skip)
                 throw("invalid skip value")
         }
-
         gp := getg()
         mp := acquirem() // we must not be preempted while accessing profstack
         nstk := 1
@@ -937,6 +946,16 @@ func (r *MemProfileRecord) Stack() []uintptr {
  // the testing package's -test.memprofile flag instead
  // of calling MemProfile directly.
  func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
+       return memProfileInternal(len(p), inuseZero, func(r profilerecord.MemProfileRecord) {
+               copyMemProfileRecord(&p[0], r)
+               p = p[1:]
+       })
+}
+
+// memProfileInternal returns the number of records n in the profile. If there
+// are less than size records, copyFn is invoked for each record, and ok returns
+// true.
+func memProfileInternal(size int, inuseZero bool, copyFn func(profilerecord.MemProfileRecord)) (n int, ok bool) {
         cycle := mProfCycle.read()
         // If we're between mProf_NextCycle and mProf_Flush, take care
         // of flushing to the active profile so we only have to look
@@ -976,14 +995,19 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
                         }
                 }
         }
-       if n <= len(p) {
+       if n <= size {
                 ok = true
-               idx := 0
                 for b := head; b != nil; b = b.allnext {
                         mp := b.mp()
                         if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
-                               record(&p[idx], b)
-                               idx++
+                               r := profilerecord.MemProfileRecord{
+                                       AllocBytes:   int64(mp.active.alloc_bytes),
+                                       FreeBytes:    int64(mp.active.free_bytes),
+                                       AllocObjects: int64(mp.active.allocs),
+                                       FreeObjects:  int64(mp.active.frees),
+                                       Stack:        b.stk(),
+                               }
+                               copyFn(r)
                         }
                 }
         }
@@ -991,24 +1015,30 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
         return
  }
  
-// Write b's data to r.
-func record(r *MemProfileRecord, b *bucket) {
-       mp := b.mp()
-       r.AllocBytes = int64(mp.active.alloc_bytes)
-       r.FreeBytes = int64(mp.active.free_bytes)
-       r.AllocObjects = int64(mp.active.allocs)
-       r.FreeObjects = int64(mp.active.frees)
+func copyMemProfileRecord(dst *MemProfileRecord, src profilerecord.MemProfileRecord) {
+       dst.AllocBytes = src.AllocBytes
+       dst.FreeBytes = src.FreeBytes
+       dst.AllocObjects = src.AllocObjects
+       dst.FreeObjects = src.FreeObjects
         if raceenabled {
-               racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(MemProfile))
+               racewriterangepc(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0), getcallerpc(), abi.FuncPCABIInternal(MemProfile))
         }
         if msanenabled {
-               msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
+               msanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0))
         }
         if asanenabled {
-               asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
+               asanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0))
         }
-       i := copy(r.Stack0[:], b.stk())
-       clear(r.Stack0[i:])
+       i := copy(dst.Stack0[:], src.Stack)
+       clear(dst.Stack0[i:])
+}
+
+//go:linkname pprof_memProfileInternal
+func pprof_memProfileInternal(p []profilerecord.MemProfileRecord, inuseZero bool) (n int, ok bool) {
+       return memProfileInternal(len(p), inuseZero, func(r profilerecord.MemProfileRecord) {
+               p[0] = r
+               p = p[1:]
+       })
  }
  
  func iterate_memprof(fn func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) {
@@ -1037,41 +1067,66 @@ type BlockProfileRecord struct {
  // the [testing] package's -test.blockprofile flag instead
  // of calling BlockProfile directly.
  func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
+       return blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
+               copyBlockProfileRecord(&p[0], r)
+               p = p[1:]
+       })
+}
+
+// blockProfileInternal returns the number of records n in the profile. If there
+// are less than size records, copyFn is invoked for each record, and ok returns
+// true.
+func blockProfileInternal(size int, copyFn func(profilerecord.BlockProfileRecord)) (n int, ok bool) {
         lock(&profBlockLock)
         head := (*bucket)(bbuckets.Load())
         for b := head; b != nil; b = b.allnext {
                 n++
         }
-       if n <= len(p) {
+       if n <= size {
                 ok = true
                 for b := head; b != nil; b = b.allnext {
                         bp := b.bp()
-                       r := &p[0]
-                       r.Count = int64(bp.count)
+                       r := profilerecord.BlockProfileRecord{
+                               Count:  int64(bp.count),
+                               Cycles: bp.cycles,
+                               Stack:  b.stk(),
+                       }
                         // Prevent callers from having to worry about division by zero errors.
                         // See discussion on http://golang.org/cl/299991.
                         if r.Count == 0 {
                                 r.Count = 1
                         }
-                       r.Cycles = bp.cycles
-                       if raceenabled {
-                               racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), abi.FuncPCABIInternal(BlockProfile))
-                       }
-                       if msanenabled {
-                               msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
-                       }
-                       if asanenabled {
-                               asanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
-                       }
-                       i := fpunwindExpand(r.Stack0[:], b.stk())
-                       clear(r.Stack0[i:])
-                       p = p[1:]
+                       copyFn(r)
                 }
         }
         unlock(&profBlockLock)
         return
  }
  
+func copyBlockProfileRecord(dst *BlockProfileRecord, src profilerecord.BlockProfileRecord) {
+       dst.Count = src.Count
+       dst.Cycles = src.Cycles
+       if raceenabled {
+               racewriterangepc(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0), getcallerpc(), abi.FuncPCABIInternal(BlockProfile))
+       }
+       if msanenabled {
+               msanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0))
+       }
+       if asanenabled {
+               asanwrite(unsafe.Pointer(&dst.Stack0[0]), unsafe.Sizeof(dst.Stack0))
+       }
+       i := fpunwindExpand(dst.Stack0[:], src.Stack)
+       clear(dst.Stack0[i:])
+}
+
+//go:linkname pprof_blockProfileInternal
+func pprof_blockProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool) {
+       return blockProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
+               p[0] = r
+               p = p[1:]
+       })
+}
+
  // MutexProfile returns n, the number of records in the current mutex profile.
  // If len(p) >= n, MutexProfile copies the profile into p and returns n, true.
  // Otherwise, MutexProfile does not change p, and returns n, false.
@@ -1079,27 +1134,45 @@ func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
  // Most clients should use the [runtime/pprof] package
  // instead of calling MutexProfile directly.
  func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
+       return mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
+               copyBlockProfileRecord(&p[0], r)
+               p = p[1:]
+       })
+}
+
+// mutexProfileInternal returns the number of records n in the profile. If there
+// are less than size records, copyFn is invoked for each record, and ok returns
+// true.
+func mutexProfileInternal(size int, copyFn func(profilerecord.BlockProfileRecord)) (n int, ok bool) {
         lock(&profBlockLock)
         head := (*bucket)(xbuckets.Load())
         for b := head; b != nil; b = b.allnext {
                 n++
         }
-       if n <= len(p) {
+       if n <= size {
                 ok = true
                 for b := head; b != nil; b = b.allnext {
                         bp := b.bp()
-                       r := &p[0]
-                       r.Count = int64(bp.count)
-                       r.Cycles = bp.cycles
-                       i := fpunwindExpand(r.Stack0[:], b.stk())
-                       clear(r.Stack0[i:])
-                       p = p[1:]
+                       r := profilerecord.BlockProfileRecord{
+                               Count:  int64(bp.count),
+                               Cycles: bp.cycles,
+                               Stack:  b.stk(),
+                       }
+                       copyFn(r)
                 }
         }
         unlock(&profBlockLock)
         return
  }
  
+//go:linkname pprof_mutexProfileInternal
+func pprof_mutexProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool) {
+       return mutexProfileInternal(len(p), func(r profilerecord.BlockProfileRecord) {
+               p[0] = r
+               p = p[1:]
+       })
+}
+
  // ThreadCreateProfile returns n, the number of records in the thread creation profile.
  // If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.
  // If len(p) < n, ThreadCreateProfile does not change p and returns n, false.
@@ -1107,28 +1180,45 @@ func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
  // Most clients should use the runtime/pprof package instead
  // of calling ThreadCreateProfile directly.
  func ThreadCreateProfile(p []StackRecord) (n int, ok bool) {
+       return threadCreateProfileInternal(len(p), func(r profilerecord.StackRecord) {
+               copy(p[0].Stack0[:], r.Stack)
+               p = p[1:]
+       })
+}
+
+// threadCreateProfileInternal returns the number of records n in the profile.
+// If there are less than size records, copyFn is invoked for each record, and
+// ok returns true.
+func threadCreateProfileInternal(size int, copyFn func(profilerecord.StackRecord)) (n int, ok bool) {
         first := (*m)(atomic.Loadp(unsafe.Pointer(&allm)))
         for mp := first; mp != nil; mp = mp.alllink {
                 n++
         }
-       if n <= len(p) {
+       if n <= size {
                 ok = true
-               i := 0
                 for mp := first; mp != nil; mp = mp.alllink {
-                       p[i].Stack0 = mp.createstack
-                       i++
+                       r := profilerecord.StackRecord{Stack: mp.createstack[:]}
+                       copyFn(r)
                 }
         }
         return
  }
  
-//go:linkname runtime_goroutineProfileWithLabels runtime/pprof.runtime_goroutineProfileWithLabels
-func runtime_goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
+//go:linkname pprof_threadCreateInternal
+func pprof_threadCreateInternal(p []profilerecord.StackRecord) (n int, ok bool) {
+       return threadCreateProfileInternal(len(p), func(r profilerecord.StackRecord) {
+               p[0] = r
+               p = p[1:]
+       })
+}
+
+//go:linkname pprof_goroutineProfileWithLabels
+func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
         return goroutineProfileWithLabels(p, labels)
  }
  
  // labels may be nil. If labels is non-nil, it must have the same length as p.
-func goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
+func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
         if labels != nil && len(labels) != len(p) {
                 labels = nil
         }
@@ -1140,7 +1230,7 @@ var goroutineProfile = struct {
         sema    uint32
         active  bool
         offset  atomic.Int64
-       records []StackRecord
+       records []profilerecord.StackRecord
         labels  []unsafe.Pointer
  }{
         sema: 1,
@@ -1179,7 +1269,7 @@ func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileSt
         return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new))
  }
  
-func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
+func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
         if len(p) == 0 {
                 // An empty slice is obviously too small. Return a rough
                 // allocation estimate without bothering to STW. As long as
@@ -1192,6 +1282,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point
  
         ourg := getg()
  
+       pcbuf := makeProfStack() // see saveg() for explanation
         stw := stopTheWorld(stwGoroutineProfile)
         // Using gcount while the world is stopped should give us a consistent view
         // of the number of live goroutines, minus the number of goroutines that are
@@ -1218,7 +1309,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point
         sp := getcallersp()
         pc := getcallerpc()
         systemstack(func() {
-               saveg(pc, sp, ourg, &p[0])
+               saveg(pc, sp, ourg, &p[0], pcbuf)
         })
         if labels != nil {
                 labels[0] = ourg.labels
@@ -1240,7 +1331,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point
         if fing != nil {
                 fing.goroutineProfiled.Store(goroutineProfileSatisfied)
                 if readgstatus(fing) != _Gdead && !isSystemGoroutine(fing, false) {
-                       doRecordGoroutineProfile(fing)
+                       doRecordGoroutineProfile(fing, pcbuf)
                 }
         }
         startTheWorld(stw)
@@ -1257,7 +1348,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point
         // call will start by adding itself to the profile (before the act of
         // executing can cause any changes in its stack).
         forEachGRace(func(gp1 *g) {
-               tryRecordGoroutineProfile(gp1, Gosched)
+               tryRecordGoroutineProfile(gp1, pcbuf, Gosched)
         })
  
         stw = stopTheWorld(stwGoroutineProfileCleanup)
@@ -1301,13 +1392,13 @@ func tryRecordGoroutineProfileWB(gp1 *g) {
         if getg().m.p.ptr() == nil {
                 throw("no P available, write barriers are forbidden")
         }
-       tryRecordGoroutineProfile(gp1, osyield)
+       tryRecordGoroutineProfile(gp1, nil, osyield)
  }
  
  // tryRecordGoroutineProfile ensures that gp1 has the appropriate representation
  // in the current goroutine profile: either that it should not be profiled, or
  // that a snapshot of its call stack and labels are now in the profile.
-func tryRecordGoroutineProfile(gp1 *g, yield func()) {
+func tryRecordGoroutineProfile(gp1 *g, pcbuf []uintptr, yield func()) {
         if readgstatus(gp1) == _Gdead {
                 // Dead goroutines should not appear in the profile. Goroutines that
                 // start while profile collection is active will get goroutineProfiled
@@ -1342,7 +1433,7 @@ func tryRecordGoroutineProfile(gp1 *g, yield func()) {
                 // in this limbo.
                 mp := acquirem()
                 if gp1.goroutineProfiled.CompareAndSwap(goroutineProfileAbsent, goroutineProfileInProgress) {
-                       doRecordGoroutineProfile(gp1)
+                       doRecordGoroutineProfile(gp1, pcbuf)
                         gp1.goroutineProfiled.Store(goroutineProfileSatisfied)
                 }
                 releasem(mp)
@@ -1356,7 +1447,7 @@ func tryRecordGoroutineProfile(gp1 *g, yield func()) {
  // goroutine that is coordinating the goroutine profile (running on its own
  // stack), or from the scheduler in preparation to execute gp1 (running on the
  // system stack).
-func doRecordGoroutineProfile(gp1 *g) {
+func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) {
         if readgstatus(gp1) == _Grunning {
                 print("doRecordGoroutineProfile gp1=", gp1.goid, "\n")
                 throw("cannot read stack of running goroutine")
@@ -1379,14 +1470,14 @@ func doRecordGoroutineProfile(gp1 *g) {
         // set gp1.goroutineProfiled to goroutineProfileInProgress and so are still
         // preventing it from being truly _Grunnable. So we'll use the system stack
         // to avoid schedule delays.
-       systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &goroutineProfile.records[offset]) })
+       systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &goroutineProfile.records[offset], pcbuf) })
  
         if goroutineProfile.labels != nil {
                 goroutineProfile.labels[offset] = gp1.labels
         }
  }
  
-func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
+func goroutineProfileWithLabelsSync(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
         gp := getg()
  
         isOK := func(gp1 *g) bool {
@@ -1395,6 +1486,7 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n
                 return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1, false)
         }
  
+       pcbuf := makeProfStack() // see saveg() for explanation
         stw := stopTheWorld(stwGoroutineProfile)
  
         // World is stopped, no locking required.
@@ -1413,7 +1505,7 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n
                 sp := getcallersp()
                 pc := getcallerpc()
                 systemstack(func() {
-                       saveg(pc, sp, gp, &r[0])
+                       saveg(pc, sp, gp, &r[0], pcbuf)
                 })
                 r = r[1:]
  
@@ -1438,7 +1530,7 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n
                         // The world is stopped, so it cannot use cgocall (which will be
                         // blocked at exitsyscall). Do it on the system stack so it won't
                         // call into the schedular (see traceback.go:cgoContextPCs).
-                       systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &r[0]) })
+                       systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &r[0], pcbuf) })
                         if labels != nil {
                                 lbl[0] = gp1.labels
                                 lbl = lbl[1:]
@@ -1462,17 +1554,41 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n
  // Most clients should use the [runtime/pprof] package instead
  // of calling GoroutineProfile directly.
  func GoroutineProfile(p []StackRecord) (n int, ok bool) {
+       records := make([]profilerecord.StackRecord, len(p))
+       n, ok = goroutineProfileInternal(records)
+       if !ok {
+               return
+       }
+       for i, mr := range records[0:n] {
+               copy(p[i].Stack0[:], mr.Stack)
+       }
+       return
+}
  
+func goroutineProfileInternal(p []profilerecord.StackRecord) (n int, ok bool) {
         return goroutineProfileWithLabels(p, nil)
  }
  
-func saveg(pc, sp uintptr, gp *g, r *StackRecord) {
+func saveg(pc, sp uintptr, gp *g, r *profilerecord.StackRecord, pcbuf []uintptr) {
+       // To reduce memory usage, we want to allocate a r.Stack that is just big
+       // enough to hold gp's stack trace. Naively we might achieve this by
+       // recording our stack trace into mp.profStack, and then allocating a
+       // r.Stack of the right size. However, mp.profStack is also used for
+       // allocation profiling, so it could get overwritten if the slice allocation
+       // gets profiled. So instead we record the stack trace into a temporary
+       // pcbuf which is usually given to us by our caller. When it's not, we have
+       // to allocate one here. This will only happen for goroutines that were in a
+       // syscall when the goroutine profile started or for goroutines that manage
+       // to execute before we finish iterating over all the goroutines.
+       if pcbuf == nil {
+               pcbuf = makeProfStack()
+       }
+
         var u unwinder
         u.initAt(pc, sp, 0, gp, unwindSilentErrors)
-       n := tracebackPCs(&u, 0, r.Stack0[:])
-       if n < len(r.Stack0) {
-               r.Stack0[n] = 0
-       }
+       n := tracebackPCs(&u, 0, pcbuf)
+       r.Stack = make([]uintptr, n)
+       copy(r.Stack, pcbuf)
  }
  
  // Stack formats a stack trace of the calling goroutine into buf
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go

index e352b39caf5dec5bb835b7bb39a7732d294fec96..0ef217eef8f1603325d2a2790ac3237523c780e8 100644 (file)
--- a/src/runtime/pprof/pprof.go
+++ b/src/runtime/pprof/pprof.go
@@ -76,6 +76,7 @@ import (
         "bufio"
         "fmt"
         "internal/abi"
+       "internal/profilerecord"
         "io"
         "runtime"
         "sort"
@@ -411,7 +412,7 @@ type countProfile interface {
  // as the pprof-proto format output. Translations from cycle count to time duration
  // are done because The proto expects count and time (nanoseconds) instead of count
  // and the number of cycles for block, contention profiles.
-func printCountCycleProfile(w io.Writer, countName, cycleName string, records []runtime.BlockProfileRecord) error {
+func printCountCycleProfile(w io.Writer, countName, cycleName string, records []profilerecord.BlockProfileRecord) error {
         // Output profile in protobuf form.
         b := newProfileBuilder(w)
         b.pbValueType(tagProfile_PeriodType, countName, "count")
@@ -419,16 +420,18 @@ func printCountCycleProfile(w io.Writer, countName, cycleName string, records []
         b.pbValueType(tagProfile_SampleType, countName, "count")
         b.pbValueType(tagProfile_SampleType, cycleName, "nanoseconds")
  
-       cpuGHz := float64(runtime_cyclesPerSecond()) / 1e9
+       cpuGHz := float64(pprof_cyclesPerSecond()) / 1e9
  
         values := []int64{0, 0}
         var locs []uint64
+       expandedStack := pprof_makeProfStack()
         for _, r := range records {
                 values[0] = r.Count
                 values[1] = int64(float64(r.Cycles) / cpuGHz)
                 // For count profiles, all stack addresses are
                 // return PCs, which is what appendLocsForStack expects.
-               locs = b.appendLocsForStack(locs[:0], r.Stack())
+               n := pprof_fpunwindExpand(expandedStack[:], r.Stack)
+               locs = b.appendLocsForStack(locs[:0], expandedStack[:n])
                 b.pbSample(values, locs, nil)
         }
         b.build()
@@ -593,14 +596,14 @@ func writeHeapInternal(w io.Writer, debug int, defaultSampleType string) error {
         // the two calls—so allocate a few extra records for safety
         // and also try again if we're very unlucky.
         // The loop should only execute one iteration in the common case.
-       var p []runtime.MemProfileRecord
-       n, ok := runtime.MemProfile(nil, true)
+       var p []profilerecord.MemProfileRecord
+       n, ok := pprof_memProfileInternal(nil, true)
         for {
                 // Allocate room for a slightly bigger profile,
                 // in case a few more entries have been added
                 // since the call to MemProfile.
-               p = make([]runtime.MemProfileRecord, n+50)
-               n, ok = runtime.MemProfile(p, true)
+               p = make([]profilerecord.MemProfileRecord, n+50)
+               n, ok = pprof_memProfileInternal(p, true)
                 if ok {
                         p = p[0:n]
                         break
@@ -654,11 +657,11 @@ func writeHeapInternal(w io.Writer, debug int, defaultSampleType string) error {
                 fmt.Fprintf(w, "%d: %d [%d: %d] @",
                         r.InUseObjects(), r.InUseBytes(),
                         r.AllocObjects, r.AllocBytes)
-               for _, pc := range r.Stack() {
+               for _, pc := range r.Stack {
                         fmt.Fprintf(w, " %#x", pc)
                 }
                 fmt.Fprintf(w, "\n")
-               printStackRecord(w, r.Stack(), false)
+               printStackRecord(w, r.Stack, false)
         }
  
         // Print memstats information too.
@@ -713,8 +716,8 @@ func writeThreadCreate(w io.Writer, debug int) error {
         // Until https://golang.org/issues/6104 is addressed, wrap
         // ThreadCreateProfile because there's no point in tracking labels when we
         // don't get any stack-traces.
-       return writeRuntimeProfile(w, debug, "threadcreate", func(p []runtime.StackRecord, _ []unsafe.Pointer) (n int, ok bool) {
-               return runtime.ThreadCreateProfile(p)
+       return writeRuntimeProfile(w, debug, "threadcreate", func(p []profilerecord.StackRecord, _ []unsafe.Pointer) (n int, ok bool) {
+               return pprof_threadCreateInternal(p)
         })
  }
  
@@ -723,15 +726,12 @@ func countGoroutine() int {
         return runtime.NumGoroutine()
  }
  
-// runtime_goroutineProfileWithLabels is defined in runtime/mprof.go
-func runtime_goroutineProfileWithLabels(p []runtime.StackRecord, labels []unsafe.Pointer) (n int, ok bool)
-
  // writeGoroutine writes the current runtime GoroutineProfile to w.
  func writeGoroutine(w io.Writer, debug int) error {
         if debug >= 2 {
                 return writeGoroutineStacks(w)
         }
-       return writeRuntimeProfile(w, debug, "goroutine", runtime_goroutineProfileWithLabels)
+       return writeRuntimeProfile(w, debug, "goroutine", pprof_goroutineProfileWithLabels)
  }
  
  func writeGoroutineStacks(w io.Writer) error {
@@ -755,14 +755,14 @@ func writeGoroutineStacks(w io.Writer) error {
         return err
  }
  
-func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runtime.StackRecord, []unsafe.Pointer) (int, bool)) error {
+func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]profilerecord.StackRecord, []unsafe.Pointer) (int, bool)) error {
         // Find out how many records there are (fetch(nil)),
         // allocate that many records, and get the data.
         // There's a race—more records might be added between
         // the two calls—so allocate a few extra records for safety
         // and also try again if we're very unlucky.
         // The loop should only execute one iteration in the common case.
-       var p []runtime.StackRecord
+       var p []profilerecord.StackRecord
         var labels []unsafe.Pointer
         n, ok := fetch(nil, nil)
  
@@ -770,7 +770,7 @@ func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runti
                 // Allocate room for a slightly bigger profile,
                 // in case a few more entries have been added
                 // since the call to ThreadProfile.
-               p = make([]runtime.StackRecord, n+10)
+               p = make([]profilerecord.StackRecord, n+10)
                 labels = make([]unsafe.Pointer, n+10)
                 n, ok = fetch(p, labels)
                 if ok {
@@ -784,12 +784,12 @@ func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runti
  }
  
  type runtimeProfile struct {
-       stk    []runtime.StackRecord
+       stk    []profilerecord.StackRecord
         labels []unsafe.Pointer
  }
  
  func (p *runtimeProfile) Len() int              { return len(p.stk) }
-func (p *runtimeProfile) Stack(i int) []uintptr { return p.stk[i].Stack() }
+func (p *runtimeProfile) Stack(i int) []uintptr { return p.stk[i].Stack }
  func (p *runtimeProfile) Label(i int) *labelMap { return (*labelMap)(p.labels[i]) }
  
  var cpu struct {
@@ -894,20 +894,20 @@ func countMutex() int {
  
  // writeBlock writes the current blocking profile to w.
  func writeBlock(w io.Writer, debug int) error {
-       return writeProfileInternal(w, debug, "contention", runtime.BlockProfile)
+       return writeProfileInternal(w, debug, "contention", pprof_blockProfileInternal)
  }
  
  // writeMutex writes the current mutex profile to w.
  func writeMutex(w io.Writer, debug int) error {
-       return writeProfileInternal(w, debug, "mutex", runtime.MutexProfile)
+       return writeProfileInternal(w, debug, "mutex", pprof_mutexProfileInternal)
  }
  
  // writeProfileInternal writes the current blocking or mutex profile depending on the passed parameters.
-func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile func([]runtime.BlockProfileRecord) (int, bool)) error {
-       var p []runtime.BlockProfileRecord
+func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile func([]profilerecord.BlockProfileRecord) (int, bool)) error {
+       var p []profilerecord.BlockProfileRecord
         n, ok := runtimeProfile(nil)
         for {
-               p = make([]runtime.BlockProfileRecord, n+50)
+               p = make([]profilerecord.BlockProfileRecord, n+50)
                 n, ok = runtimeProfile(p)
                 if ok {
                         p = p[:n]
@@ -926,19 +926,22 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu
         w = tw
  
         fmt.Fprintf(w, "--- %v:\n", name)
-       fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond())
+       fmt.Fprintf(w, "cycles/second=%v\n", pprof_cyclesPerSecond())
         if name == "mutex" {
                 fmt.Fprintf(w, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1))
         }
+       expandedStack := pprof_makeProfStack()
         for i := range p {
                 r := &p[i]
                 fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
-               for _, pc := range r.Stack() {
+               n := pprof_fpunwindExpand(expandedStack, r.Stack)
+               stack := expandedStack[:n]
+               for _, pc := range stack {
                         fmt.Fprintf(w, " %#x", pc)
                 }
                 fmt.Fprint(w, "\n")
                 if debug > 0 {
-                       printStackRecord(w, r.Stack(), true)
+                       printStackRecord(w, stack, true)
                 }
         }
  
@@ -948,4 +951,26 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu
         return b.Flush()
  }
  
-func runtime_cyclesPerSecond() int64
+//go:linkname pprof_goroutineProfileWithLabels runtime.pprof_goroutineProfileWithLabels
+func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool)
+
+//go:linkname pprof_cyclesPerSecond runtime.pprof_cyclesPerSecond
+func pprof_cyclesPerSecond() int64
+
+//go:linkname pprof_memProfileInternal runtime.pprof_memProfileInternal
+func pprof_memProfileInternal(p []profilerecord.MemProfileRecord, inuseZero bool) (n int, ok bool)
+
+//go:linkname pprof_blockProfileInternal runtime.pprof_blockProfileInternal
+func pprof_blockProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool)
+
+//go:linkname pprof_mutexProfileInternal runtime.pprof_mutexProfileInternal
+func pprof_mutexProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool)
+
+//go:linkname pprof_threadCreateInternal runtime.pprof_threadCreateInternal
+func pprof_threadCreateInternal(p []profilerecord.StackRecord) (n int, ok bool)
+
+//go:linkname pprof_fpunwindExpand runtime.pprof_fpunwindExpand
+func pprof_fpunwindExpand(dst, src []uintptr) int
+
+//go:linkname pprof_makeProfStack runtime.pprof_makeProfStack
+func pprof_makeProfStack() []uintptr
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go

index 1c92c7e1f4725edc2f8044f44e977ab5ee05b1c1..e6fa068060758514f820cc153792fbe2a4906a3c 100644 (file)
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -2444,7 +2444,7 @@ func TestProfilerStackDepth(t *testing.T) {
                 runtime.SetMutexProfileFraction(oldMutexRate)
         })
  
-       const depth = 32
+       const depth = 128
         go produceProfileEvents(t, depth)
         awaitBlockedGoroutine(t, "chan receive", "goroutineDeep", 1)
  
diff --git a/src/runtime/pprof/protomem.go b/src/runtime/pprof/protomem.go

index fa75a28c6263c1be8aedf0ce997f9d77396552e1..ab3550f43f9a390544dcf87324dec9406d7a8268 100644 (file)
--- a/src/runtime/pprof/protomem.go
+++ b/src/runtime/pprof/protomem.go
@@ -5,6 +5,7 @@
  package pprof
  
  import (
+       "internal/profilerecord"
         "io"
         "math"
         "runtime"
@@ -12,7 +13,7 @@ import (
  )
  
  // writeHeapProto writes the current heap profile in protobuf format to w.
-func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defaultSampleType string) error {
+func writeHeapProto(w io.Writer, p []profilerecord.MemProfileRecord, rate int64, defaultSampleType string) error {
         b := newProfileBuilder(w)
         b.pbValueType(tagProfile_PeriodType, "space", "bytes")
         b.pb.int64Opt(tagProfile_Period, rate)
@@ -29,7 +30,7 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defau
         for _, r := range p {
                 hideRuntime := true
                 for tries := 0; tries < 2; tries++ {
-                       stk := r.Stack()
+                       stk := r.Stack
                         // For heap profiles, all stack
                         // addresses are return PCs, which is
                         // what appendLocsForStack expects.
diff --git a/src/runtime/pprof/protomem_test.go b/src/runtime/pprof/protomem_test.go

index 5fb67c53f6753186675b7cd3276a0b261cb5ece3..8e9732a33156e1a4798328d1d04615d86c4902a0 100644 (file)
--- a/src/runtime/pprof/protomem_test.go
+++ b/src/runtime/pprof/protomem_test.go
@@ -8,6 +8,7 @@ import (
         "bytes"
         "fmt"
         "internal/profile"
+       "internal/profilerecord"
         "internal/testenv"
         "runtime"
         "slices"
@@ -24,10 +25,10 @@ func TestConvertMemProfile(t *testing.T) {
         // from these and get back to addr1 and addr2.
         a1, a2 := uintptr(addr1)+1, uintptr(addr2)+1
         rate := int64(512 * 1024)
-       rec := []runtime.MemProfileRecord{
-               {AllocBytes: 4096, FreeBytes: 1024, AllocObjects: 4, FreeObjects: 1, Stack0: [32]uintptr{a1, a2}},
-               {AllocBytes: 512 * 1024, FreeBytes: 0, AllocObjects: 1, FreeObjects: 0, Stack0: [32]uintptr{a2 + 1, a2 + 2}},
-               {AllocBytes: 512 * 1024, FreeBytes: 512 * 1024, AllocObjects: 1, FreeObjects: 1, Stack0: [32]uintptr{a1 + 1, a1 + 2, a2 + 3}},
+       rec := []profilerecord.MemProfileRecord{
+               {AllocBytes: 4096, FreeBytes: 1024, AllocObjects: 4, FreeObjects: 1, Stack: []uintptr{a1, a2}},
+               {AllocBytes: 512 * 1024, FreeBytes: 0, AllocObjects: 1, FreeObjects: 0, Stack: []uintptr{a2 + 1, a2 + 2}},
+               {AllocBytes: 512 * 1024, FreeBytes: 512 * 1024, AllocObjects: 1, FreeObjects: 1, Stack: []uintptr{a1 + 1, a1 + 2, a2 + 3}},
         }
  
         periodType := &profile.ValueType{Type: "space", Unit: "bytes"}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go

index 418f1c5a6649ffd6d2686530b42f2fd52cb0e314..a9d60faa69f9feb7f558ec4d1fe8aaab6b960bd6 100644 (file)
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -930,10 +930,30 @@ func mcommoninit(mp *m, id int64) {
  // malloc and runtime locks for mLockProfile.
  // TODO(mknyszek): Implement lazy allocation if this becomes a problem.
  func mProfStackInit(mp *m) {
-       mp.profStack = make([]uintptr, maxStack)
-       mp.mLockProfile.stack = make([]uintptr, maxStack)
+       mp.profStack = makeProfStackFP()
+       mp.mLockProfile.stack = makeProfStackFP()
  }
  
+// makeProfStackFP creates a buffer large enough to hold a maximum-sized stack
+// trace as well as any additional frames needed for frame pointer unwinding
+// with delayed inline expansion.
+func makeProfStackFP() []uintptr {
+       // The "1" term is to account for the first stack entry being
+       // taken up by a "skip" sentinel value for profilers which
+       // defer inline frame expansion until the profile is reported.
+       // The "maxSkip" term is for frame pointer unwinding, where we
+       // want to end up with debug.profstackdebth frames but will discard
+       // some "physical" frames to account for skipping.
+       return make([]uintptr, 1+maxSkip+maxLogicalStack)
+}
+
+// makeProfStack returns a buffer large enough to hold a maximum-sized stack
+// trace.
+func makeProfStack() []uintptr { return make([]uintptr, maxLogicalStack) }
+
+//go:linkname pprof_makeProfStack
+func pprof_makeProfStack() []uintptr { return makeProfStack() }
+
  func (mp *m) becomeSpinning() {
         mp.spinning = true
         sched.nmspinning.Add(1)
@@ -3132,7 +3152,7 @@ func execute(gp *g, inheritTime bool) {
                 // Make sure that gp has had its stack written out to the goroutine
                 // profile, exactly as it was when the goroutine profiler first stopped
                 // the world.
-               tryRecordGoroutineProfile(gp, osyield)
+               tryRecordGoroutineProfile(gp, nil, osyield)
         }
  
         // Assign gp.m before entering _Grunning so running Gs have an
diff --git a/src/runtime/tracestack.go b/src/runtime/tracestack.go

index 477526d7cb46932f77aafbcb3481727406218a99..69f6bb974e12773fd7e18928a05239e1c88a468e 100644 (file)
--- a/src/runtime/tracestack.go
+++ b/src/runtime/tracestack.go
@@ -262,6 +262,11 @@ func fpTracebackPCs(fp unsafe.Pointer, pcBuf []uintptr) (i int) {
         return i
  }
  
+//go:linkname pprof_fpunwindExpand
+func pprof_fpunwindExpand(dst, src []uintptr) int {
+       return fpunwindExpand(dst, src)
+}
+
  // fpunwindExpand expands a call stack from pcBuf into dst,
  // returning the number of PCs written to dst.
  // pcBuf and dst should not overlap.
author	Felix Geisendörfer <felix.geisendoerfer@datadoghq.com>
	Fri, 17 May 2024 13:07:07 +0000 (15:07 +0200)
committer	Austin Clements <austin@google.com>
	Tue, 21 May 2024 14:38:45 +0000 (14:38 +0000)
doc/next/6-stdlib/99-minor/runtime/pprof/43669.md	[new file with mode: 0644]	patch \| blob
src/cmd/internal/objabi/pkgspecial.go		patch \| blob \| history
src/go/build/deps_test.go		patch \| blob \| history
src/internal/profilerecord/profilerecord.go	[new file with mode: 0644]	patch \| blob
src/runtime/cpuprof.go		patch \| blob \| history
src/runtime/mprof.go		patch \| blob \| history
src/runtime/pprof/pprof.go		patch \| blob \| history
src/runtime/pprof/pprof_test.go		patch \| blob \| history
src/runtime/pprof/protomem.go		patch \| blob \| history
src/runtime/pprof/protomem_test.go		patch \| blob \| history
src/runtime/proc.go		patch \| blob \| history
src/runtime/tracestack.go		patch \| blob \| history