]> Cypherpunks repositories - gostls13.git/commitdiff
runtime/pprof: remove hard-coded sleep in CPU profile reader
authorNick Ripley <nick.ripley@datadoghq.com>
Fri, 21 Nov 2025 16:01:29 +0000 (16:01 +0000)
committerNick Ripley <nick.ripley@datadoghq.com>
Fri, 21 Nov 2025 21:25:29 +0000 (13:25 -0800)
The CPU profiler reader goroutine has a hard-coded 100ms sleep between
reads of the CPU profile sample buffer. This is done because waking up
the CPU profile reader is not signal-safe on some platforms. As a
consequence, stopping the profiler takes 200ms (one iteration to read
the last samples and one to see the "eof"), and on many-core systems the
reader does not wake up frequently enought to keep up with incoming
data.

This CL removes the sleep where it is safe to do so, following a
suggestion by Austin Clements in the comments on CL 445375. We let the
reader fully block, and wake up the reader when the buffer is over
half-full.

Fixes #63043
Updates #56029

Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-arm64-longtest,gotip-linux-386-longtest
Change-Id: I9f7e7e9918a4a6f16e80f6aaf33103126568a81f
Reviewed-on: https://go-review.googlesource.com/c/go/+/610815
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Mark Freeman <markfreeman@google.com>
src/runtime/pprof/pprof.go
src/runtime/profbuf.go
src/runtime/profbuf_test.go

index 78d00af6cac2fd649e0ae24a524c8bf9db3bd0cd..c617a8b26a4968a44aeecafa20d710488b4da8eb 100644 (file)
@@ -924,7 +924,10 @@ func profileWriter(w io.Writer) {
        b := newProfileBuilder(w)
        var err error
        for {
-               time.Sleep(100 * time.Millisecond)
+               if runtime.GOOS == "darwin" || runtime.GOOS == "ios" {
+                       // see runtime_pprof_readProfile
+                       time.Sleep(100 * time.Millisecond)
+               }
                data, tags, eof := readProfile()
                if e := b.addCPUData(data, tags); e != nil && err == nil {
                        err = e
index 40aff9e1d2425131764446bc50bd8d56907a0e8b..147212206fc1259d9edb76ed2fe76caa2cfdedd3 100644 (file)
@@ -38,6 +38,10 @@ import (
 // be returned by read. By definition, r ≤ rNext ≤ w (before wraparound),
 // and rNext is only used by the reader, so it can be accessed without atomics.
 //
+// If the reader is blocked waiting for more data, the writer will wake it up if
+// either the buffer is more than half full, or when the writer sets the eof
+// marker or writes overflow entries (described below.)
+//
 // If the writer gets ahead of the reader, so that the buffer fills,
 // future writes are discarded and replaced in the output stream by an
 // overflow entry, which has size 2+hdrsize+1, time set to the time of
@@ -378,11 +382,28 @@ func (b *profBuf) write(tagPtr *unsafe.Pointer, now int64, hdr []uint64, stk []u
                // Racing with reader setting flag bits in b.w, to avoid lost wakeups.
                old := b.w.load()
                new := old.addCountsAndClearFlags(skip+2+len(stk)+int(b.hdrsize), 1)
+               // We re-load b.r here to reduce the likelihood of early wakeups
+               // if the reader already consumed some data between the last
+               // time we read b.r and now. This isn't strictly necessary.
+               unread := countSub(new.dataCount(), b.r.load().dataCount())
+               if unread < 0 {
+                       // The new count overflowed and wrapped around.
+                       unread += len(b.data)
+               }
+               wakeupThreshold := len(b.data) / 2
+               if unread < wakeupThreshold {
+                       // Carry over the sleeping flag since we're not planning
+                       // to wake the reader yet
+                       new |= old & profReaderSleeping
+               }
                if !b.w.cas(old, new) {
                        continue
                }
-               // If there was a reader, wake it up.
-               if old&profReaderSleeping != 0 {
+               // If we've hit our high watermark for data in the buffer,
+               // and there is a reader, wake it up.
+               if unread >= wakeupThreshold && old&profReaderSleeping != 0 {
+                       // NB: if we reach this point, then the sleeping bit is
+                       // cleared in the new b.w value
                        notewakeup(&b.wait)
                }
                break
index 7a435c9e5f9f3de8c815556c8c599391ba2b10df..2f068ac386ded861d83fae4a6e89ed0677f298b4 100644 (file)
@@ -5,8 +5,12 @@
 package runtime_test
 
 import (
+       "fmt"
+       "regexp"
+       "runtime"
        . "runtime"
        "slices"
+       "sync"
        "testing"
        "time"
        "unsafe"
@@ -190,3 +194,75 @@ func TestProfBufDoubleWakeup(t *testing.T) {
                }
        }
 }
+
+func TestProfBufWakeup(t *testing.T) {
+       b := NewProfBuf(2, 16, 2)
+       var wg sync.WaitGroup
+       wg.Go(func() {
+               read := 0
+               for {
+                       rdata, _, eof := b.Read(ProfBufBlocking)
+                       if read == 0 && len(rdata) < 8 {
+                               t.Errorf("first wake up at less than half full, got %x", rdata)
+                       }
+                       read += len(rdata)
+                       if eof {
+                               return
+                       }
+               }
+       })
+
+       // Under the hood profBuf uses notetsleepg when the reader blocks.
+       // Different platforms have different implementations, leading to
+       // different statuses we need to look for to determine whether the
+       // reader is blocked.
+       var waitStatus string
+       switch runtime.GOOS {
+       case "js":
+               waitStatus = "waiting"
+       case "wasip1":
+               waitStatus = "runnable"
+       default:
+               waitStatus = "syscall"
+       }
+
+       // Ensure that the reader is blocked
+       awaitBlockedGoroutine(waitStatus, "TestProfBufWakeup.func1")
+       // NB: this writes 6 words not 4. Fine for the test.
+       // The reader shouldn't wake up for this
+       b.Write(nil, 1, []uint64{1, 2}, []uintptr{3, 4})
+
+       // The reader should still be blocked
+       //
+       // TODO(nick): this is racy. We could Gosched and still have the reader
+       // blocked in a buggy implementation because it just didn't get a chance
+       // to run
+       awaitBlockedGoroutine(waitStatus, "TestProfBufWakeup.func1")
+       b.Write(nil, 1, []uint64{5, 6}, []uintptr{7, 8})
+       b.Close()
+
+       // Wait here so we can be sure the reader got the data
+       wg.Wait()
+}
+
+// see also runtime/pprof tests
+func awaitBlockedGoroutine(state, fName string) {
+       re := fmt.Sprintf(`(?m)^goroutine \d+ \[%s\]:\n(?:.+\n\t.+\n)*runtime_test\.%s`, regexp.QuoteMeta(state), fName)
+       r := regexp.MustCompile(re)
+
+       buf := make([]byte, 64<<10)
+       for {
+               Gosched()
+               n := Stack(buf, true)
+               if n == len(buf) {
+                       // Buffer wasn't large enough for a full goroutine dump.
+                       // Resize it and try again.
+                       buf = make([]byte, 2*len(buf))
+                       continue
+               }
+               const count = 1
+               if len(r.FindAll(buf[:n], -1)) >= count {
+                       return
+               }
+       }
+}