From: Michael Anthony Knyszek Date: Fri, 13 May 2022 15:14:54 +0000 (+0000) Subject: runtime/metrics: add the last GC cycle that had the limiter enabled X-Git-Tag: go1.19beta1~256 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=cfccb5cb7cea77c5272115556e59dea374e4dbd1;p=gostls13.git runtime/metrics: add the last GC cycle that had the limiter enabled This metric exports the the last GC cycle index that the GC limiter was enabled. This metric is useful for debugging and identifying the root cause of OOMs, especially when SetMemoryLimit is in use. For #48409. Change-Id: Ic6383b19e88058366a74f6ede1683b8ffb30a69c Reviewed-on: https://go-review.googlesource.com/c/go/+/403614 TryBot-Result: Gopher Robot Reviewed-by: David Chase Run-TryBot: Michael Knyszek --- diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index 2bff44c466..75ad318b07 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -165,6 +165,12 @@ func initMetrics() { out.scalar = uint64(in.heapStats.tinyAllocCount) }, }, + "/gc/limiter/last-enabled:gc-cycle": { + compute: func(_ *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(gcCPULimiter.lastEnabledCycle.Load()) + }, + }, "/gc/pauses:seconds": { compute: func(_ *statAggregate, out *metricValue) { hist := out.float64HistOrInit(timeHistBuckets) diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go index a33d9a2c35..ee99d3938d 100644 --- a/src/runtime/metrics/description.go +++ b/src/runtime/metrics/description.go @@ -140,6 +140,15 @@ var allDesc = []Description{ Kind: KindUint64, Cumulative: true, }, + { + Name: "/gc/limiter/last-enabled:gc-cycle", + Description: "GC cycle the last time the GC CPU limiter was enabled. " + + "This metric is useful for diagnosing the root cause of an out-of-memory " + + "error, because the limiter trades memory for CPU time when the GC's CPU " + + "time gets too high. This is most likely to occur with use of SetMemoryLimit. " + + "The first GC cycle is cycle 1, so a value of 0 indicates that it was never enabled.", + Kind: KindUint64, + }, { Name: "/gc/pauses:seconds", Description: "Distribution individual GC-related stop-the-world pause latencies.", diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go index b4d99f72bb..28c9f6abb5 100644 --- a/src/runtime/metrics/doc.go +++ b/src/runtime/metrics/doc.go @@ -102,6 +102,13 @@ Below is the full list of supported metrics, ordered lexicographically. only their block. Each block is already accounted for in allocs-by-size and frees-by-size. + /gc/limiter/last-enabled:gc-cycle + GC cycle the last time the GC CPU limiter was enabled. + This metric is useful for diagnosing the root cause of an out-of-memory + error, because the limiter trades memory for CPU time when the GC's CPU + time gets too high. This is most likely to occur with use of SetMemoryLimit. + The first GC cycle is cycle 1, so a value of 0 indicates that it was never enabled. + /gc/pauses:seconds Distribution individual GC-related stop-the-world pause latencies. diff --git a/src/runtime/mgclimit.go b/src/runtime/mgclimit.go index b930af3340..cbe5500be6 100644 --- a/src/runtime/mgclimit.go +++ b/src/runtime/mgclimit.go @@ -40,8 +40,8 @@ type gcCPULimiterState struct { // - fill <= capacity fill, capacity uint64 } - // TODO(mknyszek): Export this as a runtime/metric to provide an estimate of - // how much GC work is being dropped on the floor. + // overflow is the cumulative amount of GC CPU time that we tried to fill the + // bucket with but exceeded its capacity. overflow uint64 // gcEnabled is an internal copy of gcBlackenEnabled that determines @@ -65,6 +65,9 @@ type gcCPULimiterState struct { // Updated under lock, but may be read concurrently. lastUpdate atomic.Int64 + // lastEnabledCycle is the GC cycle that last had the limiter enabled. + lastEnabledCycle atomic.Uint32 + // nprocs is an internal copy of gomaxprocs, used to determine total available // CPU time. // @@ -203,6 +206,7 @@ func (l *gcCPULimiterState) accumulate(mutatorTime, gcTime int64) { l.bucket.fill = l.bucket.capacity if !enabled { l.enabled.Store(true) + l.lastEnabledCycle.Store(memstats.numgc + 1) } return } @@ -254,6 +258,7 @@ func (l *gcCPULimiterState) resetCapacity(now int64, nprocs int32) { if l.bucket.fill > l.bucket.capacity { l.bucket.fill = l.bucket.capacity l.enabled.Store(true) + l.lastEnabledCycle.Store(memstats.numgc + 1) } else if l.bucket.fill < l.bucket.capacity { l.enabled.Store(false) }