]> Cypherpunks repositories - gostls13.git/commitdiff
runtime/metrics: add the last GC cycle that had the limiter enabled
authorMichael Anthony Knyszek <mknyszek@google.com>
Fri, 13 May 2022 15:14:54 +0000 (15:14 +0000)
committerMichael Knyszek <mknyszek@google.com>
Fri, 13 May 2022 20:45:19 +0000 (20:45 +0000)
This metric exports the the last GC cycle index that the GC limiter was
enabled. This metric is useful for debugging and identifying the root
cause of OOMs, especially when SetMemoryLimit is in use.

For #48409.

Change-Id: Ic6383b19e88058366a74f6ede1683b8ffb30a69c
Reviewed-on: https://go-review.googlesource.com/c/go/+/403614
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>

src/runtime/metrics.go
src/runtime/metrics/description.go
src/runtime/metrics/doc.go
src/runtime/mgclimit.go

index 2bff44c46686b402081d35f77aeebc68f9c39646..75ad318b07bc4b0ab62ad46cb12a841e4d7cf949 100644 (file)
@@ -165,6 +165,12 @@ func initMetrics() {
                                out.scalar = uint64(in.heapStats.tinyAllocCount)
                        },
                },
+               "/gc/limiter/last-enabled:gc-cycle": {
+                       compute: func(_ *statAggregate, out *metricValue) {
+                               out.kind = metricKindUint64
+                               out.scalar = uint64(gcCPULimiter.lastEnabledCycle.Load())
+                       },
+               },
                "/gc/pauses:seconds": {
                        compute: func(_ *statAggregate, out *metricValue) {
                                hist := out.float64HistOrInit(timeHistBuckets)
index a33d9a2c35af236c0f9f524ca02c79c2fb5e2699..ee99d3938df607c87b69fe7e44c9c61a486be7cd 100644 (file)
@@ -140,6 +140,15 @@ var allDesc = []Description{
                Kind:       KindUint64,
                Cumulative: true,
        },
+       {
+               Name: "/gc/limiter/last-enabled:gc-cycle",
+               Description: "GC cycle the last time the GC CPU limiter was enabled. " +
+                       "This metric is useful for diagnosing the root cause of an out-of-memory " +
+                       "error, because the limiter trades memory for CPU time when the GC's CPU " +
+                       "time gets too high. This is most likely to occur with use of SetMemoryLimit. " +
+                       "The first GC cycle is cycle 1, so a value of 0 indicates that it was never enabled.",
+               Kind: KindUint64,
+       },
        {
                Name:        "/gc/pauses:seconds",
                Description: "Distribution individual GC-related stop-the-world pause latencies.",
index b4d99f72bb5a10ffdb78ce44eabd8d0d2ea77894..28c9f6abb5eb3ac71759bd679c75efb3ff3b0c35 100644 (file)
@@ -102,6 +102,13 @@ Below is the full list of supported metrics, ordered lexicographically.
                only their block. Each block is already accounted for in
                allocs-by-size and frees-by-size.
 
+       /gc/limiter/last-enabled:gc-cycle
+               GC cycle the last time the GC CPU limiter was enabled.
+               This metric is useful for diagnosing the root cause of an out-of-memory
+               error, because the limiter trades memory for CPU time when the GC's CPU
+               time gets too high. This is most likely to occur with use of SetMemoryLimit.
+               The first GC cycle is cycle 1, so a value of 0 indicates that it was never enabled.
+
        /gc/pauses:seconds
                Distribution individual GC-related stop-the-world pause latencies.
 
index b930af3340eb95438a339077da1104c84a70a6a1..cbe5500be61b980c208fda70324fb56894128049 100644 (file)
@@ -40,8 +40,8 @@ type gcCPULimiterState struct {
                // - fill <= capacity
                fill, capacity uint64
        }
-       // TODO(mknyszek): Export this as a runtime/metric to provide an estimate of
-       // how much GC work is being dropped on the floor.
+       // overflow is the cumulative amount of GC CPU time that we tried to fill the
+       // bucket with but exceeded its capacity.
        overflow uint64
 
        // gcEnabled is an internal copy of gcBlackenEnabled that determines
@@ -65,6 +65,9 @@ type gcCPULimiterState struct {
        // Updated under lock, but may be read concurrently.
        lastUpdate atomic.Int64
 
+       // lastEnabledCycle is the GC cycle that last had the limiter enabled.
+       lastEnabledCycle atomic.Uint32
+
        // nprocs is an internal copy of gomaxprocs, used to determine total available
        // CPU time.
        //
@@ -203,6 +206,7 @@ func (l *gcCPULimiterState) accumulate(mutatorTime, gcTime int64) {
                l.bucket.fill = l.bucket.capacity
                if !enabled {
                        l.enabled.Store(true)
+                       l.lastEnabledCycle.Store(memstats.numgc + 1)
                }
                return
        }
@@ -254,6 +258,7 @@ func (l *gcCPULimiterState) resetCapacity(now int64, nprocs int32) {
        if l.bucket.fill > l.bucket.capacity {
                l.bucket.fill = l.bucket.capacity
                l.enabled.Store(true)
+               l.lastEnabledCycle.Store(memstats.numgc + 1)
        } else if l.bucket.fill < l.bucket.capacity {
                l.enabled.Store(false)
        }