]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: add ReadMemStats latency benchmark
authorMichael Anthony Knyszek <mknyszek@google.com>
Wed, 19 Feb 2020 20:54:25 +0000 (20:54 +0000)
committerMichael Knyszek <mknyszek@google.com>
Wed, 18 Mar 2020 17:59:19 +0000 (17:59 +0000)
This change adds a benchmark to the runtime which measures ReadMemStats
latencies. It generates allocations with lots of pointers to keep the GC
busy while hitting ReadMemStats and measuring the time it takes to
complete.

Updates #19812.

Change-Id: I7a76aaf497ba5324d3c7a7b3df32461b3e6c3ac8
Reviewed-on: https://go-review.googlesource.com/c/go/+/220177
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Emmanuel Odeke <emm.odeke@gmail.com>
src/runtime/gc_test.go

index 54a0e7d604b5537bdd9c8db62c408d2ff5285e32..8ffb4f4a0f4690baa84235c759f1a8c5b246cc61 100644 (file)
@@ -10,6 +10,7 @@ import (
        "reflect"
        "runtime"
        "runtime/debug"
+       "sort"
        "sync"
        "sync/atomic"
        "testing"
@@ -506,6 +507,90 @@ func BenchmarkReadMemStats(b *testing.B) {
        hugeSink = nil
 }
 
+func BenchmarkReadMemStatsLatency(b *testing.B) {
+       // We’ll apply load to the runtime with maxProcs-1 goroutines
+       // and use one more to actually benchmark. It doesn't make sense
+       // to try to run this test with only 1 P (that's what
+       // BenchmarkReadMemStats is for).
+       maxProcs := runtime.GOMAXPROCS(-1)
+       if maxProcs == 1 {
+               b.Skip("This benchmark can only be run with GOMAXPROCS > 1")
+       }
+
+       // Code to build a big tree with lots of pointers.
+       type node struct {
+               children [16]*node
+       }
+       var buildTree func(depth int) *node
+       buildTree = func(depth int) *node {
+               tree := new(node)
+               if depth != 0 {
+                       for i := range tree.children {
+                               tree.children[i] = buildTree(depth - 1)
+                       }
+               }
+               return tree
+       }
+
+       // Keep the GC busy by continuously generating large trees.
+       done := make(chan struct{})
+       var wg sync.WaitGroup
+       for i := 0; i < maxProcs-1; i++ {
+               wg.Add(1)
+               go func() {
+                       defer wg.Done()
+                       var hold *node
+               loop:
+                       for {
+                               hold = buildTree(5)
+                               select {
+                               case <-done:
+                                       break loop
+                               default:
+                               }
+                       }
+                       runtime.KeepAlive(hold)
+               }()
+       }
+
+       // Spend this much time measuring latencies.
+       latencies := make([]time.Duration, 0, 1024)
+
+       // Run for timeToBench hitting ReadMemStats continuously
+       // and measuring the latency.
+       b.ResetTimer()
+       var ms runtime.MemStats
+       for i := 0; i < b.N; i++ {
+               // Sleep for a bit, otherwise we're just going to keep
+               // stopping the world and no one will get to do anything.
+               time.Sleep(100 * time.Millisecond)
+               start := time.Now()
+               runtime.ReadMemStats(&ms)
+               latencies = append(latencies, time.Now().Sub(start))
+       }
+       close(done)
+       // Make sure to stop the timer before we wait! The goroutines above
+       // are very heavy-weight and not easy to stop, so we could end up
+       // confusing the benchmarking framework for small b.N.
+       b.StopTimer()
+       wg.Wait()
+
+       // Disable the default */op metrics.
+       // ns/op doesn't mean anything because it's an average, but we
+       // have a sleep in our b.N loop above which skews this significantly.
+       b.ReportMetric(0, "ns/op")
+       b.ReportMetric(0, "B/op")
+       b.ReportMetric(0, "allocs/op")
+
+       // Sort latencies then report percentiles.
+       sort.Slice(latencies, func(i, j int) bool {
+               return latencies[i] < latencies[j]
+       })
+       b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
+       b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
+       b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
+}
+
 func TestUserForcedGC(t *testing.T) {
        // Test that runtime.GC() triggers a GC even if GOGC=off.
        defer debug.SetGCPercent(debug.SetGCPercent(-1))