From 7cfb814b0a43831ec6f847d7bb6ff00020393fcf Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 19 Feb 2020 20:54:25 +0000 Subject: [PATCH] runtime: add ReadMemStats latency benchmark This change adds a benchmark to the runtime which measures ReadMemStats latencies. It generates allocations with lots of pointers to keep the GC busy while hitting ReadMemStats and measuring the time it takes to complete. Updates #19812. Change-Id: I7a76aaf497ba5324d3c7a7b3df32461b3e6c3ac8 Reviewed-on: https://go-review.googlesource.com/c/go/+/220177 Run-TryBot: Michael Knyszek TryBot-Result: Gobot Gobot Reviewed-by: Austin Clements Reviewed-by: Emmanuel Odeke --- src/runtime/gc_test.go | 85 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 54a0e7d604..8ffb4f4a0f 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -10,6 +10,7 @@ import ( "reflect" "runtime" "runtime/debug" + "sort" "sync" "sync/atomic" "testing" @@ -506,6 +507,90 @@ func BenchmarkReadMemStats(b *testing.B) { hugeSink = nil } +func BenchmarkReadMemStatsLatency(b *testing.B) { + // We’ll apply load to the runtime with maxProcs-1 goroutines + // and use one more to actually benchmark. It doesn't make sense + // to try to run this test with only 1 P (that's what + // BenchmarkReadMemStats is for). + maxProcs := runtime.GOMAXPROCS(-1) + if maxProcs == 1 { + b.Skip("This benchmark can only be run with GOMAXPROCS > 1") + } + + // Code to build a big tree with lots of pointers. + type node struct { + children [16]*node + } + var buildTree func(depth int) *node + buildTree = func(depth int) *node { + tree := new(node) + if depth != 0 { + for i := range tree.children { + tree.children[i] = buildTree(depth - 1) + } + } + return tree + } + + // Keep the GC busy by continuously generating large trees. + done := make(chan struct{}) + var wg sync.WaitGroup + for i := 0; i < maxProcs-1; i++ { + wg.Add(1) + go func() { + defer wg.Done() + var hold *node + loop: + for { + hold = buildTree(5) + select { + case <-done: + break loop + default: + } + } + runtime.KeepAlive(hold) + }() + } + + // Spend this much time measuring latencies. + latencies := make([]time.Duration, 0, 1024) + + // Run for timeToBench hitting ReadMemStats continuously + // and measuring the latency. + b.ResetTimer() + var ms runtime.MemStats + for i := 0; i < b.N; i++ { + // Sleep for a bit, otherwise we're just going to keep + // stopping the world and no one will get to do anything. + time.Sleep(100 * time.Millisecond) + start := time.Now() + runtime.ReadMemStats(&ms) + latencies = append(latencies, time.Now().Sub(start)) + } + close(done) + // Make sure to stop the timer before we wait! The goroutines above + // are very heavy-weight and not easy to stop, so we could end up + // confusing the benchmarking framework for small b.N. + b.StopTimer() + wg.Wait() + + // Disable the default */op metrics. + // ns/op doesn't mean anything because it's an average, but we + // have a sleep in our b.N loop above which skews this significantly. + b.ReportMetric(0, "ns/op") + b.ReportMetric(0, "B/op") + b.ReportMetric(0, "allocs/op") + + // Sort latencies then report percentiles. + sort.Slice(latencies, func(i, j int) bool { + return latencies[i] < latencies[j] + }) + b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns") + b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns") + b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns") +} + func TestUserForcedGC(t *testing.T) { // Test that runtime.GC() triggers a GC even if GOGC=off. defer debug.SetGCPercent(debug.SetGCPercent(-1)) -- 2.50.0