From: Austin Clements <austin@google.com>
Date: Fri, 8 Mar 2019 20:01:34 +0000 (-0500)
Subject: sync: add Pool benchmarks to stress STW and reuse
X-Git-Tag: go1.13beta1~784
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=59f2704dabf0c68bc645adb0ed4a8d94cdbcd7bb;p=gostls13.git

sync: add Pool benchmarks to stress STW and reuse

This adds two benchmarks that will highlight two problems in Pool that
we're about to address.

The first benchmark measures the impact of large Pools on GC STW time.
Currently, STW time is O(# of items in Pools), and this benchmark
demonstrates 70µs STW times.

The second benchmark measures the impact of fully clearing all Pools
on each GC. Typically this is a problem in heavily-loaded systems
because it causes a spike in allocation. This benchmark stresses this
by simulating an expensive "New" function, so the cost of creating new
objects is reflected in the ns/op of the benchmark.

For #22950, #22331.

Change-Id: I0c8853190d23144026fa11837b6bf42adc461722
Reviewed-on: https://go-review.googlesource.com/c/go/+/166959
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
---

diff --git a/src/sync/pool_test.go b/src/sync/pool_test.go
index 62085b5c96..5649a9dc83 100644
--- a/src/sync/pool_test.go
+++ b/src/sync/pool_test.go
@@ -10,6 +10,7 @@ package sync_test
 import (
 	"runtime"
 	"runtime/debug"
+	"sort"
 	. "sync"
 	"sync/atomic"
 	"testing"
@@ -253,3 +254,84 @@ func BenchmarkPoolOverflow(b *testing.B) {
 		}
 	})
 }
+
+var globalSink interface{}
+
+func BenchmarkPoolSTW(b *testing.B) {
+	// Take control of GC.
+	defer debug.SetGCPercent(debug.SetGCPercent(-1))
+
+	var mstats runtime.MemStats
+	var pauses []uint64
+
+	var p Pool
+	for i := 0; i < b.N; i++ {
+		// Put a large number of items into a pool.
+		const N = 100000
+		var item interface{} = 42
+		for i := 0; i < N; i++ {
+			p.Put(item)
+		}
+		// Do a GC.
+		runtime.GC()
+		// Record pause time.
+		runtime.ReadMemStats(&mstats)
+		pauses = append(pauses, mstats.PauseNs[(mstats.NumGC+255)%256])
+	}
+
+	// Get pause time stats.
+	sort.Slice(pauses, func(i, j int) bool { return pauses[i] < pauses[j] })
+	var total uint64
+	for _, ns := range pauses {
+		total += ns
+	}
+	// ns/op for this benchmark is average STW time.
+	b.ReportMetric(float64(total)/float64(b.N), "ns/op")
+	b.ReportMetric(float64(pauses[len(pauses)*95/100]), "p95-ns/STW")
+	b.ReportMetric(float64(pauses[len(pauses)*50/100]), "p50-ns/STW")
+}
+
+func BenchmarkPoolExpensiveNew(b *testing.B) {
+	// Populate a pool with items that are expensive to construct
+	// to stress pool cleanup and subsequent reconstruction.
+
+	// Create a ballast so the GC has a non-zero heap size and
+	// runs at reasonable times.
+	globalSink = make([]byte, 8<<20)
+	defer func() { globalSink = nil }()
+
+	// Create a pool that's "expensive" to fill.
+	var p Pool
+	var nNew uint64
+	p.New = func() interface{} {
+		atomic.AddUint64(&nNew, 1)
+		time.Sleep(time.Millisecond)
+		return 42
+	}
+	var mstats1, mstats2 runtime.MemStats
+	runtime.ReadMemStats(&mstats1)
+	b.RunParallel(func(pb *testing.PB) {
+		// Simulate 100X the number of goroutines having items
+		// checked out from the Pool simultaneously.
+		items := make([]interface{}, 100)
+		var sink []byte
+		for pb.Next() {
+			// Stress the pool.
+			for i := range items {
+				items[i] = p.Get()
+				// Simulate doing some work with this
+				// item checked out.
+				sink = make([]byte, 32<<10)
+			}
+			for i, v := range items {
+				p.Put(v)
+				items[i] = nil
+			}
+		}
+		_ = sink
+	})
+	runtime.ReadMemStats(&mstats2)
+
+	b.ReportMetric(float64(mstats2.NumGC-mstats1.NumGC)/float64(b.N), "GCs/op")
+	b.ReportMetric(float64(nNew)/float64(b.N), "New/op")
+}