From: Michael Anthony Knyszek Date: Thu, 23 May 2024 20:26:02 +0000 (+0000) Subject: iter: deflake TestPull by letting exiting goroutines finish X-Git-Tag: go1.23rc1~137 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=54efef99b2b9432c2eb6cd63a287a13d43b9bb7b;p=gostls13.git iter: deflake TestPull by letting exiting goroutines finish Currently TestPull is flaky because goroutines spawned to run subtests exit asynchronously when they finish and TestPull has explicit checks for the number of existing goroutines. This is pretty much only a problem between subtests executing, because within each subtest the coroutine goroutine spawned for iter.Pull always exits fully synchronously before the final `next` or `stop` returns. So, we can resolve the problem by ensuring the first goroutine count the test takes likely doesn't contain any exiting goroutines. The trick is to set GOMAXPROCS=1 and spin in runtime.Gosched until the number of goroutines stabilizes to some reasonable degree (we pick 100 consecutive iterations; there are only a handful of possible goroutines that can run, so this is giving that handful around 20 chances to actually run to completion). When running TestPull under stress2, this issue is easily reproducible before this CL. After this CL, it no longer reproduces under these conditions. Fixes #66017. Change-Id: I4bf0a9771f7364df7dd58f8aeb3ae26742d5746f Reviewed-on: https://go-review.googlesource.com/c/go/+/587917 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- diff --git a/src/iter/pull_test.go b/src/iter/pull_test.go index 21db1029af..c39574b959 100644 --- a/src/iter/pull_test.go +++ b/src/iter/pull_test.go @@ -34,7 +34,7 @@ func squares(n int) Seq2[int, int64] { func TestPull(t *testing.T) { for end := 0; end <= 3; end++ { t.Run(fmt.Sprint(end), func(t *testing.T) { - ng := runtime.NumGoroutine() + ng := stableNumGoroutine() wantNG := func(want int) { if xg := runtime.NumGoroutine() - ng; xg != want { t.Helper() @@ -76,7 +76,7 @@ func TestPull(t *testing.T) { func TestPull2(t *testing.T) { for end := 0; end <= 3; end++ { t.Run(fmt.Sprint(end), func(t *testing.T) { - ng := runtime.NumGoroutine() + ng := stableNumGoroutine() wantNG := func(want int) { if xg := runtime.NumGoroutine() - ng; xg != want { t.Helper() @@ -115,6 +115,39 @@ func TestPull2(t *testing.T) { } } +// stableNumGoroutine is like NumGoroutine but tries to ensure stability of +// the value by letting any exiting goroutines finish exiting. +func stableNumGoroutine() int { + // The idea behind stablizing the value of NumGoroutine is to + // see the same value enough times in a row in between calls to + // runtime.Gosched. With GOMAXPROCS=1, we're trying to make sure + // that other goroutines run, so that they reach a stable point. + // It's not guaranteed, because it is still possible for a goroutine + // to Gosched back into itself, so we require NumGoroutine to be + // the same 100 times in a row. This should be more than enough to + // ensure all goroutines get a chance to run to completion (or to + // some block point) for a small group of test goroutines. + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) + + c := 0 + ng := runtime.NumGoroutine() + for i := 0; i < 1000; i++ { + nng := runtime.NumGoroutine() + if nng == ng { + c++ + } else { + c = 0 + ng = nng + } + if c >= 100 { + // The same value 100 times in a row is good enough. + return ng + } + runtime.Gosched() + } + panic("failed to stabilize NumGoroutine after 1000 iterations") +} + func TestPullDoubleNext(t *testing.T) { next, _ := Pull(doDoubleNext()) nextSlot = next