From 2e0cd2aef5924e48e1ceb74e3d52e76c56dd34cc Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Thu, 27 Jun 2019 20:30:28 -0400 Subject: [PATCH] net: deflake TestVariousDeadlines TestVariousDeadlines starts a client and server. The client dials the server, sets a timeout on the connection, reads from it, gets a timeout error, closes the connection. The server writes an infinite stream of a's to each connection it accepts. The test was trying to run these in lockstep: run a client dial+read+timeout+close, wait for server to accept+write+error out on write to closed connection, repeat. On FreeBSD 11.2 and less frequently on macOS we see the test timeout waiting for the server to do its half of the lockstep dance. I believe the problem is that the client can do its step of the dance with such a short timeout that the read, timeout, and close happens before the server ever returns from the accept(2) system call. For the purposes of testing the client-side read timeout, this is fine. But I suspect that under some circumstances, the "TCP-accepted" connection does not translate into a "socket-layer-accepted" connection that triggers a return from accept(2). That is, the Go server never sees the connection at all. And the test sits there waiting for it to acknowledge being done with a connection it never started with. Fix the problem by not trying to lockstep with the server. This definitely fixes the flake, since the specific line that was calling t.Fatal is now deleted. This exposes a different flake, seen on a trybot run for an early version of this CL, in which the client's io.Copy does not stop within the time allotted. The problem now is that there is no guarantee that a read beyond the deadline with available data returns an error instead of the available data, yet the test assumes this guarantee, and in fact the opposite is usually true - we don't bother checking the deadline unless the read needs to block. That is, deadlines don't cut off a flood of available data, yet this test thinks they do. This CL therefore also changes the server not to send an infinite flood of data - don't send any data at all - so that the read deadline is guaranteed to be exercised. Fixes #19519. Change-Id: I58057c3ed94ac2aebab140ea597f317abae6e65e Reviewed-on: https://go-review.googlesource.com/c/go/+/184137 Run-TryBot: Russ Cox TryBot-Result: Gobot Gobot Reviewed-by: Bryan C. Mills --- src/net/timeout_test.go | 47 ++++++++++++----------------------------- 1 file changed, 14 insertions(+), 33 deletions(-) diff --git a/src/net/timeout_test.go b/src/net/timeout_test.go index 4b9fe7eba9..93e46025a2 100644 --- a/src/net/timeout_test.go +++ b/src/net/timeout_test.go @@ -826,24 +826,14 @@ func testVariousDeadlines(t *testing.T) { d time.Duration } - ch := make(chan error, 1) - pasvch := make(chan result) handler := func(ls *localServer, ln Listener) { for { c, err := ln.Accept() if err != nil { - ch <- err - return + break } - // The server, with no timeouts of its own, - // sending bytes to clients as fast as it can. - go func() { - t0 := time.Now() - n, err := io.Copy(c, neverEnding('a')) - dt := time.Since(t0) - c.Close() - pasvch <- result{n, err, dt} - }() + c.Read(make([]byte, 1)) // wait for client to close connection + c.Close() } } ls, err := newLocalServer("tcp") @@ -884,18 +874,18 @@ func testVariousDeadlines(t *testing.T) { } } for run := 0; run < numRuns; run++ { - name := fmt.Sprintf("%v run %d/%d", timeout, run+1, numRuns) + name := fmt.Sprintf("%v %d/%d", timeout, run, numRuns) t.Log(name) + tooSlow := time.NewTimer(5 * time.Second) + defer tooSlow.Stop() + c, err := Dial(ls.Listener.Addr().Network(), ls.Listener.Addr().String()) if err != nil { t.Fatal(err) } - tooLong := 5 * time.Second - max := time.NewTimer(tooLong) - defer max.Stop() - actvch := make(chan result) + ch := make(chan result, 1) go func() { t0 := time.Now() if err := c.SetDeadline(t0.Add(timeout)); err != nil { @@ -904,27 +894,18 @@ func testVariousDeadlines(t *testing.T) { n, err := io.Copy(ioutil.Discard, c) dt := time.Since(t0) c.Close() - actvch <- result{n, err, dt} + ch <- result{n, err, dt} }() select { - case res := <-actvch: + case res := <-ch: if nerr, ok := res.err.(Error); ok && nerr.Timeout() { - t.Logf("for %v, good client timeout after %v, reading %d bytes", name, res.d, res.n) + t.Logf("%v: good timeout after %v; %d bytes", name, res.d, res.n) } else { - t.Fatalf("for %v, client Copy = %d, %v; want timeout", name, res.n, res.err) + t.Fatalf("%v: Copy = %d, %v; want timeout", name, res.n, res.err) } - case <-max.C: - t.Fatalf("for %v, timeout (%v) waiting for client to timeout (%v) reading", name, tooLong, timeout) - } - - select { - case res := <-pasvch: - t.Logf("for %v, server in %v wrote %d: %v", name, res.d, res.n, res.err) - case err := <-ch: - t.Fatalf("for %v, Accept = %v", name, err) - case <-max.C: - t.Fatalf("for %v, timeout waiting for server to finish writing", name) + case <-tooSlow.C: + t.Fatalf("%v: client stuck in Dial+Copy", name) } } } -- 2.50.0