From 082afccebfc068207328fcaeb4b01e540784ff24 Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Tue, 1 Nov 2022 08:26:58 -0400 Subject: [PATCH] internal/testenv: adjust timeout calculations in CommandContext I noticed some test failures in the build dashboard after CL 445597 that made me realize the grace period should be based on the test timeout, not the Context timeout: if the test itself sets a short timeout for a command, we still want to give the test process enough time to consume and log its output. I also put some more thought into how one might debug a test hang, and realized that in that case we don't want to set a WaitDelay at all: instead, we want to leave the processes in their stuck state so that they can be investigated with tools like `ps` and 'lsof'. Updates #50436. Change-Id: I65421084f44eeaaaec5dd2741cd836e9e68dd380 Reviewed-on: https://go-review.googlesource.com/c/go/+/446875 Auto-Submit: Bryan Mills Reviewed-by: Ian Lance Taylor Run-TryBot: Bryan Mills TryBot-Result: Gopher Robot --- src/internal/testenv/exec.go | 76 ++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/src/internal/testenv/exec.go b/src/internal/testenv/exec.go index c78df45382..77de59c70a 100644 --- a/src/internal/testenv/exec.go +++ b/src/internal/testenv/exec.go @@ -79,8 +79,8 @@ func CleanCmdEnv(cmd *exec.Cmd) *exec.Cmd { // - skips t if the platform does not support os/exec, // - sends SIGQUIT (if supported by the platform) instead of SIGKILL // in its Cancel function -// - adds a timeout (with an arbitrary grace period) before the test's deadline expires, -// - sets a WaitDelay for an arbitrary grace period, +// - if the test has a deadline, adds a Context timeout and WaitDelay +// for an arbitrary grace period before the test's deadline expires, // - fails the test if the command does not complete before the test's deadline, and // - sets a Cleanup function that verifies that the test did not leak a subprocess. func CommandContext(t testing.TB, ctx context.Context, name string, args ...string) *exec.Cmd { @@ -88,54 +88,56 @@ func CommandContext(t testing.TB, ctx context.Context, name string, args ...stri MustHaveExec(t) var ( - gracePeriod = 100 * time.Millisecond - cancel context.CancelFunc + cancelCtx context.CancelFunc + gracePeriod time.Duration // unlimited unless the test has a deadline (to allow for interactive debugging) ) - if s := os.Getenv("GO_TEST_TIMEOUT_SCALE"); s != "" { - scale, err := strconv.Atoi(s) - if err != nil { - t.Fatalf("invalid GO_TEST_TIMEOUT_SCALE: %v", err) - } - gracePeriod *= time.Duration(scale) - } if t, ok := t.(interface { testing.TB Deadline() (time.Time, bool) }); ok { if td, ok := t.Deadline(); ok { - if cd, ok := ctx.Deadline(); !ok || cd.Sub(td) > gracePeriod { - // Either ctx doesn't have a deadline, or its deadline would expire - // after (or too close before) the test has already timed out. - // Compute a new timeout that will expire before the test does so that - // we can terminate the subprocess with a more useful signal. - - timeout := time.Until(td) - - // If time allows, increase the termination grace period to 5% of the - // remaining time. - if gp := timeout / 20; gp > gracePeriod { - gracePeriod = gp + // Start with a minimum grace period, just long enough to consume the + // output of a reasonable program after it terminates. + gracePeriod = 100 * time.Millisecond + if s := os.Getenv("GO_TEST_TIMEOUT_SCALE"); s != "" { + scale, err := strconv.Atoi(s) + if err != nil { + t.Fatalf("invalid GO_TEST_TIMEOUT_SCALE: %v", err) } + gracePeriod *= time.Duration(scale) + } + + // If time allows, increase the termination grace period to 5% of the + // test's remaining time. + testTimeout := time.Until(td) + if gp := testTimeout / 20; gp > gracePeriod { + gracePeriod = gp + } - // When we run commands that execute subprocesses, we want to reserve two - // grace periods to clean up. We will send the first termination signal when - // the context expires, then wait one grace period for the process to - // produce whatever useful output it can (such as a stack trace). After the - // first grace period expires, we'll escalate to os.Kill, leaving the second - // grace period for the test function to record its output before the test - // process itself terminates. - timeout -= 2 * gracePeriod - - ctx, cancel = context.WithTimeout(ctx, timeout) - t.Cleanup(cancel) + // When we run commands that execute subprocesses, we want to reserve two + // grace periods to clean up: one for the delay between the first + // termination signal being sent (via the Cancel callback when the Context + // expires) and the process being forcibly terminated (via the WaitDelay + // field), and a second one for the delay becween the process being + // terminated and and the test logging its output for debugging. + // + // (We want to ensure that the test process itself has enough time to + // log the output before it is also terminated.) + cmdTimeout := testTimeout - 2*gracePeriod + + if cd, ok := ctx.Deadline(); !ok || time.Until(cd) > cmdTimeout { + // Either ctx doesn't have a deadline, or its deadline would expire + // after (or too close before) the test has already timed out. + // Add a shorter timeout so that the test will produce useful output. + ctx, cancelCtx = context.WithTimeout(ctx, cmdTimeout) } } } cmd := exec.CommandContext(ctx, name, args...) cmd.Cancel = func() error { - if cancel != nil && ctx.Err() == context.DeadlineExceeded { + if cancelCtx != nil && ctx.Err() == context.DeadlineExceeded { // The command timed out due to running too close to the test's deadline. // There is no way the test did that intentionally — it's too close to the // wire! — so mark it as a test failure. That way, if the test expects the @@ -154,8 +156,8 @@ func CommandContext(t testing.TB, ctx context.Context, name string, args ...stri cmd.WaitDelay = gracePeriod t.Cleanup(func() { - if cancel != nil { - cancel() + if cancelCtx != nil { + cancelCtx() } if cmd.Process != nil && cmd.ProcessState == nil { t.Errorf("command was started, but test did not wait for it to complete: %v", cmd) -- 2.50.0