From 84cd7ab3c388fb3280cf426a35045d85cda99ff8 Mon Sep 17 00:00:00 2001
From: "Bryan C. Mills" <bcmills@google.com>
Date: Wed, 26 Oct 2022 12:18:13 -0400
Subject: [PATCH] internal/testenv: add CommandContext and Command

This adds a testenv.CommandContext function, with timeout behavior
based on the existing logic in cmd/go.TestScript: namely, the command
is terminated with SIGQUIT (if supported) with an arbitrary grace
period remaining until the test's deadline.

If the test environment does not support executing subprocesses,
CommandContext skips the test.

If the command is terminated due to the timout expiring or the test
fails to wait for the command after starting it, CommandContext marks
the test as failing.

For tests where a shorter timeout is desired (such as for fail-fast
behavior), one may be supplied using context.WithTimeout.

The more concise Command helper is like CommandContext but without
the need to supply an explicit Context.

Updates #50436.

Change-Id: Ifd81fb86c402f034063c9e9c03045b4106eab81a
Reviewed-on: https://go-review.googlesource.com/c/go/+/445596
Run-TryBot: Bryan Mills <bcmills@google.com>
Reviewed-by: Austin Clements <austin@google.com>
Auto-Submit: Bryan Mills <bcmills@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@google.com>
---
 src/internal/testenv/testenv.go | 98 +++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

diff --git a/src/internal/testenv/testenv.go b/src/internal/testenv/testenv.go
index ae8413efb6..f71f5cdd41 100644
--- a/src/internal/testenv/testenv.go
+++ b/src/internal/testenv/testenv.go
@@ -12,6 +12,7 @@ package testenv
 
 import (
 	"bytes"
+	"context"
 	"errors"
 	"flag"
 	"fmt"
@@ -379,6 +380,103 @@ func CleanCmdEnv(cmd *exec.Cmd) *exec.Cmd {
 	return cmd
 }
 
+// CommandContext is like exec.CommandContext, but:
+//   - skips t if the platform does not support os/exec,
+//   - sends SIGQUIT (if supported by the platform) instead of SIGKILL
+//     in its Cancel function
+//   - adds a timeout (with an arbitrary grace period) before the test's deadline expires,
+//   - sets a WaitDelay for an arbitrary grace period,
+//   - fails the test if the command does not complete before the test's deadline, and
+//   - sets a Cleanup function that verifies that the test did not leak a subprocess.
+func CommandContext(t testing.TB, ctx context.Context, name string, args ...string) *exec.Cmd {
+	t.Helper()
+	MustHaveExec(t)
+
+	var (
+		gracePeriod = 100 * time.Millisecond
+		cancel      context.CancelFunc
+	)
+	if s := os.Getenv("GO_TEST_TIMEOUT_SCALE"); s != "" {
+		scale, err := strconv.Atoi(s)
+		if err != nil {
+			t.Fatalf("invalid GO_TEST_TIMEOUT_SCALE: %v", err)
+		}
+		gracePeriod *= time.Duration(scale)
+	}
+
+	if t, ok := t.(interface {
+		testing.TB
+		Deadline() (time.Time, bool)
+	}); ok {
+		if td, ok := t.Deadline(); ok {
+			if cd, ok := ctx.Deadline(); !ok || cd.Sub(td) > gracePeriod {
+				// Either ctx doesn't have a deadline, or its deadline would expire
+				// after (or too close before) the test has already timed out.
+				// Compute a new timeout that will expire before the test does so that
+				// we can terminate the subprocess with a more useful signal.
+
+				timeout := time.Until(td)
+
+				// If time allows, increase the termination grace period to 5% of the
+				// remaining time.
+				if gp := timeout / 20; gp > gracePeriod {
+					gracePeriod = gp
+				}
+
+				// When we run commands that execute subprocesses, we want to reserve two
+				// grace periods to clean up. We will send the first termination signal when
+				// the context expires, then wait one grace period for the process to
+				// produce whatever useful output it can (such as a stack trace). After the
+				// first grace period expires, we'll escalate to os.Kill, leaving the second
+				// grace period for the test function to record its output before the test
+				// process itself terminates.
+				timeout -= 2 * gracePeriod
+
+				ctx, cancel = context.WithTimeout(ctx, timeout)
+				t.Cleanup(cancel)
+			}
+		}
+	}
+
+	cmd := exec.CommandContext(ctx, name, args...)
+	cmd.Cancel = func() error {
+		if cancel != nil && ctx.Err() == context.DeadlineExceeded {
+			// The command timed out due to running too close to the test's deadline.
+			// There is no way the test did that intentionally â it's too close to the
+			// wire! â so mark it as a test failure. That way, if the test expects the
+			// command to fail for some other reason, it doesn't have to distinguish
+			// between that reason and a timeout.
+			t.Errorf("test timed out while running command: %v", cmd)
+		} else {
+			// The command is being terminated due to ctx being canceled, but
+			// apparently not due to an explicit test deadline that we added.
+			// Log that information in case it is useful for diagnosing a failure,
+			// but don't actually fail the test because of it.
+			t.Logf("%v: terminating command: %v", ctx.Err(), cmd)
+		}
+		return cmd.Process.Signal(Sigquit)
+	}
+	cmd.WaitDelay = gracePeriod
+
+	t.Cleanup(func() {
+		if cancel != nil {
+			cancel()
+		}
+		if cmd.Process != nil && cmd.ProcessState == nil {
+			t.Errorf("command was started, but test did not wait for it to complete: %v", cmd)
+		}
+	})
+
+	return cmd
+}
+
+// Command is like exec.Command, but applies the same changes as
+// testenv.CommandContext (with a default Context).
+func Command(t testing.TB, name string, args ...string) *exec.Cmd {
+	t.Helper()
+	return CommandContext(t, context.Background(), name, args...)
+}
+
 // CPUIsSlow reports whether the CPU running the test is suspected to be slow.
 func CPUIsSlow() bool {
 	switch runtime.GOARCH {
-- 
2.52.0