cmd/go: use Cancel and WaitDelay to terminate test subprocesses

author Bryan C. Mills <bcmills@google.com>

Wed, 7 Dec 2022 22:26:17 +0000 (17:26 -0500)

committer Gopher Robot <gobot@golang.org>

Thu, 19 Jan 2023 20:45:56 +0000 (20:45 +0000)
author Bryan C. Mills <bcmills@google.com>
Wed, 7 Dec 2022 22:26:17 +0000 (17:26 -0500)
committer Gopher Robot <gobot@golang.org>
Thu, 19 Jan 2023 20:45:56 +0000 (20:45 +0000)
diff --git a/src/cmd/go/internal/test/test.go b/src/cmd/go/internal/test/test.go

index fe6e733538a3fa8a3b7d91a7f662736e9d2f5478..be024f44647e74ceaf0f7e2290a3ab5b1ca77482 100644 (file)
--- a/src/cmd/go/internal/test/test.go
+++ b/src/cmd/go/internal/test/test.go
@@ -1276,7 +1276,12 @@ func (r *runTestActor) Act(b *work.Builder, ctx context.Context, a *work.Action)
                 }
         }
  
-       cmd := exec.Command(args[0], args[1:]...)
+       // Normally, the test will terminate itself when the timeout expires,
+       // but add a last-ditch deadline to detect and stop wedged binaries.
+       ctx, cancel := context.WithTimeout(ctx, testKillTimeout)
+       defer cancel()
+
+       cmd := exec.CommandContext(ctx, args[0], args[1:]...)
         cmd.Dir = a.Package.Dir
  
         env := cfg.OrigEnv[:len(cfg.OrigEnv):len(cfg.OrigEnv)]
@@ -1309,42 +1314,33 @@ func (r *runTestActor) Act(b *work.Builder, ctx context.Context, a *work.Action)
                 cmd.Env = env
         }
  
-       base.StartSigHandlers()
-       t0 := time.Now()
-       err = cmd.Start()
-
-       // This is a last-ditch deadline to detect and
-       // stop wedged test binaries, to keep the builders
-       // running.
-       if err == nil {
-               tick := time.NewTimer(testKillTimeout)
-               done := make(chan error)
-               go func() {
-                       done <- cmd.Wait()
-               }()
-       Outer:
-               select {
-               case err = <-done:
-                       // ok
-               case <-tick.C:
-                       if base.SignalTrace != nil {
-                               // Send a quit signal in the hope that the program will print
-                               // a stack trace and exit. Give it five seconds before resorting
-                               // to Kill.
-                               cmd.Process.Signal(base.SignalTrace)
-                               select {
-                               case err = <-done:
-                                       fmt.Fprintf(cmd.Stdout, "*** Test killed with %v: ran too long (%v).\n", base.SignalTrace, testKillTimeout)
-                                       break Outer
-                               case <-time.After(5 * time.Second):
-                               }
+       var (
+               cancelKilled   = false
+               cancelSignaled = false
+       )
+       cmd.Cancel = func() error {
+               if base.SignalTrace == nil {
+                       err := cmd.Process.Kill()
+                       if err == nil {
+                               cancelKilled = true
                         }
-                       cmd.Process.Kill()
-                       err = <-done
-                       fmt.Fprintf(cmd.Stdout, "*** Test killed: ran too long (%v).\n", testKillTimeout)
+                       return err
                 }
-               tick.Stop()
+
+               // Send a quit signal in the hope that the program will print
+               // a stack trace and exit.
+               err := cmd.Process.Signal(base.SignalTrace)
+               if err == nil {
+                       cancelSignaled = true
+               }
+               return err
         }
+       // Give the test five seconds to exit after the signal before resorting to Kill.
+       cmd.WaitDelay = 5 * time.Second
+
+       base.StartSigHandlers()
+       t0 := time.Now()
+       err = cmd.Run()
         out := buf.Bytes()
         a.TestOutput = &buf
         t := fmt.Sprintf("%.3fs", time.Since(t0).Seconds())
@@ -1374,7 +1370,13 @@ func (r *runTestActor) Act(b *work.Builder, ctx context.Context, a *work.Action)
                 r.c.saveOutput(a)
         } else {
                 base.SetExitStatus(1)
-               if len(out) == 0 {
+               if cancelSignaled {
+                       fmt.Fprintf(cmd.Stdout, "*** Test killed with %v: ran too long (%v).\n", base.SignalTrace, testKillTimeout)
+               } else if cancelKilled {
+                       fmt.Fprintf(cmd.Stdout, "*** Test killed: ran too long (%v).\n", testKillTimeout)
+               }
+               var ee *exec.ExitError
+               if len(out) == 0 || !errors.As(err, &ee) || !ee.Exited() {
                         // If there was no test output, print the exit status so that the reason
                         // for failure is clear.
                         fmt.Fprintf(cmd.Stdout, "%s\n", err)
diff --git a/src/cmd/go/testdata/script/test_timeout_stdin.txt b/src/cmd/go/testdata/script/test_timeout_stdin.txt

new file mode 100644 (file)

index 0000000..606ffa7
--- /dev/null
+++ b/src/cmd/go/testdata/script/test_timeout_stdin.txt
@@ -0,0 +1,88 @@
+# Regression test for https://go.dev/issue/24050:
+# a test that exits with an I/O stream held open
+# should fail after a reasonable delay, not wait forever.
+# (As of the time of writing, that delay is 5 seconds,
+# but this test does not depend on its specific value.)
+
+[short] skip 'runs a test that hangs until its WaitDelay expires'
+
+! go test -v -timeout=1m .
+
+       # After the test process itself prints PASS and exits,
+       # the kernel closes its stdin pipe to to the orphaned subprocess.
+       # At that point, we expect the subprocess to print 'stdin closed'
+       # and periodically log to stderr until the WaitDelay expires.
+       #
+       # Once the WaitDelay expires, the copying goroutine for 'go test' stops and
+       # closes the read side of the stderr pipe, and the subprocess will eventually
+       # exit due to a failed write to that pipe.
+
+stdout '^--- PASS: TestOrphanCmd .*\nPASS\nstdin closed'
+stdout '^exec: WaitDelay expired before I/O complete\nFAIL\s+example\s+\d+(\.\d+)?s'
+
+-- go.mod --
+module example
+
+go 1.20
+-- main_test.go --
+package main
+
+import (
+       "fmt"
+       "io"
+       "os"
+       "os/exec"
+       "testing"
+       "time"
+)
+
+func TestMain(m *testing.M) {
+       if os.Getenv("TEST_TIMEOUT_HANG") == "1" {
+               io.Copy(io.Discard, os.Stdin)
+               if _, err := os.Stderr.WriteString("stdin closed\n"); err != nil {
+                       os.Exit(1)
+               }
+
+               ticker := time.NewTicker(100 * time.Millisecond)
+               for t := range ticker.C {
+                       _, err := fmt.Fprintf(os.Stderr, "still alive at %v\n", t)
+                       if err != nil {
+                               os.Exit(1)
+                       }
+               }
+       }
+
+       m.Run()
+}
+
+func TestOrphanCmd(t *testing.T) {
+       exe, err := os.Executable()
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       cmd := exec.Command(exe)
+       cmd.Env = append(cmd.Environ(), "TEST_TIMEOUT_HANG=1")
+
+       // Hold stdin open until this (parent) process exits.
+       if _, err := cmd.StdinPipe(); err != nil {
+               t.Fatal(err)
+       }
+
+       // Forward stderr to the subprocess so that it can hold the stream open.
+       cmd.Stderr = os.Stderr
+
+       if err := cmd.Start(); err != nil {
+               t.Fatal(err)
+       }
+       t.Logf("started %v", cmd)
+
+       // Intentionally leak cmd when the test completes.
+       // This will allow the test process itself to exit, but (at least on Unix
+       // platforms) will keep the parent process's stderr stream open.
+       go func() {
+               if err := cmd.Wait(); err != nil {
+                       os.Exit(3)
+               }
+       }()
+}
author	Bryan C. Mills <bcmills@google.com>
	Wed, 7 Dec 2022 22:26:17 +0000 (17:26 -0500)
committer	Gopher Robot <gobot@golang.org>
	Thu, 19 Jan 2023 20:45:56 +0000 (20:45 +0000)
src/cmd/go/internal/test/test.go		patch \| blob \| history
src/cmd/go/testdata/script/test_timeout_stdin.txt	[new file with mode: 0644]	patch \| blob