close(doneCh)
}()
- // Wait for an arbitrary timeout longer than one second. The subprocess itself
- // attempts to sleep for one second, but if the machine running the test is
- // heavily loaded that subprocess may not schedule very quickly even if the
- // bug remains fixed. (This is fine, because if the bug really is unfixed we
- // can keep the process hung indefinitely, as long as we signal it often
- // enough.)
- timeout := 10 * time.Second
-
// The subprocess begins sleeping for 1s after it writes to stdout, so measure
// the timeout from here (not from when we started creating the process).
// That should reduce noise from process startup overhead.
t.Fatalf("The program stopped too quickly.")
}
}
+ signaled := 0
for {
select {
- case now := <-ticker.C:
- if now.Sub(ready) > timeout {
- // If we got paused for a long time, for example if GODEBUG=gcstoptheworld=2,
- // it could be that the subprocess did actually finish and not deadlock, we
- // just got stuck as runnable or our wakeup was delayed. Double-check that
- // we don't have anything from doneCh before declaring failure.
- select {
- case err := <-doneCh:
- checkDoneErr(err)
- return
- default:
- }
- t.Error("Program failed to return on time and has to be killed, issue #27520 still exists")
- // Send SIGQUIT to get a goroutine dump.
- // Stop sending SIGIO so that the program can clean up and actually terminate.
- cmd.Process.Signal(syscall.SIGQUIT)
+ // Wait for an arbitrary timeout longer than one second (we use the test binary's
+ // timeout). The subprocess itself attempts to sleep for one second, but if the
+ // machine running the test is heavily loaded that subprocess may not schedule
+ // very quickly even if the bug remains fixed. (This is fine, because if the bug
+ // really is unfixed we can keep the process hung indefinitely, as long as we
+ // signal it often enough.)
+ case <-t.Context().Done():
+ // If we got paused for a long time for any reason (we're running in parallel
+ // with other tests after all) it could be that the subprocess did actually
+ // finish and not deadlock, we just got stuck as runnable or our wakeup was
+ // delayed. Double-check that we don't have anything from doneCh before
+ // declaring failure.
+ select {
+ case err := <-doneCh:
+ checkDoneErr(err)
return
+ default:
}
+ // Send SIGQUIT to get a goroutine dump.
+ // Stop sending SIGIO so that the program can clean up and actually terminate.
+ cmd.Process.Signal(syscall.SIGQUIT)
+
+ // For failure, we require that we have waited at least 2 seconds. Otherwise
+ // if another test just ran for a long time and we just happened to push the
+ // overall binary to timeout, we will report a failure here that will just
+ // muddy the waters for whoever is trying to investigate the timeout.
+ const minTime = 2 * time.Second
+ if dur := time.Now().Sub(ready); dur > minTime {
+ t.Logf("signaled program %d times", signaled)
+ t.Logf("time waited: %v", dur)
+ t.Error("program failed to return on time and has to be killed, issue #27520 still exists")
+ } else {
+ t.Skipf("test context deadline ended, but test ran for %s<%s, cutting it short", dur, minTime)
+ }
+ return
+ case <-ticker.C:
// Send the pesky signal that toggles spinning
// indefinitely if #27520 is not fixed.
cmd.Process.Signal(syscall.SIGIO)
-
+ signaled++
case err := <-doneCh:
checkDoneErr(err)
return