From 07ede7a54379eef959cf29af5a87ea19c78a31fd Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Fri, 30 Jun 2023 11:50:47 -0400 Subject: [PATCH] syscall: serialize locks on ForkLock on platforms where forkExecPipe is not atomic In CL 421441, we changed syscall to allow concurrent calls to forkExec. On platforms that support the pipe2 syscall that is the right behavior, because pipe2 atomically opens the pipe with CLOEXEC already set. However, on platforms that do not support pipe2 (currently aix and darwin), syscall.forkExecPipe is not atomic, and the pipes do not initially have CLOEXEC set. If two calls to forkExec proceed concurrently, a pipe intended for one child process can be accidentally inherited by the other. If the process is long-lived, the pipe can be held open unexpectedly and prevent the parent process from reaching EOF reading the child's status from the pipe. Fixes #61080. Updates #23558. Updates #54162. Change-Id: I83edcc80674ff267a39d06260c5697c654ff5a4b Reviewed-on: https://go-review.googlesource.com/c/go/+/507355 TryBot-Result: Gopher Robot Reviewed-by: Ian Lance Taylor Run-TryBot: Bryan Mills Auto-Submit: Bryan Mills --- src/os/exec/exec_test.go | 72 +++++++++++++++++++++++++++++++ src/syscall/exec_linux.go | 5 --- src/syscall/exec_unix.go | 83 ------------------------------------ src/syscall/forkpipe.go | 11 ++++- src/syscall/forkpipe2.go | 89 ++++++++++++++++++++++++++++++++++++++- 5 files changed, 170 insertions(+), 90 deletions(-) diff --git a/src/os/exec/exec_test.go b/src/os/exec/exec_test.go index 67cd446f42..d37fffd39d 100644 --- a/src/os/exec/exec_test.go +++ b/src/os/exec/exec_test.go @@ -1708,3 +1708,75 @@ func TestCancelErrors(t *testing.T) { } }) } + +// TestConcurrentExec is a regression test for https://go.dev/issue/61080. +// +// Forking multiple child processes concurrently would sometimes hang on darwin. +// (This test hung on a gomote with -count=100 after only a few iterations.) +func TestConcurrentExec(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + + // This test will spawn nHangs subprocesses that hang reading from stdin, + // and nExits subprocesses that exit immediately. + // + // When issue #61080 was present, a long-lived "hang" subprocess would + // occasionally inherit the fork/exec status pipe from an "exit" subprocess, + // causing the parent process (which expects to see an EOF on that pipe almost + // immediately) to unexpectedly block on reading from the pipe. + var ( + nHangs = runtime.GOMAXPROCS(0) + nExits = runtime.GOMAXPROCS(0) + hangs, exits sync.WaitGroup + ) + hangs.Add(nHangs) + exits.Add(nExits) + + // ready is done when the goroutines have done as much work as possible to + // prepare to create subprocesses. It isn't strictly necessary for the test, + // but helps to increase the repro rate by making it more likely that calls to + // syscall.StartProcess for the "hang" and "exit" goroutines overlap. + var ready sync.WaitGroup + ready.Add(nHangs + nExits) + + for i := 0; i < nHangs; i++ { + go func() { + defer hangs.Done() + + cmd := helperCommandContext(t, ctx, "pipetest") + stdin, err := cmd.StdinPipe() + if err != nil { + ready.Done() + t.Error(err) + return + } + cmd.Cancel = stdin.Close + ready.Done() + + ready.Wait() + if err := cmd.Start(); err != nil { + t.Error(err) + return + } + + cmd.Wait() + }() + } + + for i := 0; i < nExits; i++ { + go func() { + defer exits.Done() + + cmd := helperCommandContext(t, ctx, "exit", "0") + ready.Done() + + ready.Wait() + if err := cmd.Run(); err != nil { + t.Error(err) + } + }() + } + + exits.Wait() + cancel() + hangs.Wait() +} diff --git a/src/syscall/exec_linux.go b/src/syscall/exec_linux.go index feb1e26432..dfbb38ac16 100644 --- a/src/syscall/exec_linux.go +++ b/src/syscall/exec_linux.go @@ -641,11 +641,6 @@ childerror: } } -// Try to open a pipe with O_CLOEXEC set on both file descriptors. -func forkExecPipe(p []int) (err error) { - return Pipe2(p, O_CLOEXEC) -} - func formatIDMappings(idMap []SysProcIDMap) []byte { var data []byte for _, im := range idMap { diff --git a/src/syscall/exec_unix.go b/src/syscall/exec_unix.go index 14edd023d3..9a5f2d3295 100644 --- a/src/syscall/exec_unix.go +++ b/src/syscall/exec_unix.go @@ -241,89 +241,6 @@ func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) return pid, nil } -var ( - // Guard the forking variable. - forkingLock sync.Mutex - // Number of goroutines currently forking, and thus the - // number of goroutines holding a conceptual write lock - // on ForkLock. - forking int -) - -// hasWaitingReaders reports whether any goroutine is waiting -// to acquire a read lock on rw. It is defined in the sync package. -func hasWaitingReaders(rw *sync.RWMutex) bool - -// acquireForkLock acquires a write lock on ForkLock. -// ForkLock is exported and we've promised that during a fork -// we will call ForkLock.Lock, so that no other threads create -// new fds that are not yet close-on-exec before we fork. -// But that forces all fork calls to be serialized, which is bad. -// But we haven't promised that serialization, and it is essentially -// undetectable by other users of ForkLock, which is good. -// Avoid the serialization by ensuring that ForkLock is locked -// at the first fork and unlocked when there are no more forks. -func acquireForkLock() { - forkingLock.Lock() - defer forkingLock.Unlock() - - if forking == 0 { - // There is no current write lock on ForkLock. - ForkLock.Lock() - forking++ - return - } - - // ForkLock is currently locked for writing. - - if hasWaitingReaders(&ForkLock) { - // ForkLock is locked for writing, and at least one - // goroutine is waiting to read from it. - // To avoid lock starvation, allow readers to proceed. - // The simple way to do this is for us to acquire a - // read lock. That will block us until all current - // conceptual write locks are released. - // - // Note that this case is unusual on modern systems - // with O_CLOEXEC and SOCK_CLOEXEC. On those systems - // the standard library should never take a read - // lock on ForkLock. - - forkingLock.Unlock() - - ForkLock.RLock() - ForkLock.RUnlock() - - forkingLock.Lock() - - // Readers got a chance, so now take the write lock. - - if forking == 0 { - ForkLock.Lock() - } - } - - forking++ -} - -// releaseForkLock releases the conceptual write lock on ForkLock -// acquired by acquireForkLock. -func releaseForkLock() { - forkingLock.Lock() - defer forkingLock.Unlock() - - if forking <= 0 { - panic("syscall.releaseForkLock: negative count") - } - - forking-- - - if forking == 0 { - // No more conceptual write locks. - ForkLock.Unlock() - } -} - // Combination of fork and exec, careful to be thread safe. func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) { return forkExec(argv0, argv, attr) diff --git a/src/syscall/forkpipe.go b/src/syscall/forkpipe.go index 5082abc41c..1f4292f686 100644 --- a/src/syscall/forkpipe.go +++ b/src/syscall/forkpipe.go @@ -6,7 +6,8 @@ package syscall -// Try to open a pipe with O_CLOEXEC set on both file descriptors. +// forkExecPipe opens a pipe and non-atomically sets O_CLOEXEC on both file +// descriptors. func forkExecPipe(p []int) error { err := Pipe(p) if err != nil { @@ -19,3 +20,11 @@ func forkExecPipe(p []int) error { _, err = fcntl(p[1], F_SETFD, FD_CLOEXEC) return err } + +func acquireForkLock() { + ForkLock.Lock() +} + +func releaseForkLock() { + ForkLock.Unlock() +} diff --git a/src/syscall/forkpipe2.go b/src/syscall/forkpipe2.go index 6ab1391c12..bbecfdabf8 100644 --- a/src/syscall/forkpipe2.go +++ b/src/syscall/forkpipe2.go @@ -2,10 +2,97 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build dragonfly || freebsd || netbsd || openbsd || solaris +//go:build dragonfly || freebsd || linux || netbsd || openbsd || solaris package syscall +import "sync" + +// forkExecPipe atomically opens a pipe with O_CLOEXEC set on both file +// descriptors. func forkExecPipe(p []int) error { return Pipe2(p, O_CLOEXEC) } + +var ( + // Guard the forking variable. + forkingLock sync.Mutex + // Number of goroutines currently forking, and thus the + // number of goroutines holding a conceptual write lock + // on ForkLock. + forking int +) + +// hasWaitingReaders reports whether any goroutine is waiting +// to acquire a read lock on rw. It is defined in the sync package. +func hasWaitingReaders(rw *sync.RWMutex) bool + +// acquireForkLock acquires a write lock on ForkLock. +// ForkLock is exported and we've promised that during a fork +// we will call ForkLock.Lock, so that no other threads create +// new fds that are not yet close-on-exec before we fork. +// But that forces all fork calls to be serialized, which is bad. +// But we haven't promised that serialization, and it is essentially +// undetectable by other users of ForkLock, which is good. +// Avoid the serialization by ensuring that ForkLock is locked +// at the first fork and unlocked when there are no more forks. +func acquireForkLock() { + forkingLock.Lock() + defer forkingLock.Unlock() + + if forking == 0 { + // There is no current write lock on ForkLock. + ForkLock.Lock() + forking++ + return + } + + // ForkLock is currently locked for writing. + + if hasWaitingReaders(&ForkLock) { + // ForkLock is locked for writing, and at least one + // goroutine is waiting to read from it. + // To avoid lock starvation, allow readers to proceed. + // The simple way to do this is for us to acquire a + // read lock. That will block us until all current + // conceptual write locks are released. + // + // Note that this case is unusual on modern systems + // with O_CLOEXEC and SOCK_CLOEXEC. On those systems + // the standard library should never take a read + // lock on ForkLock. + + forkingLock.Unlock() + + ForkLock.RLock() + ForkLock.RUnlock() + + forkingLock.Lock() + + // Readers got a chance, so now take the write lock. + + if forking == 0 { + ForkLock.Lock() + } + } + + forking++ +} + +// releaseForkLock releases the conceptual write lock on ForkLock +// acquired by acquireForkLock. +func releaseForkLock() { + forkingLock.Lock() + defer forkingLock.Unlock() + + if forking <= 0 { + panic("syscall.releaseForkLock: negative count") + } + + forking-- + + if forking == 0 { + // No more conceptual write locks. + ForkLock.Unlock() + } +} -- 2.48.1