From: Ian Lance Taylor Date: Sat, 6 Aug 2022 01:06:51 +0000 (-0700) Subject: syscall: avoid serializing forks on ForkLock X-Git-Tag: go1.21rc1~346 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=d6473a12637945ca88966f6658da663abcbd508b;p=gostls13.git syscall: avoid serializing forks on ForkLock Fixes #23558 Fixes #54162 Change-Id: I3cf6efe466080cdb17e171218e9385ccb272c301 Reviewed-on: https://go-review.googlesource.com/c/go/+/421441 Run-TryBot: Ian Lance Taylor Auto-Submit: Ian Lance Taylor Reviewed-by: Brad Fitzpatrick TryBot-Result: Gopher Robot Reviewed-by: David Chase Reviewed-by: Bryan Mills Reviewed-by: Michael Knyszek --- diff --git a/src/sync/rwmutex.go b/src/sync/rwmutex.go index ad52951311..1317624035 100644 --- a/src/sync/rwmutex.go +++ b/src/sync/rwmutex.go @@ -219,6 +219,19 @@ func (rw *RWMutex) Unlock() { } } +// syscall_hasWaitingReaders reports whether any goroutine is waiting +// to acquire a read lock on rw. This exists because syscall.ForkLock +// is an RWMutex, and we can't change that without breaking compatibility. +// We don't need or want RWMutex semantics for ForkLock, and we use +// this private API to avoid having to change the type of ForkLock. +// For more details see the syscall package. +// +//go:linkname syscall_hasWaitingReaders syscall.hasWaitingReaders +func syscall_hasWaitingReaders(rw *RWMutex) bool { + r := rw.readerCount.Load() + return r < 0 && r+rwmutexMaxReaders > 0 +} + // RLocker returns a Locker interface that implements // the Lock and Unlock methods by calling rw.RLock and rw.RUnlock. func (rw *RWMutex) RLocker() Locker { diff --git a/src/syscall/exec_unix.go b/src/syscall/exec_unix.go index 40e9b9feda..4b9c04db83 100644 --- a/src/syscall/exec_unix.go +++ b/src/syscall/exec_unix.go @@ -16,7 +16,8 @@ import ( "unsafe" ) -// Lock synchronizing creation of new file descriptors with fork. +// ForkLock is used to synchronize creation of new file descriptors +// with fork. // // We want the child in a fork/exec sequence to inherit only the // file descriptors we intend. To do that, we mark all file @@ -53,16 +54,14 @@ import ( // The rules for which file descriptor-creating operations use the // ForkLock are as follows: // -// 1) Pipe. Does not block. Use the ForkLock. -// 2) Socket. Does not block. Use the ForkLock. -// 3) Accept. If using non-blocking mode, use the ForkLock. -// Otherwise, live with the race. -// 4) Open. Can block. Use O_CLOEXEC if available (Linux). -// Otherwise, live with the race. -// 5) Dup. Does not block. Use the ForkLock. -// On Linux, could use fcntl F_DUPFD_CLOEXEC -// instead of the ForkLock, but only for dup(fd, -1). - +// - Pipe. Use pipe2 if available. Otherwise, does not block, +// so use ForkLock. +// - Socket. Use SOCK_CLOEXEC if available. Otherwise, does not +// block, so use ForkLock. +// - Open. Use O_CLOEXEC if available. Otherwise, may block, +// so live with the race. +// - Dup. Use F_DUPFD_CLOEXEC or dup3 if available. Otherwise, +// does not block, so use ForkLock. var ForkLock sync.RWMutex // StringSlicePtr converts a slice of strings to a slice of pointers @@ -194,14 +193,11 @@ func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) return 0, errorspkg.New("Setctty set but Ctty not valid in child") } - // Acquire the fork lock so that no other threads - // create new fds that are not yet close-on-exec - // before we fork. - ForkLock.Lock() + acquireForkLock() // Allocate child status pipe close on exec. if err = forkExecPipe(p[:]); err != nil { - ForkLock.Unlock() + releaseForkLock() return 0, err } @@ -210,10 +206,10 @@ func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) if err1 != 0 { Close(p[0]) Close(p[1]) - ForkLock.Unlock() + releaseForkLock() return 0, Errno(err1) } - ForkLock.Unlock() + releaseForkLock() // Read child error status from pipe. Close(p[1]) @@ -245,6 +241,89 @@ func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) return pid, nil } +var ( + // Guard the forking variable. + forkingLock sync.Mutex + // Number of goroutines currently forking, and thus the + // number of goroutines holding a conceptual write lock + // on ForkLock. + forking int +) + +// hasWaitingReaders reports whether any goroutine is waiting +// to acquire a read lock on rw. It is defined in the sync package. +func hasWaitingReaders(rw *sync.RWMutex) bool + +// acquireForkLock acquires a write lock on ForkLock. +// ForkLock is exported and we've promised that during a fork +// we will call ForkLock.Lock, so that no other threads create +// new fds that are not yet close-on-exec before we fork. +// But that forces all fork calls to be serialized, which is bad. +// But we haven't promised that serialization, and it is essentially +// undetectable by other users of ForkLock, which is good. +// Avoid the serialization by ensuring that ForkLock is locked +// at the first fork and unlocked when there are no more forks. +func acquireForkLock() { + forkingLock.Lock() + defer forkingLock.Unlock() + + if forking == 0 { + // There is no current write lock on ForkLock. + ForkLock.Lock() + forking++ + return + } + + // ForkLock is currently locked for writing. + + if hasWaitingReaders(&ForkLock) { + // ForkLock is locked for writing, and at least one + // goroutine is waiting to read from it. + // To avoid lock starvation, allow readers to proceed. + // The simple way to do this is for us to acquire a + // read lock. That will block us until all current + // conceptual write locks are released. + // + // Note that this case is unusual on modern systems + // with O_CLOEXEC and SOCK_CLOEXEC. On those systems + // the standard library should never take a read + // lock on ForkLock. + + forkingLock.Unlock() + + ForkLock.RLock() + ForkLock.RUnlock() + + forkingLock.Lock() + + // Readers got a chance, so now take the write lock. + + if forking == 0 { + ForkLock.Lock() + } + } + + forking++ +} + +// releaseForkLock releases the conceptual write lock on ForkLock +// acquired by acquireForkLock. +func releaseForkLock() { + forkingLock.Lock() + defer forkingLock.Unlock() + + if forking <= 0 { + panic("syscall.releaseForkLock: negative count") + } + + forking-- + + if forking == 0 { + // No more conceptual write locks. + ForkLock.Unlock() + } +} + // Combination of fork and exec, careful to be thread safe. func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) { return forkExec(argv0, argv, attr)