syscall: avoid serializing forks on ForkLock

author Ian Lance Taylor <iant@golang.org>

Sat, 6 Aug 2022 01:06:51 +0000 (18:06 -0700)

committer Gopher Robot <gobot@golang.org>

Tue, 23 May 2023 17:25:09 +0000 (17:25 +0000)
author Ian Lance Taylor <iant@golang.org>
Sat, 6 Aug 2022 01:06:51 +0000 (18:06 -0700)
committer Gopher Robot <gobot@golang.org>
Tue, 23 May 2023 17:25:09 +0000 (17:25 +0000)
diff --git a/src/sync/rwmutex.go b/src/sync/rwmutex.go

index ad52951311e6641c91382461cdacf9d9ce930b58..1317624035d307c8542d1b2ef930dd0b51877875 100644 (file)
--- a/src/sync/rwmutex.go
+++ b/src/sync/rwmutex.go
@@ -219,6 +219,19 @@ func (rw *RWMutex) Unlock() {
         }
  }
  
+// syscall_hasWaitingReaders reports whether any goroutine is waiting
+// to acquire a read lock on rw. This exists because syscall.ForkLock
+// is an RWMutex, and we can't change that without breaking compatibility.
+// We don't need or want RWMutex semantics for ForkLock, and we use
+// this private API to avoid having to change the type of ForkLock.
+// For more details see the syscall package.
+//
+//go:linkname syscall_hasWaitingReaders syscall.hasWaitingReaders
+func syscall_hasWaitingReaders(rw *RWMutex) bool {
+       r := rw.readerCount.Load()
+       return r < 0 && r+rwmutexMaxReaders > 0
+}
+
  // RLocker returns a Locker interface that implements
  // the Lock and Unlock methods by calling rw.RLock and rw.RUnlock.
  func (rw *RWMutex) RLocker() Locker {
diff --git a/src/syscall/exec_unix.go b/src/syscall/exec_unix.go

index 40e9b9feda5493886d32e11c326928b79a2d3010..4b9c04db83ee2c40a8ebab200b29dedd99a0c2a6 100644 (file)
--- a/src/syscall/exec_unix.go
+++ b/src/syscall/exec_unix.go
@@ -16,7 +16,8 @@ import (
         "unsafe"
  )
  
-// Lock synchronizing creation of new file descriptors with fork.
+// ForkLock is used to synchronize creation of new file descriptors
+// with fork.
  //
  // We want the child in a fork/exec sequence to inherit only the
  // file descriptors we intend. To do that, we mark all file
@@ -53,16 +54,14 @@ import (
  // The rules for which file descriptor-creating operations use the
  // ForkLock are as follows:
  //
-// 1) Pipe. Does not block. Use the ForkLock.
-// 2) Socket. Does not block. Use the ForkLock.
-// 3) Accept. If using non-blocking mode, use the ForkLock.
-//             Otherwise, live with the race.
-// 4) Open. Can block. Use O_CLOEXEC if available (Linux).
-//             Otherwise, live with the race.
-// 5) Dup. Does not block. Use the ForkLock.
-//             On Linux, could use fcntl F_DUPFD_CLOEXEC
-//             instead of the ForkLock, but only for dup(fd, -1).
-
+//   - Pipe. Use pipe2 if available. Otherwise, does not block,
+//     so use ForkLock.
+//   - Socket. Use SOCK_CLOEXEC if available. Otherwise, does not
+//     block, so use ForkLock.
+//   - Open. Use O_CLOEXEC if available. Otherwise, may block,
+//     so live with the race.
+//   - Dup. Use F_DUPFD_CLOEXEC or dup3 if available. Otherwise,
+//     does not block, so use ForkLock.
  var ForkLock sync.RWMutex
  
  // StringSlicePtr converts a slice of strings to a slice of pointers
@@ -194,14 +193,11 @@ func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error)
                 return 0, errorspkg.New("Setctty set but Ctty not valid in child")
         }
  
-       // Acquire the fork lock so that no other threads
-       // create new fds that are not yet close-on-exec
-       // before we fork.
-       ForkLock.Lock()
+       acquireForkLock()
  
         // Allocate child status pipe close on exec.
         if err = forkExecPipe(p[:]); err != nil {
-               ForkLock.Unlock()
+               releaseForkLock()
                 return 0, err
         }
  
@@ -210,10 +206,10 @@ func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error)
         if err1 != 0 {
                 Close(p[0])
                 Close(p[1])
-               ForkLock.Unlock()
+               releaseForkLock()
                 return 0, Errno(err1)
         }
-       ForkLock.Unlock()
+       releaseForkLock()
  
         // Read child error status from pipe.
         Close(p[1])
@@ -245,6 +241,89 @@ func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error)
         return pid, nil
  }
  
+var (
+       // Guard the forking variable.
+       forkingLock sync.Mutex
+       // Number of goroutines currently forking, and thus the
+       // number of goroutines holding a conceptual write lock
+       // on ForkLock.
+       forking int
+)
+
+// hasWaitingReaders reports whether any goroutine is waiting
+// to acquire a read lock on rw. It is defined in the sync package.
+func hasWaitingReaders(rw *sync.RWMutex) bool
+
+// acquireForkLock acquires a write lock on ForkLock.
+// ForkLock is exported and we've promised that during a fork
+// we will call ForkLock.Lock, so that no other threads create
+// new fds that are not yet close-on-exec before we fork.
+// But that forces all fork calls to be serialized, which is bad.
+// But we haven't promised that serialization, and it is essentially
+// undetectable by other users of ForkLock, which is good.
+// Avoid the serialization by ensuring that ForkLock is locked
+// at the first fork and unlocked when there are no more forks.
+func acquireForkLock() {
+       forkingLock.Lock()
+       defer forkingLock.Unlock()
+
+       if forking == 0 {
+               // There is no current write lock on ForkLock.
+               ForkLock.Lock()
+               forking++
+               return
+       }
+
+       // ForkLock is currently locked for writing.
+
+       if hasWaitingReaders(&ForkLock) {
+               // ForkLock is locked for writing, and at least one
+               // goroutine is waiting to read from it.
+               // To avoid lock starvation, allow readers to proceed.
+               // The simple way to do this is for us to acquire a
+               // read lock. That will block us until all current
+               // conceptual write locks are released.
+               //
+               // Note that this case is unusual on modern systems
+               // with O_CLOEXEC and SOCK_CLOEXEC. On those systems
+               // the standard library should never take a read
+               // lock on ForkLock.
+
+               forkingLock.Unlock()
+
+               ForkLock.RLock()
+               ForkLock.RUnlock()
+
+               forkingLock.Lock()
+
+               // Readers got a chance, so now take the write lock.
+
+               if forking == 0 {
+                       ForkLock.Lock()
+               }
+       }
+
+       forking++
+}
+
+// releaseForkLock releases the conceptual write lock on ForkLock
+// acquired by acquireForkLock.
+func releaseForkLock() {
+       forkingLock.Lock()
+       defer forkingLock.Unlock()
+
+       if forking <= 0 {
+               panic("syscall.releaseForkLock: negative count")
+       }
+
+       forking--
+
+       if forking == 0 {
+               // No more conceptual write locks.
+               ForkLock.Unlock()
+       }
+}
+
  // Combination of fork and exec, careful to be thread safe.
  func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
         return forkExec(argv0, argv, attr)
author	Ian Lance Taylor <iant@golang.org>
	Sat, 6 Aug 2022 01:06:51 +0000 (18:06 -0700)
committer	Gopher Robot <gobot@golang.org>
	Tue, 23 May 2023 17:25:09 +0000 (17:25 +0000)
src/sync/rwmutex.go		patch \| blob \| history
src/syscall/exec_unix.go		patch \| blob \| history