From: Akshat Kumar Date: Fri, 18 Jan 2013 21:43:25 +0000 (-0500) Subject: syscall, os: fix a fork-exec/wait race in Plan 9. X-Git-Tag: go1.1rc2~1352 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=b62847000b3c9c3f7837b54dab0d1234944b0722;p=gostls13.git syscall, os: fix a fork-exec/wait race in Plan 9. On Plan 9, only the parent of a given process can enter its wait queue. When a Go program tries to fork-exec a child process and subsequently waits for it to finish, the goroutines doing these two tasks do not necessarily tie themselves to the same (or any single) OS thread. In the case that the fork and the wait system calls happen on different OS threads (say, due to a goroutine being rescheduled somewhere along the way), the wait() will either return an error or end up waiting for a completely different child than was intended. This change forces the fork and wait syscalls to happen in the same goroutine and ties that goroutine to its OS thread until the child exits. The PID of the child is recorded upon fork and exit, and de-queued once the child's wait message has been read. The Wait API, then, is translated into a synthetic implementation that simply waits for the requested PID to show up in the queue and then reads the associated stats. R=rsc, rminnich, npe, mirtchovski, ality CC=golang-dev https://golang.org/cl/6545051 --- diff --git a/src/pkg/os/exec_plan9.go b/src/pkg/os/exec_plan9.go index 2a7a597637..2bd5b6888d 100644 --- a/src/pkg/os/exec_plan9.go +++ b/src/pkg/os/exec_plan9.go @@ -75,20 +75,12 @@ func (p *Process) wait() (ps *ProcessState, err error) { if p.Pid == -1 { return nil, ErrInvalid } - - for true { - err = syscall.Await(&waitmsg) - - if err != nil { - return nil, NewSyscallError("wait", err) - } - - if waitmsg.Pid == p.Pid { - p.setDone() - break - } + err = syscall.WaitProcess(p.Pid, &waitmsg) + if err != nil { + return nil, NewSyscallError("wait", err) } + p.setDone() ps = &ProcessState{ pid: waitmsg.Pid, status: &waitmsg, diff --git a/src/pkg/syscall/exec_plan9.go b/src/pkg/syscall/exec_plan9.go index 26531514ad..ae0cd0d4b5 100644 --- a/src/pkg/syscall/exec_plan9.go +++ b/src/pkg/syscall/exec_plan9.go @@ -7,6 +7,7 @@ package syscall import ( + "runtime" "sync" "unsafe" ) @@ -499,9 +500,68 @@ func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) return forkExec(argv0, argv, attr) } +type waitErr struct { + Waitmsg + err error +} + +var procs struct { + sync.Mutex + waits map[int]chan *waitErr +} + +// startProcess starts a new goroutine, tied to the OS +// thread, which runs the process and subsequently waits +// for it to finish, communicating the process stats back +// to any goroutines that may have been waiting on it. +// +// Such a dedicated goroutine is needed because on +// Plan 9, only the parent thread can wait for a child, +// whereas goroutines tend to jump OS threads (e.g., +// between starting a process and running Wait(), the +// goroutine may have been rescheduled). +func startProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) { + type forkRet struct { + pid int + err error + } + + forkc := make(chan forkRet, 1) + go func() { + runtime.LockOSThread() + var ret forkRet + + ret.pid, ret.err = forkExec(argv0, argv, attr) + // If fork fails there is nothing to wait for. + if ret.err != nil || ret.pid == 0 { + forkc <- ret + return + } + + waitc := make(chan *waitErr, 1) + + // Mark that the process is running. + procs.Lock() + if procs.waits == nil { + procs.waits = make(map[int]chan *waitErr) + } + procs.waits[ret.pid] = waitc + procs.Unlock() + + forkc <- ret + + var w waitErr + w.err = Await(&w.Waitmsg) + waitc <- &w + close(waitc) + }() + ret := <-forkc + return ret.pid, ret.err +} + // StartProcess wraps ForkExec for package os. func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, handle uintptr, err error) { - pid, err = forkExec(argv0, argv, attr) + pid, err = startProcess(argv0, argv, attr) return pid, 0, err } @@ -548,3 +608,35 @@ func Exec(argv0 string, argv []string, envv []string) (err error) { return e1 } + +// WaitProcess waits until the pid of a +// running process is found in the queue of +// wait messages. It is used in conjunction +// with StartProcess to wait for a running +// process to exit. +func WaitProcess(pid int, w *Waitmsg) (err error) { + procs.Lock() + ch := procs.waits[pid] + procs.Unlock() + + var wmsg *waitErr + if ch != nil { + wmsg = <-ch + procs.Lock() + if procs.waits[pid] == ch { + delete(procs.waits, pid) + } + procs.Unlock() + } + if wmsg == nil { + // ch was missing or ch is closed + return NewError("process not found") + } + if wmsg.err != nil { + return wmsg.err + } + if w != nil { + *w = wmsg.Waitmsg + } + return nil +}