From: Dmitry Vyukov Date: Tue, 13 Jan 2015 17:12:50 +0000 (+0300) Subject: runtime: fix spurious deadlock in netpoll X-Git-Tag: go1.5beta1~2357 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=776aecaf6e16076bde940124c309dc6e5027c6e2;p=gostls13.git runtime: fix spurious deadlock in netpoll There is a small possibility that runtime deadlocks when netpoll is just activated. Consider the following scenario: GOMAXPROCS=1 epfd=-1 (netpoll is not activated yet) A thread is in findrunnable, sets sched.lastpoll=0, calls netpoll(true), which returns nil. Now the thread is descheduled for some time. Then sysmon retakes a P from syscall and calls handoffp. The "If this is the last running P and nobody is polling network" check in handoffp fails, since the first thread set sched.lastpoll=0. So handoffp decides that there is already a thread that polls network and so it calls pidleput. Now the first thread is scheduled again, finds no work and calls stopm. There is no thread that polls network and so checkdead reports deadlock. To fix this, don't set sched.lastpoll=0 when netpoll is not activated. The deadlock can happen if cgo is disabled (-tag=netgo) and only on program startup (when netpoll is just activated). The test is from issue 5216 that lead to addition of the "If this is the last running P and nobody is polling network" check in handoffp. Update issue 9576. Change-Id: I9405f627a4d37bd6b99d5670d4328744aeebfc7a Reviewed-on: https://go-review.googlesource.com/2750 Reviewed-by: Ian Lance Taylor --- diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go index 24fe338b91..43cea9008a 100644 --- a/src/runtime/crash_test.go +++ b/src/runtime/crash_test.go @@ -526,3 +526,31 @@ func TestRecoverBeforePanicAfterGoexit(t *testing.T) { }() runtime.Goexit() } + +func TestNetpollDeadlock(t *testing.T) { + output := executeTest(t, netpollDeadlockSource, nil) + want := "done\n" + if !strings.HasSuffix(output, want) { + t.Fatalf("output does not start with %q:\n%s", want, output) + } +} + +const netpollDeadlockSource = ` +package main +import ( + "fmt" + "net" +) +func init() { + fmt.Println("dialing") + c, err := net.Dial("tcp", "localhost:14356") + if err == nil { + c.Close() + } else { + fmt.Println("error: ", err) + } +} +func main() { + fmt.Println("done") +} +` diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go index 0bd372319a..3ef4506491 100644 --- a/src/runtime/netpoll.go +++ b/src/runtime/netpoll.go @@ -69,11 +69,19 @@ type pollCache struct { // seq is incremented when deadlines are changed or descriptor is reused. } -var pollcache pollCache +var ( + netpollInited uint32 + pollcache pollCache +) //go:linkname net_runtime_pollServerInit net.runtime_pollServerInit func net_runtime_pollServerInit() { netpollinit() + atomicstore(&netpollInited, 1) +} + +func netpollinited() bool { + return atomicload(&netpollInited) != 0 } //go:linkname net_runtime_pollOpen net.runtime_pollOpen diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go index 22ea7a9d27..6fa407f0ce 100644 --- a/src/runtime/proc1.go +++ b/src/runtime/proc1.go @@ -1338,7 +1338,7 @@ stop: } // poll network - if xchg64(&sched.lastpoll, 0) != 0 { + if netpollinited() && xchg64(&sched.lastpoll, 0) != 0 { if _g_.m.p != nil { throw("findrunnable: netpoll with p") }