[release-branch.go1.14] runtime: ensure startm new M is consistently visible to checkdead

author Michael Pratt <mpratt@google.com>

Mon, 27 Jul 2020 19:04:17 +0000 (15:04 -0400)

committer Dmitri Shuralyov <dmitshur@golang.org>

Sat, 22 Aug 2020 01:48:02 +0000 (01:48 +0000)
author Michael Pratt <mpratt@google.com>
Mon, 27 Jul 2020 19:04:17 +0000 (15:04 -0400)
committer Dmitri Shuralyov <dmitshur@golang.org>
Sat, 22 Aug 2020 01:48:02 +0000 (01:48 +0000)
diff --git a/src/runtime/proc.go b/src/runtime/proc.go

index a66b4d02515fd654c0797376e58978427ac34a16..378d02dea7022f0987b3176312f3b2de8465ee88 100644 (file)
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -131,7 +131,7 @@ func main() {
  
         if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon
                 systemstack(func() {
-                       newm(sysmon, nil)
+                       newm(sysmon, nil, -1)
                 })
         }
  
@@ -544,7 +544,7 @@ func schedinit() {
         stackinit()
         mallocinit()
         fastrandinit() // must run before mcommoninit
-       mcommoninit(_g_.m)
+       mcommoninit(_g_.m, -1)
         cpuinit()       // must run before alginit
         alginit()       // maps must not be used before this call
         modulesinit()   // provides activeModules
@@ -605,7 +605,22 @@ func checkmcount() {
         }
  }
  
-func mcommoninit(mp *m) {
+// mReserveID returns the next ID to use for a new m. This new m is immediately
+// considered 'running' by checkdead.
+//
+// sched.lock must be held.
+func mReserveID() int64 {
+       if sched.mnext+1 < sched.mnext {
+               throw("runtime: thread ID overflow")
+       }
+       id := sched.mnext
+       sched.mnext++
+       checkmcount()
+       return id
+}
+
+// Pre-allocated ID may be passed as 'id', or omitted by passing -1.
+func mcommoninit(mp *m, id int64) {
         _g_ := getg()
  
         // g0 stack won't make sense for user (and is not necessary unwindable).
@@ -614,12 +629,12 @@ func mcommoninit(mp *m) {
         }
  
         lock(&sched.lock)
-       if sched.mnext+1 < sched.mnext {
-               throw("runtime: thread ID overflow")
+
+       if id >= 0 {
+               mp.id = id
+       } else {
+               mp.id = mReserveID()
         }
-       mp.id = sched.mnext
-       sched.mnext++
-       checkmcount()
  
         mp.fastrand[0] = uint32(int64Hash(uint64(mp.id), fastrandseed))
         mp.fastrand[1] = uint32(int64Hash(uint64(cputicks()), ^fastrandseed))
@@ -1006,7 +1021,7 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 {
                         notewakeup(&mp.park)
                 } else {
                         // Start M to run P.  Do not start another M below.
-                       newm(nil, p)
+                       newm(nil, p, -1)
                 }
         }
  
@@ -1353,12 +1368,13 @@ type cgothreadstart struct {
  // Allocate a new m unassociated with any thread.
  // Can use p for allocation context if needed.
  // fn is recorded as the new m's m.mstartfn.
+// id is optional pre-allocated m ID. Omit by passing -1.
  //
  // This function is allowed to have write barriers even if the caller
  // isn't because it borrows _p_.
  //
  //go:yeswritebarrierrec
-func allocm(_p_ *p, fn func()) *m {
+func allocm(_p_ *p, fn func(), id int64) *m {
         _g_ := getg()
         acquirem() // disable GC because it can be called from sysmon
         if _g_.m.p == 0 {
@@ -1387,7 +1403,7 @@ func allocm(_p_ *p, fn func()) *m {
  
         mp := new(m)
         mp.mstartfn = fn
-       mcommoninit(mp)
+       mcommoninit(mp, id)
  
         // In case of cgo or Solaris or illumos or Darwin, pthread_create will make us a stack.
         // Windows and Plan 9 will layout sched stack on OS stack.
@@ -1526,7 +1542,7 @@ func oneNewExtraM() {
         // The sched.pc will never be returned to, but setting it to
         // goexit makes clear to the traceback routines where
         // the goroutine stack ends.
-       mp := allocm(nil, nil)
+       mp := allocm(nil, nil, -1)
         gp := malg(4096)
         gp.sched.pc = funcPC(goexit) + sys.PCQuantum
         gp.sched.sp = gp.stack.hi
@@ -1699,9 +1715,11 @@ var newmHandoff struct {
  // Create a new m. It will start off with a call to fn, or else the scheduler.
  // fn needs to be static and not a heap allocated closure.
  // May run with m.p==nil, so write barriers are not allowed.
+//
+// id is optional pre-allocated m ID. Omit by passing -1.
  //go:nowritebarrierrec
-func newm(fn func(), _p_ *p) {
-       mp := allocm(_p_, fn)
+func newm(fn func(), _p_ *p, id int64) {
+       mp := allocm(_p_, fn, id)
         mp.nextp.set(_p_)
         mp.sigmask = initSigmask
         if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" {
@@ -1770,7 +1788,7 @@ func startTemplateThread() {
                 releasem(mp)
                 return
         }
-       newm(templateThread, nil)
+       newm(templateThread, nil, -1)
         releasem(mp)
  }
  
@@ -1865,16 +1883,31 @@ func startm(_p_ *p, spinning bool) {
                 }
         }
         mp := mget()
-       unlock(&sched.lock)
         if mp == nil {
+               // No M is available, we must drop sched.lock and call newm.
+               // However, we already own a P to assign to the M.
+               //
+               // Once sched.lock is released, another G (e.g., in a syscall),
+               // could find no idle P while checkdead finds a runnable G but
+               // no running M's because this new M hasn't started yet, thus
+               // throwing in an apparent deadlock.
+               //
+               // Avoid this situation by pre-allocating the ID for the new M,
+               // thus marking it as 'running' before we drop sched.lock. This
+               // new M will eventually run the scheduler to execute any
+               // queued G's.
+               id := mReserveID()
+               unlock(&sched.lock)
+
                 var fn func()
                 if spinning {
                         // The caller incremented nmspinning, so set m.spinning in the new M.
                         fn = mspinning
                 }
-               newm(fn, _p_)
+               newm(fn, _p_, id)
                 return
         }
+       unlock(&sched.lock)
         if mp.spinning {
                 throw("startm: m is spinning")
         }
author	Michael Pratt <mpratt@google.com>
	Mon, 27 Jul 2020 19:04:17 +0000 (15:04 -0400)
committer	Dmitri Shuralyov <dmitshur@golang.org>
	Sat, 22 Aug 2020 01:48:02 +0000 (01:48 +0000)