runtime: support preemption on windows/{386,amd64}

author Austin Clements <austin@google.com>

Fri, 25 Oct 2019 20:17:41 +0000 (16:17 -0400)

committer Austin Clements <austin@google.com>

Wed, 20 Nov 2019 17:13:59 +0000 (17:13 +0000)
author Austin Clements <austin@google.com>
Fri, 25 Oct 2019 20:17:41 +0000 (16:17 -0400)
committer Austin Clements <austin@google.com>
Wed, 20 Nov 2019 17:13:59 +0000 (17:13 +0000)
diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go

index 00f4c6ec2841dc5afe13d1bffd038465e6029dd0..cf5837c1f0c86901320e6a64eb9bcf147db8d802 100644 (file)
--- a/src/runtime/os_windows.go
+++ b/src/runtime/os_windows.go
@@ -6,6 +6,7 @@ package runtime
  
  import (
         "runtime/internal/atomic"
+       "runtime/internal/sys"
         "unsafe"
  )
  
@@ -32,6 +33,7 @@ const (
  //go:cgo_import_dynamic runtime._GetSystemDirectoryA GetSystemDirectoryA%2 "kernel32.dll"
  //go:cgo_import_dynamic runtime._GetSystemInfo GetSystemInfo%1 "kernel32.dll"
  //go:cgo_import_dynamic runtime._GetThreadContext GetThreadContext%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SetThreadContext SetThreadContext%2 "kernel32.dll"
  //go:cgo_import_dynamic runtime._LoadLibraryW LoadLibraryW%1 "kernel32.dll"
  //go:cgo_import_dynamic runtime._LoadLibraryA LoadLibraryA%1 "kernel32.dll"
  //go:cgo_import_dynamic runtime._PostQueuedCompletionStatus PostQueuedCompletionStatus%4 "kernel32.dll"
@@ -79,6 +81,7 @@ var (
         _GetSystemInfo,
         _GetSystemTimeAsFileTime,
         _GetThreadContext,
+       _SetThreadContext,
         _LoadLibraryW,
         _LoadLibraryA,
         _PostQueuedCompletionStatus,
@@ -539,6 +542,11 @@ var exiting uint32
  
  //go:nosplit
  func exit(code int32) {
+       // Disallow thread suspension for preemption. Otherwise,
+       // ExitProcess and SuspendThread can race: SuspendThread
+       // queues a suspension request for this thread, ExitProcess
+       // kills the suspending thread, and then this thread suspends.
+       lock(&suspendLock)
         atomic.Store(&exiting, 1)
         stdcall1(_ExitProcess, uintptr(code))
  }
@@ -1003,19 +1011,22 @@ func profilem(mp *m, thread uintptr) {
         r.contextflags = _CONTEXT_CONTROL
         stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(r)))
  
-       var gp *g
+       gp := gFromTLS(mp)
+
+       sigprof(r.ip(), r.sp(), r.lr(), gp, mp)
+}
+
+func gFromTLS(mp *m) *g {
         switch GOARCH {
-       default:
-               panic("unsupported architecture")
         case "arm":
                 tls := &mp.tls[0]
-               gp = **((***g)(unsafe.Pointer(tls)))
+               return **((***g)(unsafe.Pointer(tls)))
         case "386", "amd64":
                 tls := &mp.tls[0]
-               gp = *((**g)(unsafe.Pointer(tls)))
+               return *((**g)(unsafe.Pointer(tls)))
         }
-
-       sigprof(r.ip(), r.sp(), r.lr(), gp, mp)
+       throw("unsupported architecture")
+       return nil
  }
  
  func profileloop1(param uintptr) uint32 {
@@ -1081,10 +1092,95 @@ func setThreadCPUProfiler(hz int32) {
         atomic.Store((*uint32)(unsafe.Pointer(&getg().m.profilehz)), uint32(hz))
  }
  
-const preemptMSupported = false
+const preemptMSupported = GOARCH != "arm"
+
+// suspendLock protects simultaneous SuspendThread operations from
+// suspending each other.
+var suspendLock mutex
  
  func preemptM(mp *m) {
-       // Not currently supported.
-       //
-       // TODO: Use SuspendThread/GetThreadContext/ResumeThread
+       if GOARCH == "arm" {
+               // TODO: Implement call injection
+               return
+       }
+
+       if mp == getg().m {
+               throw("self-preempt")
+       }
+
+       // Acquire our own handle to mp's thread.
+       lock(&mp.threadLock)
+       if mp.thread == 0 {
+               // The M hasn't been minit'd yet (or was just unminit'd).
+               unlock(&mp.threadLock)
+               atomic.Xadd(&mp.preemptGen, 1)
+               return
+       }
+       var thread uintptr
+       stdcall7(_DuplicateHandle, currentProcess, mp.thread, currentProcess, uintptr(unsafe.Pointer(&thread)), 0, 0, _DUPLICATE_SAME_ACCESS)
+       unlock(&mp.threadLock)
+
+       // Prepare thread context buffer.
+       var c *context
+       cbuf := make([]byte, unsafe.Sizeof(*c)+15)
+       // Align Context to 16 bytes.
+       c = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&cbuf[15]))) &^ 15))
+       c.contextflags = _CONTEXT_CONTROL
+
+       // Serialize thread suspension. SuspendThread is asynchronous,
+       // so it's otherwise possible for two threads to suspend each
+       // other and deadlock. We must hold this lock until after
+       // GetThreadContext, since that blocks until the thread is
+       // actually suspended.
+       lock(&suspendLock)
+
+       // Suspend the thread.
+       if int32(stdcall1(_SuspendThread, thread)) == -1 {
+               unlock(&suspendLock)
+               stdcall1(_CloseHandle, thread)
+               // The thread no longer exists. This shouldn't be
+               // possible, but just acknowledge the request.
+               atomic.Xadd(&mp.preemptGen, 1)
+               return
+       }
+
+       // We have to be very careful between this point and once
+       // we've shown mp is at an async safe-point. This is like a
+       // signal handler in the sense that mp could have been doing
+       // anything when we stopped it, including holding arbitrary
+       // locks.
+
+       // We have to get the thread context before inspecting the M
+       // because SuspendThread only requests a suspend.
+       // GetThreadContext actually blocks until it's suspended.
+       stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(c)))
+
+       unlock(&suspendLock)
+
+       // Does it want a preemption and is it safe to preempt?
+       gp := gFromTLS(mp)
+       if wantAsyncPreempt(gp) && isAsyncSafePoint(gp, c.ip(), c.sp(), c.lr()) {
+               // Inject call to asyncPreempt
+               targetPC := funcPC(asyncPreempt)
+               switch GOARCH {
+               default:
+                       throw("unsupported architecture")
+               case "386", "amd64":
+                       // Make it look like the thread called targetPC.
+                       pc := c.ip()
+                       sp := c.sp()
+                       sp -= sys.PtrSize
+                       *(*uintptr)(unsafe.Pointer(sp)) = pc
+                       c.set_sp(sp)
+                       c.set_ip(targetPC)
+               }
+
+               stdcall2(_SetThreadContext, thread, uintptr(unsafe.Pointer(c)))
+       }
+
+       // Acknowledge the preemption.
+       atomic.Xadd(&mp.preemptGen, 1)
+
+       stdcall1(_ResumeThread, thread)
+       stdcall1(_CloseHandle, thread)
  }
diff --git a/src/runtime/preempt.go b/src/runtime/preempt.go

index f1546149139fcbf7185359374c8c4e4251746179..60e1bcef5fa2a94ab0b3163e90dc13586bc2afe7 100644 (file)
--- a/src/runtime/preempt.go
+++ b/src/runtime/preempt.go
@@ -118,6 +118,7 @@ func suspendG(gp *g) suspendGState {
         stopped := false
         var asyncM *m
         var asyncGen uint32
+       var nextPreemptM int64
         for i := 0; ; i++ {
                 switch s := readgstatus(gp); s {
                 default:
@@ -205,14 +206,32 @@ func suspendG(gp *g) suspendGState {
                         gp.preempt = true
                         gp.stackguard0 = stackPreempt
  
-                       // Send asynchronous preemption.
-                       asyncM = gp.m
-                       asyncGen = atomic.Load(&asyncM.preemptGen)
-                       if preemptMSupported && debug.asyncpreemptoff == 0 {
-                               preemptM(asyncM)
-                       }
+                       // Prepare for asynchronous preemption.
+                       asyncM2 := gp.m
+                       asyncGen2 := atomic.Load(&asyncM2.preemptGen)
+                       needAsync := asyncM != asyncM2 || asyncGen != asyncGen2
+                       asyncM = asyncM2
+                       asyncGen = asyncGen2
  
                         casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
+
+                       // Send asynchronous preemption. We do this
+                       // after CASing the G back to _Grunning
+                       // because preemptM may be synchronous and we
+                       // don't want to catch the G just spinning on
+                       // its status.
+                       if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync {
+                               // Rate limit preemptM calls. This is
+                               // particularly important on Windows
+                               // where preemptM is actually
+                               // synchronous and the spin loop here
+                               // can lead to live-lock.
+                               now := nanotime()
+                               if now >= nextPreemptM {
+                                       nextPreemptM = now + yieldDelay/2
+                                       preemptM(asyncM)
+                               }
+                       }
                 }
  
                 // TODO: Don't busy wait. This loop should really only
author	Austin Clements <austin@google.com>
	Fri, 25 Oct 2019 20:17:41 +0000 (16:17 -0400)
committer	Austin Clements <austin@google.com>
	Wed, 20 Nov 2019 17:13:59 +0000 (17:13 +0000)
src/runtime/os_windows.go		patch \| blob \| history
src/runtime/preempt.go		patch \| blob \| history