]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: support preemption on windows/{386,amd64}
authorAustin Clements <austin@google.com>
Fri, 25 Oct 2019 20:17:41 +0000 (16:17 -0400)
committerAustin Clements <austin@google.com>
Wed, 20 Nov 2019 17:13:59 +0000 (17:13 +0000)
This implements preemptM on Windows using SuspendThead and
ResumeThread.

Unlike on POSIX platforms, preemptM on Windows happens synchronously.
This means we need a make a few other tweaks to suspendG:

1. We need to CAS the G back to _Grunning before doing the preemptM,
   or there's a good chance we'll just catch the G spinning on its
   status in the runtime, which won't be preemptible.

2. We need to rate-limit preemptM attempts. Otherwise, if the first
   attempt catches the G at a non-preemptible point, the busy loop in
   suspendG may hammer it so hard that it never makes it past that
   non-preemptible point.

Updates #10958, #24543.

Change-Id: Ie53b098811096f7e45d864afd292dc9e999ce226
Reviewed-on: https://go-review.googlesource.com/c/go/+/204340
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/runtime/os_windows.go
src/runtime/preempt.go

index 00f4c6ec2841dc5afe13d1bffd038465e6029dd0..cf5837c1f0c86901320e6a64eb9bcf147db8d802 100644 (file)
@@ -6,6 +6,7 @@ package runtime
 
 import (
        "runtime/internal/atomic"
+       "runtime/internal/sys"
        "unsafe"
 )
 
@@ -32,6 +33,7 @@ const (
 //go:cgo_import_dynamic runtime._GetSystemDirectoryA GetSystemDirectoryA%2 "kernel32.dll"
 //go:cgo_import_dynamic runtime._GetSystemInfo GetSystemInfo%1 "kernel32.dll"
 //go:cgo_import_dynamic runtime._GetThreadContext GetThreadContext%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SetThreadContext SetThreadContext%2 "kernel32.dll"
 //go:cgo_import_dynamic runtime._LoadLibraryW LoadLibraryW%1 "kernel32.dll"
 //go:cgo_import_dynamic runtime._LoadLibraryA LoadLibraryA%1 "kernel32.dll"
 //go:cgo_import_dynamic runtime._PostQueuedCompletionStatus PostQueuedCompletionStatus%4 "kernel32.dll"
@@ -79,6 +81,7 @@ var (
        _GetSystemInfo,
        _GetSystemTimeAsFileTime,
        _GetThreadContext,
+       _SetThreadContext,
        _LoadLibraryW,
        _LoadLibraryA,
        _PostQueuedCompletionStatus,
@@ -539,6 +542,11 @@ var exiting uint32
 
 //go:nosplit
 func exit(code int32) {
+       // Disallow thread suspension for preemption. Otherwise,
+       // ExitProcess and SuspendThread can race: SuspendThread
+       // queues a suspension request for this thread, ExitProcess
+       // kills the suspending thread, and then this thread suspends.
+       lock(&suspendLock)
        atomic.Store(&exiting, 1)
        stdcall1(_ExitProcess, uintptr(code))
 }
@@ -1003,19 +1011,22 @@ func profilem(mp *m, thread uintptr) {
        r.contextflags = _CONTEXT_CONTROL
        stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(r)))
 
-       var gp *g
+       gp := gFromTLS(mp)
+
+       sigprof(r.ip(), r.sp(), r.lr(), gp, mp)
+}
+
+func gFromTLS(mp *m) *g {
        switch GOARCH {
-       default:
-               panic("unsupported architecture")
        case "arm":
                tls := &mp.tls[0]
-               gp = **((***g)(unsafe.Pointer(tls)))
+               return **((***g)(unsafe.Pointer(tls)))
        case "386", "amd64":
                tls := &mp.tls[0]
-               gp = *((**g)(unsafe.Pointer(tls)))
+               return *((**g)(unsafe.Pointer(tls)))
        }
-
-       sigprof(r.ip(), r.sp(), r.lr(), gp, mp)
+       throw("unsupported architecture")
+       return nil
 }
 
 func profileloop1(param uintptr) uint32 {
@@ -1081,10 +1092,95 @@ func setThreadCPUProfiler(hz int32) {
        atomic.Store((*uint32)(unsafe.Pointer(&getg().m.profilehz)), uint32(hz))
 }
 
-const preemptMSupported = false
+const preemptMSupported = GOARCH != "arm"
+
+// suspendLock protects simultaneous SuspendThread operations from
+// suspending each other.
+var suspendLock mutex
 
 func preemptM(mp *m) {
-       // Not currently supported.
-       //
-       // TODO: Use SuspendThread/GetThreadContext/ResumeThread
+       if GOARCH == "arm" {
+               // TODO: Implement call injection
+               return
+       }
+
+       if mp == getg().m {
+               throw("self-preempt")
+       }
+
+       // Acquire our own handle to mp's thread.
+       lock(&mp.threadLock)
+       if mp.thread == 0 {
+               // The M hasn't been minit'd yet (or was just unminit'd).
+               unlock(&mp.threadLock)
+               atomic.Xadd(&mp.preemptGen, 1)
+               return
+       }
+       var thread uintptr
+       stdcall7(_DuplicateHandle, currentProcess, mp.thread, currentProcess, uintptr(unsafe.Pointer(&thread)), 0, 0, _DUPLICATE_SAME_ACCESS)
+       unlock(&mp.threadLock)
+
+       // Prepare thread context buffer.
+       var c *context
+       cbuf := make([]byte, unsafe.Sizeof(*c)+15)
+       // Align Context to 16 bytes.
+       c = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&cbuf[15]))) &^ 15))
+       c.contextflags = _CONTEXT_CONTROL
+
+       // Serialize thread suspension. SuspendThread is asynchronous,
+       // so it's otherwise possible for two threads to suspend each
+       // other and deadlock. We must hold this lock until after
+       // GetThreadContext, since that blocks until the thread is
+       // actually suspended.
+       lock(&suspendLock)
+
+       // Suspend the thread.
+       if int32(stdcall1(_SuspendThread, thread)) == -1 {
+               unlock(&suspendLock)
+               stdcall1(_CloseHandle, thread)
+               // The thread no longer exists. This shouldn't be
+               // possible, but just acknowledge the request.
+               atomic.Xadd(&mp.preemptGen, 1)
+               return
+       }
+
+       // We have to be very careful between this point and once
+       // we've shown mp is at an async safe-point. This is like a
+       // signal handler in the sense that mp could have been doing
+       // anything when we stopped it, including holding arbitrary
+       // locks.
+
+       // We have to get the thread context before inspecting the M
+       // because SuspendThread only requests a suspend.
+       // GetThreadContext actually blocks until it's suspended.
+       stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(c)))
+
+       unlock(&suspendLock)
+
+       // Does it want a preemption and is it safe to preempt?
+       gp := gFromTLS(mp)
+       if wantAsyncPreempt(gp) && isAsyncSafePoint(gp, c.ip(), c.sp(), c.lr()) {
+               // Inject call to asyncPreempt
+               targetPC := funcPC(asyncPreempt)
+               switch GOARCH {
+               default:
+                       throw("unsupported architecture")
+               case "386", "amd64":
+                       // Make it look like the thread called targetPC.
+                       pc := c.ip()
+                       sp := c.sp()
+                       sp -= sys.PtrSize
+                       *(*uintptr)(unsafe.Pointer(sp)) = pc
+                       c.set_sp(sp)
+                       c.set_ip(targetPC)
+               }
+
+               stdcall2(_SetThreadContext, thread, uintptr(unsafe.Pointer(c)))
+       }
+
+       // Acknowledge the preemption.
+       atomic.Xadd(&mp.preemptGen, 1)
+
+       stdcall1(_ResumeThread, thread)
+       stdcall1(_CloseHandle, thread)
 }
index f1546149139fcbf7185359374c8c4e4251746179..60e1bcef5fa2a94ab0b3163e90dc13586bc2afe7 100644 (file)
@@ -118,6 +118,7 @@ func suspendG(gp *g) suspendGState {
        stopped := false
        var asyncM *m
        var asyncGen uint32
+       var nextPreemptM int64
        for i := 0; ; i++ {
                switch s := readgstatus(gp); s {
                default:
@@ -205,14 +206,32 @@ func suspendG(gp *g) suspendGState {
                        gp.preempt = true
                        gp.stackguard0 = stackPreempt
 
-                       // Send asynchronous preemption.
-                       asyncM = gp.m
-                       asyncGen = atomic.Load(&asyncM.preemptGen)
-                       if preemptMSupported && debug.asyncpreemptoff == 0 {
-                               preemptM(asyncM)
-                       }
+                       // Prepare for asynchronous preemption.
+                       asyncM2 := gp.m
+                       asyncGen2 := atomic.Load(&asyncM2.preemptGen)
+                       needAsync := asyncM != asyncM2 || asyncGen != asyncGen2
+                       asyncM = asyncM2
+                       asyncGen = asyncGen2
 
                        casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
+
+                       // Send asynchronous preemption. We do this
+                       // after CASing the G back to _Grunning
+                       // because preemptM may be synchronous and we
+                       // don't want to catch the G just spinning on
+                       // its status.
+                       if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync {
+                               // Rate limit preemptM calls. This is
+                               // particularly important on Windows
+                               // where preemptM is actually
+                               // synchronous and the spin loop here
+                               // can lead to live-lock.
+                               now := nanotime()
+                               if now >= nextPreemptM {
+                                       nextPreemptM = now + yieldDelay/2
+                                       preemptM(asyncM)
+                               }
+                       }
                }
 
                // TODO: Don't busy wait. This loop should really only