From: Austin Clements Date: Fri, 25 Oct 2019 20:17:41 +0000 (-0400) Subject: runtime: support preemption on windows/{386,amd64} X-Git-Tag: go1.14beta1~140 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=b89b4623eb70cbdc6b0aea43a5a826b7a26f20a7;p=gostls13.git runtime: support preemption on windows/{386,amd64} This implements preemptM on Windows using SuspendThead and ResumeThread. Unlike on POSIX platforms, preemptM on Windows happens synchronously. This means we need a make a few other tweaks to suspendG: 1. We need to CAS the G back to _Grunning before doing the preemptM, or there's a good chance we'll just catch the G spinning on its status in the runtime, which won't be preemptible. 2. We need to rate-limit preemptM attempts. Otherwise, if the first attempt catches the G at a non-preemptible point, the busy loop in suspendG may hammer it so hard that it never makes it past that non-preemptible point. Updates #10958, #24543. Change-Id: Ie53b098811096f7e45d864afd292dc9e999ce226 Reviewed-on: https://go-review.googlesource.com/c/go/+/204340 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go index 00f4c6ec28..cf5837c1f0 100644 --- a/src/runtime/os_windows.go +++ b/src/runtime/os_windows.go @@ -6,6 +6,7 @@ package runtime import ( "runtime/internal/atomic" + "runtime/internal/sys" "unsafe" ) @@ -32,6 +33,7 @@ const ( //go:cgo_import_dynamic runtime._GetSystemDirectoryA GetSystemDirectoryA%2 "kernel32.dll" //go:cgo_import_dynamic runtime._GetSystemInfo GetSystemInfo%1 "kernel32.dll" //go:cgo_import_dynamic runtime._GetThreadContext GetThreadContext%2 "kernel32.dll" +//go:cgo_import_dynamic runtime._SetThreadContext SetThreadContext%2 "kernel32.dll" //go:cgo_import_dynamic runtime._LoadLibraryW LoadLibraryW%1 "kernel32.dll" //go:cgo_import_dynamic runtime._LoadLibraryA LoadLibraryA%1 "kernel32.dll" //go:cgo_import_dynamic runtime._PostQueuedCompletionStatus PostQueuedCompletionStatus%4 "kernel32.dll" @@ -79,6 +81,7 @@ var ( _GetSystemInfo, _GetSystemTimeAsFileTime, _GetThreadContext, + _SetThreadContext, _LoadLibraryW, _LoadLibraryA, _PostQueuedCompletionStatus, @@ -539,6 +542,11 @@ var exiting uint32 //go:nosplit func exit(code int32) { + // Disallow thread suspension for preemption. Otherwise, + // ExitProcess and SuspendThread can race: SuspendThread + // queues a suspension request for this thread, ExitProcess + // kills the suspending thread, and then this thread suspends. + lock(&suspendLock) atomic.Store(&exiting, 1) stdcall1(_ExitProcess, uintptr(code)) } @@ -1003,19 +1011,22 @@ func profilem(mp *m, thread uintptr) { r.contextflags = _CONTEXT_CONTROL stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(r))) - var gp *g + gp := gFromTLS(mp) + + sigprof(r.ip(), r.sp(), r.lr(), gp, mp) +} + +func gFromTLS(mp *m) *g { switch GOARCH { - default: - panic("unsupported architecture") case "arm": tls := &mp.tls[0] - gp = **((***g)(unsafe.Pointer(tls))) + return **((***g)(unsafe.Pointer(tls))) case "386", "amd64": tls := &mp.tls[0] - gp = *((**g)(unsafe.Pointer(tls))) + return *((**g)(unsafe.Pointer(tls))) } - - sigprof(r.ip(), r.sp(), r.lr(), gp, mp) + throw("unsupported architecture") + return nil } func profileloop1(param uintptr) uint32 { @@ -1081,10 +1092,95 @@ func setThreadCPUProfiler(hz int32) { atomic.Store((*uint32)(unsafe.Pointer(&getg().m.profilehz)), uint32(hz)) } -const preemptMSupported = false +const preemptMSupported = GOARCH != "arm" + +// suspendLock protects simultaneous SuspendThread operations from +// suspending each other. +var suspendLock mutex func preemptM(mp *m) { - // Not currently supported. - // - // TODO: Use SuspendThread/GetThreadContext/ResumeThread + if GOARCH == "arm" { + // TODO: Implement call injection + return + } + + if mp == getg().m { + throw("self-preempt") + } + + // Acquire our own handle to mp's thread. + lock(&mp.threadLock) + if mp.thread == 0 { + // The M hasn't been minit'd yet (or was just unminit'd). + unlock(&mp.threadLock) + atomic.Xadd(&mp.preemptGen, 1) + return + } + var thread uintptr + stdcall7(_DuplicateHandle, currentProcess, mp.thread, currentProcess, uintptr(unsafe.Pointer(&thread)), 0, 0, _DUPLICATE_SAME_ACCESS) + unlock(&mp.threadLock) + + // Prepare thread context buffer. + var c *context + cbuf := make([]byte, unsafe.Sizeof(*c)+15) + // Align Context to 16 bytes. + c = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&cbuf[15]))) &^ 15)) + c.contextflags = _CONTEXT_CONTROL + + // Serialize thread suspension. SuspendThread is asynchronous, + // so it's otherwise possible for two threads to suspend each + // other and deadlock. We must hold this lock until after + // GetThreadContext, since that blocks until the thread is + // actually suspended. + lock(&suspendLock) + + // Suspend the thread. + if int32(stdcall1(_SuspendThread, thread)) == -1 { + unlock(&suspendLock) + stdcall1(_CloseHandle, thread) + // The thread no longer exists. This shouldn't be + // possible, but just acknowledge the request. + atomic.Xadd(&mp.preemptGen, 1) + return + } + + // We have to be very careful between this point and once + // we've shown mp is at an async safe-point. This is like a + // signal handler in the sense that mp could have been doing + // anything when we stopped it, including holding arbitrary + // locks. + + // We have to get the thread context before inspecting the M + // because SuspendThread only requests a suspend. + // GetThreadContext actually blocks until it's suspended. + stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(c))) + + unlock(&suspendLock) + + // Does it want a preemption and is it safe to preempt? + gp := gFromTLS(mp) + if wantAsyncPreempt(gp) && isAsyncSafePoint(gp, c.ip(), c.sp(), c.lr()) { + // Inject call to asyncPreempt + targetPC := funcPC(asyncPreempt) + switch GOARCH { + default: + throw("unsupported architecture") + case "386", "amd64": + // Make it look like the thread called targetPC. + pc := c.ip() + sp := c.sp() + sp -= sys.PtrSize + *(*uintptr)(unsafe.Pointer(sp)) = pc + c.set_sp(sp) + c.set_ip(targetPC) + } + + stdcall2(_SetThreadContext, thread, uintptr(unsafe.Pointer(c))) + } + + // Acknowledge the preemption. + atomic.Xadd(&mp.preemptGen, 1) + + stdcall1(_ResumeThread, thread) + stdcall1(_CloseHandle, thread) } diff --git a/src/runtime/preempt.go b/src/runtime/preempt.go index f154614913..60e1bcef5f 100644 --- a/src/runtime/preempt.go +++ b/src/runtime/preempt.go @@ -118,6 +118,7 @@ func suspendG(gp *g) suspendGState { stopped := false var asyncM *m var asyncGen uint32 + var nextPreemptM int64 for i := 0; ; i++ { switch s := readgstatus(gp); s { default: @@ -205,14 +206,32 @@ func suspendG(gp *g) suspendGState { gp.preempt = true gp.stackguard0 = stackPreempt - // Send asynchronous preemption. - asyncM = gp.m - asyncGen = atomic.Load(&asyncM.preemptGen) - if preemptMSupported && debug.asyncpreemptoff == 0 { - preemptM(asyncM) - } + // Prepare for asynchronous preemption. + asyncM2 := gp.m + asyncGen2 := atomic.Load(&asyncM2.preemptGen) + needAsync := asyncM != asyncM2 || asyncGen != asyncGen2 + asyncM = asyncM2 + asyncGen = asyncGen2 casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning) + + // Send asynchronous preemption. We do this + // after CASing the G back to _Grunning + // because preemptM may be synchronous and we + // don't want to catch the G just spinning on + // its status. + if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync { + // Rate limit preemptM calls. This is + // particularly important on Windows + // where preemptM is actually + // synchronous and the spin loop here + // can lead to live-lock. + now := nanotime() + if now >= nextPreemptM { + nextPreemptM = now + yieldDelay/2 + preemptM(asyncM) + } + } } // TODO: Don't busy wait. This loop should really only