From 50048a4e8ee11016227c283be2d073e14e1c006b Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Mon, 18 Jul 2016 23:00:43 -0700 Subject: [PATCH] runtime: add as many extra M's as needed When a non-Go thread calls into Go, the runtime needs an M to run the Go code. The runtime keeps a list of extra M's available. When the last extra M is allocated, the needextram field is set to tell it to allocate a new extra M as soon as it is running in Go. This ensures that an extra M will always be available for the next thread. However, if many threads need an extra M at the same time, this serializes them all. One thread will get an extra M with the needextram field set. All the other threads will see that there is no M available and will go to sleep. The one thread that succeeded will create a new extra M. One lucky thread will get it. All the other threads will see that there is no M available and will go to sleep. The effect is thundering herd, as all the threads looking for an extra M go through the process one by one. This seems to have a particularly bad effect on the FreeBSD scheduler for some reason. With this change, we track the number of threads waiting for an M, and create all of them as soon as one thread gets through. This still means that all the threads will fight for the lock to pick up the next M. But at least each thread that gets the lock will succeed, instead of going to sleep only to fight again. This smooths out the performance greatly on FreeBSD, reducing the average wall time of `testprogcgo CgoCallbackGC` by 74%. On GNU/Linux the average wall time goes down by 9%. Fixes #13926 Fixes #16396 Change-Id: I6dc42a4156085a7ed4e5334c60b39db8f8ef8fea Reviewed-on: https://go-review.googlesource.com/25047 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Dmitry Vyukov --- src/runtime/cgocall.go | 3 ++- src/runtime/proc.go | 28 +++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index d7e20ebc1d..f8d693060d 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -80,6 +80,7 @@ package runtime import ( + "runtime/internal/atomic" "runtime/internal/sys" "unsafe" ) @@ -176,7 +177,7 @@ func cgocallbackg(ctxt uintptr) { func cgocallbackg1(ctxt uintptr) { gp := getg() - if gp.m.needextram { + if gp.m.needextram || atomic.Load(&extraMWaiters) > 0 { gp.m.needextram = false systemstack(newextram) } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 2c0b3df167..1d00930ac5 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1389,10 +1389,27 @@ func needm(x byte) { var earlycgocallback = []byte("fatal error: cgo callback before cgo call\n") -// newextram allocates an m and puts it on the extra list. +// newextram allocates m's and puts them on the extra list. // It is called with a working local m, so that it can do things // like call schedlock and allocate. func newextram() { + c := atomic.Xchg(&extraMWaiters, 0) + if c > 0 { + for i := uint32(0); i < c; i++ { + oneNewExtraM() + } + } else { + // Make sure there is at least one extra M. + mp := lockextra(true) + unlockextra(mp) + if mp == nil { + oneNewExtraM() + } + } +} + +// oneNewExtraM allocates an m and puts it on the extra list. +func oneNewExtraM() { // Create extra goroutine locked to extra m. // The goroutine is the context in which the cgo callback will run. // The sched.pc will never be returned to, but setting it to @@ -1485,6 +1502,7 @@ func getm() uintptr { } var extram uintptr +var extraMWaiters uint32 // lockextra locks the extra list and returns the list head. // The caller must unlock the list by storing a new list head @@ -1495,6 +1513,7 @@ var extram uintptr func lockextra(nilokay bool) *m { const locked = 1 + incr := false for { old := atomic.Loaduintptr(&extram) if old == locked { @@ -1503,6 +1522,13 @@ func lockextra(nilokay bool) *m { continue } if old == 0 && !nilokay { + if !incr { + // Add 1 to the number of threads + // waiting for an M. + // This is cleared by newextram. + atomic.Xadd(&extraMWaiters, 1) + incr = true + } usleep(1) continue } -- 2.50.0