From 44497ebacb6336e4cc9ce2934840bdd68e8c46c0 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 17 May 2016 18:46:03 -0400 Subject: [PATCH] runtime: fix goroutine priority elevation Currently it's possible for user code to exploit the high scheduler priority of the GC worker in conjunction with the runnext optimization to elevate a user goroutine to high priority so it will always run even if there are other runnable goroutines. For example, if a goroutine is in a tight allocation loop, the following can happen: 1. Goroutine 1 allocates, triggering a GC. 2. G 1 attempts an assist, but fails and blocks. 3. The scheduler runs the GC worker, since it is high priority. Note that this also starts a new scheduler quantum. 4. The GC worker does enough work to satisfy the assist. 5. The GC worker readies G 1, putting it in runnext. 6. GC finishes and the scheduler runs G 1 from runnext, giving it the rest of the GC worker's quantum. 7. Go to 1. Even if there are other goroutines on the run queue, they never get a chance to run in the above sequence. This requires a confluence of circumstances that make it unlikely, though not impossible, that it would happen in "real" code. In the test added by this commit, we force this confluence by setting GOMAXPROCS to 1 and GOGC to 1 so it's easy for the test to repeated trigger GC and wake from a blocked assist. We fix this by making GC always put user goroutines at the end of the run queue, instead of in runnext. This makes it so user code can't piggy-back on the GC's high priority to make a user goroutine act like it has high priority. The only other situation where GC wakes user goroutines is waking all blocked assists at the end, but this uses the global run queue and hence doesn't have this problem. Fixes #15706. Change-Id: I1589dee4b7b7d0c9c8575ed3472226084dfce8bc Reviewed-on: https://go-review.googlesource.com/23172 Reviewed-by: Rick Hudson --- src/runtime/mgcmark.go | 8 ++++++- src/runtime/proc_test.go | 8 +++++++ src/runtime/testdata/testprog/gc.go | 35 +++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index dfddd8c6f6..cbdf2b8375 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -601,7 +601,13 @@ func gcFlushBgCredit(scanWork int64) { gp.gcAssistBytes = 0 xgp := gp gp = gp.schedlink.ptr() - ready(xgp, 0, true) + // It's important that we *not* put xgp in + // runnext. Otherwise, it's possible for user + // code to exploit the GC worker's high + // scheduler priority to get itself always run + // before other goroutines and always in the + // fresh quantum started by GC. + ready(xgp, 0, false) } else { // Partially satisfy this assist. gp.gcAssistBytes += scanBytes diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go index 8994121071..22e4dca771 100644 --- a/src/runtime/proc_test.go +++ b/src/runtime/proc_test.go @@ -344,6 +344,14 @@ func TestGCFairness(t *testing.T) { } } +func TestGCFairness2(t *testing.T) { + output := runTestProg(t, "testprog", "GCFairness2") + want := "OK\n" + if output != want { + t.Fatalf("want %s, got %s\n", want, output) + } +} + func TestNumGoroutine(t *testing.T) { output := runTestProg(t, "testprog", "NumGoroutine") want := "1\n" diff --git a/src/runtime/testdata/testprog/gc.go b/src/runtime/testdata/testprog/gc.go index 0676e9a4ec..a0c1f82b56 100644 --- a/src/runtime/testdata/testprog/gc.go +++ b/src/runtime/testdata/testprog/gc.go @@ -8,11 +8,14 @@ import ( "fmt" "os" "runtime" + "runtime/debug" + "sync/atomic" "time" ) func init() { register("GCFairness", GCFairness) + register("GCFairness2", GCFairness2) register("GCSys", GCSys) } @@ -72,3 +75,35 @@ func GCFairness() { time.Sleep(10 * time.Millisecond) fmt.Println("OK") } + +func GCFairness2() { + // Make sure user code can't exploit the GC's high priority + // scheduling to make scheduling of user code unfair. See + // issue #15706. + runtime.GOMAXPROCS(1) + debug.SetGCPercent(1) + var count [3]int64 + var sink [3]interface{} + for i := range count { + go func(i int) { + for { + sink[i] = make([]byte, 1024) + atomic.AddInt64(&count[i], 1) + } + }(i) + } + // Note: If the unfairness is really bad, it may not even get + // past the sleep. + // + // If the scheduling rules change, this may not be enough time + // to let all goroutines run, but for now we cycle through + // them rapidly. + time.Sleep(30 * time.Millisecond) + for i := range count { + if atomic.LoadInt64(&count[i]) == 0 { + fmt.Printf("goroutine %d did not run\n", i) + return + } + } + fmt.Println("OK") +} -- 2.50.0