From: Ian Lance Taylor Date: Fri, 1 Jun 2018 17:16:08 +0000 (-0700) Subject: runtime: don't crash holding locks on racy timer access X-Git-Tag: go1.11beta1~221 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=d6c3b0a56d8c81c221b0adf69ae351f7cd467854;p=gostls13.git runtime: don't crash holding locks on racy timer access If we run into data corruption due to the program accessing timers in a racy way, do a normal panic rather than a hard crash with "panic holding locks". The hope is to make the problem less confusing for users. Fixes #25686 Change-Id: I863417adf21f7f8c088675b67a3acf49a0cdef41 Reviewed-on: https://go-review.googlesource.com/115815 Reviewed-by: Austin Clements --- diff --git a/src/runtime/time.go b/src/runtime/time.go index 4308cc0f0b..9de45f5e08 100644 --- a/src/runtime/time.go +++ b/src/runtime/time.go @@ -98,7 +98,10 @@ func timeSleep(ns int64) { t.arg = gp tb := t.assignBucket() lock(&tb.lock) - tb.addtimerLocked(t) + if !tb.addtimerLocked(t) { + unlock(&tb.lock) + badTimer() + } goparkunlock(&tb.lock, waitReasonSleep, traceEvGoSleep, 2) } @@ -128,14 +131,19 @@ func goroutineReady(arg interface{}, seq uintptr) { func addtimer(t *timer) { tb := t.assignBucket() lock(&tb.lock) - tb.addtimerLocked(t) + ok := tb.addtimerLocked(t) unlock(&tb.lock) + if !ok { + badTimer() + } } // Add a timer to the heap and start or kick timerproc if the new timer is // earlier than any of the others. // Timers are locked. -func (tb *timersBucket) addtimerLocked(t *timer) { +// Returns whether all is well: false if the data structure is corrupt +// due to user-level races. +func (tb *timersBucket) addtimerLocked(t *timer) bool { // when must never be negative; otherwise timerproc will overflow // during its delta calculation and never expire other runtime timers. if t.when < 0 { @@ -143,7 +151,9 @@ func (tb *timersBucket) addtimerLocked(t *timer) { } t.i = len(tb.t) tb.t = append(tb.t, t) - siftupTimer(tb.t, t.i) + if !siftupTimer(tb.t, t.i) { + return false + } if t.i == 0 { // siftup moved to top: new earliest deadline. if tb.sleeping { @@ -159,6 +169,7 @@ func (tb *timersBucket) addtimerLocked(t *timer) { tb.created = true go timerproc(tb) } + return true } // Delete timer t from the heap. @@ -191,11 +202,19 @@ func deltimer(t *timer) bool { } tb.t[last] = nil tb.t = tb.t[:last] + ok := true if i != last { - siftupTimer(tb.t, i) - siftdownTimer(tb.t, i) + if !siftupTimer(tb.t, i) { + ok = false + } + if !siftdownTimer(tb.t, i) { + ok = false + } } unlock(&tb.lock) + if !ok { + badTimer() + } return true } @@ -219,10 +238,13 @@ func timerproc(tb *timersBucket) { if delta > 0 { break } + ok := true if t.period > 0 { // leave in heap but adjust next time to fire t.when += t.period * (1 + -delta/t.period) - siftdownTimer(tb.t, 0) + if !siftdownTimer(tb.t, 0) { + ok = false + } } else { // remove from heap last := len(tb.t) - 1 @@ -233,7 +255,9 @@ func timerproc(tb *timersBucket) { tb.t[last] = nil tb.t = tb.t[:last] if last > 0 { - siftdownTimer(tb.t, 0) + if !siftdownTimer(tb.t, 0) { + ok = false + } } t.i = -1 // mark as removed } @@ -241,6 +265,9 @@ func timerproc(tb *timersBucket) { arg := t.arg seq := t.seq unlock(&tb.lock) + if !ok { + badTimer() + } if raceenabled { raceacquire(unsafe.Pointer(t)) } @@ -326,8 +353,20 @@ func timeSleepUntil() int64 { } // Heap maintenance algorithms. - -func siftupTimer(t []*timer, i int) { +// These algorithms check for slice index errors manually. +// Slice index error can happen if the program is using racy +// access to timers. We don't want to panic here, because +// it will cause the program to crash with a mysterious +// "panic holding locks" message. Instead, we panic while not +// holding a lock. +// The races can occur despite the bucket locks because assignBucket +// itself is called without locks, so racy calls can cause a timer to +// change buckets while executing these functions. + +func siftupTimer(t []*timer, i int) bool { + if i >= len(t) { + return false + } when := t[i].when tmp := t[i] for i > 0 { @@ -343,10 +382,14 @@ func siftupTimer(t []*timer, i int) { t[i] = tmp t[i].i = i } + return true } -func siftdownTimer(t []*timer, i int) { +func siftdownTimer(t []*timer, i int) bool { n := len(t) + if i >= n { + return false + } when := t[i].when tmp := t[i] for { @@ -382,6 +425,15 @@ func siftdownTimer(t []*timer, i int) { t[i] = tmp t[i].i = i } + return true +} + +// badTimer is called if the timer data structures have been corrupted, +// presumably due to racy use by the program. We panic here rather than +// panicing due to invalid slice access while holding locks. +// See issue #25686. +func badTimer() { + panic(errorString("racy use of timers")) } // Entry points for net, time to call nanotime. diff --git a/src/time/time_test.go b/src/time/time_test.go index 7778bf1f83..432a67dec3 100644 --- a/src/time/time_test.go +++ b/src/time/time_test.go @@ -9,11 +9,13 @@ import ( "encoding/gob" "encoding/json" "fmt" + "internal/race" "math/big" "math/rand" "os" "runtime" "strings" + "sync" "testing" "testing/quick" . "time" @@ -1341,3 +1343,38 @@ func TestReadFileLimit(t *testing.T) { t.Errorf("readFile(%q) error = %v; want error containing 'is too large'", zero, err) } } + +// Issue 25686: hard crash on concurrent timer access. +// This test deliberately invokes a race condition. +// We are testing that we don't crash with "fatal error: panic holding locks". +func TestConcurrentTimerReset(t *testing.T) { + if race.Enabled { + t.Skip("skipping test under race detector") + } + + // We expect this code to panic rather than crash. + // Don't worry if it doesn't panic. + catch := func(i int) { + if e := recover(); e != nil { + t.Logf("panic in goroutine %d, as expected, with %q", i, e) + } else { + t.Logf("no panic in goroutine %d", i) + } + } + + const goroutines = 8 + const tries = 1000 + var wg sync.WaitGroup + wg.Add(goroutines) + timer := NewTimer(Hour) + for i := 0; i < goroutines; i++ { + go func(i int) { + defer wg.Done() + defer catch(i) + for j := 0; j < tries; j++ { + timer.Reset(Hour + Duration(i*j)) + } + }(i) + } + wg.Wait() +}