From: Austin Clements Date: Mon, 26 Nov 2018 19:41:23 +0000 (-0500) Subject: runtime: check more work flushing races X-Git-Tag: go1.12beta1~213 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=438b9544a079576c539cdc040cbf337966a0b25d;p=gostls13.git runtime: check more work flushing races This adds several new checks to help debug #27993. It adds a mechanism for freezing write barriers and gcWork puts during the mark completion algorithm. This way, if we do detect mark completion, we can catch any puts that happened during the completion algorithm. Based on build dashboard failures, this seems to be the window of time when these are happening. This also double-checks that all work buffers are empty immediately upon entering mark termination (much earlier than the current check). This is unlikely to trigger based on the current failures, but is a good safety net. Change-Id: I03f56c48c4322069e28c50fbc3c15b2fee2130c2 Reviewed-on: https://go-review.googlesource.com/c/151797 Run-TryBot: Austin Clements Reviewed-by: Michael Knyszek --- diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index db589c3f8f..7747e5409c 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1363,6 +1363,19 @@ func gcStart(trigger gcTrigger) { // This is protected by markDoneSema. var gcMarkDoneFlushed uint32 +// debugCachedWork enables extra checks for debugging premature mark +// termination. +// +// For debugging issue #27993. +const debugCachedWork = true + +// gcWorkPauseGen is for debugging the mark completion algorithm. +// gcWork put operations spin while gcWork.pauseGen == gcWorkPauseGen. +// Only used if debugCachedWork is true. +// +// For debugging issue #27993. +var gcWorkPauseGen uint32 = 1 + // gcMarkDone transitions the GC from mark to mark termination if all // reachable objects have been marked (that is, there are no grey // objects and can be no more in the future). Otherwise, it flushes @@ -1408,6 +1421,14 @@ top: // Flush the write barrier buffer, since this may add // work to the gcWork. wbBufFlush1(_p_) + // For debugging, shrink the write barrier + // buffer so it flushes immediately. + // wbBuf.reset will keep it at this size as + // long as throwOnGCWork is set. + if debugCachedWork { + b := &_p_.wbBuf + b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers])) + } // Flush the gcWork, since this may create global work // and set the flushedWork flag. // @@ -1418,11 +1439,23 @@ top: if _p_.gcw.flushedWork { atomic.Xadd(&gcMarkDoneFlushed, 1) _p_.gcw.flushedWork = false + } else if debugCachedWork { + // For debugging, freeze the gcWork + // until we know whether we've reached + // completion or not. If we think + // we've reached completion, but + // there's a paused gcWork, then + // that's a bug. + _p_.gcw.pauseGen = gcWorkPauseGen } }) }) if gcMarkDoneFlushed != 0 { + if debugCachedWork { + // Release paused gcWorks. + atomic.Xadd(&gcWorkPauseGen, 1) + } // More grey objects were discovered since the // previous termination check, so there may be more // work to do. Keep going. It's possible the @@ -1431,7 +1464,12 @@ top: goto top } - throwOnGCWork = true + if debugCachedWork { + throwOnGCWork = true + // Release paused gcWorks. If there are any, they + // should now observe throwOnGCWork and panic. + atomic.Xadd(&gcWorkPauseGen, 1) + } // There was no global work, no local work, and no Ps // communicated work since we took markDoneSema. Therefore @@ -1449,6 +1487,30 @@ top: // below. The important thing is that the wb remains active until // all marking is complete. This includes writes made by the GC. + if debugCachedWork { + // For debugging, double check that no work was added after we + // went around above and disable write barrier buffering. + for _, p := range allp { + gcw := &p.gcw + if !gcw.empty() { + printlock() + print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork) + if gcw.wbuf1 == nil { + print(" wbuf1=") + } else { + print(" wbuf1.n=", gcw.wbuf1.nobj) + } + if gcw.wbuf2 == nil { + print(" wbuf2=") + } else { + print(" wbuf2.n=", gcw.wbuf2.nobj) + } + print("\n") + throw("throwOnGCWork") + } + } + } + // Disable assists and background workers. We must do // this before waking blocked assists. atomic.Store(&gcBlackenEnabled, 0) diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go index da2129ee50..8a77ff55e4 100644 --- a/src/runtime/mgcwork.go +++ b/src/runtime/mgcwork.go @@ -93,6 +93,10 @@ type gcWork struct { // termination check. Specifically, this indicates that this // gcWork may have communicated work to another gcWork. flushedWork bool + + // pauseGen causes put operations to spin while pauseGen == + // gcWorkPauseGen if debugCachedWork is true. + pauseGen uint32 } // Most of the methods of gcWork are go:nowritebarrierrec because the @@ -111,13 +115,21 @@ func (w *gcWork) init() { w.wbuf2 = wbuf2 } +func (w *gcWork) checkPut() { + if debugCachedWork { + for atomic.Load(&gcWorkPauseGen) == w.pauseGen { + } + if throwOnGCWork { + throw("throwOnGCWork") + } + } +} + // put enqueues a pointer for the garbage collector to trace. // obj must point to the beginning of a heap object or an oblet. //go:nowritebarrierrec func (w *gcWork) put(obj uintptr) { - if throwOnGCWork { - throw("throwOnGCWork") - } + w.checkPut() flushed := false wbuf := w.wbuf1 @@ -153,9 +165,7 @@ func (w *gcWork) put(obj uintptr) { // otherwise it returns false and the caller needs to call put. //go:nowritebarrierrec func (w *gcWork) putFast(obj uintptr) bool { - if throwOnGCWork { - throw("throwOnGCWork") - } + w.checkPut() wbuf := w.wbuf1 if wbuf == nil { @@ -178,9 +188,7 @@ func (w *gcWork) putBatch(obj []uintptr) { return } - if throwOnGCWork { - throw("throwOnGCWork") - } + w.checkPut() flushed := false wbuf := w.wbuf1 @@ -303,16 +311,12 @@ func (w *gcWork) balance() { return } if wbuf := w.wbuf2; wbuf.nobj != 0 { - if throwOnGCWork { - throw("throwOnGCWork") - } + w.checkPut() putfull(wbuf) w.flushedWork = true w.wbuf2 = getempty() } else if wbuf := w.wbuf1; wbuf.nobj > 4 { - if throwOnGCWork { - throw("throwOnGCWork") - } + w.checkPut() w.wbuf1 = handoff(wbuf) w.flushedWork = true // handoff did putfull } else { diff --git a/src/runtime/mwbbuf.go b/src/runtime/mwbbuf.go index c91cea254e..a698493a0a 100644 --- a/src/runtime/mwbbuf.go +++ b/src/runtime/mwbbuf.go @@ -79,7 +79,7 @@ const ( func (b *wbBuf) reset() { start := uintptr(unsafe.Pointer(&b.buf[0])) b.next = start - if writeBarrier.cgo { + if writeBarrier.cgo || (debugCachedWork && throwOnGCWork) { // Effectively disable the buffer by forcing a flush // on every barrier. b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers]))