// gcphase == _GCmark.
var gcBlackenEnabled uint32
+// gcBlackenPromptly indicates that optimizations that may
+// hide work from the global work queue should be disabled.
+//
+// If gcBlackenPromptly is true, per-P gcWork caches should
+// be flushed immediately and new objects should be allocated black.
+//
+// There is a tension between allocating objects white and
+// allocating them black. If white and the objects die before being
+// marked they can be collected during this GC cycle. On the other
+// hand allocating them black will reduce _GCmarktermination latency
+// since more work is done in the mark phase. This tension is resolved
+// by allocating white until the mark phase is approaching its end and
+// then allocating black for the remainder of the mark phase.
+var gcBlackenPromptly bool
+
const (
_GCoff = iota // GC not running, write barrier disabled
_GCstw // unused state
if _p_.gcBgMarkWorker == nil {
throw("gcControllerState.findRunnable: no background mark worker")
}
- if work.bgMarkDone != 0 {
+ if work.bgMark1.done != 0 && work.bgMark2.done != 0 {
// Background mark is done. Don't schedule background
// mark worker any more. (This is not just an
// optimization. Without this we can spin scheduling
return memstats.heap_live >= memstats.next_gc && atomicloaduint(&bggc.working) == 0
}
+// bgMarkSignal synchronizes the GC coordinator and background mark workers.
+type bgMarkSignal struct {
+ // Workers race to cas to 1. Winner signals coordinator.
+ done uint32
+ // Coordinator to wake up.
+ lock mutex
+ g *g
+ wake bool
+}
+
+func (s *bgMarkSignal) wait() {
+ lock(&s.lock)
+ if s.wake {
+ // Wakeup already happened
+ unlock(&s.lock)
+ } else {
+ s.g = getg()
+ goparkunlock(&s.lock, "mark wait (idle)", traceEvGoBlock, 1)
+ }
+ s.wake = false
+ s.g = nil
+}
+
+// complete signals the completion of this phase of marking. This can
+// be called multiple times during a cycle; only the first call has
+// any effect.
+func (s *bgMarkSignal) complete() {
+ if cas(&s.done, 0, 1) {
+ // This is the first worker to reach this completion point.
+ // Signal the main GC goroutine.
+ lock(&s.lock)
+ if s.g == nil {
+ // It hasn't parked yet.
+ s.wake = true
+ } else {
+ ready(s.g, 0)
+ }
+ unlock(&s.lock)
+ }
+}
+
+func (s *bgMarkSignal) clear() {
+ s.done = 0
+}
+
var work struct {
full uint64 // lock-free list of full blocks workbuf
empty uint64 // lock-free list of empty blocks workbuf
bgMarkReady note // signal background mark worker has started
bgMarkDone uint32 // cas to 1 when at a background mark completion point
-
// Background mark completion signaling
- bgMarkWake struct {
- lock mutex
- g *g
- wake bool
- }
+
+ // Coordination for the 2 parts of the mark phase.
+ bgMark1 bgMarkSignal
+ bgMark2 bgMarkSignal
// Copy of mheap.allspans for marker or sweeper.
spans []*mspan
}
// Wait for background mark completion.
- lock(&work.bgMarkWake.lock)
- if work.bgMarkWake.wake {
- // Wakeup already happened
- unlock(&work.bgMarkWake.lock)
+ work.bgMark1.wait()
+
+ // The global work list is empty, but there can still be work
+ // sitting in the per-P work caches and there can be more
+ // objects reachable from global roots since they don't have write
+ // barriers. Rescan some roots and flush work caches.
+ systemstack(func() {
+ // rescan global data and bss.
+ markroot(nil, _RootData)
+ markroot(nil, _RootBss)
+ forEachP(func(_p_ *p) {
+ _p_.gcw.dispose()
+ })
+ })
+
+ if atomicload64(&work.full) != 0 || atomicload64(&work.partial) != 0 {
+ if work.bgMark2.done != 0 {
+ throw("work.bgMark2.done != 0")
+ }
+ gcBlackenPromptly = true
+ // Wait for this more aggressive background mark to complete.
+ work.bgMark2.wait()
} else {
- work.bgMarkWake.g = getg()
- goparkunlock(&work.bgMarkWake.lock, "mark wait (idle)", traceEvGoBlock, 1)
+ work.bgMark2.done = 1
}
- work.bgMarkWake.wake = false
- work.bgMarkWake.g = nil
// Begin mark termination.
if debug.gctrace > 0 {
// World is stopped.
// Start marktermination which includes enabling the write barrier.
atomicstore(&gcBlackenEnabled, 0)
+ gcBlackenPromptly = false
setGCPhase(_GCmarktermination)
if debug.gctrace > 0 {
work.nproc = ^uint32(0)
work.nwait = ^uint32(0)
- // Background GC and assists race to set this to 1 on
- // completion so that this only gets one "done" signal.
- work.bgMarkDone = 0
-
+ // Reset background mark completion points.
+ work.bgMark1.clear()
+ work.bgMark2.clear()
gcController.bgMarkStartTime = nanotime()
}
startTime := nanotime()
- xadd(&work.nwait, -1)
+ decnwait := xadd(&work.nwait, -1)
+ if decnwait == work.nproc {
+ println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc)
+ throw("work.nwait was > work.nproc")
+ }
done := false
switch p.gcMarkWorkerMode {
gcDrainUntilPreempt(&p.gcw, gcBgCreditSlack)
// Was this the last worker and did we run out
// of work?
- done = xadd(&work.nwait, +1) == work.nproc && work.full == 0 && work.partial == 0
+ incnwait := xadd(&work.nwait, +1)
+ if incnwait > work.nproc {
+ println("runtime: p.gcMarkWorkerMode=", p.gcMarkWorkerMode,
+ "work.nwait=", incnwait, "work.nproc=", work.nproc)
+ throw("work.nwait > work.nproc")
+ }
+ done = incnwait == work.nproc && work.full == 0 && work.partial == 0
+ }
+ // If we are near the end of the mark phase dispose of p.gcw.
+ if gcBlackenPromptly {
+ p.gcw.dispose()
}
- // We're not in mark termination, so there's no need
- // to dispose p.gcw.
// If this worker reached a background mark completion
// point, signal the main GC goroutine.
if done {
- gcBgMarkDone()
+ if gcBlackenPromptly {
+ if work.bgMark1.done == 0 {
+ throw("completing mark 2, but bgMark1.done == 0")
+ }
+ work.bgMark2.complete()
+ } else {
+ work.bgMark1.complete()
+ }
}
duration := nanotime() - startTime
switch p.gcMarkWorkerMode {
case gcMarkWorkerDedicatedMode:
xaddint64(&gcController.dedicatedMarkTime, duration)
+ xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1)
case gcMarkWorkerFractionalMode:
xaddint64(&gcController.fractionalMarkTime, duration)
xaddint64(&gcController.fractionalMarkWorkersNeeded, 1)
}
}
-// gcBgMarkDone signals the completion of background marking. This can
-// be called multiple times during a cycle; only the first call has
-// any effect.
-func gcBgMarkDone() {
- if cas(&work.bgMarkDone, 0, 1) {
- // This is the first worker to reach completion.
- // Signal the main GC goroutine.
- lock(&work.bgMarkWake.lock)
- if work.bgMarkWake.g == nil {
- // It hasn't parked yet.
- work.bgMarkWake.wake = true
- } else {
- ready(work.bgMarkWake.g, 0)
- }
- unlock(&work.bgMarkWake.lock)
- }
-}
-
-// gcMarkWorkAvailable determines if mark work is readily available.
-// It is used by the scheduler to decide if this p run a mark work.
+// gcMarkWorkAvailable returns true if executing a mark worker
+// on p is potentially useful.
func gcMarkWorkAvailable(p *p) bool {
if !p.gcw.empty() {
return true
work.nwait = 0
work.ndone = 0
- work.nproc = 1 // For now do not do this in parallel.
+ work.nproc = 1
+ useOneP := uint32(1) // For now do not do this in parallel.
// ackgcphase is not needed since we are not scanning running goroutines.
- parforsetup(work.markfor, work.nproc, uint32(_RootCount+local_allglen), false, markroot)
+ parforsetup(work.markfor, useOneP, uint32(_RootCount+local_allglen), false, markroot)
parfordo(work.markfor)
lock(&allglock)
// Perform assist work
systemstack(func() {
+ if atomicload(&gcBlackenEnabled) == 0 {
+ // The gcBlackenEnabled check in malloc races with the
+ // store that clears it but an atomic check in every malloc
+ // would be a performance hit.
+ // Instead we recheck it here on the non-preemptable system
+ // stack to determine if we should preform an assist.
+ return
+ }
// Track time spent in this assist. Since we're on the
// system stack, this is non-preemptible, so we can
// just measure start and end time.
startTime := nanotime()
- xadd(&work.nwait, -1)
+ decnwait := xadd(&work.nwait, -1)
+ if decnwait == work.nproc {
+ println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc)
+ throw("nwait > work.nprocs")
+ }
// drain own cached work first in the hopes that it
// will be more cache friendly.
gcDrainN(gcw, scanWork)
// Record that we did this much scan work.
gp.gcscanwork += gcw.scanWork - startScanWork
- // No need to dispose since we're not in mark termination.
-
+ // If we are near the end of the mark phase
+ // dispose of the gcw.
+ if gcBlackenPromptly {
+ gcw.dispose()
+ }
// If this is the last worker and we ran out of work,
// signal a completion point.
- if xadd(&work.nwait, +1) == work.nproc && work.full == 0 && work.partial == 0 {
+ incnwait := xadd(&work.nwait, +1)
+ if incnwait > work.nproc {
+ println("runtime: work.nwait=", incnwait,
+ "work.nproc=", work.nproc,
+ "gcBlackenPromptly=", gcBlackenPromptly)
+ throw("work.nwait > work.nproc")
+ }
+
+ if incnwait == work.nproc && work.full == 0 && work.partial == 0 {
// This has reached a background completion
// point.
- gcBgMarkDone()
+ if gcBlackenPromptly {
+ if work.bgMark1.done == 0 {
+ throw("completing mark 2, but bgMark1.done == 0")
+ }
+ work.bgMark2.complete()
+ } else {
+ work.bgMark1.complete()
+ }
}
-
duration := nanotime() - startTime
_p_ := gp.m.p.ptr()
_p_.gcAssistTime += duration
if obj, hbits, span := heapBitsForObject(b); obj != 0 {
gcw := &getg().m.p.ptr().gcw
greyobject(obj, 0, 0, hbits, span, gcw)
- if gcphase == _GCmarktermination {
+ if gcphase == _GCmarktermination || gcBlackenPromptly {
// Ps aren't allowed to cache work during mark
// termination.
gcw.dispose()
}
}
-// When in GCmarkterminate phase we allocate black.
+// If gcBlackenPromptly is true we are in the second mark phase phase so we allocate black.
//go:nowritebarrier
func gcmarknewobject_m(obj, size uintptr) {
- if gcphase != _GCmarktermination {
- throw("marking new object while not in mark termination phase")
- }
- if useCheckmark { // The world should be stopped so this should not happen.
+ if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
throw("gcmarknewobject called while doing checkmark")
}
-
heapBitsForAddr(obj).setMarked()
xadd64(&work.bytesMarked, int64(size))
}
return
}
if !b.inuse {
- println("runtime:logput fails log entry=", entry,
+ println("runtime: logput fails log entry=", entry,
"b.log[0]=", b.log[0], "b.log[1]=", b.log[1],
"b.log[2]=", b.log[2], "b.log[3]=", b.log[3])
throw("logput: put not legal")
return b
}
- xadd(&work.nwait, +1)
+ incnwait := xadd(&work.nwait, +1)
+ if incnwait > work.nproc {
+ println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc)
+ throw("work.nwait > work.nproc")
+ }
for i := 0; ; i++ {
if work.full != 0 || work.partial != 0 {
- xadd(&work.nwait, -1)
+ decnwait := xadd(&work.nwait, -1)
+ if decnwait == work.nproc {
+ println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc)
+ throw("work.nwait > work.nproc")
+ }
b = (*workbuf)(lfstackpop(&work.full))
if b == nil {
b = (*workbuf)(lfstackpop(&work.partial))
b.checknonempty()
return b
}
- xadd(&work.nwait, +1)
+ incnwait := xadd(&work.nwait, +1)
+ if incnwait > work.nproc {
+ println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc)
+ throw("work.nwait > work.nproc")
+ }
}
if work.nwait == work.nproc {
return nil