const (
_GCoff = iota // GC not running; sweeping in background, write barrier disabled
- _GCscan // GC collecting roots into workbufs, write barrier ENABLED
- _GCmark // GC marking from workbufs, write barrier ENABLED
+ _GCmark // GC marking roots and workbufs, write barrier ENABLED
_GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED
)
//go:nosplit
func setGCPhase(x uint32) {
atomicstore(&gcphase, x)
- writeBarrierEnabled = gcphase == _GCmark || gcphase == _GCmarktermination || gcphase == _GCscan
+ writeBarrierEnabled = gcphase == _GCmark || gcphase == _GCmarktermination
}
// gcMarkWorkerMode represents the mode that a concurrent mark worker
}
var work struct {
- full uint64 // lock-free list of full blocks workbuf
- empty uint64 // lock-free list of empty blocks workbuf
- pad0 [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait
+ full uint64 // lock-free list of full blocks workbuf
+ empty uint64 // lock-free list of empty blocks workbuf
+ pad0 [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait
+
+ markrootNext uint32 // next markroot job
+ markrootJobs uint32 // number of markroot jobs
+
nproc uint32
tstart int64
nwait uint32
func gc(mode gcMode) {
// Timing/utilization tracking
var stwprocs, maxprocs int32
- var tSweepTerm, tScan, tMark, tMarkTerm int64
+ var tSweepTerm, tMark, tMarkTerm int64
// debug.gctrace variables
var heap0, heap1, heap2, heapGoal uint64
heapGoal = gcController.heapGoal
systemstack(func() {
- // Enter scan phase and enable write barriers.
+ // Enter concurrent mark phase and enable
+ // write barriers.
//
// Because the world is stopped, all Ps will
// observe that write barriers are enabled by
// allocations are blocked until assists can
// happen, we want enable assists as early as
// possible.
- setGCPhase(_GCscan)
+ setGCPhase(_GCmark)
// markrootSpans uses work.spans, so make sure
// it is up to date.
gcCopySpans()
gcBgMarkPrepare() // Must happen before assist enable.
+ gcMarkRootPrepare()
// At this point all Ps have enabled the write
// barrier, thus maintaining the no white to
// mutators.
atomicstore(&gcBlackenEnabled, 1)
- // Concurrent scan.
+ // Concurrent mark.
startTheWorldWithSema()
now = nanotime()
pauseNS += now - pauseStart
- tScan = now
gcController.assistStartTime = now
- gcscan_m()
-
- // Enter mark phase.
- setGCPhase(_GCmark)
})
- // Concurrent mark.
- tMark = nanotime()
+ tMark = now
// Enable background mark workers and wait for
// background mark completion.
- gcController.bgMarkStartTime = nanotime()
+ gcController.bgMarkStartTime = now
work.bgMark1.clear()
work.bgMark1.wait()
+ gcMarkRootCheck()
+
// The global work list is empty, but there can still be work
// sitting in the per-P work caches and there can be more
// objects reachable from global roots since they don't have write
gcController.endCycle()
} else {
t := nanotime()
- tScan, tMark, tMarkTerm = t, t, t
+ tMark, tMarkTerm = t, t
heapGoal = heap0
}
memstats.pause_total_ns += uint64(pauseNS)
// Update work.totaltime.
- sweepTermCpu := int64(stwprocs) * (tScan - tSweepTerm)
- scanCpu := tMark - tScan
+ sweepTermCpu := int64(stwprocs) * (tMark - tSweepTerm)
// We report idle marking time below, but omit it from the
// overall utilization here since it's "free".
markCpu := gcController.assistTime + gcController.dedicatedMarkTime + gcController.fractionalMarkTime
markTermCpu := int64(stwprocs) * (now - tMarkTerm)
- cycleCpu := sweepTermCpu + scanCpu + markCpu + markTermCpu
+ cycleCpu := sweepTermCpu + markCpu + markTermCpu
work.totaltime += cycleCpu
// Compute overall GC CPU utilization.
tInstallWB := tMark
installWBCpu := int64(0)
+ // Scan phase is no longer used.
+ tScan := tInstallWB
+ scanCpu := int64(0)
+
+ // TODO: Clean up the gctrace format.
+
var sbuf [24]byte
printlock()
print("gc ", memstats.numgc,
if atomicload64(&work.full) != 0 {
return true // global work available
}
+ if work.markrootNext < work.markrootJobs {
+ return true // root scan work available
+ }
return false
}
gcFlushGCWork()
// Queue root marking jobs.
- nRoots := gcMarkRootPrepare()
+ gcMarkRootPrepare()
work.nwait = 0
work.ndone = 0
traceGCScanStart()
}
- parforsetup(work.markfor, work.nproc, uint32(nRoots), false, markroot)
if work.nproc > 1 {
noteclear(&work.alldone)
helpgc(int32(work.nproc))
}
gchelperstart()
- parfordo(work.markfor)
var gcw gcWork
gcDrain(&gcw, gcDrainBlock)
gcw.dispose()
+ gcMarkRootCheck()
if work.full != 0 {
throw("work.full != 0")
}
traceGCScanStart()
}
- // parallel mark for over GC roots
- parfordo(work.markfor)
- if gcphase != _GCscan {
+ // Parallel mark over GC roots and heap
+ if gcphase == _GCmarktermination {
var gcw gcWork
gcDrain(&gcw, gcDrainBlock) // blocks in getfull
gcw.dispose()
rootBlockSpans = 8 * 1024 // 64MB worth of spans
)
-// gcMarkRootPrepare initializes scanning-related state and returns
-// the number of roots.
+// gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
+// some miscellany) and initializes scanning-related state.
//
// The caller must have call gcCopySpans().
//
//go:nowritebarrier
-func gcMarkRootPrepare() int {
+func gcMarkRootPrepare() {
// Compute how many data and BSS root blocks there are.
nBlocks := func(bytes uintptr) int {
return int((bytes + rootBlockBytes - 1) / rootBlockBytes)
// allglen isn't changing, so we'll scan all Gs.
work.nStackRoots = int(atomicloaduintptr(&allglen))
- return fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots
+ work.markrootNext = 0
+ work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots)
}
-// Scan all of the stacks, greying (or graying if in America) the referents
-// but not blackening them since the mark write barrier isn't installed.
-//go:nowritebarrier
-func gcscan_m() {
- _g_ := getg()
-
- // Grab the g that called us and potentially allow rescheduling.
- // This allows it to be scanned like other goroutines.
- mastergp := _g_.m.curg
- casgstatus(mastergp, _Grunning, _Gwaiting)
- mastergp.waitreason = "garbage collection scan"
-
- // Span sweeping has been done by finishsweep_m.
- // Long term we will want to make this goroutine runnable
- // by placing it onto a scanenqueue state and then calling
- // runtime·restartg(mastergp) to make it Grunnable.
- // At the bottom we will want to return this p back to the scheduler.
-
- nroots := gcMarkRootPrepare()
-
- work.ndone = 0
- useOneP := uint32(1) // For now do not do this in parallel.
- // ackgcphase is not needed since we are not scanning running goroutines.
- parforsetup(work.markfor, useOneP, uint32(nroots), false, markroot)
- parfordo(work.markfor)
+// gcMarkRootCheck checks that all roots have been scanned. It is
+// purely for debugging.
+func gcMarkRootCheck() {
+ if work.markrootNext < work.markrootJobs {
+ print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n")
+ throw("left over markroot jobs")
+ }
lock(&allglock)
// Check that gc work is done.
}
}
unlock(&allglock)
-
- casgstatus(mastergp, _Gwaiting, _Grunning)
- // Let the g that called us continue to run.
}
// ptrmask for an allocation containing a single pointer.
var oneptrmask = [...]uint8{1}
+// markroot scans the i'th root.
+//
+// Preemption must be disabled (because this uses a gcWork).
+//
//go:nowritebarrier
func markroot(desc *parfor, i uint32) {
// TODO: Consider using getg().m.p.ptr().gcw.
}
case i == fixedRootFlushCaches:
- if gcphase != _GCscan { // Do not flush mcaches during GCscan phase.
+ if gcphase == _GCmarktermination { // Do not flush mcaches during concurrent phase.
flushallmcaches()
}
shrinkstack(gp)
}
- scang(gp)
+ if gcphase != _GCmarktermination && gp.startpc == gcBgMarkWorkerPC {
+ // GC background workers may be
+ // non-preemptible, so we may deadlock if we
+ // try to scan them during a concurrent phase.
+ // They also have tiny stacks, so just ignore
+ // them until mark termination.
+ gp.gcscandone = true
+ break
+ }
+
+ // scang must be done on the system stack in case
+ // we're trying to scan our own stack.
+ systemstack(func() {
+ // If this is a self-scan, put the user G in
+ // _Gwaiting to prevent self-deadlock. It may
+ // already be in _Gwaiting if this is mark
+ // termination.
+ userG := getg().m.curg
+ selfScan := gp == userG && readgstatus(userG) == _Grunning
+ if selfScan {
+ casgstatus(userG, _Grunning, _Gwaiting)
+ userG.waitreason = "garbage collection scan"
+ }
+
+ // TODO: scang blocks until gp's stack has
+ // been scanned, which may take a while for
+ // running goroutines. Consider doing this in
+ // two phases where the first is non-blocking:
+ // we scan the stacks we can and ask running
+ // goroutines to scan themselves; and the
+ // second blocks.
+ scang(gp)
+
+ if selfScan {
+ casgstatus(userG, _Gwaiting, _Grunning)
+ }
+ })
}
gcw.dispose()
sp = gp.sched.sp
}
switch gcphase {
- case _GCscan:
+ case _GCmark:
// Install stack barriers during stack scan.
barrierOffset = uintptr(firstStackBarrierOffset)
nextBarrier = sp + barrierOffset
} else {
// Only re-scan up to the lowest un-hit
// barrier. Any frames above this have not
- // executed since the _GCscan scan of gp and
+ // executed since the concurrent scan of gp and
// any writes through up-pointers to above
// this barrier had write barriers.
nextBarrier = gp.stkbar[gp.stkbarPos].savedLRPtr
// We skip installing a barrier on bottom-most
// frame because on LR machines this LR is not
// on the stack.
- if gcphase == _GCscan && n != 0 {
+ if gcphase == _GCmark && n != 0 {
if gcInstallStackBarrier(gp, frame) {
barrierOffset *= 2
nextBarrier = sp + barrierOffset
gcDrainBlock gcDrainFlags = 0
)
-// gcDrain scans objects in work buffers, blackening grey objects
-// until all work buffers have been drained.
+// gcDrain scans roots and objects in work buffers, blackening grey
+// objects until all roots and work buffers have been drained.
//
// If flags&gcDrainUntilPreempt != 0, gcDrain also returns if
// g.preempt is set. Otherwise, this will block until all dedicated
throw("gcDrain phase incorrect")
}
+ gp := getg()
blocking := flags&gcDrainUntilPreempt == 0
flushBgCredit := flags&gcDrainFlushBgCredit != 0
+ // Drain root marking jobs.
+ if work.markrootNext < work.markrootJobs {
+ for blocking || !gp.preempt {
+ job := xadd(&work.markrootNext, +1) - 1
+ if job >= work.markrootJobs {
+ break
+ }
+ // TODO: Pass in gcw.
+ markroot(nil, job)
+ }
+ }
+
initScanWork := gcw.scanWork
- gp := getg()
+ // Drain heap marking jobs.
for blocking || !gp.preempt {
// If another proc wants a pointer, give it some.
if work.nwait > 0 && work.full == 0 {