// base address for all 0-byte allocations
var zerobase uintptr
+// Determine whether to initiate a GC.
+// Currently the primitive heuristic we use will start a new
+// concurrent GC when approximately half the available space
+// made available by the last GC cycle has been used.
+// If the GC is already working no need to trigger another one.
+// This should establish a feedback loop where if the GC does not
+// have sufficient time to complete then more memory will be
+// requested from the OS increasing heap size thus allow future
+// GCs more time to complete.
+// memstat.heap_alloc and memstat.next_gc reads have benign races
+// A false negative simple does not start a GC, a false positive
+// will start a GC needlessly. Neither have correctness issues.
+func shouldtriggergc() bool {
+ return memstats.heap_alloc+memstats.heap_alloc*3/4 >= memstats.next_gc && atomicloaduint(&bggc.working) == 0
+}
+
// Allocate an object of size bytes.
// Small objects are allocated from the per-P cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
+ shouldhelpgc := false
if size == 0 {
return unsafe.Pointer(&zerobase)
}
systemstack(func() {
mCache_Refill(c, tinySizeClass)
})
+ shouldhelpgc = true
s = c.alloc[tinySizeClass]
v = s.freelist
}
systemstack(func() {
mCache_Refill(c, int32(sizeclass))
})
+ shouldhelpgc = true
s = c.alloc[sizeclass]
v = s.freelist
}
c.local_cachealloc += intptr(size)
} else {
var s *mspan
+ shouldhelpgc = true
systemstack(func() {
s = largeAlloc(size, uint32(flags))
})
}
}
- if memstats.heap_alloc >= memstats.next_gc/2 {
+ if shouldtriggergc() {
gogc(0)
+ } else if shouldhelpgc && atomicloaduint(&bggc.working) == 1 {
+ // bggc.lock not taken since race on bggc.working is benign.
+ // At worse we don't call gchelpwork.
+ // Delay the gchelpwork until the epilogue so that it doesn't
+ // interfere with the inner working of malloc such as
+ // mcache refills that might happen while doing the gchelpwork
+ systemstack(gchelpwork)
}
return x
releasem(mp)
mp = nil
- semacquire(&worldsema, false)
-
- if force == 0 && memstats.heap_alloc < memstats.next_gc {
- // typically threads which lost the race to grab
- // worldsema exit here when gc is done.
- semrelease(&worldsema)
- return
+ if force == 0 {
+ lock(&bggc.lock)
+ if !bggc.started {
+ bggc.working = 1
+ bggc.started = true
+ go backgroundgc()
+ } else if bggc.working == 0 {
+ bggc.working = 1
+ ready(bggc.g)
+ }
+ unlock(&bggc.lock)
+ } else {
+ gcwork(force)
}
+}
+
+func gcwork(force int32) {
+
+ semacquire(&worldsema, false)
// Pick up the remaining unswept/not being swept spans concurrently
for gosweepone() != ^uintptr(0) {
// Ok, we're doing it! Stop everybody else
- startTime := nanotime()
- mp = acquirem()
+ mp := acquirem()
mp.gcing = 1
releasem(mp)
gctimer.count++
if force == 0 {
gctimer.cycle.sweepterm = nanotime()
}
+ // Pick up the remaining unswept/not being swept spans before we STW
+ for gosweepone() != ^uintptr(0) {
+ sweep.nbgsweep++
+ }
systemstack(stoptheworld)
systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
if force == 0 { // Do as much work concurrently as possible
systemstack(gcscan_m)
gctimer.cycle.installmarkwb = nanotime()
systemstack(stoptheworld)
- gcinstallmarkwb()
+ systemstack(gcinstallmarkwb)
systemstack(starttheworld)
gctimer.cycle.mark = nanotime()
systemstack(gcmark_m)
systemstack(gcinstalloffwb_m)
}
+ startTime := nanotime()
if mp != acquirem() {
throw("gogc: rescheduled")
}
eagersweep := force >= 2
for i := 0; i < n; i++ {
if i > 0 {
+ // refresh start time if doing a second GC
startTime = nanotime()
}
// switch to g0, call gc, then switch back
// gctimes records the time in nanoseconds of each phase of the concurrent GC.
type gctimes struct {
sweepterm int64 // stw
- scan int64 // stw
- installmarkwb int64
+ scan int64
+ installmarkwb int64 // stw
mark int64
markterm int64 // stw
sweep int64
var gctimer gcchronograph
-// GCstarttimes initializes the gc timess. All previous timess are lost.
+// GCstarttimes initializes the gc times. All previous times are lost.
func GCstarttimes(verbose int64) {
gctimer = gcchronograph{verbose: verbose}
}
// the information from the most recent Concurent GC cycle. Calls from the
// application to runtime.GC() are ignored.
func GCprinttimes() {
+ if gctimer.verbose == 0 {
+ println("GC timers not enabled")
+ return
+ }
+
// Explicitly put times on the heap so printPhase can use it.
times := new(gctimes)
*times = calctimes()
_DebugGCPtrs = false // if true, print trace of every pointer load during GC
_ConcurrentSweep = true
- _WorkbufSize = 4 * 1024
+ _WorkbufSize = 4 * 256
_FinBlockSize = 4 * 1024
_RootData = 0
_RootBss = 1
var nbadblock int32
type workdata struct {
- full uint64 // lock-free list of full blocks
- empty uint64 // lock-free list of empty blocks
- partial uint64 // lock-free list of partially filled blocks
+ full uint64 // lock-free list of full blocks workbuf
+ empty uint64 // lock-free list of empty blocks workbuf
+ partial uint64 // lock-free list of partially filled blocks workbuf
pad0 [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait
nproc uint32
tstart int64
// base and extent.
b := b0
n := n0
+
+ // ptrmask can have 2 possible values:
+ // 1. nil - obtain pointer mask from GC bitmap.
+ // 2. pointer to a compact mask (for stacks and data).
+
wbuf := getpartialorempty()
if b != 0 {
wbuf = scanobject(b, n, ptrmask, wbuf)
return
}
}
- if gcphase == _GCscan {
- throw("scanblock: In GCscan phase but no b passed in.")
- }
- keepworking := b == 0
+ drainallwbufs := b == 0
+ drainworkbuf(wbuf, drainallwbufs)
+}
+// Scan objects in wbuf until wbuf is empty.
+// If drainallwbufs is true find all other available workbufs and repeat the process.
+//go:nowritebarrier
+func drainworkbuf(wbuf *workbuf, drainallwbufs bool) {
if gcphase != _GCmark && gcphase != _GCmarktermination {
println("gcphase", gcphase)
throw("scanblock phase")
}
- // ptrmask can have 2 possible values:
- // 1. nil - obtain pointer mask from GC bitmap.
- // 2. pointer to a compact mask (for stacks and data).
for {
if wbuf.nobj == 0 {
- if !keepworking {
+ if !drainallwbufs {
putempty(wbuf)
return
}
// PREFETCH(wbuf->obj[wbuf->nobj - 3];
// }
wbuf.nobj--
- b = wbuf.obj[wbuf.nobj]
+ b := wbuf.obj[wbuf.nobj]
+ wbuf = scanobject(b, mheap_.arena_used-b, nil, wbuf)
+ }
+}
+
+// Scan at most count objects in the wbuf.
+//go:nowritebarrier
+func drainobjects(wbuf *workbuf, count uintptr) {
+ for i := uintptr(0); i < count; i++ {
+ if wbuf.nobj == 0 {
+ putempty(wbuf)
+ return
+ }
+
+ // This might be a good place to add prefetch code...
+ // if(wbuf->nobj > 4) {
+ // PREFETCH(wbuf->obj[wbuf->nobj - 3];
+ // }
+ wbuf.nobj--
+ b := wbuf.obj[wbuf.nobj]
wbuf = scanobject(b, mheap_.arena_used-b, nil, wbuf)
}
+ putpartial(wbuf)
+ return
}
//go:nowritebarrier
}
}
+// trygetfull tries to get a full or partially empty workbuffer.
+// if one is not immediately available return nil
+//go:nowritebarrier
+func trygetfull() *workbuf {
+ wbuf := (*workbuf)(lfstackpop(&work.full))
+ if wbuf == nil {
+ wbuf = (*workbuf)(lfstackpop(&work.partial))
+ }
+ return wbuf
+}
+
// Get a full work buffer off the work.full or a partially
// filled one off the work.partial list. If nothing is available
// wait until all the other gc helpers have finished and then
}
}
+// gchelpwork does a small bounded amount of gc work. The purpose is to
+// shorten the time (as measured by allocations) spent doing a concurrent GC.
+// The number of mutator calls is roughly propotional to the number of allocations
+// made by that mutator. This slows down the allocation while speeding up the GC.
+//go:nowritebarrier
+func gchelpwork() {
+ switch gcphase {
+ default:
+ throw("gcphasework in bad gcphase")
+ case _GCoff, _GCquiesce, _GCstw:
+ // No work.
+ case _GCsweep:
+ // We could help by calling sweepone to sweep a single span.
+ // _ = sweepone()
+ case _GCscan:
+ // scan the stack, mark the objects, put pointers in work buffers
+ // hanging off the P where this is being run.
+ // scanstack(gp)
+ case _GCmark:
+ // Get a full work buffer and empty it.
+ var wbuf *workbuf
+ wbuf = trygetfull()
+ if wbuf != nil {
+ drainobjects(wbuf, uintptr(len(wbuf.obj))) // drain upto one buffer's worth of objects
+ }
+ case _GCmarktermination:
+ // We should never be here since the world is stopped.
+ // All available mark work will be emptied before returning.
+ throw("gcphasework in bad gcphase")
+ }
+}
+
// The gp has been moved to a GC safepoint. GC phase specific
// work is done here.
//go:nowritebarrier
var sweep sweepdata
+// State of the background concurrent GC goroutine.
+var bggc struct {
+ lock mutex
+ g *g
+ working uint
+ started bool
+}
+
// sweeps one span
// returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep
//go:nowritebarrier
}
}
+// backgroundgc is running in a goroutine and does the concurrent GC work.
+// bggc holds the state of the backgroundgc.
+func backgroundgc() {
+ bggc.g = getg()
+ bggc.g.issystem = true
+ for {
+ gcwork(0)
+ lock(&bggc.lock)
+ bggc.working = 0
+ goparkunlock(&bggc.lock, "Concurrent GC wait")
+ }
+}
+
func bgsweep() {
sweep.g = getg()
getg().issystem = true
}
runtime.ReadMemStats(memstats)
sys1 := memstats.Sys
- if sys1-sys > chunk*50 {
+ if sys1-sys > chunk*500 {
println("allocated 1000 chunks of", chunk, "and used ", sys1-sys, "memory")
panic("init1")
}