// Are things on the free lists black or white? How does the sweep phase work?
// Concurrent sweep.
+//
// The sweep phase proceeds concurrently with normal program execution.
// The heap is swept span-by-span both lazily (when a goroutine needs another span)
// and concurrently in a background goroutine (this helps programs that are not CPU bound).
-// However, at the end of the stop-the-world GC phase we don't know the size of the live heap,
-// and so next_gc calculation is tricky and happens as follows.
-// At the end of the stop-the-world phase next_gc is conservatively set based on total
-// heap size; all spans are marked as "needs sweeping".
-// Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory.
-// The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc
-// closer to the target value. However, this is not enough to avoid over-allocating memory.
-// Consider that a goroutine wants to allocate a new span for a large object and
-// there are no free swept spans, but there are small-object unswept spans.
-// If the goroutine naively allocates a new span, it can surpass the yet-unknown
-// target next_gc value. In order to prevent such cases (1) when a goroutine needs
-// to allocate a new small-object span, it sweeps small-object spans for the same
-// object size until it frees at least one object; (2) when a goroutine needs to
-// allocate large-object span from heap, it sweeps spans until it frees at least
-// that many pages into heap. Together these two measures ensure that we don't surpass
-// target next_gc value by a large margin. There is an exception: if a goroutine sweeps
-// and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span,
-// but there can still be other one-page unswept spans which could be combined into a
-// two-page span.
+// At the end of STW mark termination all spans are marked as "needs sweeping".
+//
+// The background sweeper goroutine simply sweeps spans one-by-one.
+//
+// To avoid requesting more OS memory while there are unswept spans, when a
+// goroutine needs another span, it first attempts to reclaim that much memory
+// by sweeping. When a goroutine needs to allocate a new small-object span, it
+// sweeps small-object spans for the same object size until it frees at least
+// one object. When a goroutine needs to allocate large-object span from heap,
+// it sweeps spans until it frees at least that many pages into heap. There is
+// one case where this may not suffice: if a goroutine sweeps and frees two
+// nonadjacent one-page spans to the heap, it will allocate a new two-page
+// span, but there can still be other one-page unswept spans which could be
+// combined into a two-page span.
+//
// It's critical to ensure that no operations proceed on unswept spans (that would corrupt
// mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
// so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
// have sufficient time to complete then more memory will be
// requested from the OS increasing heap size thus allow future
// GCs more time to complete.
-// memstat.heap_alloc and memstat.next_gc reads have benign races
+// memstat.heap_live read has a benign race.
// A false negative simple does not start a GC, a false positive
// will start a GC needlessly. Neither have correctness issues.
func shouldtriggergc() bool {
- return triggerratio*(int64(memstats.next_gc)-int64(memstats.heap_alloc)) <= int64(memstats.next_gc) && atomicloaduint(&bggc.working) == 0
+ return triggerratio*(int64(memstats.next_gc)-int64(memstats.heap_live)) <= int64(memstats.next_gc) && atomicloaduint(&bggc.working) == 0
}
var work struct {
if debug.gctrace > 0 {
stwprocs, maxprocs = gcprocs(), gomaxprocs
tSweepTerm = nanotime()
- heap0 = memstats.heap_alloc
+ heap0 = memstats.heap_live
}
if trace.enabled {
gcphase = _GCmarktermination
if debug.gctrace > 0 {
- heap1 = memstats.heap_alloc
+ heap1 = memstats.heap_live
}
startTime := nanotime()
shrinkfinish()
cachestats()
- // next_gc calculation is tricky with concurrent sweep since we don't know size of live heap
- // conservatively set next_gc to high value assuming that everything is live
- // concurrent/lazy sweep will reduce this number while discovering new garbage
- memstats.next_gc = memstats.heap_alloc + memstats.heap_alloc*uint64(gcpercent)/100
+
+ // compute next_gc
+ memstats.heap_live = work.bytesMarked
+ memstats.next_gc = memstats.heap_live + memstats.heap_live*uint64(gcpercent)/100
if memstats.next_gc < heapminimum {
memstats.next_gc = heapminimum
}
if trace.enabled {
+ traceHeapAlloc()
traceNextGC()
}
}
c.local_nlargefree++
c.local_largefree += size
- reduction := int64(size) * int64(gcpercent+100) / 100
- if int64(memstats.next_gc)-reduction > int64(heapminimum) {
- xadd64(&memstats.next_gc, -reduction)
- } else {
- atomicstore64(&memstats.next_gc, heapminimum)
- }
res = true
} else {
// Free small object.
}
if nfree > 0 {
c.local_nsmallfree[cl] += uintptr(nfree)
- c.local_cachealloc -= intptr(uintptr(nfree) * size)
- reduction := int64(nfree) * int64(size) * int64(gcpercent+100) / 100
- if int64(memstats.next_gc)-reduction > int64(heapminimum) {
- xadd64(&memstats.next_gc, -reduction)
- } else {
- atomicstore64(&memstats.next_gc, heapminimum)
- }
res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head, end, preserve)
// MCentral_FreeSpan updates sweepgen
}
}
// transfer stats from cache to global
- memstats.heap_alloc += uint64(_g_.m.mcache.local_cachealloc)
+ memstats.heap_live += uint64(_g_.m.mcache.local_cachealloc)
_g_.m.mcache.local_cachealloc = 0
memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
_g_.m.mcache.local_tinyallocs = 0
// update stats, sweep lists
if large {
memstats.heap_objects++
- memstats.heap_alloc += uint64(npage << _PageShift)
+ memstats.heap_live += uint64(npage << _PageShift)
// Swept spans are at the end of lists.
if s.npages < uintptr(len(h.free)) {
mSpanList_InsertBack(&h.busy[s.npages], s)
systemstack(func() {
mp := getg().m
lock(&h.lock)
- memstats.heap_alloc += uint64(mp.mcache.local_cachealloc)
+ memstats.heap_live += uint64(mp.mcache.local_cachealloc)
mp.mcache.local_cachealloc = 0
memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs)
mp.mcache.local_tinyallocs = 0
if acct != 0 {
- memstats.heap_alloc -= uint64(s.npages << _PageShift)
memstats.heap_objects--
}
mHeap_FreeSpanLocked(h, s, true, true, 0)