//
// scavengePercent represents the portion of mutator time we're willing
// to spend on scavenging in percent.
- //
- // scavengePageLatency is a worst-case estimate (order-of-magnitude) of
- // the time it takes to scavenge one (regular-sized) page of memory.
- // scavengeHugePageLatency is the same but for huge pages.
- //
- // scavengePagePeriod is derived from scavengePercent and scavengePageLatency,
- // and represents the average time between scavenging one page that we're
- // aiming for. scavengeHugePagePeriod is the same but for huge pages.
- // These constants are core to the scavenge pacing algorithm.
- scavengePercent = 1 // 1%
- scavengePageLatency = 10e3 // 10µs
- scavengeHugePageLatency = 10e3 // 10µs
- scavengePagePeriod = scavengePageLatency / (scavengePercent / 100.0)
- scavengeHugePagePeriod = scavengePageLatency / (scavengePercent / 100.0)
+ scavengePercent = 1 // 1%
// retainExtraPercent represents the amount of memory over the heap goal
// that the scavenger should keep as a buffer space for the allocator.
// information about the heap yet) so this is fine, and avoids a fault
// or garbage data later.
if memstats.last_next_gc == 0 {
- mheap_.scavengeBytesPerNS = 0
+ mheap_.scavengeGoal = ^uint64(0)
return
}
// Compute our scavenging goal.
// physical page.
retainedNow := heapRetained()
- // If we're already below our goal or there's less the one physical page
- // worth of work to do, publish the goal in case it changed then disable
+ // If we're already below our goal, or within one page of our goal, then disable
// the background scavenger. We disable the background scavenger if there's
- // less than one physical page of work to do to avoid a potential divide-by-zero
- // in the calculations below (totalTime will be zero), and it's not worth
- // turning on the scavenger for less than one page of work.
+ // less than one physical page of work to do because it's not worth it.
if retainedNow <= retainedGoal || retainedNow-retainedGoal < uint64(physPageSize) {
- mheap_.scavengeRetainedGoal = retainedGoal
- mheap_.scavengeBytesPerNS = 0
+ mheap_.scavengeGoal = ^uint64(0)
return
}
-
- // Now we start to compute the total amount of work necessary and the total
- // amount of time we're willing to give the scavenger to complete this work.
- // This will involve calculating how much of the work consists of huge pages
- // and how much consists of regular pages since the former can let us scavenge
- // more memory in the same time.
- totalWork := retainedNow - retainedGoal
-
- // On systems without huge page support, all work is regular work.
- regularWork := totalWork
- hugeTime := uint64(0)
-
- // On systems where we have huge pages, we want to do as much of the
- // scavenging work as possible on huge pages, because the costs are the
- // same per page, but we can give back more more memory in a shorter
- // period of time.
- if physHugePageSize != 0 {
- // Start by computing the amount of free memory we have in huge pages
- // in total. Trivially, this is all the huge page work we need to do.
- hugeWork := uint64(mheap_.free.unscavHugePages) << physHugePageShift
-
- // ...but it could turn out that there's more huge work to do than
- // total work, so cap it at total work. This might happen for very large
- // heaps where the additional factor of retainExtraPercent can make it so
- // that there are free chunks of memory larger than a huge page that we don't want
- // to scavenge.
- if hugeWork >= totalWork {
- hugePages := totalWork >> physHugePageShift
- hugeWork = hugePages << physHugePageShift
- }
- // Everything that's not huge work is regular work. At this point we
- // know huge work so we can calculate how much time that will take
- // based on scavengePageRate (which applies to pages of any size).
- regularWork = totalWork - hugeWork
- hugeTime = (hugeWork >> physHugePageShift) * scavengeHugePagePeriod
- }
- // Finally, we can compute how much time it'll take to do the regular work
- // and the total time to do all the work.
- regularTime := regularWork / uint64(physPageSize) * scavengePagePeriod
- totalTime := hugeTime + regularTime
-
- now := nanotime()
-
- // Update all the pacing parameters in mheap with scavenge.lock held,
- // so that scavenge.gen is kept in sync with the updated values.
- mheap_.scavengeRetainedGoal = retainedGoal
- mheap_.scavengeRetainedBasis = retainedNow
- mheap_.scavengeTimeBasis = now
- mheap_.scavengeBytesPerNS = float64(totalWork) / float64(totalTime)
- mheap_.scavengeGen++ // increase scavenge generation
+ mheap_.scavengeGoal = retainedGoal
}
// Sleep/wait state of the background scavenger.
g *g
parked bool
timer *timer
-
- // Generation counter.
- //
- // It represents the last generation count (as defined by
- // mheap_.scavengeGen) checked by the scavenger and is updated
- // each time the scavenger checks whether it is on-pace.
- //
- // Skew between this field and mheap_.scavengeGen is used to
- // determine whether a new update is available.
- //
- // Protected by mheap_.lock.
- gen uint64
}
// wakeScavenger unparks the scavenger if necessary. It must be called
// The scavenger may be woken up earlier by a pacing change, and it may not go
// to sleep at all if there's a pending pacing change.
//
-// Returns false if awoken early (i.e. true means a complete sleep).
-func scavengeSleep(ns int64) bool {
+// Returns the amount of time actually slept.
+func scavengeSleep(ns int64) int64 {
lock(&scavenge.lock)
- // First check if there's a pending update.
- // If there is one, don't bother sleeping.
- var hasUpdate bool
- systemstack(func() {
- lock(&mheap_.lock)
- hasUpdate = mheap_.scavengeGen != scavenge.gen
- unlock(&mheap_.lock)
- })
- if hasUpdate {
- unlock(&scavenge.lock)
- return false
- }
-
// Set the timer.
//
// This must happen here instead of inside gopark
// because we can't close over any variables without
// failing escape analysis.
- now := nanotime()
- resetTimer(scavenge.timer, now+ns)
+ start := nanotime()
+ resetTimer(scavenge.timer, start+ns)
// Mark ourself as asleep and go to sleep.
scavenge.parked = true
goparkunlock(&scavenge.lock, waitReasonSleep, traceEvGoSleep, 2)
- // Return true if we completed the full sleep.
- return (nanotime() - now) >= ns
+ // Return how long we actually slept for.
+ return nanotime() - start
}
// Background scavenger.
c <- 1
goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
- // Parameters for sleeping.
- //
- // If we end up doing more work than we need, we should avoid spinning
- // until we have more work to do: instead, we know exactly how much time
- // until more work will need to be done, so we sleep.
- //
- // We should avoid sleeping for less than minSleepNS because Gosched()
- // overheads among other things will work out better in that case.
+ // Exponentially-weighted moving average of the fraction of time this
+ // goroutine spends scavenging (that is, percent of a single CPU).
+ // It represents a measure of scheduling overheads which might extend
+ // the sleep or the critical time beyond what's expected. Assume no
+ // overhead to begin with.
//
- // There's no reason to set a maximum on sleep time because we'll always
- // get woken up earlier if there's any kind of update that could change
- // the scavenger's pacing.
- //
- // retryDelayNS tracks how much to sleep next time we fail to do any
- // useful work.
- const minSleepNS = int64(100 * 1000) // 100 µs
-
- retryDelayNS := minSleepNS
+ // TODO(mknyszek): Consider making this based on total CPU time of the
+ // application (i.e. scavengePercent * GOMAXPROCS). This isn't really
+ // feasible now because the scavenger acquires the heap lock over the
+ // scavenging operation, which means scavenging effectively blocks
+ // allocators and isn't scalable. However, given a scalable allocator,
+ // it makes sense to also make the scavenger scale with it; if you're
+ // allocating more frequently, then presumably you're also generating
+ // more work for the scavenger.
+ const idealFraction = scavengePercent / 100.0
+ scavengeEWMA := float64(idealFraction)
for {
released := uintptr(0)
- park := false
- ttnext := int64(0)
+
+ // Time in scavenging critical section.
+ crit := int64(0)
// Run on the system stack since we grab the heap lock,
// and a stack growth with the heap lock means a deadlock.
systemstack(func() {
lock(&mheap_.lock)
- // Update the last generation count that the scavenger has handled.
- scavenge.gen = mheap_.scavengeGen
-
// If background scavenging is disabled or if there's no work to do just park.
- retained := heapRetained()
- if mheap_.scavengeBytesPerNS == 0 || retained <= mheap_.scavengeRetainedGoal {
+ retained, goal := heapRetained(), mheap_.scavengeGoal
+ if retained <= goal {
unlock(&mheap_.lock)
- park = true
return
}
- // Calculate how big we want the retained heap to be
- // at this point in time.
- //
- // The formula is for that of a line, y = b - mx
- // We want y (want),
- // m = scavengeBytesPerNS (> 0)
- // x = time between scavengeTimeBasis and now
- // b = scavengeRetainedBasis
- rate := mheap_.scavengeBytesPerNS
- tdist := nanotime() - mheap_.scavengeTimeBasis
- rdist := uint64(rate * float64(tdist))
- want := mheap_.scavengeRetainedBasis - rdist
-
- // If we're above the line, scavenge to get below the
- // line.
- if retained > want {
- released = mheap_.scavengeLocked(uintptr(retained - want))
- }
- unlock(&mheap_.lock)
+ // Scavenge one page, and measure the amount of time spent scavenging.
+ start := nanotime()
+ released = mheap_.scavengeLocked(physPageSize)
+ crit = nanotime() - start
- // If we over-scavenged a bit, calculate how much time it'll
- // take at the current rate for us to make that up. We definitely
- // won't have any work to do until at least that amount of time
- // passes.
- if released > uintptr(retained-want) {
- extra := released - uintptr(retained-want)
- ttnext = int64(float64(extra) / rate)
- }
+ unlock(&mheap_.lock)
})
- if park {
- lock(&scavenge.lock)
- scavenge.parked = true
- goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
- continue
- }
-
if debug.gctrace > 0 {
if released > 0 {
- print("scvg: ", released>>20, " MB released\n")
+ print("scvg: ", released>>10, " KB released\n")
}
print("scvg: inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
}
if released == 0 {
- // If we were unable to release anything this may be because there's
- // no free memory available to scavenge. Go to sleep and try again.
- if scavengeSleep(retryDelayNS) {
- // If we successfully slept through the delay, back off exponentially.
- retryDelayNS *= 2
- }
+ lock(&scavenge.lock)
+ scavenge.parked = true
+ goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
continue
}
- retryDelayNS = minSleepNS
- if ttnext > 0 && ttnext > minSleepNS {
- // If there's an appreciable amount of time until the next scavenging
- // goal, just sleep. We'll get woken up if anything changes and this
- // way we avoid spinning.
- scavengeSleep(ttnext)
- continue
+ // If we spent more than 10 ms (for example, if the OS scheduled us away, or someone
+ // put their machine to sleep) in the critical section, bound the time we use to
+ // calculate at 10 ms to avoid letting the sleep time get arbitrarily high.
+ const maxCrit = 10e6
+ if crit > maxCrit {
+ crit = maxCrit
+ }
+
+ // Compute the amount of time to sleep, assuming we want to use at most
+ // scavengePercent of CPU time. Take into account scheduling overheads
+ // that may extend the length of our sleep by multiplying by how far
+ // off we are from the ideal ratio. For example, if we're sleeping too
+ // much, then scavengeEMWA < idealFraction, so we'll adjust the sleep time
+ // down.
+ adjust := scavengeEWMA / idealFraction
+ sleepTime := int64(adjust * float64(crit) / (scavengePercent / 100.0))
+
+ // Go to sleep.
+ slept := scavengeSleep(sleepTime)
+
+ // Compute the new ratio.
+ fraction := float64(crit) / float64(crit+slept)
+
+ // Set a lower bound on the fraction.
+ // Due to OS-related anomalies we may "sleep" for an inordinate amount
+ // of time. Let's avoid letting the ratio get out of hand by bounding
+ // the sleep time we use in our EWMA.
+ const minFraction = 1 / 1000
+ if fraction < minFraction {
+ fraction = minFraction
}
- // Give something else a chance to run, no locks are held.
- Gosched()
+ // Update scavengeEWMA by merging in the new crit/slept ratio.
+ const alpha = 0.5
+ scavengeEWMA = alpha*fraction + (1-alpha)*scavengeEWMA
}
}