runtime: make the scavenger self-paced

author Michael Anthony Knyszek <mknyszek@google.com>

Thu, 17 Oct 2019 15:35:54 +0000 (15:35 +0000)

committer Michael Knyszek <mknyszek@google.com>

Thu, 7 Nov 2019 20:12:18 +0000 (20:12 +0000)
author Michael Anthony Knyszek <mknyszek@google.com>
Thu, 17 Oct 2019 15:35:54 +0000 (15:35 +0000)
committer Michael Knyszek <mknyszek@google.com>
Thu, 7 Nov 2019 20:12:18 +0000 (20:12 +0000)
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go

index aeab2d60e000a65d7f3471024a3f4d7c8b7098b8..3320ee536a97e1b3f6c4b27e9356839d7855845d 100644 (file)
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -65,20 +65,7 @@ const (
         //
         // scavengePercent represents the portion of mutator time we're willing
         // to spend on scavenging in percent.
-       //
-       // scavengePageLatency is a worst-case estimate (order-of-magnitude) of
-       // the time it takes to scavenge one (regular-sized) page of memory.
-       // scavengeHugePageLatency is the same but for huge pages.
-       //
-       // scavengePagePeriod is derived from scavengePercent and scavengePageLatency,
-       // and represents the average time between scavenging one page that we're
-       // aiming for. scavengeHugePagePeriod is the same but for huge pages.
-       // These constants are core to the scavenge pacing algorithm.
-       scavengePercent         = 1    // 1%
-       scavengePageLatency     = 10e3 // 10µs
-       scavengeHugePageLatency = 10e3 // 10µs
-       scavengePagePeriod      = scavengePageLatency / (scavengePercent / 100.0)
-       scavengeHugePagePeriod  = scavengePageLatency / (scavengePercent / 100.0)
+       scavengePercent = 1 // 1%
  
         // retainExtraPercent represents the amount of memory over the heap goal
         // that the scavenger should keep as a buffer space for the allocator.
@@ -113,7 +100,7 @@ func gcPaceScavenger() {
         // information about the heap yet) so this is fine, and avoids a fault
         // or garbage data later.
         if memstats.last_next_gc == 0 {
-               mheap_.scavengeBytesPerNS = 0
+               mheap_.scavengeGoal = ^uint64(0)
                 return
         }
         // Compute our scavenging goal.
@@ -141,67 +128,14 @@ func gcPaceScavenger() {
         // physical page.
         retainedNow := heapRetained()
  
-       // If we're already below our goal or there's less the one physical page
-       // worth of work to do, publish the goal in case it changed then disable
+       // If we're already below our goal, or within one page of our goal, then disable
         // the background scavenger. We disable the background scavenger if there's
-       // less than one physical page of work to do to avoid a potential divide-by-zero
-       // in the calculations below (totalTime will be zero), and it's not worth
-       // turning on the scavenger for less than one page of work.
+       // less than one physical page of work to do because it's not worth it.
         if retainedNow <= retainedGoal || retainedNow-retainedGoal < uint64(physPageSize) {
-               mheap_.scavengeRetainedGoal = retainedGoal
-               mheap_.scavengeBytesPerNS = 0
+               mheap_.scavengeGoal = ^uint64(0)
                 return
         }
-
-       // Now we start to compute the total amount of work necessary and the total
-       // amount of time we're willing to give the scavenger to complete this work.
-       // This will involve calculating how much of the work consists of huge pages
-       // and how much consists of regular pages since the former can let us scavenge
-       // more memory in the same time.
-       totalWork := retainedNow - retainedGoal
-
-       // On systems without huge page support, all work is regular work.
-       regularWork := totalWork
-       hugeTime := uint64(0)
-
-       // On systems where we have huge pages, we want to do as much of the
-       // scavenging work as possible on huge pages, because the costs are the
-       // same per page, but we can give back more more memory in a shorter
-       // period of time.
-       if physHugePageSize != 0 {
-               // Start by computing the amount of free memory we have in huge pages
-               // in total. Trivially, this is all the huge page work we need to do.
-               hugeWork := uint64(mheap_.free.unscavHugePages) << physHugePageShift
-
-               // ...but it could turn out that there's more huge work to do than
-               // total work, so cap it at total work. This might happen for very large
-               // heaps where the additional factor of retainExtraPercent can make it so
-               // that there are free chunks of memory larger than a huge page that we don't want
-               // to scavenge.
-               if hugeWork >= totalWork {
-                       hugePages := totalWork >> physHugePageShift
-                       hugeWork = hugePages << physHugePageShift
-               }
-               // Everything that's not huge work is regular work. At this point we
-               // know huge work so we can calculate how much time that will take
-               // based on scavengePageRate (which applies to pages of any size).
-               regularWork = totalWork - hugeWork
-               hugeTime = (hugeWork >> physHugePageShift) * scavengeHugePagePeriod
-       }
-       // Finally, we can compute how much time it'll take to do the regular work
-       // and the total time to do all the work.
-       regularTime := regularWork / uint64(physPageSize) * scavengePagePeriod
-       totalTime := hugeTime + regularTime
-
-       now := nanotime()
-
-       // Update all the pacing parameters in mheap with scavenge.lock held,
-       // so that scavenge.gen is kept in sync with the updated values.
-       mheap_.scavengeRetainedGoal = retainedGoal
-       mheap_.scavengeRetainedBasis = retainedNow
-       mheap_.scavengeTimeBasis = now
-       mheap_.scavengeBytesPerNS = float64(totalWork) / float64(totalTime)
-       mheap_.scavengeGen++ // increase scavenge generation
+       mheap_.scavengeGoal = retainedGoal
  }
  
  // Sleep/wait state of the background scavenger.
@@ -210,18 +144,6 @@ var scavenge struct {
         g      *g
         parked bool
         timer  *timer
-
-       // Generation counter.
-       //
-       // It represents the last generation count (as defined by
-       // mheap_.scavengeGen) checked by the scavenger and is updated
-       // each time the scavenger checks whether it is on-pace.
-       //
-       // Skew between this field and mheap_.scavengeGen is used to
-       // determine whether a new update is available.
-       //
-       // Protected by mheap_.lock.
-       gen uint64
  }
  
  // wakeScavenger unparks the scavenger if necessary. It must be called
@@ -254,37 +176,24 @@ func wakeScavenger() {
  // The scavenger may be woken up earlier by a pacing change, and it may not go
  // to sleep at all if there's a pending pacing change.
  //
-// Returns false if awoken early (i.e. true means a complete sleep).
-func scavengeSleep(ns int64) bool {
+// Returns the amount of time actually slept.
+func scavengeSleep(ns int64) int64 {
         lock(&scavenge.lock)
  
-       // First check if there's a pending update.
-       // If there is one, don't bother sleeping.
-       var hasUpdate bool
-       systemstack(func() {
-               lock(&mheap_.lock)
-               hasUpdate = mheap_.scavengeGen != scavenge.gen
-               unlock(&mheap_.lock)
-       })
-       if hasUpdate {
-               unlock(&scavenge.lock)
-               return false
-       }
-
         // Set the timer.
         //
         // This must happen here instead of inside gopark
         // because we can't close over any variables without
         // failing escape analysis.
-       now := nanotime()
-       resetTimer(scavenge.timer, now+ns)
+       start := nanotime()
+       resetTimer(scavenge.timer, start+ns)
  
         // Mark ourself as asleep and go to sleep.
         scavenge.parked = true
         goparkunlock(&scavenge.lock, waitReasonSleep, traceEvGoSleep, 2)
  
-       // Return true if we completed the full sleep.
-       return (nanotime() - now) >= ns
+       // Return how long we actually slept for.
+       return nanotime() - start
  }
  
  // Background scavenger.
@@ -306,111 +215,98 @@ func bgscavenge(c chan int) {
         c <- 1
         goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
  
-       // Parameters for sleeping.
-       //
-       // If we end up doing more work than we need, we should avoid spinning
-       // until we have more work to do: instead, we know exactly how much time
-       // until more work will need to be done, so we sleep.
-       //
-       // We should avoid sleeping for less than minSleepNS because Gosched()
-       // overheads among other things will work out better in that case.
+       // Exponentially-weighted moving average of the fraction of time this
+       // goroutine spends scavenging (that is, percent of a single CPU).
+       // It represents a measure of scheduling overheads which might extend
+       // the sleep or the critical time beyond what's expected. Assume no
+       // overhead to begin with.
         //
-       // There's no reason to set a maximum on sleep time because we'll always
-       // get woken up earlier if there's any kind of update that could change
-       // the scavenger's pacing.
-       //
-       // retryDelayNS tracks how much to sleep next time we fail to do any
-       // useful work.
-       const minSleepNS = int64(100 * 1000) // 100 µs
-
-       retryDelayNS := minSleepNS
+       // TODO(mknyszek): Consider making this based on total CPU time of the
+       // application (i.e. scavengePercent * GOMAXPROCS). This isn't really
+       // feasible now because the scavenger acquires the heap lock over the
+       // scavenging operation, which means scavenging effectively blocks
+       // allocators and isn't scalable. However, given a scalable allocator,
+       // it makes sense to also make the scavenger scale with it; if you're
+       // allocating more frequently, then presumably you're also generating
+       // more work for the scavenger.
+       const idealFraction = scavengePercent / 100.0
+       scavengeEWMA := float64(idealFraction)
  
         for {
                 released := uintptr(0)
-               park := false
-               ttnext := int64(0)
+
+               // Time in scavenging critical section.
+               crit := int64(0)
  
                 // Run on the system stack since we grab the heap lock,
                 // and a stack growth with the heap lock means a deadlock.
                 systemstack(func() {
                         lock(&mheap_.lock)
  
-                       // Update the last generation count that the scavenger has handled.
-                       scavenge.gen = mheap_.scavengeGen
-
                         // If background scavenging is disabled or if there's no work to do just park.
-                       retained := heapRetained()
-                       if mheap_.scavengeBytesPerNS == 0 || retained <= mheap_.scavengeRetainedGoal {
+                       retained, goal := heapRetained(), mheap_.scavengeGoal
+                       if retained <= goal {
                                 unlock(&mheap_.lock)
-                               park = true
                                 return
                         }
  
-                       // Calculate how big we want the retained heap to be
-                       // at this point in time.
-                       //
-                       // The formula is for that of a line, y = b - mx
-                       // We want y (want),
-                       //   m = scavengeBytesPerNS (> 0)
-                       //   x = time between scavengeTimeBasis and now
-                       //   b = scavengeRetainedBasis
-                       rate := mheap_.scavengeBytesPerNS
-                       tdist := nanotime() - mheap_.scavengeTimeBasis
-                       rdist := uint64(rate * float64(tdist))
-                       want := mheap_.scavengeRetainedBasis - rdist
-
-                       // If we're above the line, scavenge to get below the
-                       // line.
-                       if retained > want {
-                               released = mheap_.scavengeLocked(uintptr(retained - want))
-                       }
-                       unlock(&mheap_.lock)
+                       // Scavenge one page, and measure the amount of time spent scavenging.
+                       start := nanotime()
+                       released = mheap_.scavengeLocked(physPageSize)
+                       crit = nanotime() - start
  
-                       // If we over-scavenged a bit, calculate how much time it'll
-                       // take at the current rate for us to make that up. We definitely
-                       // won't have any work to do until at least that amount of time
-                       // passes.
-                       if released > uintptr(retained-want) {
-                               extra := released - uintptr(retained-want)
-                               ttnext = int64(float64(extra) / rate)
-                       }
+                       unlock(&mheap_.lock)
                 })
  
-               if park {
-                       lock(&scavenge.lock)
-                       scavenge.parked = true
-                       goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
-                       continue
-               }
-
                 if debug.gctrace > 0 {
                         if released > 0 {
-                               print("scvg: ", released>>20, " MB released\n")
+                               print("scvg: ", released>>10, " KB released\n")
                         }
                         print("scvg: inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
                 }
  
                 if released == 0 {
-                       // If we were unable to release anything this may be because there's
-                       // no free memory available to scavenge. Go to sleep and try again.
-                       if scavengeSleep(retryDelayNS) {
-                               // If we successfully slept through the delay, back off exponentially.
-                               retryDelayNS *= 2
-                       }
+                       lock(&scavenge.lock)
+                       scavenge.parked = true
+                       goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
                         continue
                 }
-               retryDelayNS = minSleepNS
  
-               if ttnext > 0 && ttnext > minSleepNS {
-                       // If there's an appreciable amount of time until the next scavenging
-                       // goal, just sleep. We'll get woken up if anything changes and this
-                       // way we avoid spinning.
-                       scavengeSleep(ttnext)
-                       continue
+               // If we spent more than 10 ms (for example, if the OS scheduled us away, or someone
+               // put their machine to sleep) in the critical section, bound the time we use to
+               // calculate at 10 ms to avoid letting the sleep time get arbitrarily high.
+               const maxCrit = 10e6
+               if crit > maxCrit {
+                       crit = maxCrit
+               }
+
+               // Compute the amount of time to sleep, assuming we want to use at most
+               // scavengePercent of CPU time. Take into account scheduling overheads
+               // that may extend the length of our sleep by multiplying by how far
+               // off we are from the ideal ratio. For example, if we're sleeping too
+               // much, then scavengeEMWA < idealFraction, so we'll adjust the sleep time
+               // down.
+               adjust := scavengeEWMA / idealFraction
+               sleepTime := int64(adjust * float64(crit) / (scavengePercent / 100.0))
+
+               // Go to sleep.
+               slept := scavengeSleep(sleepTime)
+
+               // Compute the new ratio.
+               fraction := float64(crit) / float64(crit+slept)
+
+               // Set a lower bound on the fraction.
+               // Due to OS-related anomalies we may "sleep" for an inordinate amount
+               // of time. Let's avoid letting the ratio get out of hand by bounding
+               // the sleep time we use in our EWMA.
+               const minFraction = 1 / 1000
+               if fraction < minFraction {
+                       fraction = minFraction
                 }
  
-               // Give something else a chance to run, no locks are held.
-               Gosched()
+               // Update scavengeEWMA by merging in the new crit/slept ratio.
+               const alpha = 0.5
+               scavengeEWMA = alpha*fraction + (1-alpha)*scavengeEWMA
         }
  }
  
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go

index c09ef0f57ccdd9efb12d48b8f168e0f036fd76e8..dfa4b4bfc6d56976449f7e167157d34a5442097b 100644 (file)
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -89,25 +89,10 @@ type mheap struct {
         // TODO(austin): pagesInUse should be a uintptr, but the 386
         // compiler can't 8-byte align fields.
  
-       // Scavenger pacing parameters
-       //
-       // The two basis parameters and the scavenge ratio parallel the proportional
-       // sweeping implementation, the primary differences being that:
-       //  * Scavenging concerns itself with RSS, estimated as heapRetained()
-       //  * Rather than pacing the scavenger to the GC, it is paced to a
-       //    time-based rate computed in gcPaceScavenger.
-       //
-       // scavengeRetainedGoal represents our goal RSS.
-       //
-       // All fields must be accessed with lock.
-       //
-       // TODO(mknyszek): Consider abstracting the basis fields and the scavenge ratio
-       // into its own type so that this logic may be shared with proportional sweeping.
-       scavengeTimeBasis     int64
-       scavengeRetainedBasis uint64
-       scavengeBytesPerNS    float64
-       scavengeRetainedGoal  uint64
-       scavengeGen           uint64 // incremented on each pacing update
+       // scavengeGoal is the amount of total retained heap memory (measured by
+       // heapRetained) that the runtime will try to maintain by returning memory
+       // to the OS.
+       scavengeGoal uint64
  
         // Page reclaimer state
  
@@ -1561,17 +1546,17 @@ func (h *mheap) scavengeLocked(nbytes uintptr) uintptr {
         return released
  }
  
-// scavengeIfNeededLocked calls scavengeLocked if we're currently above the
-// scavenge goal in order to prevent the mutator from out-running the
-// the scavenger.
+// scavengeIfNeededLocked scavenges memory assuming that size bytes of memory
+// will become unscavenged soon. It only scavenges enough to bring heapRetained
+// back down to the scavengeGoal.
  //
  // h must be locked.
  func (h *mheap) scavengeIfNeededLocked(size uintptr) {
-       if r := heapRetained(); r+uint64(size) > h.scavengeRetainedGoal {
+       if r := heapRetained(); r+uint64(size) > h.scavengeGoal {
                 todo := uint64(size)
                 // If we're only going to go a little bit over, just request what
                 // we actually need done.
-               if overage := r + uint64(size) - h.scavengeRetainedGoal; overage < todo {
+               if overage := r + uint64(size) - h.scavengeGoal; overage < todo {
                         todo = overage
                 }
                 h.scavengeLocked(uintptr(todo))
author	Michael Anthony Knyszek <mknyszek@google.com>
	Thu, 17 Oct 2019 15:35:54 +0000 (15:35 +0000)
committer	Michael Knyszek <mknyszek@google.com>
	Thu, 7 Nov 2019 20:12:18 +0000 (20:12 +0000)
src/runtime/mgcscavenge.go		patch \| blob \| history
src/runtime/mheap.go		patch \| blob \| history