runtime: use a controller to control the scavenge rate

author Michael Anthony Knyszek <mknyszek@google.com>

Mon, 4 Oct 2021 21:44:06 +0000 (21:44 +0000)

committer Michael Knyszek <mknyszek@google.com>

Fri, 5 Nov 2021 17:46:59 +0000 (17:46 +0000)
author Michael Anthony Knyszek <mknyszek@google.com>
Mon, 4 Oct 2021 21:44:06 +0000 (21:44 +0000)
committer Michael Knyszek <mknyszek@google.com>
Fri, 5 Nov 2021 17:46:59 +0000 (17:46 +0000)
diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go

index 230e78b000d544e38cf461d67493882eee5e37c6..5b699cb298381e8eb63a8c779790b3236153a19c 100644 (file)
--- a/src/runtime/mgcpacer.go
+++ b/src/runtime/mgcpacer.go
@@ -349,9 +349,6 @@ func (c *gcControllerState) init(gcPercent int32) {
                         kp: 0.9,
                         ti: 4.0,
  
-                       // An update is done once per GC cycle.
-                       period: 1,
-
                         // Set a high reset time in GC cycles.
                         // This is inversely proportional to the rate at which we
                         // accumulate error from clipping. By making this very high
@@ -677,8 +674,9 @@ func (c *gcControllerState) endCycle(now int64, procs int, userForced bool) floa
                         (float64(scanWork) * (1 - utilization))
  
                 // Update cons/mark controller.
+               // Period for this is 1 GC cycle.
                 oldConsMark := c.consMark
-               c.consMark = c.consMarkController.next(c.consMark, currentConsMark)
+               c.consMark = c.consMarkController.next(c.consMark, currentConsMark, 1.0)
  
                 if debug.gcpacertrace > 0 {
                         printlock()
@@ -1259,10 +1257,7 @@ func readGOGC() int32 {
  type piController struct {
         kp float64 // Proportional constant.
         ti float64 // Integral time constant.
-       tt float64 // Reset time in GC cyles.
-
-       // Period in GC cycles between updates.
-       period float64
+       tt float64 // Reset time.
  
         min, max float64 // Output boundaries.
  
@@ -1271,7 +1266,7 @@ type piController struct {
         errIntegral float64 // Integral of the error from t=0 to now.
  }
  
-func (c *piController) next(input, setpoint float64) float64 {
+func (c *piController) next(input, setpoint, period float64) float64 {
         // Compute the raw output value.
         prop := c.kp * (setpoint - input)
         rawOutput := prop + c.errIntegral
@@ -1286,7 +1281,7 @@ func (c *piController) next(input, setpoint float64) float64 {
  
         // Update the controller's state.
         if c.ti != 0 && c.tt != 0 {
-               c.errIntegral += (c.kp*c.period/c.ti)*(setpoint-input) + (c.period/c.tt)*(output-rawOutput)
+               c.errIntegral += (c.kp*period/c.ti)*(setpoint-input) + (period/c.tt)*(output-rawOutput)
         }
         return output
  }
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go

index 72ec81e5e3a0ab9cf00b56c2c36d0c9d66fb0170..a2a88e94d24b9b0bd8dae958578cc7ed9b7f91fb 100644 (file)
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -270,35 +270,85 @@ func bgscavenge(c chan int) {
         c <- 1
         goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
  
-       // Exponentially-weighted moving average of the fraction of time this
-       // goroutine spends scavenging (that is, percent of a single CPU).
-       // It represents a measure of scheduling overheads which might extend
-       // the sleep or the critical time beyond what's expected. Assume no
-       // overhead to begin with.
+       // idealFraction is the ideal % of overall application CPU time that we
+       // spend scavenging.
         //
-       // TODO(mknyszek): Consider making this based on total CPU time of the
-       // application (i.e. scavengePercent * GOMAXPROCS). This isn't really
-       // feasible now because the scavenger acquires the heap lock over the
-       // scavenging operation, which means scavenging effectively blocks
-       // allocators and isn't scalable. However, given a scalable allocator,
-       // it makes sense to also make the scavenger scale with it; if you're
-       // allocating more frequently, then presumably you're also generating
-       // more work for the scavenger.
-       const idealFraction = scavengePercent / 100.0
-       scavengeEWMA := float64(idealFraction)
-
+       // TODO(mknyszek): Currently this is percent of one CPU (hence the division
+       // by gomaxprocs), but ideally this should be 1% of overall CPU time.
+       // Given a scalable memory allocator, it makes sense that the scavenger
+       // should scale with it; if you're allocating more frequently, then presumably
+       // you're also generating more work from the scavenger.
+       idealFraction := float64(scavengePercent) / 100.0 / float64(gomaxprocs)
+
+       // Input: fraction of CPU time used.
+       // Setpoint: idealFraction.
+       // Output: ratio of critical time to sleep time (determines sleep time).
+       //
+       // The output of this controller is somewhat indirect to what we actually
+       // want to achieve: how much time to sleep for. The reason for this definition
+       // is to ensure that the controller's outputs have a direct relationship with
+       // its inputs (as opposed to an inverse relationship), making it somewhat
+       // easier to reason about for tuning purposes.
+       critSleepController := piController{
+               // Tuned loosely via Ziegler-Nichols process.
+               kp: 0.3375,
+               ti: 3.2e6,
+               tt: 1e9, // 1 second reset time.
+
+               // These ranges seem wide, but we want to give the controller plenty of
+               // room to hunt for the optimal value.
+               min: 0.001,  // 1:1000
+               max: 1000.0, // 1000:1
+       }
+       // It doesn't really matter what value we start at, but we can't be zero, because
+       // that'll cause divide-by-zero issues.
+       critSleepRatio := 0.001
         for {
                 released := uintptr(0)
                 crit := float64(0)
  
-               // If background scavenging is disabled or if there's no work to do just park.
-               retained, goal := heapRetained(), atomic.Load64(&mheap_.scavengeGoal)
-               if retained > goal {
-                       // Scavenge one page, and measure the amount of time spent scavenging.
+               // Spend at least 1 ms scavenging, otherwise the corresponding
+               // sleep time to maintain our desired utilization is too low to
+               // be reliable.
+               const minCritTime = 1e6
+               for crit < minCritTime {
+                       // If background scavenging is disabled or if there's no work to do just park.
+                       retained, goal := heapRetained(), atomic.Load64(&mheap_.scavengeGoal)
+                       if retained <= goal {
+                               break
+                       }
+
+                       // scavengeQuantum is the amount of memory we try to scavenge
+                       // in one go. A smaller value means the scavenger is more responsive
+                       // to the scheduler in case of e.g. preemption. A larger value means
+                       // that the overheads of scavenging are better amortized, so better
+                       // scavenging throughput.
+                       //
+                       // The current value is chosen assuming a cost of ~10µs/physical page
+                       // (this is somewhat pessimistic), which implies a worst-case latency of
+                       // about 160µs for 4 KiB physical pages. The current value is biased
+                       // toward latency over throughput.
+                       const scavengeQuantum = 64 << 10
+
+                       // Accumulate the amount of time spent scavenging.
                         start := nanotime()
-                       released = mheap_.pages.scavenge(physPageSize)
+                       released = mheap_.pages.scavenge(scavengeQuantum)
                         atomic.Xadduintptr(&mheap_.pages.scav.released, released)
-                       crit = float64(nanotime() - start)
+                       end := nanotime()
+
+                       // On some platforms we may see end >= start if the time it takes to scavenge
+                       // memory is less than the minimum granularity of its clock (e.g. Windows) or
+                       // due to clock bugs.
+                       //
+                       // In this case, just assume scavenging takes 10 µs per regular physical page
+                       // (determined empirically), and conservatively ignore the impact of huge pages
+                       // on timing.
+                       const approxCritNSPerPhysicalPage = 10e3
+                       if end <= start {
+                               crit += approxCritNSPerPhysicalPage * float64(released/physPageSize)
+                       } else {
+                               crit += float64(end - start)
+                       }
                 }
  
                 if released == 0 {
@@ -316,18 +366,13 @@ func bgscavenge(c chan int) {
                         throw("released less than one physical page of memory")
                 }
  
-               // On some platforms we may see crit as zero if the time it takes to scavenge
-               // memory is less than the minimum granularity of its clock (e.g. Windows).
-               // In this case, just assume scavenging takes 10 µs per regular physical page
-               // (determined empirically), and conservatively ignore the impact of huge pages
-               // on timing.
-               //
-               // We shouldn't ever see a crit value less than zero unless there's a bug of
-               // some kind, either on our side or in the platform we're running on, but be
-               // defensive in that case as well.
-               const approxCritNSPerPhysicalPage = 10e3
-               if crit <= 0 {
-                       crit = approxCritNSPerPhysicalPage * float64(released/physPageSize)
+               if crit < minCritTime {
+                       // This means there wasn't enough work to actually fill up minCritTime.
+                       // That's fine; we shouldn't try to do anything with this information
+                       // because it's going result in a short enough sleep request that things
+                       // will get messy. Just assume we did at least this much work.
+                       // All this means is that we'll sleep longer than we otherwise would have.
+                       crit = minCritTime
                 }
  
                 // Multiply the critical time by 1 + the ratio of the costs of using
@@ -338,41 +383,19 @@ func bgscavenge(c chan int) {
                 // because of the additional overheads of using scavenged memory.
                 crit *= 1 + scavengeCostRatio
  
-               // If we spent more than 10 ms (for example, if the OS scheduled us away, or someone
-               // put their machine to sleep) in the critical section, bound the time we use to
-               // calculate at 10 ms to avoid letting the sleep time get arbitrarily high.
-               const maxCrit = 10e6
-               if crit > maxCrit {
-                       crit = maxCrit
-               }
+               // Go to sleep for our current sleepNS.
+               slept := scavengeSleep(int64(crit / critSleepRatio))
  
-               // Compute the amount of time to sleep, assuming we want to use at most
-               // scavengePercent of CPU time. Take into account scheduling overheads
-               // that may extend the length of our sleep by multiplying by how far
-               // off we are from the ideal ratio. For example, if we're sleeping too
-               // much, then scavengeEMWA < idealFraction, so we'll adjust the sleep time
-               // down.
-               adjust := scavengeEWMA / idealFraction
-               sleepTime := int64(adjust * crit / (scavengePercent / 100.0))
-
-               // Go to sleep.
-               slept := scavengeSleep(sleepTime)
-
-               // Compute the new ratio.
-               fraction := crit / (crit + float64(slept))
-
-               // Set a lower bound on the fraction.
-               // Due to OS-related anomalies we may "sleep" for an inordinate amount
-               // of time. Let's avoid letting the ratio get out of hand by bounding
-               // the sleep time we use in our EWMA.
-               const minFraction = 1.0 / 1000.0
-               if fraction < minFraction {
-                       fraction = minFraction
-               }
-
-               // Update scavengeEWMA by merging in the new crit/slept ratio.
-               const alpha = 0.5
-               scavengeEWMA = alpha*fraction + (1-alpha)*scavengeEWMA
+               // Calculate the CPU time spent.
+               //
+               // This may be slightly inaccurate with respect to GOMAXPROCS, but we're
+               // recomputing this often enough relative to GOMAXPROCS changes in general
+               // (it only changes when the world is stopped, and not during a GC) that
+               // that small inaccuracy is in the noise.
+               cpuFraction := float64(crit) / ((float64(slept) + crit) * float64(gomaxprocs))
+
+               // Update the critSleepRatio, adjusting until we reach our ideal fraction.
+               critSleepRatio = critSleepController.next(cpuFraction, idealFraction, float64(slept)+crit)
         }
  }
author	Michael Anthony Knyszek <mknyszek@google.com>
	Mon, 4 Oct 2021 21:44:06 +0000 (21:44 +0000)
committer	Michael Knyszek <mknyszek@google.com>
	Fri, 5 Nov 2021 17:46:59 +0000 (17:46 +0000)
src/runtime/mgcpacer.go		patch \| blob \| history
src/runtime/mgcscavenge.go		patch \| blob \| history