runtime: snapshot heap profile during mark termination

author Austin Clements <austin@google.com>

Wed, 1 Mar 2017 18:58:22 +0000 (13:58 -0500)

committer Austin Clements <austin@google.com>

Fri, 31 Mar 2017 01:14:56 +0000 (01:14 +0000)
author Austin Clements <austin@google.com>
Wed, 1 Mar 2017 18:58:22 +0000 (13:58 -0500)
committer Austin Clements <austin@google.com>
Fri, 31 Mar 2017 01:14:56 +0000 (01:14 +0000)
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go

index 2bdb21af993632a4ef60629cfd29ad71c602f1fc..31e8e4caeefb3783fea9b61f26354c1bbae16491 100644 (file)
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1302,18 +1302,22 @@ func gcMarkTermination() {
         sweep.nbgsweep = 0
         sweep.npausesweep = 0
  
+       // If gcSweep didn't do it, finish the current heap profiling
+       // cycle and start a new heap profiling cycle. We do this
+       // before starting the world so events don't leak into the
+       // wrong cycle.
+       needProfCycle := _ConcurrentSweep && work.mode != gcForceBlockMode
+       if needProfCycle {
+               mProf_NextCycle()
+       }
+
         systemstack(startTheWorldWithSema)
  
-       // Update heap profile stats if gcSweep didn't do it. This is
-       // relatively expensive, so we don't want to do it while the
-       // world is stopped, but it needs to happen ASAP after
-       // starting the world to prevent too many allocations from the
-       // next cycle leaking in. It must happen before releasing
-       // worldsema since there are applications that do a
-       // runtime.GC() to update the heap profile and then
-       // immediately collect the profile.
-       if _ConcurrentSweep && work.mode != gcForceBlockMode {
-               mProf_GC()
+       // Flush the heap profile so we can start a new cycle next GC.
+       // This is relatively expensive, so we don't do it with the
+       // world stopped.
+       if needProfCycle {
+               mProf_Flush()
         }
  
         // Free stack spans. This must be done between GC cycles.
@@ -1759,9 +1763,12 @@ func gcSweep(mode gcMode) {
                 for sweepone() != ^uintptr(0) {
                         sweep.npausesweep++
                 }
-               // Do an additional mProf_GC, because all 'free' events are now real as well.
-               mProf_GC()
-               mProf_GC()
+               // All "free" events are now real, so flush everything
+               // into the published profile.
+               mProf_NextCycle()
+               mProf_Flush()
+               mProf_NextCycle()
+               mProf_Flush()
                 return
         }
  
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go

index 524598edaad547ad15561b1e31124d3f528f6d3c..cd781c4416f971ef4608c5060d2aec270c2bc703 100644 (file)
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -77,36 +77,43 @@ type memRecord struct {
         //
         //       alloc → ▲ ← free
         //               ┠┅┅┅┅┅┅┅┅┅┅┅P
-       //       r_a     →    p_a    →  allocs
-       //                    p_f    →  frees
+       //       C+2     →    C+1    →  C
         //
         //                   alloc → ▲ ← free
         //                           ┠┅┅┅┅┅┅┅┅┅┅┅P
-       //                   r_a     →    p_a    →  alloc
-       //                                p_f    →  frees
+       //                   C+2     →    C+1    →  C
         //
         // Since we can't publish a consistent snapshot until all of
         // the sweep frees are accounted for, we wait until the next
         // mark termination ("MT" above) to publish the previous mark
-       // termination's snapshot ("P" above). To do this, information
-       // is delayed through "recent" and "prev" stages ("r_*" and
-       // "p_*" above). Specifically:
+       // termination's snapshot ("P" above). To do this, allocation
+       // and free events are accounted to *future* heap profile
+       // cycles ("C+n" above) and we only publish a cycle once all
+       // of the events from that cycle must be done. Specifically:
         //
-       // Mallocs are accounted in recent stats.
-       // Explicit frees are accounted in recent stats.
-       // GC frees are accounted in prev stats.
-       // After GC prev stats are added to final stats and
-       // recent stats are moved into prev stats.
+       // Mallocs are accounted to cycle C+2.
+       // Explicit frees are accounted to cycle C+2.
+       // GC frees (done during sweeping) are accounted to cycle C+1.
+       //
+       // After mark termination, we increment the global heap
+       // profile cycle counter and accumulate the stats from cycle C
+       // into the active profile.
  
         // active is the currently published profile. A profiling
         // cycle can be accumulated into active once its complete.
         active memRecordCycle
  
-       // changes between next-to-last GC and last GC
-       prev memRecordCycle
-
-       // changes since last GC
-       recent memRecordCycle
+       // future records the profile events we're counting for cycles
+       // that have not yet been published. This is ring buffer
+       // indexed by the global heap profile cycle C and stores
+       // cycles C, C+1, and C+2. Unlike active, these counts are
+       // only for a single cycle; they are not cumulative across
+       // cycles.
+       //
+       // We store cycle C here because there's a window between when
+       // C becomes the active cycle and when we've flushed it to
+       // active.
+       future [3]memRecordCycle
  }
  
  // memRecordCycle
@@ -136,8 +143,21 @@ var (
         xbuckets  *bucket // mutex profile buckets
         buckhash  *[179999]*bucket
         bucketmem uintptr
+
+       mProf struct {
+               // All fields in mProf are protected by proflock.
+
+               // cycle is the global heap profile cycle. This wraps
+               // at mProfCycleWrap.
+               cycle uint32
+               // flushed indicates that future[cycle] in all buckets
+               // has been flushed to the active profile.
+               flushed bool
+       }
  )
  
+const mProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24)
+
  // newBucket allocates a bucket with the given type and number of stack entries.
  func newBucket(typ bucketType, nstk int) *bucket {
         size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(uintptr(0))
@@ -248,32 +268,64 @@ func eqslice(x, y []uintptr) bool {
         return true
  }
  
-func mprof_GC() {
-       for b := mbuckets; b != nil; b = b.allnext {
-               mp := b.mp()
-
-               mp.active.add(&mp.prev)
-               mp.prev = mp.recent
-               mp.recent = memRecordCycle{}
-       }
+// mProf_NextCycle publishes the next heap profile cycle and creates a
+// fresh heap profile cycle. This operation is fast and can be done
+// during STW. The caller must call mProf_Flush before calling
+// mProf_NextCycle again.
+//
+// This is called by mark termination during STW so allocations and
+// frees after the world is started again count towards a new heap
+// profiling cycle.
+func mProf_NextCycle() {
+       lock(&proflock)
+       // We explicitly wrap mProf.cycle rather than depending on
+       // uint wraparound because the memRecord.future ring does not
+       // itself wrap at a power of two.
+       mProf.cycle = (mProf.cycle + 1) % mProfCycleWrap
+       mProf.flushed = false
+       unlock(&proflock)
  }
  
-// Record that a gc just happened: all the 'recent' statistics are now real.
-func mProf_GC() {
+// mProf_Flush flushes the events from the current heap profiling
+// cycle into the active profile. After this it is safe to start a new
+// heap profiling cycle with mProf_NextCycle.
+//
+// This is called by GC after mark termination starts the world. In
+// contrast with mProf_NextCycle, this is somewhat expensive, but safe
+// to do concurrently.
+func mProf_Flush() {
         lock(&proflock)
-       mprof_GC()
+       if !mProf.flushed {
+               mProf_FlushLocked()
+               mProf.flushed = true
+       }
         unlock(&proflock)
  }
  
+func mProf_FlushLocked() {
+       c := mProf.cycle
+       for b := mbuckets; b != nil; b = b.allnext {
+               mp := b.mp()
+
+               // Flush cycle C into the published profile and clear
+               // it for reuse.
+               mpc := &mp.future[c%uint32(len(mp.future))]
+               mp.active.add(mpc)
+               *mpc = memRecordCycle{}
+       }
+}
+
  // Called by malloc to record a profiled block.
  func mProf_Malloc(p unsafe.Pointer, size uintptr) {
         var stk [maxStack]uintptr
         nstk := callers(4, stk[:])
         lock(&proflock)
         b := stkbucket(memProfile, size, stk[:nstk], true)
+       c := mProf.cycle
         mp := b.mp()
-       mp.recent.allocs++
-       mp.recent.alloc_bytes += size
+       mpc := &mp.future[(c+2)%uint32(len(mp.future))]
+       mpc.allocs++
+       mpc.alloc_bytes += size
         unlock(&proflock)
  
         // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
@@ -288,9 +340,11 @@ func mProf_Malloc(p unsafe.Pointer, size uintptr) {
  // Called when freeing a profiled block.
  func mProf_Free(b *bucket, size uintptr) {
         lock(&proflock)
+       c := mProf.cycle
         mp := b.mp()
-       mp.prev.frees++
-       mp.prev.free_bytes += size
+       mpc := &mp.future[(c+1)%uint32(len(mp.future))]
+       mpc.frees++
+       mpc.free_bytes += size
         unlock(&proflock)
  }
  
@@ -467,6 +521,10 @@ func (r *MemProfileRecord) Stack() []uintptr {
  // of calling MemProfile directly.
  func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
         lock(&proflock)
+       // If we're between mProf_NextCycle and mProf_Flush, take care
+       // of flushing to the active profile so we only have to look
+       // at the active profile below.
+       mProf_FlushLocked()
         clear := true
         for b := mbuckets; b != nil; b = b.allnext {
                 mp := b.mp()
@@ -481,12 +539,14 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
                 // Absolutely no data, suggesting that a garbage collection
                 // has not yet happened. In order to allow profiling when
                 // garbage collection is disabled from the beginning of execution,
-               // accumulate stats as if a GC just happened, and recount buckets.
-               mprof_GC()
-               mprof_GC()
+               // accumulate all of the cycles, and recount buckets.
                 n = 0
                 for b := mbuckets; b != nil; b = b.allnext {
                         mp := b.mp()
+                       for c := range mp.future {
+                               mp.active.add(&mp.future[c])
+                               mp.future[c] = memRecordCycle{}
+                       }
                         if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
                                 n++
                         }
author	Austin Clements <austin@google.com>
	Wed, 1 Mar 2017 18:58:22 +0000 (13:58 -0500)
committer	Austin Clements <austin@google.com>
	Fri, 31 Mar 2017 01:14:56 +0000 (01:14 +0000)
src/runtime/mgc.go		patch \| blob \| history
src/runtime/mprof.go		patch \| blob \| history