runtime: increase GC concurrency.

author Rick Hudson <rlh@golang.org>

Tue, 6 Jan 2015 19:58:49 +0000 (14:58 -0500)

committer Rick Hudson <rlh@golang.org>

Thu, 8 Jan 2015 20:34:56 +0000 (20:34 +0000)
author Rick Hudson <rlh@golang.org>
Tue, 6 Jan 2015 19:58:49 +0000 (14:58 -0500)
committer Rick Hudson <rlh@golang.org>
Thu, 8 Jan 2015 20:34:56 +0000 (20:34 +0000)
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go

index 5cf45828229df3be64ba80b3577dd2cdf1b7e28d..bc14d2222d399b6aeb0f62f91dc0eccdbf6392f1 100644 (file)
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -39,10 +39,27 @@ type pageID uintptr
  // base address for all 0-byte allocations
  var zerobase uintptr
  
+// Determine whether to initiate a GC.
+// Currently the primitive heuristic we use will start a new
+// concurrent GC when approximately half the available space
+// made available by the last GC cycle has been used.
+// If the GC is already working no need to trigger another one.
+// This should establish a feedback loop where if the GC does not
+// have sufficient time to complete then more memory will be
+// requested from the OS increasing heap size thus allow future
+// GCs more time to complete.
+// memstat.heap_alloc and memstat.next_gc reads have benign races
+// A false negative simple does not start a GC, a false positive
+// will start a GC needlessly. Neither have correctness issues.
+func shouldtriggergc() bool {
+       return memstats.heap_alloc+memstats.heap_alloc*3/4 >= memstats.next_gc && atomicloaduint(&bggc.working) == 0
+}
+
  // Allocate an object of size bytes.
  // Small objects are allocated from the per-P cache's free lists.
  // Large objects (> 32 kB) are allocated straight from the heap.
  func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
+       shouldhelpgc := false
         if size == 0 {
                 return unsafe.Pointer(&zerobase)
         }
@@ -144,6 +161,7 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
                                 systemstack(func() {
                                         mCache_Refill(c, tinySizeClass)
                                 })
+                               shouldhelpgc = true
                                 s = c.alloc[tinySizeClass]
                                 v = s.freelist
                         }
@@ -174,6 +192,7 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
                                 systemstack(func() {
                                         mCache_Refill(c, int32(sizeclass))
                                 })
+                               shouldhelpgc = true
                                 s = c.alloc[sizeclass]
                                 v = s.freelist
                         }
@@ -191,6 +210,7 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
                 c.local_cachealloc += intptr(size)
         } else {
                 var s *mspan
+               shouldhelpgc = true
                 systemstack(func() {
                         s = largeAlloc(size, uint32(flags))
                 })
@@ -345,8 +365,15 @@ marked:
                 }
         }
  
-       if memstats.heap_alloc >= memstats.next_gc/2 {
+       if shouldtriggergc() {
                 gogc(0)
+       } else if shouldhelpgc && atomicloaduint(&bggc.working) == 1 {
+               // bggc.lock not taken since race on bggc.working is benign.
+               // At worse we don't call gchelpwork.
+               // Delay the gchelpwork until the epilogue so that it doesn't
+               // interfere with the inner working of malloc such as
+               // mcache refills that might happen while doing the gchelpwork
+               systemstack(gchelpwork)
         }
  
         return x
@@ -466,14 +493,25 @@ func gogc(force int32) {
         releasem(mp)
         mp = nil
  
-       semacquire(&worldsema, false)
-
-       if force == 0 && memstats.heap_alloc < memstats.next_gc {
-               // typically threads which lost the race to grab
-               // worldsema exit here when gc is done.
-               semrelease(&worldsema)
-               return
+       if force == 0 {
+               lock(&bggc.lock)
+               if !bggc.started {
+                       bggc.working = 1
+                       bggc.started = true
+                       go backgroundgc()
+               } else if bggc.working == 0 {
+                       bggc.working = 1
+                       ready(bggc.g)
+               }
+               unlock(&bggc.lock)
+       } else {
+               gcwork(force)
         }
+}
+
+func gcwork(force int32) {
+
+       semacquire(&worldsema, false)
  
         // Pick up the remaining unswept/not being swept spans concurrently
         for gosweepone() != ^uintptr(0) {
@@ -482,14 +520,17 @@ func gogc(force int32) {
  
         // Ok, we're doing it!  Stop everybody else
  
-       startTime := nanotime()
-       mp = acquirem()
+       mp := acquirem()
         mp.gcing = 1
         releasem(mp)
         gctimer.count++
         if force == 0 {
                 gctimer.cycle.sweepterm = nanotime()
         }
+       // Pick up the remaining unswept/not being swept spans before we STW
+       for gosweepone() != ^uintptr(0) {
+               sweep.nbgsweep++
+       }
         systemstack(stoptheworld)
         systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
         if force == 0 {            // Do as much work concurrently as possible
@@ -500,7 +541,7 @@ func gogc(force int32) {
                 systemstack(gcscan_m)
                 gctimer.cycle.installmarkwb = nanotime()
                 systemstack(stoptheworld)
-               gcinstallmarkwb()
+               systemstack(gcinstallmarkwb)
                 systemstack(starttheworld)
                 gctimer.cycle.mark = nanotime()
                 systemstack(gcmark_m)
@@ -509,6 +550,7 @@ func gogc(force int32) {
                 systemstack(gcinstalloffwb_m)
         }
  
+       startTime := nanotime()
         if mp != acquirem() {
                 throw("gogc: rescheduled")
         }
@@ -527,6 +569,7 @@ func gogc(force int32) {
         eagersweep := force >= 2
         for i := 0; i < n; i++ {
                 if i > 0 {
+                       // refresh start time if doing a second GC
                         startTime = nanotime()
                 }
                 // switch to g0, call gc, then switch back
@@ -579,8 +622,8 @@ func GCcheckmarkdisable() {
  // gctimes records the time in nanoseconds of each phase of the concurrent GC.
  type gctimes struct {
         sweepterm     int64 // stw
-       scan          int64 // stw
-       installmarkwb int64
+       scan          int64
+       installmarkwb int64 // stw
         mark          int64
         markterm      int64 // stw
         sweep         int64
@@ -601,7 +644,7 @@ type gcchronograph struct {
  
  var gctimer gcchronograph
  
-// GCstarttimes initializes the gc timess. All previous timess are lost.
+// GCstarttimes initializes the gc times. All previous times are lost.
  func GCstarttimes(verbose int64) {
         gctimer = gcchronograph{verbose: verbose}
  }
@@ -655,6 +698,11 @@ func calctimes() gctimes {
  // the information from the most recent Concurent GC cycle. Calls from the
  // application to runtime.GC() are ignored.
  func GCprinttimes() {
+       if gctimer.verbose == 0 {
+               println("GC timers not enabled")
+               return
+       }
+
         // Explicitly put times on the heap so printPhase can use it.
         times := new(gctimes)
         *times = calctimes()
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go

index 35edd8aa30e3d14ba424f1a2b38dfacf8a76097e..4d0900a41ce9172975bec8d4406d1f9f337ca397 100644 (file)
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -123,7 +123,7 @@ const (
         _DebugGCPtrs     = false // if true, print trace of every pointer load during GC
         _ConcurrentSweep = true
  
-       _WorkbufSize     = 4 * 1024
+       _WorkbufSize     = 4 * 256
         _FinBlockSize    = 4 * 1024
         _RootData        = 0
         _RootBss         = 1
@@ -191,9 +191,9 @@ var badblock [1024]uintptr
  var nbadblock int32
  
  type workdata struct {
-       full    uint64                // lock-free list of full blocks
-       empty   uint64                // lock-free list of empty blocks
-       partial uint64                // lock-free list of partially filled blocks
+       full    uint64                // lock-free list of full blocks workbuf
+       empty   uint64                // lock-free list of empty blocks workbuf
+       partial uint64                // lock-free list of partially filled blocks workbuf
         pad0    [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait
         nproc   uint32
         tstart  int64
@@ -587,6 +587,11 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8) {
         // base and extent.
         b := b0
         n := n0
+
+       // ptrmask can have 2 possible values:
+       // 1. nil - obtain pointer mask from GC bitmap.
+       // 2. pointer to a compact mask (for stacks and data).
+
         wbuf := getpartialorempty()
         if b != 0 {
                 wbuf = scanobject(b, n, ptrmask, wbuf)
@@ -600,23 +605,23 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8) {
                         return
                 }
         }
-       if gcphase == _GCscan {
-               throw("scanblock: In GCscan phase but no b passed in.")
-       }
  
-       keepworking := b == 0
+       drainallwbufs := b == 0
+       drainworkbuf(wbuf, drainallwbufs)
+}
  
+// Scan objects in wbuf until wbuf is empty.
+// If drainallwbufs is true find all other available workbufs and repeat the process.
+//go:nowritebarrier
+func drainworkbuf(wbuf *workbuf, drainallwbufs bool) {
         if gcphase != _GCmark && gcphase != _GCmarktermination {
                 println("gcphase", gcphase)
                 throw("scanblock phase")
         }
  
-       // ptrmask can have 2 possible values:
-       // 1. nil - obtain pointer mask from GC bitmap.
-       // 2. pointer to a compact mask (for stacks and data).
         for {
                 if wbuf.nobj == 0 {
-                       if !keepworking {
+                       if !drainallwbufs {
                                 putempty(wbuf)
                                 return
                         }
@@ -641,9 +646,30 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8) {
                 //         PREFETCH(wbuf->obj[wbuf->nobj - 3];
                 //  }
                 wbuf.nobj--
-               b = wbuf.obj[wbuf.nobj]
+               b := wbuf.obj[wbuf.nobj]
+               wbuf = scanobject(b, mheap_.arena_used-b, nil, wbuf)
+       }
+}
+
+// Scan at most count objects in the wbuf.
+//go:nowritebarrier
+func drainobjects(wbuf *workbuf, count uintptr) {
+       for i := uintptr(0); i < count; i++ {
+               if wbuf.nobj == 0 {
+                       putempty(wbuf)
+                       return
+               }
+
+               // This might be a good place to add prefetch code...
+               // if(wbuf->nobj > 4) {
+               //         PREFETCH(wbuf->obj[wbuf->nobj - 3];
+               //  }
+               wbuf.nobj--
+               b := wbuf.obj[wbuf.nobj]
                 wbuf = scanobject(b, mheap_.arena_used-b, nil, wbuf)
         }
+       putpartial(wbuf)
+       return
  }
  
  //go:nowritebarrier
@@ -809,6 +835,17 @@ func putpartial(b *workbuf) {
         }
  }
  
+// trygetfull tries to get a full or partially empty workbuffer.
+// if one is not immediately available return nil
+//go:nowritebarrier
+func trygetfull() *workbuf {
+       wbuf := (*workbuf)(lfstackpop(&work.full))
+       if wbuf == nil {
+               wbuf = (*workbuf)(lfstackpop(&work.partial))
+       }
+       return wbuf
+}
+
  // Get a full work buffer off the work.full or a partially
  // filled one off the work.partial list. If nothing is available
  // wait until all the other gc helpers have finished and then
@@ -1090,6 +1127,38 @@ func gcmarkwb_m(slot *uintptr, ptr uintptr) {
         }
  }
  
+// gchelpwork does a small bounded amount of gc work. The purpose is to
+// shorten the time (as measured by allocations) spent doing a concurrent GC.
+// The number of mutator calls is roughly propotional to the number of allocations
+// made by that mutator. This slows down the allocation while speeding up the GC.
+//go:nowritebarrier
+func gchelpwork() {
+       switch gcphase {
+       default:
+               throw("gcphasework in bad gcphase")
+       case _GCoff, _GCquiesce, _GCstw:
+               // No work.
+       case _GCsweep:
+               // We could help by calling sweepone to sweep a single span.
+               // _ = sweepone()
+       case _GCscan:
+               // scan the stack, mark the objects, put pointers in work buffers
+               // hanging off the P where this is being run.
+               // scanstack(gp)
+       case _GCmark:
+               // Get a full work buffer and empty it.
+               var wbuf *workbuf
+               wbuf = trygetfull()
+               if wbuf != nil {
+                       drainobjects(wbuf, uintptr(len(wbuf.obj))) // drain upto one buffer's worth of objects
+               }
+       case _GCmarktermination:
+               // We should never be here since the world is stopped.
+               // All available mark work will be emptied before returning.
+               throw("gcphasework in bad gcphase")
+       }
+}
+
  // The gp has been moved to a GC safepoint. GC phase specific
  // work is done here.
  //go:nowritebarrier
@@ -1425,6 +1494,14 @@ type sweepdata struct {
  
  var sweep sweepdata
  
+// State of the background concurrent GC goroutine.
+var bggc struct {
+       lock    mutex
+       g       *g
+       working uint
+       started bool
+}
+
  // sweeps one span
  // returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep
  //go:nowritebarrier
diff --git a/src/runtime/mgc0.go b/src/runtime/mgc0.go

index b9718cbd18e46437149d606a6f7d4789bd69892a..625c8740f7707f7d1b67018d5d231878569a1813 100644 (file)
--- a/src/runtime/mgc0.go
+++ b/src/runtime/mgc0.go
@@ -78,6 +78,19 @@ func clearpools() {
         }
  }
  
+// backgroundgc is running in a goroutine and does the concurrent GC work.
+// bggc holds the state of the backgroundgc.
+func backgroundgc() {
+       bggc.g = getg()
+       bggc.g.issystem = true
+       for {
+               gcwork(0)
+               lock(&bggc.lock)
+               bggc.working = 0
+               goparkunlock(&bggc.lock, "Concurrent GC wait")
+       }
+}
+
  func bgsweep() {
         sweep.g = getg()
         getg().issystem = true
diff --git a/test/init1.go b/test/init1.go

index f6eda6edfea011941eb5f38d53c5d6d394a0eadb..83e9149f4cadea86d0bd3086ad9688625caed7f2 100644 (file)
--- a/test/init1.go
+++ b/test/init1.go
@@ -31,7 +31,7 @@ func init() {
         }
         runtime.ReadMemStats(memstats)
         sys1 := memstats.Sys
-       if sys1-sys > chunk*50 {
+       if sys1-sys > chunk*500 {
                 println("allocated 1000 chunks of", chunk, "and used ", sys1-sys, "memory")
                 panic("init1")
         }
author	Rick Hudson <rlh@golang.org>
	Tue, 6 Jan 2015 19:58:49 +0000 (14:58 -0500)
committer	Rick Hudson <rlh@golang.org>
	Thu, 8 Jan 2015 20:34:56 +0000 (20:34 +0000)
src/runtime/malloc.go		patch \| blob \| history
src/runtime/mgc.go		patch \| blob \| history
src/runtime/mgc0.go		patch \| blob \| history
test/init1.go		patch \| blob \| history