]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: faster parallel GC
authorDmitriy Vyukov <dvyukov@google.com>
Thu, 21 Mar 2013 08:48:02 +0000 (12:48 +0400)
committerDmitriy Vyukov <dvyukov@google.com>
Thu, 21 Mar 2013 08:48:02 +0000 (12:48 +0400)
Use per-thread work buffers instead of global mutex-protected pool. This eliminates contention from parallel scan phase.

benchmark                             old ns/op    new ns/op    delta
garbage.BenchmarkTree2-8               97100768     71417553  -26.45%
garbage.BenchmarkTree2LastPause-8     970931485    714103692  -26.45%
garbage.BenchmarkTree2Pause-8         469127802    345029253  -26.45%
garbage.BenchmarkParser-8            2880950854   2715456901   -5.74%
garbage.BenchmarkParserLastPause-8    137047399    103336476  -24.60%
garbage.BenchmarkParserPause-8         80686028     58922680  -26.97%

R=golang-dev, 0xe2.0x9a.0x9b, dave, adg, rsc, iant
CC=golang-dev
https://golang.org/cl/7816044

src/pkg/runtime/mgc0.c
src/pkg/runtime/proc.c

index a79c22ef955d448079611dcbd5f43a61383371d0..dd268fcda0f92cf57c51abd6768f550b089136bf 100644 (file)
@@ -140,6 +140,7 @@ static Workbuf* getempty(Workbuf*);
 static Workbuf* getfull(Workbuf*);
 static void    putempty(Workbuf*);
 static Workbuf* handoff(Workbuf*);
+static void    gchelperstart(void);
 
 static struct {
        uint64  full;  // lock-free list of full blocks
@@ -287,11 +288,12 @@ struct BufferList
 {
        PtrTarget ptrtarget[IntermediateBufferCapacity];
        Obj obj[IntermediateBufferCapacity];
-       BufferList *next;
+       uint32 busy;
+       byte pad[CacheLineSize];
 };
-static BufferList *bufferList;
+#pragma dataflag 16  // no pointers
+static BufferList bufferList[MaxGcproc];
 
-static Lock lock;
 static Type *itabtype;
 
 static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
@@ -598,23 +600,11 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
 
        // Allocate ptrbuf
        {
-               runtime·lock(&lock);
-
-               if(bufferList == nil) {
-                       bufferList = runtime·SysAlloc(sizeof(*bufferList));
-                       if(bufferList == nil)
-                               runtime·throw("runtime: cannot allocate memory");
-                       bufferList->next = nil;
-               }
-               scanbuffers = bufferList;
-               bufferList = bufferList->next;
-
+               scanbuffers = &bufferList[m->helpgc];
                ptrbuf = &scanbuffers->ptrtarget[0];
                ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget);
                objbuf = &scanbuffers->obj[0];
                objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj);
-
-               runtime·unlock(&lock);
        }
 
        ptrbufpos = ptrbuf;
@@ -1056,11 +1046,7 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
                nobj--;
        }
 
-endscan:
-       runtime·lock(&lock);
-       scanbuffers->next = bufferList;
-       bufferList = scanbuffers;
-       runtime·unlock(&lock);
+endscan:;
 }
 
 // debug_scanblock is the debug copy of scanblock.
@@ -1688,6 +1674,8 @@ runtime·memorydump(void)
 void
 runtime·gchelper(void)
 {
+       gchelperstart();
+
        // parallel mark for over gc roots
        runtime·parfordo(work.markfor);
 
@@ -1701,6 +1689,7 @@ runtime·gchelper(void)
        }
 
        runtime·parfordo(work.sweepfor);
+       bufferList[m->helpgc].busy = 0;
        if(runtime·xadd(&work.ndone, +1) == work.nproc-1)
                runtime·notewakeup(&work.alldone);
 }
@@ -1892,6 +1881,7 @@ gc(struct gc_args *args)
 
        t1 = runtime·nanotime();
 
+       gchelperstart();
        runtime·parfordo(work.markfor);
        scanblock(nil, nil, 0, true);
 
@@ -1903,6 +1893,7 @@ gc(struct gc_args *args)
        t2 = runtime·nanotime();
 
        runtime·parfordo(work.sweepfor);
+       bufferList[m->helpgc].busy = 0;
        t3 = runtime·nanotime();
 
        if(work.nproc > 1)
@@ -2043,6 +2034,15 @@ runtime∕debug·setGCPercent(intgo in, intgo out)
        FLUSH(&out);
 }
 
+static void
+gchelperstart(void)
+{
+       if(m->helpgc < 0 || m->helpgc >= MaxGcproc)
+               runtime·throw("gchelperstart: bad m->helpgc");
+       if(runtime·xchg(&bufferList[m->helpgc].busy, 1))
+               runtime·throw("gchelperstart: already busy");
+}
+
 static void
 runfinq(void)
 {
index a6ef83ba73d3773038af009d02b7ca1387dd92ef..8d05730e4317b9624d3e1c34bc35b77c6d067988 100644 (file)
@@ -332,7 +332,7 @@ runtime·helpgc(int32 nproc)
                mp = mget();
                if(mp == nil)
                        runtime·throw("runtime·gcprocs inconsistency");
-               mp->helpgc = 1;
+               mp->helpgc = n;
                mp->mcache = runtime·allp[pos]->mcache;
                pos++;
                runtime·notewakeup(&mp->park);
@@ -386,7 +386,7 @@ runtime·stoptheworld(void)
 static void
 mhelpgc(void)
 {
-       m->helpgc = 1;
+       m->helpgc = -1;
 }
 
 void
@@ -485,7 +485,7 @@ runtime·mstart(void)
                m->mstartfn();
 
        if(m->helpgc) {
-               m->helpgc = false;
+               m->helpgc = 0;
                stopm();
        } else if(m != &runtime·m0) {
                acquirep(m->nextp);
@@ -794,8 +794,8 @@ retry:
        runtime·notesleep(&m->park);
        runtime·noteclear(&m->park);
        if(m->helpgc) {
-               m->helpgc = 0;
                runtime·gchelper();
+               m->helpgc = 0;
                m->mcache = nil;
                goto retry;
        }