]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: refactor helpgc functionality in preparation for parallel GC
authorDmitriy Vyukov <dvyukov@google.com>
Tue, 15 May 2012 15:10:16 +0000 (19:10 +0400)
committerDmitriy Vyukov <dvyukov@google.com>
Tue, 15 May 2012 15:10:16 +0000 (19:10 +0400)
Parallel GC needs to know in advance how many helper threads will be there.
Hopefully it's the last patch before I can tackle parallel sweep phase.
The benchmarks are unaffected.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/6200064

src/pkg/runtime/malloc.h
src/pkg/runtime/mgc0.c
src/pkg/runtime/mprof.goc
src/pkg/runtime/proc.c
src/pkg/runtime/runtime.h

index c9ac7c00c9c458af0a2932b312d08709e4a2f5e5..bc186981cd3307dc22ed983cee48507418431201 100644 (file)
@@ -414,7 +414,8 @@ enum
 void   runtime·MProf_Malloc(void*, uintptr);
 void   runtime·MProf_Free(void*, uintptr);
 void   runtime·MProf_GC(void);
-int32  runtime·helpgc(bool*);
+int32  runtime·gcprocs(void);
+void   runtime·helpgc(int32 nproc);
 void   runtime·gchelper(void);
 
 bool   runtime·getfinalizer(void *p, bool del, void (**fn)(void*), int32 *nret);
index 3c0d4e80d6c420f549cdbd7c3be94af0e02dddaa..9ceeeea05d70ecdfcff16caeeeededb20a634155 100644 (file)
@@ -366,7 +366,6 @@ debug_scanblock(byte *b, int64 n)
                if(s == nil)
                        continue;
 
-
                p =  (byte*)((uintptr)s->start<<PageShift);
                if(s->sizeclass == 0) {
                        obj = p;
@@ -692,7 +691,7 @@ handlespecial(byte *p, uintptr size)
        int32 nret;
        FinBlock *block;
        Finalizer *f;
-       
+
        if(!runtime·getfinalizer(p, true, &fn, &nret)) {
                runtime·setblockspecial(p, false);
                runtime·MProf_Free(p, size);
@@ -717,7 +716,7 @@ handlespecial(byte *p, uintptr size)
        f->fn = fn;
        f->nret = nret;
        f->arg = p;
-       runtime·unlock(&finlock); 
+       runtime·unlock(&finlock);
        return true;
 }
 
@@ -925,7 +924,6 @@ runtime·gc(int32 force)
        int64 t0, t1, t2, t3;
        uint64 heap0, heap1, obj0, obj1;
        byte *p;
-       bool extra;
        GCStats stats;
 
        // The gc is turned off (via enablegc) until
@@ -966,18 +964,21 @@ runtime·gc(int32 force)
        m->gcing = 1;
        runtime·stoptheworld();
 
-       cachestats(nil);
-       heap0 = mstats.heap_alloc;
-       obj0 = mstats.nmalloc - mstats.nfree;
+       heap0 = 0;
+       obj0 = 0;
+       if(gctrace) {
+               cachestats(nil);
+               heap0 = mstats.heap_alloc;
+               obj0 = mstats.nmalloc - mstats.nfree;
+       }
 
        runtime·lock(&work.markgate);
        runtime·lock(&work.sweepgate);
 
-       extra = false;
-       work.nproc = 1;
-       if(runtime·gomaxprocs > 1 && runtime·ncpu > 1) {
+       work.nproc = runtime·gcprocs();
+       if(work.nproc > 1) {
                runtime·noteclear(&work.alldone);
-               work.nproc += runtime·helpgc(&extra);
+               runtime·helpgc(work.nproc);
        }
        work.nwait = 0;
        work.ndone = 0;
@@ -1036,15 +1037,7 @@ runtime·gc(int32 force)
        
        runtime·MProf_GC();
        runtime·semrelease(&runtime·worldsema);
-
-       // If we could have used another helper proc, start one now,
-       // in the hope that it will be available next time.
-       // It would have been even better to start it before the collection,
-       // but doing so requires allocating memory, so it's tricky to
-       // coordinate.  This lazy approach works out in practice:
-       // we don't mind if the first couple gc rounds don't have quite
-       // the maximum number of procs.
-       runtime·starttheworld(extra);
+       runtime·starttheworld();
 
        // give the queued finalizers, if any, a chance to run  
        if(finq != nil) 
@@ -1068,7 +1061,7 @@ runtime·ReadMemStats(MStats *stats)
        *stats = mstats;
        m->gcing = 0;
        runtime·semrelease(&runtime·worldsema);
-       runtime·starttheworld(false);
+       runtime·starttheworld();
 }
 
 static void
index 0bbce85836935cd6154e3068ec800edd95f771ec..dac897e3bbfa92716807a7a41fc90e815ecb8f8d 100644 (file)
@@ -355,7 +355,7 @@ func Stack(b Slice, all bool) (n int32) {
        if(all) {
                m->gcing = 0;
                runtime·semrelease(&runtime·worldsema);
-               runtime·starttheworld(false);
+               runtime·starttheworld();
        }
 }
 
@@ -398,7 +398,7 @@ func GoroutineProfile(b Slice) (n int32, ok bool) {
        
                m->gcing = 0;
                runtime·semrelease(&runtime·worldsema);
-               runtime·starttheworld(false);
+               runtime·starttheworld();
        }
 }
 
index 12dc910f047842cfc4a1ee68c35ab19aab1f86ce..81decd6bf85387ea4186919da808c1fbc23b5be3 100644 (file)
@@ -646,35 +646,38 @@ top:
 }
 
 int32
-runtime·helpgc(bool *extra)
+runtime·gcprocs(void)
 {
-       M *mp;
-       int32 n, max;
-
-       // Figure out how many CPUs to use.
+       int32 n;
+       
+       // Figure out how many CPUs to use during GC.
        // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
-       max = runtime·gomaxprocs;
-       if(max > runtime·ncpu)
-               max = runtime·ncpu;
-       if(max > MaxGcproc)
-               max = MaxGcproc;
+       n = runtime·gomaxprocs;
+       if(n > runtime·ncpu)
+               n = runtime·ncpu;
+       if(n > MaxGcproc)
+               n = MaxGcproc;
+       if(n > runtime·sched.mwait+1) // one M is currently running
+               n = runtime·sched.mwait+1;
+       return n;
+}
 
-       // We're going to use one CPU no matter what.
-       // Figure out the max number of additional CPUs.
-       max--;
+void
+runtime·helpgc(int32 nproc)
+{
+       M *mp;
+       int32 n;
 
        runtime·lock(&runtime·sched);
-       n = 0;
-       while(n < max && (mp = mget(nil)) != nil) {
-               n++;
+       for(n = 1; n < nproc; n++) { // one M is currently running
+               mp = mget(nil);
+               if(mp == nil)
+                       runtime·throw("runtime·gcprocs inconsistency");
                mp->helpgc = 1;
                mp->waitnextg = 0;
                runtime·notewakeup(&mp->havenextg);
        }
        runtime·unlock(&runtime·sched);
-       if(extra)
-               *extra = n != max;
-       return n;
 }
 
 void
@@ -714,18 +717,30 @@ runtime·stoptheworld(void)
 }
 
 void
-runtime·starttheworld(bool extra)
+runtime·starttheworld(void)
 {
        M *m;
+       int32 max;
+       
+       // Figure out how many CPUs GC could possibly use.
+       max = runtime·gomaxprocs;
+       if(max > runtime·ncpu)
+               max = runtime·ncpu;
+       if(max > MaxGcproc)
+               max = MaxGcproc;
 
        schedlock();
        runtime·gcwaiting = 0;
        setmcpumax(runtime·gomaxprocs);
        matchmg();
-       if(extra && canaddmcpu()) {
-               // Start a new m that will (we hope) be idle
-               // and so available to help when the next
-               // garbage collection happens.
+       if(runtime·gcprocs() < max && canaddmcpu()) {
+               // If GC could have used another helper proc, start one now,
+               // in the hope that it will be available next time.
+               // It would have been even better to start it before the collection,
+               // but doing so requires allocating memory, so it's tricky to
+               // coordinate.  This lazy approach works out in practice:
+               // we don't mind if the first couple gc rounds don't have quite
+               // the maximum number of procs.
                // canaddmcpu above did mcpu++
                // (necessary, because m will be doing various
                // initialization work so is definitely running),
index 15f5fa31c85fbff8f6fd52fe28e594960db96fc4..1f7819197aecbbdf53ea0a1b72ad577ffeec30e5 100644 (file)
@@ -636,7 +636,7 @@ int64       runtime·cputicks(void);
 #pragma        varargck        type    "S"     String
 
 void   runtime·stoptheworld(void);
-void   runtime·starttheworld(bool);
+void   runtime·starttheworld(void);
 extern uint32 runtime·worldsema;
 
 /*