]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: make GC stats per-M
authorDmitriy Vyukov <dvyukov@google.com>
Thu, 5 Apr 2012 16:48:28 +0000 (20:48 +0400)
committerDmitriy Vyukov <dvyukov@google.com>
Thu, 5 Apr 2012 16:48:28 +0000 (20:48 +0400)
This is factored out part of:
https://golang.org/cl/5279048/
(Parallel GC)

benchmark                             old ns/op    new ns/op    delta
garbage.BenchmarkParser              3999106750   3975026500   -0.60%
garbage.BenchmarkParser-2            3720553750   3719196500   -0.04%
garbage.BenchmarkParser-4            3502857000   3474980500   -0.80%
garbage.BenchmarkParser-8            3375448000   3341310500   -1.01%
garbage.BenchmarkParserLastPause      329401000    324097000   -1.61%
garbage.BenchmarkParserLastPause-2    208953000    214222000   +2.52%
garbage.BenchmarkParserLastPause-4    110933000    111656000   +0.65%
garbage.BenchmarkParserLastPause-8     71969000     78230000   +8.70%
garbage.BenchmarkParserPause          230808842    197237400  -14.55%
garbage.BenchmarkParserPause-2        123674365    125197595   +1.23%
garbage.BenchmarkParserPause-4         80518525     85710333   +6.45%
garbage.BenchmarkParserPause-8         58310243     56940512   -2.35%
garbage.BenchmarkTree2                 31471700     31289400   -0.58%
garbage.BenchmarkTree2-2               21536800     21086300   -2.09%
garbage.BenchmarkTree2-4               11074700     10880000   -1.76%
garbage.BenchmarkTree2-8                7568600      7351400   -2.87%
garbage.BenchmarkTree2LastPause       314664000    312840000   -0.58%
garbage.BenchmarkTree2LastPause-2     215319000    210815000   -2.09%
garbage.BenchmarkTree2LastPause-4     110698000    108751000   -1.76%
garbage.BenchmarkTree2LastPause-8      75635000     73463000   -2.87%
garbage.BenchmarkTree2Pause           174280857    173147571   -0.65%
garbage.BenchmarkTree2Pause-2         131332714    129665761   -1.27%
garbage.BenchmarkTree2Pause-4          93803095     93422904   -0.41%
garbage.BenchmarkTree2Pause-8          86242333     85146761   -1.27%

R=rsc
CC=golang-dev
https://golang.org/cl/5987045

src/pkg/runtime/mgc0.c
src/pkg/runtime/runtime.h

index e043864c19e49d0a2c6948c5a546a7e7bb4daf50..be8eb883585b7431b6846d739ce7df50bc716be7 100644 (file)
@@ -67,9 +67,6 @@ enum {
 //
 uint32 runtime·worldsema = 1;
 
-// TODO: Make these per-M.
-static uint64 nhandoff;
-
 static int32 gctrace;
 
 typedef struct Workbuf Workbuf;
@@ -529,12 +526,16 @@ getfull(Workbuf *b)
                }
                if(work.nwait == work.nproc)
                        return nil;
-               if(i < 10)
+               if(i < 10) {
+                       m->gcstats.nprocyield++;
                        runtime·procyield(20);
-               else if(i < 20)
+               } else if(i < 20) {
+                       m->gcstats.nosyield++;
                        runtime·osyield();
-               else
+               } else {
+                       m->gcstats.nsleep++;
                        runtime·usleep(100);
+               }
        }
 }
 
@@ -550,7 +551,8 @@ handoff(Workbuf *b)
        b->nobj -= n;
        b1->nobj = n;
        runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
-       nhandoff += n;
+       m->gcstats.nhandoff++;
+       m->gcstats.nhandoffcnt += n;
 
        // Put b on full list - let first half of b get stolen.
        runtime·lock(&work.fmu);
@@ -852,20 +854,30 @@ stealcache(void)
 }
 
 static void
-cachestats(void)
+cachestats(GCStats *stats)
 {
        M *m;
        MCache *c;
        int32 i;
        uint64 stacks_inuse;
        uint64 stacks_sys;
+       uint64 *src, *dst;
 
+       if(stats)
+               runtime·memclr((byte*)stats, sizeof(*stats));
        stacks_inuse = 0;
        stacks_sys = 0;
        for(m=runtime·allm; m; m=m->alllink) {
                runtime·purgecachedstats(m);
                stacks_inuse += m->stackalloc->inuse;
                stacks_sys += m->stackalloc->sys;
+               if(stats) {
+                       src = (uint64*)&m->gcstats;
+                       dst = (uint64*)stats;
+                       for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
+                               dst[i] += src[i];
+                       runtime·memclr((byte*)&m->gcstats, sizeof(m->gcstats));
+               }
                c = m->mcache;
                for(i=0; i<nelem(c->local_by_size); i++) {
                        mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
@@ -885,6 +897,7 @@ runtime·gc(int32 force)
        uint64 heap0, heap1, obj0, obj1;
        byte *p;
        bool extra;
+       GCStats stats;
 
        // The gc is turned off (via enablegc) until
        // the bootstrap has completed.
@@ -920,12 +933,11 @@ runtime·gc(int32 force)
        }
 
        t0 = runtime·nanotime();
-       nhandoff = 0;
 
        m->gcing = 1;
        runtime·stoptheworld();
 
-       cachestats();
+       cachestats(nil);
        heap0 = mstats.heap_alloc;
        obj0 = mstats.nmalloc - mstats.nfree;
 
@@ -955,13 +967,13 @@ runtime·gc(int32 force)
        t2 = runtime·nanotime();
 
        stealcache();
-       cachestats();
+       cachestats(&stats);
 
        mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
        m->gcing = 0;
 
-       m->locks++;     // disable gc during the mallocs in newproc
        if(finq != nil) {
+               m->locks++;     // disable gc during the mallocs in newproc
                // kick off or wake up goroutine to run queued finalizers
                if(fing == nil)
                        fing = runtime·newproc1((byte*)runfinq, nil, 0, 0, runtime·gc);
@@ -969,10 +981,9 @@ runtime·gc(int32 force)
                        fingwait = 0;
                        runtime·ready(fing);
                }
+               m->locks--;
        }
-       m->locks--;
 
-       cachestats();
        heap1 = mstats.heap_alloc;
        obj1 = mstats.nmalloc - mstats.nfree;
 
@@ -985,11 +996,13 @@ runtime·gc(int32 force)
                runtime·printf("pause %D\n", t3-t0);
 
        if(gctrace) {
-               runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D handoff\n",
+               runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
+                               " %D(%D) handoff, %D/%D/%D yields\n",
                        mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
                        heap0>>20, heap1>>20, obj0, obj1,
                        mstats.nmalloc, mstats.nfree,
-                       nhandoff);
+                       stats.nhandoff, stats.nhandoffcnt,
+                       stats.nprocyield, stats.nosyield, stats.nsleep);
        }
        
        runtime·MProf_GC();
@@ -1022,7 +1035,7 @@ runtime·ReadMemStats(MStats *stats)
        runtime·semacquire(&runtime·worldsema);
        m->gcing = 1;
        runtime·stoptheworld();
-       cachestats();
+       cachestats(nil);
        *stats = mstats;
        m->gcing = 0;
        runtime·semrelease(&runtime·worldsema);
index 177de6c05fe3e3431648aa6b11b84c60a8bc92ec..20355e0c7be515ffe4649a8cc2cc71d5810b0d43 100644 (file)
@@ -71,6 +71,7 @@ typedef       struct  Complex128      Complex128;
 typedef        struct  WinCall         WinCall;
 typedef        struct  Timers          Timers;
 typedef        struct  Timer           Timer;
+typedef struct GCStats         GCStats;
 
 /*
  * per-cpu declaration.
@@ -166,6 +167,16 @@ struct     Gobuf
        byte*   pc;
        G*      g;
 };
+struct GCStats
+{
+       // the struct must consist of only uint64's,
+       // because it is casted to uint64[].
+       uint64  nhandoff;
+       uint64  nhandoffcnt;
+       uint64  nprocyield;
+       uint64  nosyield;
+       uint64  nsleep;
+};
 struct G
 {
        byte*   stackguard;     // cannot move - also known to linker, libmach, runtime/cgo
@@ -243,6 +254,7 @@ struct      M
        uintptr waitsema;       // semaphore for parking on locks
        uint32  waitsemacount;
        uint32  waitsemalock;
+       GCStats gcstats;
 
 #ifdef GOOS_windows
        void*   thread;         // thread handle