From: Dmitriy Vyukov Date: Thu, 5 Apr 2012 16:48:28 +0000 (+0400) Subject: runtime: make GC stats per-M X-Git-Tag: go1.1rc2~3433 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=d839a809b22a6e7b1b434917bdc48caac32507e8;p=gostls13.git runtime: make GC stats per-M This is factored out part of: https://golang.org/cl/5279048/ (Parallel GC) benchmark old ns/op new ns/op delta garbage.BenchmarkParser 3999106750 3975026500 -0.60% garbage.BenchmarkParser-2 3720553750 3719196500 -0.04% garbage.BenchmarkParser-4 3502857000 3474980500 -0.80% garbage.BenchmarkParser-8 3375448000 3341310500 -1.01% garbage.BenchmarkParserLastPause 329401000 324097000 -1.61% garbage.BenchmarkParserLastPause-2 208953000 214222000 +2.52% garbage.BenchmarkParserLastPause-4 110933000 111656000 +0.65% garbage.BenchmarkParserLastPause-8 71969000 78230000 +8.70% garbage.BenchmarkParserPause 230808842 197237400 -14.55% garbage.BenchmarkParserPause-2 123674365 125197595 +1.23% garbage.BenchmarkParserPause-4 80518525 85710333 +6.45% garbage.BenchmarkParserPause-8 58310243 56940512 -2.35% garbage.BenchmarkTree2 31471700 31289400 -0.58% garbage.BenchmarkTree2-2 21536800 21086300 -2.09% garbage.BenchmarkTree2-4 11074700 10880000 -1.76% garbage.BenchmarkTree2-8 7568600 7351400 -2.87% garbage.BenchmarkTree2LastPause 314664000 312840000 -0.58% garbage.BenchmarkTree2LastPause-2 215319000 210815000 -2.09% garbage.BenchmarkTree2LastPause-4 110698000 108751000 -1.76% garbage.BenchmarkTree2LastPause-8 75635000 73463000 -2.87% garbage.BenchmarkTree2Pause 174280857 173147571 -0.65% garbage.BenchmarkTree2Pause-2 131332714 129665761 -1.27% garbage.BenchmarkTree2Pause-4 93803095 93422904 -0.41% garbage.BenchmarkTree2Pause-8 86242333 85146761 -1.27% R=rsc CC=golang-dev https://golang.org/cl/5987045 --- diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c index e043864c19..be8eb88358 100644 --- a/src/pkg/runtime/mgc0.c +++ b/src/pkg/runtime/mgc0.c @@ -67,9 +67,6 @@ enum { // uint32 runtime·worldsema = 1; -// TODO: Make these per-M. -static uint64 nhandoff; - static int32 gctrace; typedef struct Workbuf Workbuf; @@ -529,12 +526,16 @@ getfull(Workbuf *b) } if(work.nwait == work.nproc) return nil; - if(i < 10) + if(i < 10) { + m->gcstats.nprocyield++; runtime·procyield(20); - else if(i < 20) + } else if(i < 20) { + m->gcstats.nosyield++; runtime·osyield(); - else + } else { + m->gcstats.nsleep++; runtime·usleep(100); + } } } @@ -550,7 +551,8 @@ handoff(Workbuf *b) b->nobj -= n; b1->nobj = n; runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); - nhandoff += n; + m->gcstats.nhandoff++; + m->gcstats.nhandoffcnt += n; // Put b on full list - let first half of b get stolen. runtime·lock(&work.fmu); @@ -852,20 +854,30 @@ stealcache(void) } static void -cachestats(void) +cachestats(GCStats *stats) { M *m; MCache *c; int32 i; uint64 stacks_inuse; uint64 stacks_sys; + uint64 *src, *dst; + if(stats) + runtime·memclr((byte*)stats, sizeof(*stats)); stacks_inuse = 0; stacks_sys = 0; for(m=runtime·allm; m; m=m->alllink) { runtime·purgecachedstats(m); stacks_inuse += m->stackalloc->inuse; stacks_sys += m->stackalloc->sys; + if(stats) { + src = (uint64*)&m->gcstats; + dst = (uint64*)stats; + for(i=0; igcstats, sizeof(m->gcstats)); + } c = m->mcache; for(i=0; ilocal_by_size); i++) { mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc; @@ -885,6 +897,7 @@ runtime·gc(int32 force) uint64 heap0, heap1, obj0, obj1; byte *p; bool extra; + GCStats stats; // The gc is turned off (via enablegc) until // the bootstrap has completed. @@ -920,12 +933,11 @@ runtime·gc(int32 force) } t0 = runtime·nanotime(); - nhandoff = 0; m->gcing = 1; runtime·stoptheworld(); - cachestats(); + cachestats(nil); heap0 = mstats.heap_alloc; obj0 = mstats.nmalloc - mstats.nfree; @@ -955,13 +967,13 @@ runtime·gc(int32 force) t2 = runtime·nanotime(); stealcache(); - cachestats(); + cachestats(&stats); mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; m->gcing = 0; - m->locks++; // disable gc during the mallocs in newproc if(finq != nil) { + m->locks++; // disable gc during the mallocs in newproc // kick off or wake up goroutine to run queued finalizers if(fing == nil) fing = runtime·newproc1((byte*)runfinq, nil, 0, 0, runtime·gc); @@ -969,10 +981,9 @@ runtime·gc(int32 force) fingwait = 0; runtime·ready(fing); } + m->locks--; } - m->locks--; - cachestats(); heap1 = mstats.heap_alloc; obj1 = mstats.nmalloc - mstats.nfree; @@ -985,11 +996,13 @@ runtime·gc(int32 force) runtime·printf("pause %D\n", t3-t0); if(gctrace) { - runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D handoff\n", + runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects," + " %D(%D) handoff, %D/%D/%D yields\n", mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000, heap0>>20, heap1>>20, obj0, obj1, mstats.nmalloc, mstats.nfree, - nhandoff); + stats.nhandoff, stats.nhandoffcnt, + stats.nprocyield, stats.nosyield, stats.nsleep); } runtime·MProf_GC(); @@ -1022,7 +1035,7 @@ runtime·ReadMemStats(MStats *stats) runtime·semacquire(&runtime·worldsema); m->gcing = 1; runtime·stoptheworld(); - cachestats(); + cachestats(nil); *stats = mstats; m->gcing = 0; runtime·semrelease(&runtime·worldsema); diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h index 177de6c05f..20355e0c7b 100644 --- a/src/pkg/runtime/runtime.h +++ b/src/pkg/runtime/runtime.h @@ -71,6 +71,7 @@ typedef struct Complex128 Complex128; typedef struct WinCall WinCall; typedef struct Timers Timers; typedef struct Timer Timer; +typedef struct GCStats GCStats; /* * per-cpu declaration. @@ -166,6 +167,16 @@ struct Gobuf byte* pc; G* g; }; +struct GCStats +{ + // the struct must consist of only uint64's, + // because it is casted to uint64[]. + uint64 nhandoff; + uint64 nhandoffcnt; + uint64 nprocyield; + uint64 nosyield; + uint64 nsleep; +}; struct G { byte* stackguard; // cannot move - also known to linker, libmach, runtime/cgo @@ -243,6 +254,7 @@ struct M uintptr waitsema; // semaphore for parking on locks uint32 waitsemacount; uint32 waitsemalock; + GCStats gcstats; #ifdef GOOS_windows void* thread; // thread handle