From: Dmitriy Vyukov Date: Thu, 12 Apr 2012 08:01:24 +0000 (+0400) Subject: runtime: speedup GC sweep phase (batch free) X-Git-Tag: go1.1rc2~3390 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=4945fc8e40eef046501f613135b4f18cf2777d29;p=gostls13.git runtime: speedup GC sweep phase (batch free) benchmark old ns/op new ns/op delta garbage.BenchmarkParser 4370050250 3779668750 -13.51% garbage.BenchmarkParser-2 3713087000 3628771500 -2.27% garbage.BenchmarkParser-4 3519755250 3406349750 -3.22% garbage.BenchmarkParser-8 3386627750 3319144000 -1.99% garbage.BenchmarkTree 493585529 408102411 -17.32% garbage.BenchmarkTree-2 500487176 402285176 -19.62% garbage.BenchmarkTree-4 473238882 361484058 -23.61% garbage.BenchmarkTree-8 486977823 368334823 -24.36% garbage.BenchmarkTree2 31446600 31203200 -0.77% garbage.BenchmarkTree2-2 21469000 21077900 -1.82% garbage.BenchmarkTree2-4 11007600 10899100 -0.99% garbage.BenchmarkTree2-8 7692400 7032600 -8.58% garbage.BenchmarkParserPause 241863263 163249450 -32.50% garbage.BenchmarkParserPause-2 120135418 112981575 -5.95% garbage.BenchmarkParserPause-4 83411552 64580700 -22.58% garbage.BenchmarkParserPause-8 51870697 42207244 -18.63% garbage.BenchmarkTreePause 20940474 13147011 -37.22% garbage.BenchmarkTreePause-2 20115124 11146715 -44.59% garbage.BenchmarkTreePause-4 17217584 7486327 -56.52% garbage.BenchmarkTreePause-8 18258845 7400871 -59.47% garbage.BenchmarkTree2Pause 174067190 172674190 -0.80% garbage.BenchmarkTree2Pause-2 131175809 130615761 -0.43% garbage.BenchmarkTree2Pause-4 95406666 93972047 -1.50% garbage.BenchmarkTree2Pause-8 86056095 85334952 -0.84% garbage.BenchmarkParserLastPause 329932000 324790000 -1.56% garbage.BenchmarkParserLastPause-2 209383000 210456000 +0.51% garbage.BenchmarkParserLastPause-4 113981000 112921000 -0.93% garbage.BenchmarkParserLastPause-8 77967000 76625000 -1.72% garbage.BenchmarkTreeLastPause 29752000 18444000 -38.01% garbage.BenchmarkTreeLastPause-2 24274000 14766000 -39.17% garbage.BenchmarkTreeLastPause-4 19565000 8726000 -55.40% garbage.BenchmarkTreeLastPause-8 21956000 10530000 -52.04% garbage.BenchmarkTree2LastPause 314411000 311945000 -0.78% garbage.BenchmarkTree2LastPause-2 214641000 210836000 -1.77% garbage.BenchmarkTree2LastPause-4 110024000 108943000 -0.98% garbage.BenchmarkTree2LastPause-8 76873000 70263000 -8.60% R=golang-dev, rsc CC=golang-dev https://golang.org/cl/5991049 --- diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h index 52997bac6c..66919c911e 100644 --- a/src/pkg/runtime/malloc.h +++ b/src/pkg/runtime/malloc.h @@ -125,7 +125,7 @@ enum // 2, 3, and 4 are all plausible maximums depending // on the hardware details of the machine. The garbage // collector scales well to 4 cpus. - MaxGcproc = 4, + MaxGcproc = 16, }; // A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).) @@ -341,6 +341,7 @@ struct MCentral void runtime·MCentral_Init(MCentral *c, int32 sizeclass); int32 runtime·MCentral_AllocList(MCentral *c, int32 n, MLink **first); void runtime·MCentral_FreeList(MCentral *c, int32 n, MLink *first); +void runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end); // Main malloc heap. // The heap itself is the "free[]" and "large" arrays, diff --git a/src/pkg/runtime/mcentral.c b/src/pkg/runtime/mcentral.c index ff0c2d11ad..6fc95aec7b 100644 --- a/src/pkg/runtime/mcentral.c +++ b/src/pkg/runtime/mcentral.c @@ -88,9 +88,6 @@ MCentral_Alloc(MCentral *c) } // Free n objects back into the central free list. -// Return the number of objects allocated. -// The objects are linked together by their first words. -// On return, *pstart points at the first object and *pend at the last. void runtime·MCentral_FreeList(MCentral *c, int32 n, MLink *start) { @@ -148,6 +145,42 @@ MCentral_Free(MCentral *c, void *v) } } +// Free n objects from a span s back into the central free list c. +// Called from GC. +void +runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end) +{ + int32 size; + + runtime·lock(c); + + // Move to nonempty if necessary. + if(s->freelist == nil) { + runtime·MSpanList_Remove(s); + runtime·MSpanList_Insert(&c->nonempty, s); + } + + // Add the objects back to s's free list. + end->next = s->freelist; + s->freelist = start; + s->ref -= n; + c->nfree += n; + + // If s is completely freed, return it to the heap. + if(s->ref == 0) { + size = runtime·class_to_size[c->sizeclass]; + runtime·MSpanList_Remove(s); + *(uintptr*)(s->start<freelist = nil; + c->nfree -= (s->npages << PageShift) / size; + runtime·unlock(c); + runtime·unmarkspan((byte*)(s->start<npages<start << PageShift); @@ -774,6 +776,9 @@ sweepspan(MSpan *s) npages = runtime·class_to_allocnpages[cl]; n = (npages << PageShift) / size; } + nfree = 0; + start = end = nil; + c = m->mcache; // Sweep through n objects of given size starting at p. // This thread owns the span now, so it can manipulate @@ -810,21 +815,33 @@ sweepspan(MSpan *s) // Mark freed; restore block boundary bit. *bitp = (*bitp & ~(bitMask<mcache; if(s->sizeclass == 0) { // Free large span. runtime·unmarkspan(p, 1<local_alloc -= size; + c->local_nfree++; } else { // Free small object. if(size > sizeof(uintptr)) ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed" - c->local_by_size[s->sizeclass].nfree++; - runtime·MCache_Free(c, p, s->sizeclass, size); + if(nfree) + end->next = (MLink*)p; + else + start = (MLink*)p; + end = (MLink*)p; + nfree++; } - c->local_alloc -= size; - c->local_nfree++; + } + + if(nfree) { + c->local_by_size[s->sizeclass].nfree += nfree; + c->local_alloc -= size * nfree; + c->local_nfree += nfree; + c->local_cachealloc -= nfree * size; + c->local_objects -= nfree; + runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, start, end); } }