]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: faster GC scan
authorDmitriy Vyukov <dvyukov@google.com>
Wed, 8 Oct 2014 09:51:12 +0000 (13:51 +0400)
committerDmitriy Vyukov <dvyukov@google.com>
Wed, 8 Oct 2014 09:51:12 +0000 (13:51 +0400)
The change contains 3 spot optimizations to scan loop:
1. Don't use byte vars, use uintptr's instead.
This seems to alleviate some codegen issue,
and alone accounts to a half of speedup.
2. Remove bitmap cache. Currently we cache only 1 byte,
so caching is not particularly effective anyway.
Removal of the cache simplifies code and positively affects regalloc.
3. Replace BitsMultiword switch with if and
do debug checks only in Debug mode.
I've benchmarked changes separately and ensured that
each of them provides speedup on top of the previous one.
This change as a whole fixes the unintentional regressions
of scan loop that were introduced during development cycle.
Fixes #8625.
Fixes #8565.

On go.benchmarks/garbage benchmark:
GOMAXPROCS=1
time: -3.13%
cputime: -3.22%
gc-pause-one: -15.71%
gc-pause-total: -15.71%

GOMAXPROCS=32
time: -1.96%
cputime: -4.43%
gc-pause-one: -6.22%
gc-pause-total: -6.22%

LGTM=khr, rsc
R=golang-codereviews, khr
CC=golang-codereviews, rlh, rsc
https://golang.org/cl/153990043

src/runtime/mgc0.c

index 5876ea5c3e2ebb55379472156e06c3190039f208..e369e5425c9ed0d1062cb7d98699f4e09012f66b 100644 (file)
@@ -179,9 +179,8 @@ have_cgo_allocate(void)
 static void
 scanblock(byte *b, uintptr n, byte *ptrmask)
 {
-       byte *obj, *obj0, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp, bits, xbits, shift, cached;
-       uintptr i, j, nobj, size, idx, x, off, scanbufpos;
-       intptr ncached;
+       byte *obj, *obj0, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp;
+       uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
        Workbuf *wbuf;
        Iface *iface;
        Eface *eface;
@@ -203,8 +202,6 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                scanbuf[i] = nil;
 
        ptrbitp = nil;
-       cached = 0;
-       ncached = 0;
 
        // ptrmask can have 2 possible values:
        // 1. nil - obtain pointer mask from GC bitmap.
@@ -259,10 +256,6 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                if(ptrmask == nil) {
                        off = (uintptr*)b - (uintptr*)arena_start;
                        ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
-                       shift = (off % wordsPerBitmapByte) * gcBits;
-                       cached = *ptrbitp >> shift;
-                       cached &= ~bitBoundary;
-                       ncached = (8 - shift)/gcBits;
                }
                for(i = 0; i < n; i += PtrSize) {
                        obj = nil;
@@ -273,15 +266,12 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                                        runtime·mheap.spans[(b-arena_start)>>PageShift] != runtime·mheap.spans[(b+i-arena_start)>>PageShift])
                                        break;
                                // Consult GC bitmap.
-                               if(ncached <= 0) {
-                                       // Refill cache.
-                                       cached = *--ptrbitp;
-                                       ncached = 2;
+                               bits = *ptrbitp;
+                               if((((uintptr)b+i)%(PtrSize*wordsPerBitmapByte)) != 0) {
+                                       ptrbitp--;
+                                       bits >>= gcBits;
                                }
-                               bits = cached;
-                               cached >>= gcBits;
-                               ncached--;
-                               if((bits&bitBoundary) != 0)
+                               if((bits&bitBoundary) != 0 && i != 0)
                                        break; // reached beginning of the next object
                                bits = (bits>>2)&BitsMask;
                                if(bits == BitsDead)
@@ -289,7 +279,7 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                        } else // dense mask (stack or data)
                                bits = (ptrmask[(i/PtrSize)/4]>>(((i/PtrSize)%4)*BitsPerPointer))&BitsMask;
 
-                       if(bits == BitsScalar || bits == BitsDead)
+                       if(bits <= BitsScalar) // BitsScalar || BitsDead
                                continue;
                        if(bits == BitsPointer) {
                                obj = *(byte**)(b+i);
@@ -298,43 +288,39 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                        }
 
                        // With those three out of the way, must be multi-word.
-                       if(bits != BitsMultiWord)
+                       if(Debug && bits != BitsMultiWord)
                                runtime·throw("unexpected garbage collection bits");
                        // Find the next pair of bits.
                        if(ptrmask == nil) {
-                               if(ncached <= 0) {
-                                       // Refill cache.
-                                       cached = *--ptrbitp;
-                                       ncached = 2;
+                               bits = *ptrbitp;
+                               if((((uintptr)b+i)%(PtrSize*wordsPerBitmapByte)) == 0) {
+                                       ptrbitp--;
+                                       bits >>= gcBits;
                                }
-                               bits = (cached>>2)&BitsMask;
+                               bits = (bits>>2)&BitsMask;
                        } else
                                bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;
 
-                       switch(bits) {
-                       default:
+                       if(Debug && bits != BitsIface && bits != BitsEface)
                                runtime·throw("unexpected garbage collection bits");
-                       case BitsIface:
+
+                       if(bits == BitsIface) {
                                iface = (Iface*)(b+i);
                                if(iface->tab != nil) {
                                        typ = iface->tab->type;
                                        if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
                                                obj = iface->data;
                                }
-                               break;
-                       case BitsEface:
+                       } else {
                                eface = (Eface*)(b+i);
                                typ = eface->type;
                                if(typ != nil) {
                                        if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
                                                obj = eface->data;
                                }
-                               break;
                        }
 
                        i += PtrSize;
-                       cached >>= gcBits;
-                       ncached--;
 
                        obj0 = obj;
                markobj: