From: Keith Randall Date: Thu, 19 Dec 2013 01:13:59 +0000 (-0800) Subject: runtime: mark objects in free lists as allocated and unscannable. X-Git-Tag: go1.3beta1~1147 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=2d20c0d6257f5dfc57bb8a78f4e2824803bd6d0e;p=gostls13.git runtime: mark objects in free lists as allocated and unscannable. On the plus side, we don't need to change the bits when mallocing pointerless objects. On the other hand, we need to mark objects in the free lists during GC. But the free lists are small at GC time, so it should be a net win. benchmark old ns/op new ns/op delta BenchmarkMalloc8 40 33 -17.65% BenchmarkMalloc16 45 38 -15.72% BenchmarkMallocTypeInfo8 58 59 +0.85% BenchmarkMallocTypeInfo16 63 64 +1.10% R=golang-dev, rsc, dvyukov CC=cshapiro, golang-dev https://golang.org/cl/41040043 --- diff --git a/src/pkg/runtime/malloc.goc b/src/pkg/runtime/malloc.goc index b81fc398f0..eb044384b5 100644 --- a/src/pkg/runtime/malloc.goc +++ b/src/pkg/runtime/malloc.goc @@ -97,8 +97,10 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag) runtime·markspan(v, 0, 0, true); } - if(!(flag & FlagNoGC)) - runtime·markallocated(v, size, (flag&FlagNoScan) != 0); + if(flag & FlagNoGC) + runtime·marknogc(v); + else if(!(flag & FlagNoScan)) + runtime·markscan(v); if(DebugTypeAtBlockEnd) *(uintptr*)((uintptr)v+size-sizeof(uintptr)) = typ; diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h index 378dcb7338..705b20199d 100644 --- a/src/pkg/runtime/malloc.h +++ b/src/pkg/runtime/malloc.h @@ -449,7 +449,8 @@ void* runtime·mallocgc(uintptr size, uintptr typ, uint32 flag); void* runtime·persistentalloc(uintptr size, uintptr align, uint64 *stat); int32 runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s); void runtime·gc(int32 force); -void runtime·markallocated(void *v, uintptr n, bool noptr); +void runtime·markscan(void *v); +void runtime·marknogc(void *v); void runtime·checkallocated(void *v, uintptr n); void runtime·markfreed(void *v, uintptr n); void runtime·checkfreed(void *v, uintptr n); diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c index f329787044..6fc88bf10b 100644 --- a/src/pkg/runtime/mgc0.c +++ b/src/pkg/runtime/mgc0.c @@ -81,7 +81,7 @@ clearpools(void) // The bits in the word are packed together by type first, then by // heap location, so each 64-bit bitmap word consists of, from top to bottom, // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits, -// then the 16 bitNoScan/bitBlockBoundary bits, then the 16 bitAllocated bits. +// then the 16 bitScan/bitBlockBoundary bits, then the 16 bitAllocated bits. // This layout makes it easier to iterate over the bits of a given type. // // The bitmap starts at mheap.arena_start and extends *backward* from @@ -97,13 +97,13 @@ clearpools(void) // bits = *b >> shift; // /* then test bits & bitAllocated, bits & bitMarked, etc. */ // -#define bitAllocated ((uintptr)1<<(bitShift*0)) -#define bitNoScan ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */ +#define bitAllocated ((uintptr)1<<(bitShift*0)) /* block start; eligible for garbage collection */ +#define bitScan ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */ #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */ #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */ -#define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */ +#define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set - mark for FlagNoGC objects */ -#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) +#define bitMask (bitAllocated | bitScan | bitMarked | bitSpecial) // Holding worldsema grants an M the right to try to stop the world. // The procedure is: @@ -534,7 +534,7 @@ flushptrbuf(Scanbuf *sbuf) } // If object has no pointers, don't need to scan further. - if((bits & bitNoScan) != 0) + if((bits & bitScan) == 0) continue; // Ask span about size class. @@ -1187,7 +1187,7 @@ debug_scanblock(byte *b, uintptr n) runtime·printf("found unmarked block %p in %p\n", obj, vp+i); // If object has no pointers, don't need to scan further. - if((bits & bitNoScan) != 0) + if((bits & bitScan) == 0) continue; debug_scanblock(obj, size); @@ -1676,6 +1676,28 @@ addroots(void) addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0}); } +static void +addfreelists(void) +{ + int32 i; + P *p, **pp; + MCache *c; + MLink *m; + + // Mark objects in the MCache of each P so we don't collect them. + for(pp=runtime·allp; p=*pp; pp++) { + c = p->mcache; + if(c==nil) + continue; + for(i = 0; i < NumSizeClasses; i++) { + for(m = c->list[i].list; m != nil; m = m->next) { + markonly(m); + } + } + } + // Note: the sweeper will mark objects in each span's freelist. +} + static bool handlespecial(byte *p, uintptr size) { @@ -1722,7 +1744,7 @@ static void sweepspan(ParFor *desc, uint32 idx) { int32 cl, n, npages; - uintptr size; + uintptr size, off, *bitp, shift; byte *p; MCache *c; byte *arena_start; @@ -1732,6 +1754,7 @@ sweepspan(ParFor *desc, uint32 idx) byte compression; uintptr type_data_inc; MSpan *s; + MLink *x; USED(&desc); s = runtime·mheap.allspans[idx]; @@ -1751,6 +1774,17 @@ sweepspan(ParFor *desc, uint32 idx) nfree = 0; end = &head; c = m->mcache; + + // mark any free objects in this span so we don't collect them + for(x = s->freelist; x != nil; x = x->next) { + // This is markonly(x) but faster because we don't need + // atomic access and we're guaranteed to be pointing at + // the head of a valid object. + off = (uintptr*)x - (uintptr*)runtime·mheap.arena_start; + bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + *bitp |= bitMarked<types.data; type_data_inc = sizeof(uintptr); @@ -1794,8 +1828,8 @@ sweepspan(ParFor *desc, uint32 idx) continue; } - // Mark freed; restore block boundary bit. - *bitp = (*bitp & ~(bitMask< 1) { @@ -2439,18 +2474,35 @@ runfinq(void) } } -// mark the block at v of size n as allocated. -// If noscan is true, mark it as not needing scanning. void -runtime·markallocated(void *v, uintptr n, bool noscan) +runtime·marknogc(void *v) { uintptr *b, obits, bits, off, shift; - if(0) - runtime·printf("markallocated %p+%p\n", v, n); + off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset + b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; - if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) - runtime·throw("markallocated: bad pointer"); + for(;;) { + obits = *b; + if((obits>>shift & bitMask) != bitAllocated) + runtime·throw("bad initial state for marknogc"); + bits = (obits & ~(bitAllocated<>shift & bitMask) != bitAllocated) + runtime·throw("bad initial state for markscan"); + bits = obits | bitScan< (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) runtime·throw("markspan: bad pointer"); + if(runtime·checking) { + // bits should be all zero at the start + off = (byte*)v + size - runtime·mheap.arena_start; + b = (uintptr*)(runtime·mheap.arena_start - off/wordsPerBitmapWord); + for(i = 0; i < size/PtrSize/wordsPerBitmapWord; i++) { + if(b[i] != 0) + runtime·throw("markspan: span bits not zero"); + } + } + p = v; if(leftover) // mark a boundary just past end of last block too n++; @@ -2548,7 +2613,7 @@ runtime·markspan(void *v, uintptr size, uintptr n, bool leftover) off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start; // word offset b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; - *b = (*b & ~(bitMask<