]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: keep objects in free lists marked as allocated.
authorDmitriy Vyukov <dvyukov@google.com>
Wed, 13 Aug 2014 16:42:55 +0000 (20:42 +0400)
committerDmitriy Vyukov <dvyukov@google.com>
Wed, 13 Aug 2014 16:42:55 +0000 (20:42 +0400)
Restore https://golang.org/cl/41040043 after GC rewrite.
Original description:
On the plus side, we don't need to change the bits on malloc and free.
On the downside, we need to mark objects in the free lists during GC.
But the free lists are small at GC time, so it should be a net win.

benchmark             old ns/op     new ns/op     delta
BenchmarkMalloc8      21.9          20.4          -6.85%
BenchmarkMalloc16     31.1          29.6          -4.82%

LGTM=khr
R=khr
CC=golang-codereviews, rlh, rsc
https://golang.org/cl/122280043

src/pkg/runtime/heapdump.c
src/pkg/runtime/malloc.go
src/pkg/runtime/mgc0.c
src/pkg/runtime/mgc0.h

index b002feb1c2870d0f618dad68c0b4ff6fc89b6bab..e5032783a8faddc83ce40a8a2a3f147b7d18e412 100644 (file)
@@ -525,11 +525,17 @@ dumproots(void)
        runtime·iterate_finq(finq_callback);
 }
 
+// Bit vector of free marks.   
+// Needs to be as big as the largest number of objects per span.       
+#pragma dataflag NOPTR
+static byte free[PageSize/8];  
+
 static void
 dumpobjs(void)
 {
-       uintptr i, j, size, n, off, shift, *bitp, bits;
+       uintptr i, j, size, n;
        MSpan *s;
+       MLink *l;
        byte *p;
 
        for(i = 0; i < runtime·mheap.nspan; i++) {
@@ -539,13 +545,15 @@ dumpobjs(void)
                p = (byte*)(s->start << PageShift);
                size = s->elemsize;
                n = (s->npages << PageShift) / size;
+               if(n > nelem(free))     
+                       runtime·throw("free array doesn't have enough entries");       
+               for(l = s->freelist; l != nil; l = l->next)
+                       free[((byte*)l - p) / size] = true;     
                for(j = 0; j < n; j++, p += size) {
-                       off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start;
-                       bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
-                       shift = (off % wordsPerBitmapWord) * gcBits;
-                       bits = (*bitp >> shift) & bitMask;
-                       if(bits != bitAllocated)
-                               continue;
+                       if(free[j]) {   
+                               free[j] = false;        
+                               continue;       
+                       }
                        dumpobj(p, size, makeheapobjbv(p, size));
                }
        }
index 71c0a4ecd7dfb207c5e5d7261d53d00d8623ca2f..76c06f314b14ad3c86e8db384daf88b8873e7c4d 100644 (file)
@@ -41,11 +41,9 @@ const (
        bitsDead           = 0
        bitsPointer        = 2
 
-       bitMiddle    = 0
-       bitBoundary  = 1
-       bitAllocated = 2
-       bitMarked    = 3
-       bitMask      = bitMiddle | bitBoundary | bitAllocated | bitMarked
+       bitBoundary = 1
+       bitMarked   = 2
+       bitMask     = bitBoundary | bitMarked
 )
 
 // All zero-sized allocations return a pointer to this byte.
@@ -185,110 +183,108 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
                size = uintptr(s.elemsize)
        }
 
+       if flags&flagNoScan != 0 {
+               // All objects are pre-marked as noscan.
+               goto marked
+       }
+
        // From here till marked label marking the object as allocated
        // and storing type info in the GC bitmap.
-       arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
-       off := (uintptr(x) - arena_start) / ptrSize
-       xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
-       shift := (off % wordsPerBitmapWord) * gcBits
-       if debugMalloc && (((*xbits)>>shift)&bitMask) != bitBoundary {
-               gothrow("bad bits in markallocated")
-       }
+       {
+               arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
+               off := (uintptr(x) - arena_start) / ptrSize
+               xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
+               shift := (off % wordsPerBitmapWord) * gcBits
+               if debugMalloc && ((*xbits>>shift)&(bitMask|bitPtrMask)) != bitBoundary {
+                       println("runtime: bits =", (*xbits>>shift)&(bitMask|bitPtrMask))
+                       gothrow("bad bits in markallocated")
+               }
 
-       var ti, te uintptr
-       var ptrmask *uint8
-       if flags&flagNoScan != 0 {
-               // bitsDead in the first quadruple means don't scan.
+               var ti, te uintptr
+               var ptrmask *uint8
                if size == ptrSize {
-                       *xbits = (*xbits & ^((bitBoundary | bitPtrMask) << shift)) | ((bitAllocated + (bitsDead << 2)) << shift)
-               } else {
-                       xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
-                       *xbitsb = bitAllocated + (bitsDead << 2)
+                       // It's one word and it has pointers, it must be a pointer.
+                       *xbits |= (bitsPointer << 2) << shift
+                       goto marked
                }
-               goto marked
-       }
-       if size == ptrSize {
-               // It's one word and it has pointers, it must be a pointer.
-               *xbits = (*xbits & ^((bitBoundary | bitPtrMask) << shift)) | ((bitAllocated | (bitsPointer << 2)) << shift)
-               goto marked
-       }
-       if typ != nil && (uintptr(typ.gc[0])|uintptr(typ.gc[1])) != 0 && uintptr(typ.size) > ptrSize {
-               if typ.kind&kindGCProg != 0 {
-                       nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
-                       masksize := nptr
-                       if masksize%2 != 0 {
-                               masksize *= 2 // repeated
+               if typ != nil && (uintptr(typ.gc[0])|uintptr(typ.gc[1])) != 0 && uintptr(typ.size) > ptrSize {
+                       if typ.kind&kindGCProg != 0 {
+                               nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
+                               masksize := nptr
+                               if masksize%2 != 0 {
+                                       masksize *= 2 // repeated
+                               }
+                               masksize = masksize * pointersPerByte / 8 // 4 bits per word
+                               masksize++                                // unroll flag in the beginning
+                               if masksize > maxGCMask && typ.gc[1] != 0 {
+                                       // If the mask is too large, unroll the program directly
+                                       // into the GC bitmap. It's 7 times slower than copying
+                                       // from the pre-unrolled mask, but saves 1/16 of type size
+                                       // memory for the mask.
+                                       mp.ptrarg[0] = x
+                                       mp.ptrarg[1] = unsafe.Pointer(typ)
+                                       mp.scalararg[0] = uint(size)
+                                       mp.scalararg[1] = uint(size0)
+                                       onM(&unrollgcproginplace_m)
+                                       goto marked
+                               }
+                               ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
+                               // Check whether the program is already unrolled.
+                               if uintptr(goatomicloadp(unsafe.Pointer(ptrmask)))&0xff == 0 {
+                                       mp.ptrarg[0] = unsafe.Pointer(typ)
+                                       onM(&unrollgcprog_m)
+                               }
+                               ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
+                       } else {
+                               ptrmask = (*uint8)(unsafe.Pointer(&typ.gc[0])) // embed mask
                        }
-                       masksize = masksize * pointersPerByte / 8 // 4 bits per word
-                       masksize++                                // unroll flag in the beginning
-                       if masksize > maxGCMask && typ.gc[1] != 0 {
-                               // If the mask is too large, unroll the program directly
-                               // into the GC bitmap. It's 7 times slower than copying
-                               // from the pre-unrolled mask, but saves 1/16 of type size
-                               // memory for the mask.
-                               mp.ptrarg[0] = x
-                               mp.ptrarg[1] = unsafe.Pointer(typ)
-                               mp.scalararg[0] = uint(size)
-                               mp.scalararg[1] = uint(size0)
-                               onM(&unrollgcproginplace_m)
+                       if size == 2*ptrSize {
+                               xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
+                               *xbitsb = *ptrmask | bitBoundary
                                goto marked
                        }
-                       ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
-                       // Check whether the program is already unrolled.
-                       if uintptr(goatomicloadp(unsafe.Pointer(ptrmask)))&0xff == 0 {
-                               mp.ptrarg[0] = unsafe.Pointer(typ)
-                               onM(&unrollgcprog_m)
+                       te = uintptr(typ.size) / ptrSize
+                       // If the type occupies odd number of words, its mask is repeated.
+                       if te%2 == 0 {
+                               te /= 2
                        }
-                       ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
-               } else {
-                       ptrmask = (*uint8)(unsafe.Pointer(&typ.gc[0])) // embed mask
                }
                if size == 2*ptrSize {
                        xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
-                       *xbitsb = *ptrmask | bitAllocated
+                       *xbitsb = (bitsPointer << 2) | (bitsPointer << 6) | bitBoundary
                        goto marked
                }
-               te = uintptr(typ.size) / ptrSize
-               // If the type occupies odd number of words, its mask is repeated.
-               if te%2 == 0 {
-                       te /= 2
-               }
-       }
-       if size == 2*ptrSize {
-               xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
-               *xbitsb = (bitsPointer << 2) | (bitsPointer << 6) | bitAllocated
-               goto marked
-       }
-       // Copy pointer bitmask into the bitmap.
-       for i := uintptr(0); i < size0; i += 2 * ptrSize {
-               v := uint8((bitsPointer << 2) | (bitsPointer << 6))
-               if ptrmask != nil {
-                       v = *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
-                       ti++
-                       if ti == te {
-                               ti = 0
+               // Copy pointer bitmask into the bitmap.
+               for i := uintptr(0); i < size0; i += 2 * ptrSize {
+                       v := uint8((bitsPointer << 2) | (bitsPointer << 6))
+                       if ptrmask != nil {
+                               v = *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
+                               ti++
+                               if ti == te {
+                                       ti = 0
+                               }
                        }
+                       if i == 0 {
+                               v |= bitBoundary
+                       }
+                       if i+ptrSize == size0 {
+                               v &^= uint8(bitPtrMask << 4)
+                       }
+
+                       off := (uintptr(x) + i - arena_start) / ptrSize
+                       xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
+                       shift := (off % wordsPerBitmapWord) * gcBits
+                       xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
+                       *xbitsb = v
                }
-               if i == 0 {
-                       v |= bitAllocated
-               }
-               if i+ptrSize == size0 {
-                       v &= ^uint8(bitPtrMask << 4)
+               if size0%(2*ptrSize) == 0 && size0 < size {
+                       // Mark the word after last object's word as bitsDead.
+                       off := (uintptr(x) + size0 - arena_start) / ptrSize
+                       xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
+                       shift := (off % wordsPerBitmapWord) * gcBits
+                       xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
+                       *xbitsb = bitsDead << 2
                }
-
-               off := (uintptr(x) + i - arena_start) / ptrSize
-               xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
-               shift := (off % wordsPerBitmapWord) * gcBits
-               xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
-               *xbitsb = v
-       }
-       if size0%(2*ptrSize) == 0 && size0 < size {
-               // Mark the word after last object's word as bitsDead.
-               off := (uintptr(x) + size0 - arena_start) / ptrSize
-               xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
-               shift := (off % wordsPerBitmapWord) * gcBits
-               xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
-               *xbitsb = bitsDead << 2
        }
 marked:
        mp.mallocing = 0
index d7e5d89f013594e32f940a2dfc2de8ec164d0d41..05b84386be10881d8b7161f81851f7fc764ccf21 100644 (file)
@@ -318,7 +318,7 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                                bits = cached;
                                cached >>= gcBits;
                                ncached--;
-                               if(i != 0 && (bits&bitMask) != bitMiddle)
+                               if(i != 0 && (bits&bitBoundary) != 0)
                                        break; // reached beginning of the next object
                                bits = (bits>>2)&BitsMask;
                                if(bits == BitsDead)
@@ -403,13 +403,13 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                        shift = (off % wordsPerBitmapWord) * gcBits;
                        xbits = *bitp;
                        bits = (xbits >> shift) & bitMask;
-                       if(bits == bitMiddle) {
+                       if((bits&bitBoundary) == 0) {
                                // Not a beginning of a block, check if we have block boundary in xbits.
                                while(shift > 0) {
                                        obj -= PtrSize;
                                        shift -= gcBits;
                                        bits = (xbits >> shift) & bitMask;
-                                       if(bits != bitMiddle)
+                                       if((bits&bitBoundary) != 0)
                                                goto havebits;
                                }
                                // Otherwise consult span table to find the block beginning.
@@ -426,7 +426,8 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                                        p = p+idx*size;
                                }
                                if(p == obj) {
-                                       runtime·printf("runtime: failed to find block beginning for %p s->limit=%p\n", p, s->limit);
+                                       runtime·printf("runtime: failed to find block beginning for %p s=%p s->limit=%p\n",
+                                               p, s->start*PageSize, s->limit);
                                        runtime·throw("failed to find block beginning");
                                }
                                obj = p;
@@ -436,8 +437,8 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                havebits:
                        // Now we have bits, bitp, and shift correct for
                        // obj pointing at the base of the object.
-                       // Only care about allocated and not marked.
-                       if(bits != bitAllocated)
+                       // Only care about not marked objects.
+                       if((bits&bitMarked) != 0)
                                continue;
                        if(work.nproc == 1)
                                *bitp |= bitMarked<<shift;
@@ -445,12 +446,12 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                                for(;;) {
                                        xbits = *bitp;
                                        bits = (xbits>>shift) & bitMask;
-                                       if(bits != bitAllocated)
+                                       if((bits&bitMarked) != 0)
                                                break;
                                        if(runtime·casp((void**)bitp, (void*)xbits, (void*)(xbits|(bitMarked<<shift))))
                                                break;
                                }
-                               if(bits != bitAllocated)
+                               if((bits&bitMarked) != 0)
                                        continue;
                        }
                        if(((xbits>>(shift+2))&BitsMask) == BitsDead)
@@ -892,7 +893,7 @@ runtime·MSpan_Sweep(MSpan *s)
        byte *p;
        MCache *c;
        byte *arena_start;
-       MLink head, *end;
+       MLink head, *end, *link;
        Special *special, **specialp, *y;
        bool res, sweepgenset;
 
@@ -922,6 +923,14 @@ runtime·MSpan_Sweep(MSpan *s)
        c = g->m->mcache;
        sweepgenset = false;
 
+       // Mark any free objects in this span so we don't collect them.
+       for(link = s->freelist; link != nil; link = link->next) {
+               off = (uintptr*)link - (uintptr*)arena_start;
+               bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+               shift = (off % wordsPerBitmapWord) * gcBits;
+               *bitp |= bitMarked<<shift;
+       }
+
        // Unlink & free special records for any objects we're about to free.
        specialp = &s->specials;
        special = *specialp;
@@ -932,7 +941,7 @@ runtime·MSpan_Sweep(MSpan *s)
                bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
                shift = (off % wordsPerBitmapWord) * gcBits;
                bits = (*bitp>>shift) & bitMask;
-               if(bits == bitAllocated) {
+               if((bits&bitMarked) == 0) {
                        // Find the exact byte for which the special was setup
                        // (as opposed to object beginning).
                        p = (byte*)(s->start << PageShift) + special->offset;
@@ -946,10 +955,6 @@ runtime·MSpan_Sweep(MSpan *s)
                        }
                } else {
                        // object is still live: keep special record
-                       if(bits != bitMarked) {
-                               runtime·printf("runtime: bad bits for special object %p: %d\n", p, (int32)bits);
-                               runtime·throw("runtime: bad bits for special object");
-                       }
                        specialp = &special->next;
                        special = *specialp;
                }
@@ -966,20 +971,17 @@ runtime·MSpan_Sweep(MSpan *s)
                xbits = *bitp;
                bits = (xbits>>shift) & bitMask;
 
-               // Non-allocated object, ignore.
-               if(bits == bitBoundary)
-                       continue;
                // Allocated and marked object, reset bits to allocated.
-               if(bits == bitMarked) {
-                       *bitp = (xbits & ~(bitMarked<<shift)) | (bitAllocated<<shift);
+               if((bits&bitMarked) != 0) {
+                       *bitp &= ~(bitMarked<<shift);
                        continue;
                }
                // At this point we know that we are looking at garbage object
                // that needs to be collected.
                if(runtime·debug.allocfreetrace)
                        runtime·tracefree(p, size);
-               // Reset to boundary.
-               *bitp = (xbits & ~(bitAllocated<<shift)) | (bitBoundary<<shift);
+               // Reset to allocated+noscan.
+               *bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
                if(cl == 0) {
                        // Free large span.
                        runtime·unmarkspan(p, s->npages<<PageShift);
@@ -1857,13 +1859,13 @@ runtime·unrollgcproginplace_m(void)
        off = (uintptr*)v - (uintptr*)arena_start;
        b = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
        shift = (off % wordsPerBitmapWord) * gcBits;
-       *b |= bitAllocated<<shift;
+       *b |= bitBoundary<<shift;
        // Mark word after last as BitsDead.
        if(size0 < size) {
                off = (uintptr*)((byte*)v + size0) - (uintptr*)arena_start;
                b = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
                shift = (off % wordsPerBitmapWord) * gcBits;
-               *b &= ~(bitPtrMask<<shift) | (BitsDead<<(shift+2));
+               *b &= ~(bitPtrMask<<shift) | ((uintptr)BitsDead<<(shift+2));
        }
 }
 
@@ -1931,7 +1933,7 @@ runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
                        b0 = b;
                        x = 0;
                }
-               x |= bitBoundary<<shift;
+               x |= (bitBoundary<<shift) | ((uintptr)BitsDead<<(shift+2));
        }
        *b0 = x;
 }
@@ -1958,8 +1960,7 @@ runtime·unmarkspan(void *v, uintptr n)
        // one span, so no other goroutines are changing these
        // bitmap words.
        n /= wordsPerBitmapWord;
-       while(n-- > 0)
-               *b-- = 0;
+       runtime·memclr((byte*)(b - n + 1), n*PtrSize);
 }
 
 void
index 3b1c5ba8cf68f6c5c87076601ee9da79c1dea608..893819c128e0759479d0093beb9d67c846115c16 100644 (file)
@@ -70,10 +70,8 @@ enum {
 // the off/16+1'th word before mheap.arena_start.  (On a 32-bit system,
 // the only difference is that the divisor is 8.)
 
-#define bitMiddle      ((uintptr)0) // middle of an object
-#define bitBoundary    ((uintptr)1) // boundary on a non-allocated object
-#define bitAllocated   ((uintptr)2) // boundary on an allocated object
-#define bitMarked      ((uintptr)3) // boundary on an allocated and marked object
+#define bitBoundary    ((uintptr)1) // boundary of an object
+#define bitMarked      ((uintptr)2) // marked object
 
-#define bitMask                ((uintptr)bitMiddle|bitBoundary|bitAllocated|bitMarked)
+#define bitMask                ((uintptr)bitBoundary|bitMarked)
 #define bitPtrMask     ((uintptr)BitsMask<<2)