]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: make the GC bitmap a byte array
authorDmitriy Vyukov <dvyukov@google.com>
Tue, 19 Aug 2014 13:38:00 +0000 (17:38 +0400)
committerDmitriy Vyukov <dvyukov@google.com>
Tue, 19 Aug 2014 13:38:00 +0000 (17:38 +0400)
Half the code in the garbage collector accesses the bitmap
as an array of bytes instead of as an array of uintptrs.
This is tricky to do correctly in a portable fashion,
it breaks on big-endian systems.
Make the bitmap a byte array.
Simplifies markallocated, scanblock and span sweep along the way,
as we don't need to recalculate bitmap position for each word.

LGTM=khr
R=golang-codereviews, khr
CC=golang-codereviews, rlh, rsc
https://golang.org/cl/125250043

src/pkg/runtime/asm_386.s
src/pkg/runtime/asm_amd64.s
src/pkg/runtime/atomic_arm.c
src/pkg/runtime/heapdump.c
src/pkg/runtime/malloc.go
src/pkg/runtime/mgc0.c
src/pkg/runtime/mgc0.h
src/pkg/runtime/runtime.h

index 0607bc802171eb3d0f9fb2e1fad9f214797f1f57..16e3f3136a5c17b1f40fc9b0a1a085192dd1e67f 100644 (file)
@@ -619,6 +619,14 @@ TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12
        XADDL   AX, (SP)
        RET
 
+// void        runtime·atomicor8(byte volatile*, byte);
+TEXT runtime·atomicor8(SB), NOSPLIT, $0-8
+       MOVL    ptr+0(FP), AX
+       MOVB    val+4(FP), BX
+       LOCK
+       ORB     BX, (AX)
+       RET
+
 // void jmpdefer(fn, sp);
 // called from deferreturn.
 // 1. pop the caller
index d94df0bf8dd98bb37e489d9b01fc4b0fbf79affd..6446b5d832c32606a7ba7e940ebc2284a32f65fd 100644 (file)
@@ -702,6 +702,14 @@ TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
        XCHGQ   AX, 0(BX)
        RET
 
+// void        runtime·atomicor8(byte volatile*, byte);
+TEXT runtime·atomicor8(SB), NOSPLIT, $0-16
+       MOVQ    ptr+0(FP), AX
+       MOVB    val+8(FP), BX
+       LOCK
+       ORB     BX, (AX)
+       RET
+
 // void jmpdefer(fn, sp);
 // called from deferreturn.
 // 1. pop the caller
index d914475c7f40424148dcaebca92a54e011dea121..537bf18331ca14202c5e6476cc99a8f50950ac1a 100644 (file)
@@ -167,3 +167,19 @@ runtime·atomicstore64(uint64 volatile *addr, uint64 v)
        *addr = v;
        runtime·unlock(LOCK(addr));
 }
+
+#pragma textflag NOSPLIT
+void
+runtime·atomicor8(byte volatile *addr, byte v)
+{
+       uint32 *addr32, old, word, shift;
+
+       // Align down to 4 bytes and use 32-bit CAS.
+       addr32 = (uint32*)((uintptr)addr & ~3);
+       word = ((uint32)v) << (((uintptr)addr & 3) * 8);
+       for(;;) {
+               old = *addr32;
+               if(runtime·cas(addr32, old, old|word))
+                       break;
+       }
+}
index babb32fe5ad44016864e05a5fe8c967fe18a5243..63d80b8d0e1042ef70b0fc7ac0454710fb6e6ff3 100644 (file)
@@ -820,7 +820,8 @@ dumpbvtypes(BitVector *bv, byte *base)
 static BitVector
 makeheapobjbv(byte *p, uintptr size)
 {
-       uintptr off, shift, *bitp, bits, nptr, i;
+       uintptr off, nptr, i;
+       byte shift, *bitp, bits;
        bool mw;
 
        // Extend the temp buffer if necessary.
@@ -838,13 +839,13 @@ makeheapobjbv(byte *p, uintptr size)
        mw = false;
        for(i = 0; i < nptr; i++) {
                off = (uintptr*)(p + i*PtrSize) - (uintptr*)runtime·mheap.arena_start;
-               bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
-               shift = (off % wordsPerBitmapWord) * gcBits;
-               bits = (*bitp >> (shift + 2)) & 3;
+               bitp = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
+               shift = (off % wordsPerBitmapByte) * gcBits;
+               bits = (*bitp >> (shift + 2)) & BitsMask;
                if(!mw && bits == BitsDead)
                        break;  // end of heap object
                mw = !mw && bits == BitsMultiWord;
-               tmpbuf[i*BitsPerPointer/8] &= ~(3<<((i*BitsPerPointer)%8));
+               tmpbuf[i*BitsPerPointer/8] &= ~(BitsMask<<((i*BitsPerPointer)%8));
                tmpbuf[i*BitsPerPointer/8] |= bits<<((i*BitsPerPointer)%8);
        }
        return (BitVector){i*BitsPerPointer, (uint32*)tmpbuf};
index 152b3b6b68ae13b477e823c17ed26ea9f29d50ac..8ee460755fd92d894fa225e9c93841c833a0e330 100644 (file)
@@ -22,8 +22,8 @@ const (
        pageSize  = 1 << pageShift
        pageMask  = pageSize - 1
 
-       wordsPerBitmapWord = ptrSize * 8 / 4
        gcBits             = 4
+       wordsPerBitmapByte = 8 / gcBits
        bitsPerPointer     = 2
        bitsMask           = 1<<bitsPerPointer - 1
        pointersPerByte    = 8 / bitsPerPointer
@@ -211,8 +211,8 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
        {
                arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
                off := (uintptr(x) - arena_start) / ptrSize
-               xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
-               shift := (off % wordsPerBitmapWord) * gcBits
+               xbits := (*uint8)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
+               shift := (off % wordsPerBitmapByte) * gcBits
                if debugMalloc && ((*xbits>>shift)&(bitMask|bitPtrMask)) != bitBoundary {
                        println("runtime: bits =", (*xbits>>shift)&(bitMask|bitPtrMask))
                        gothrow("bad bits in markallocated")
@@ -260,8 +260,7 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
                        ptrmask = (*uint8)(unsafe.Pointer(&typ.gc[0])) // embed mask
                }
                if size == 2*ptrSize {
-                       xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
-                       *xbitsb = *ptrmask | bitBoundary
+                       *xbits = *ptrmask | bitBoundary
                        goto marked
                }
                te = uintptr(typ.size) / ptrSize
@@ -283,19 +282,12 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
                                v &^= uint8(bitPtrMask << 4)
                        }
 
-                       off := (uintptr(x) + i - arena_start) / ptrSize
-                       xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
-                       shift := (off % wordsPerBitmapWord) * gcBits
-                       xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
-                       *xbitsb = v
+                       *xbits = v
+                       xbits = (*byte)(add(unsafe.Pointer(xbits), ^uintptr(0)))
                }
                if size0%(2*ptrSize) == 0 && size0 < size {
                        // Mark the word after last object's word as bitsDead.
-                       off := (uintptr(x) + size0 - arena_start) / ptrSize
-                       xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
-                       shift := (off % wordsPerBitmapWord) * gcBits
-                       xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
-                       *xbitsb = bitsDead << 2
+                       *xbits = bitsDead << 2
                }
        }
 marked:
index 60a6181fc94cb68f1c60f1923cd843184530e02d..14743c2838eba71041ce4776c2c7419666032e2d 100644 (file)
@@ -212,8 +212,8 @@ static struct {
 static void
 scanblock(byte *b, uintptr n, byte *ptrmask)
 {
-       byte *obj, *p, *arena_start, *arena_used, **wp, *scanbuf[8], bits8;
-       uintptr i, nobj, size, idx, *bitp, bits, xbits, shift, x, off, cached, scanbufpos;
+       byte *obj, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp, bits, xbits, shift, cached;
+       uintptr i, nobj, size, idx, x, off, scanbufpos;
        intptr ncached;
        Workbuf *wbuf;
        String *str;
@@ -237,6 +237,10 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
        for(i = 0; i < nelem(scanbuf); i++)
                scanbuf[i] = nil;
 
+       ptrbitp = nil;
+       cached = 0;
+       ncached = 0;
+
        // ptrmask can have 3 possible values:
        // 1. nil - obtain pointer mask from GC bitmap.
        // 2. ScanConservatively - don't use any mask, scan conservatively.
@@ -295,8 +299,15 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                        }
                        ptrmask = ScanConservatively;
                }
-               cached = 0;
-               ncached = 0;
+               // Find bits of the beginning of the object.
+               if(ptrmask == nil) {
+                       off = (uintptr*)b - (uintptr*)arena_start;
+                       ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
+                       shift = (off % wordsPerBitmapByte) * gcBits;
+                       cached = *ptrbitp >> shift;
+                       cached &= ~bitBoundary;
+                       ncached = (8 - shift)/gcBits;
+               }
                for(i = 0; i < n; i += PtrSize) {
                        obj = nil;
                        // Find bits for this word.
@@ -308,16 +319,13 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                                // Consult GC bitmap.
                                if(ncached <= 0) {
                                        // Refill cache.
-                                       off = (uintptr*)(b+i) - (uintptr*)arena_start;
-                                       bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
-                                       shift = (off % wordsPerBitmapWord) * gcBits;
-                                       cached = *bitp >> shift;
-                                       ncached = (PtrSize*8 - shift)/gcBits;
+                                       cached = *--ptrbitp;
+                                       ncached = 2;
                                }
                                bits = cached;
                                cached >>= gcBits;
                                ncached--;
-                               if(i != 0 && (bits&bitBoundary) != 0)
+                               if((bits&bitBoundary) != 0)
                                        break; // reached beginning of the next object
                                bits = (bits>>2)&BitsMask;
                                if(bits == BitsDead)
@@ -336,11 +344,9 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                        // Find the next pair of bits.
                        if(ptrmask == nil) {
                                if(ncached <= 0) {
-                                       off = (uintptr*)(b+i+PtrSize) - (uintptr*)arena_start;
-                                       bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
-                                       shift = (off % wordsPerBitmapWord) * gcBits;
-                                       cached = *bitp >> shift;
-                                       ncached = (PtrSize*8 - shift)/gcBits;
+                                       // Refill cache.
+                                       cached = *--ptrbitp;
+                                       ncached = 2;
                                }
                                bits = (cached>>2)&BitsMask;
                        } else
@@ -383,8 +389,14 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
 
                        if(bits == BitsSlice) {
                                i += 2*PtrSize;
-                               cached >>= 2*gcBits;
-                               ncached -= 2;
+                               if(ncached == 2)
+                                       ncached = 0;
+                               else if(ptrmask == nil) {
+                                       // Refill cache and consume one quadruple.
+                                       cached = *--ptrbitp;
+                                       cached >>= gcBits;
+                                       ncached = 1;
+                               }
                        } else {
                                i += PtrSize;
                                cached >>= gcBits;
@@ -398,20 +410,12 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                                continue;
                        // Mark the object.
                        off = (uintptr*)obj - (uintptr*)arena_start;
-                       bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
-                       shift = (off % wordsPerBitmapWord) * gcBits;
+                       bitp = arena_start - off/wordsPerBitmapByte - 1;
+                       shift = (off % wordsPerBitmapByte) * gcBits;
                        xbits = *bitp;
                        bits = (xbits >> shift) & bitMask;
                        if((bits&bitBoundary) == 0) {
-                               // Not a beginning of a block, check if we have block boundary in xbits.
-                               while(shift > 0) {
-                                       obj -= PtrSize;
-                                       shift -= gcBits;
-                                       bits = (xbits >> shift) & bitMask;
-                                       if((bits&bitBoundary) != 0)
-                                               goto havebits;
-                               }
-                               // Otherwise consult span table to find the block beginning.
+                               // Not a beginning of a block, consult span table to find the block beginning.
                                k = (uintptr)obj>>PageShift;
                                x = k;
                                x -= (uintptr)arena_start>>PageShift;
@@ -433,7 +437,6 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                                goto markobj;
                        }
 
-               havebits:
                        // Now we have bits, bitp, and shift correct for
                        // obj pointing at the base of the object.
                        // Only care about not marked objects.
@@ -449,22 +452,12 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
                        // For 8-byte objects we use non-atomic store, if the other
                        // quadruple is already marked. Otherwise we resort to CAS
                        // loop for marking.
-                       bits8 = xbits>>(shift&~7);
-                       if((bits8&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
+                       if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
                                work.nproc == 1)
-                               ((uint8*)bitp)[shift/8] = bits8 | (bitMarked<<(shift&7));
-                       else {
-                               for(;;) {
-                                       if(runtime·casp((void**)bitp, (void*)xbits, (void*)(xbits|(bitMarked<<shift))))
-                                               break;
-                                       xbits = *bitp;
-                                       bits = (xbits>>shift) & bitMask;
-                                       if((bits&bitMarked) != 0)
-                                               break;
-                               }
-                               if((bits&bitMarked) != 0)
-                                       continue;
-                       }
+                               *bitp = xbits | (bitMarked<<shift);
+                       else
+                               runtime·atomicor8(bitp, bitMarked<<shift);
+
                        if(((xbits>>(shift+2))&BitsMask) == BitsDead)
                                continue;  // noscan object
 
@@ -901,9 +894,9 @@ bool
 runtime·MSpan_Sweep(MSpan *s, bool preserve)
 {
        int32 cl, n, npages, nfree;
-       uintptr size, off, *bitp, shift, xbits, bits;
+       uintptr size, off, step;
        uint32 sweepgen;
-       byte *p;
+       byte *p, *bitp, shift, xbits, bits;
        MCache *c;
        byte *arena_start;
        MLink head, *end, *link;
@@ -939,8 +932,8 @@ runtime·MSpan_Sweep(MSpan *s, bool preserve)
        // Mark any free objects in this span so we don't collect them.
        for(link = s->freelist; link != nil; link = link->next) {
                off = (uintptr*)link - (uintptr*)arena_start;
-               bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
-               shift = (off % wordsPerBitmapWord) * gcBits;
+               bitp = arena_start - off/wordsPerBitmapByte - 1;
+               shift = (off % wordsPerBitmapByte) * gcBits;
                *bitp |= bitMarked<<shift;
        }
 
@@ -951,8 +944,8 @@ runtime·MSpan_Sweep(MSpan *s, bool preserve)
                // A finalizer can be set for an inner byte of an object, find object beginning.
                p = (byte*)(s->start << PageShift) + special->offset/size*size;
                off = (uintptr*)p - (uintptr*)arena_start;
-               bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
-               shift = (off % wordsPerBitmapWord) * gcBits;
+               bitp = arena_start - off/wordsPerBitmapByte - 1;
+               shift = (off % wordsPerBitmapByte) * gcBits;
                bits = (*bitp>>shift) & bitMask;
                if((bits&bitMarked) == 0) {
                        // Find the exact byte for which the special was setup
@@ -977,10 +970,27 @@ runtime·MSpan_Sweep(MSpan *s, bool preserve)
        // This thread owns the span now, so it can manipulate
        // the block bitmap without atomic operations.
        p = (byte*)(s->start << PageShift);
+       // Find bits for the beginning of the span.
+       off = (uintptr*)p - (uintptr*)arena_start;
+       bitp = arena_start - off/wordsPerBitmapByte - 1;
+       shift = 0;
+       step = size/(PtrSize*wordsPerBitmapByte);
+       // Rewind to the previous quadruple as we move to the next
+       // in the beginning of the loop.
+       bitp += step;
+       if(step == 0) {
+               // 8-byte objects.
+               bitp++;
+               shift = gcBits;
+       }
        for(; n > 0; n--, p += size) {
-               off = (uintptr*)p - (uintptr*)arena_start;
-               bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
-               shift = (off % wordsPerBitmapWord) * gcBits;
+               bitp -= step;
+               if(step == 0) {
+                       if(shift != 0)
+                               bitp--;
+                       shift = gcBits - shift;
+               }
+
                xbits = *bitp;
                bits = (xbits>>shift) & bitMask;
 
@@ -1759,8 +1769,8 @@ runtime·wakefing(void)
 static byte*
 unrollgcprog1(byte *mask, byte *prog, uintptr *ppos, bool inplace, bool sparse)
 {
-       uintptr *b, off, shift, pos, siz, i;
-       byte *arena_start, *prog1, v;
+       uintptr pos, siz, i, off;
+       byte *arena_start, *prog1, v, *bitp, shift;
 
        arena_start = runtime·mheap.arena_start;
        pos = *ppos;
@@ -1777,11 +1787,11 @@ unrollgcprog1(byte *mask, byte *prog, uintptr *ppos, bool inplace, bool sparse)
                                if(inplace) {
                                        // Store directly into GC bitmap.
                                        off = (uintptr*)(mask+pos) - (uintptr*)arena_start;
-                                       b = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
-                                       shift = (off % wordsPerBitmapWord) * gcBits;
-                                       if((shift%8)==0)
-                                               ((byte*)b)[shift/8] = 0;
-                                       ((byte*)b)[shift/8] |= v<<((shift%8)+2);
+                                       bitp = arena_start - off/wordsPerBitmapByte - 1;
+                                       shift = (off % wordsPerBitmapByte) * gcBits;
+                                       if(shift==0)
+                                               *bitp = 0;
+                                       *bitp |= v<<(shift+2);
                                        pos += PtrSize;
                                } else if(sparse) {
                                        // 4-bits per word
@@ -1847,8 +1857,8 @@ unrollglobgcprog(byte *prog, uintptr size)
 void
 runtime·unrollgcproginplace_m(void)
 {
-       uintptr size, size0, *b, off, shift, pos;
-       byte *arena_start, *prog;
+       uintptr size, size0, pos, off;
+       byte *arena_start, *prog, *bitp, shift;
        Type *typ;
        void *v;
 
@@ -1866,15 +1876,15 @@ runtime·unrollgcproginplace_m(void)
        // Mark first word as bitAllocated.
        arena_start = runtime·mheap.arena_start;
        off = (uintptr*)v - (uintptr*)arena_start;
-       b = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
-       shift = (off % wordsPerBitmapWord) * gcBits;
-       *b |= bitBoundary<<shift;
+       bitp = arena_start - off/wordsPerBitmapByte - 1;
+       shift = (off % wordsPerBitmapByte) * gcBits;
+       *bitp |= bitBoundary<<shift;
        // Mark word after last as BitsDead.
        if(size0 < size) {
                off = (uintptr*)((byte*)v + size0) - (uintptr*)arena_start;
-               b = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
-               shift = (off % wordsPerBitmapWord) * gcBits;
-               *b &= ~(bitPtrMask<<shift) | ((uintptr)BitsDead<<(shift+2));
+               bitp = arena_start - off/wordsPerBitmapByte - 1;
+               shift = (off % wordsPerBitmapByte) * gcBits;
+               *bitp &= ~(bitPtrMask<<shift) | ((uintptr)BitsDead<<(shift+2));
        }
 }
 
@@ -1916,60 +1926,67 @@ runtime·unrollgcprog_m(void)
 void
 runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
 {
-       uintptr *b, *b0, off, shift, x;
-       byte *p;
+       uintptr i, off, step;
+       byte *b;
 
        if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
                runtime·throw("markspan: bad pointer");
 
-       p = v;
-       if(leftover)    // mark a boundary just past end of last block too
-               n++;
-
-       b0 = nil;
-       x = 0;
-       for(; n-- > 0; p += size) {
-               // Okay to use non-atomic ops here, because we control
-               // the entire span, and each bitmap word has bits for only
-               // one span, so no other goroutines are changing these
-               // bitmap words.
-               off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start;  // word offset
-               b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
-               shift = (off % wordsPerBitmapWord) * gcBits;
-               if(b0 != b) {
-                       if(b0 != nil)
-                               *b0 = x;
-                       b0 = b;
-                       x = 0;
-               }
-               x |= (bitBoundary<<shift) | ((uintptr)BitsDead<<(shift+2));
+       // Find bits of the beginning of the span.
+       off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
+       b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
+       if((off%wordsPerBitmapByte) != 0)
+               runtime·throw("markspan: unaligned length");
+
+       // Okay to use non-atomic ops here, because we control
+       // the entire span, and each bitmap byte has bits for only
+       // one span, so no other goroutines are changing these bitmap words.
+
+       if(size == PtrSize) {
+               // Possible only on 64-bits (minimal size class is 8 bytes).
+               // Poor man's memset(0x11).
+               if(0x11 != ((bitBoundary+BitsDead)<<gcBits) + (bitBoundary+BitsDead))
+                       runtime·throw("markspan: bad bits");
+               if((n%(wordsPerBitmapByte*PtrSize)) != 0)
+                       runtime·throw("markspan: unaligned length");
+               b = b - n/wordsPerBitmapByte + 1;       // find first byte
+               if(((uintptr)b%PtrSize) != 0)
+                       runtime·throw("markspan: unaligned pointer");
+               for(i = 0; i != n; i += wordsPerBitmapByte*PtrSize, b += PtrSize)
+                       *(uintptr*)b = (uintptr)0x1111111111111111ULL;  // bitBoundary+BitsDead
+               return;
        }
-       *b0 = x;
+
+       if(leftover)
+               n++;    // mark a boundary just past end of last block too
+       step = size/(PtrSize*wordsPerBitmapByte);
+       for(i = 0; i != n; i++, b -= step)
+               *b = bitBoundary|(BitsDead<<2);
 }
 
 // unmark the span of memory at v of length n bytes.
 void
 runtime·unmarkspan(void *v, uintptr n)
 {
-       uintptr *p, *b, off;
+       uintptr off;
+       byte *b;
 
        if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
                runtime·throw("markspan: bad pointer");
 
-       p = v;
-       off = p - (uintptr*)runtime·mheap.arena_start;  // word offset
-       if((off % wordsPerBitmapWord) != 0)
+       off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
+       if((off % (PtrSize*wordsPerBitmapByte)) != 0)
                runtime·throw("markspan: unaligned pointer");
-       b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+       b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
        n /= PtrSize;
-       if(n%wordsPerBitmapWord != 0)
+       if(n%(PtrSize*wordsPerBitmapByte) != 0)
                runtime·throw("unmarkspan: unaligned length");
        // Okay to use non-atomic ops here, because we control
        // the entire span, and each bitmap word has bits for only
        // one span, so no other goroutines are changing these
        // bitmap words.
-       n /= wordsPerBitmapWord;
-       runtime·memclr((byte*)(b - n + 1), n*PtrSize);
+       n /= wordsPerBitmapByte;
+       runtime·memclr(b - n + 1, n);
 }
 
 void
@@ -1983,7 +2000,7 @@ runtime·MHeap_MapBits(MHeap *h)
        };
        uintptr n;
 
-       n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
+       n = (h->arena_used - h->arena_start) / (PtrSize*wordsPerBitmapByte);
        n = ROUND(n, bitmapChunk);
        n = ROUND(n, PhysPageSize);
        if(h->bitmap_mapped >= n)
@@ -2011,8 +2028,8 @@ void
 runtime·getgcmask(byte *p, Type *t, byte **mask, uintptr *len)
 {
        Stkframe frame;
-       uintptr i, n, off, bits, shift, *b;
-       byte *base;
+       uintptr i, n, off;
+       byte *base, bits, shift, *b;
 
        *mask = nil;
        *len = 0;
@@ -2047,8 +2064,8 @@ runtime·getgcmask(byte *p, Type *t, byte **mask, uintptr *len)
                *mask = runtime·mallocgc(*len, nil, 0);
                for(i = 0; i < n; i += PtrSize) {
                        off = (uintptr*)(base+i) - (uintptr*)runtime·mheap.arena_start;
-                       b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
-                       shift = (off % wordsPerBitmapWord) * gcBits;
+                       b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
+                       shift = (off % wordsPerBitmapByte) * gcBits;
                        bits = (*b >> (shift+2))&BitsMask;
                        (*mask)[i/PtrSize] = bits;
                }
index 893819c128e0759479d0093beb9d67c846115c16..7449398b9ff3266ae385a2cb61583459271bf82f 100644 (file)
@@ -8,8 +8,8 @@ enum {
        ScanStackByFrames = 1,
 
        // Four bits per word (see #defines below).
-       wordsPerBitmapWord = sizeof(void*)*8/4,
        gcBits = 4,
+       wordsPerBitmapByte = 8/gcBits,
 
        // GC type info programs.
        // The programs allow to store type info required for GC in a compact form.
index 0dc60b286b30fe594631c393431466045685976e..867da3f46af849ecfe9bcdf50c89398f0b7a297f 100644 (file)
@@ -913,6 +913,7 @@ void        runtime·atomicstore64(uint64 volatile*, uint64);
 uint64 runtime·atomicload64(uint64 volatile*);
 void*  runtime·atomicloadp(void* volatile*);
 void   runtime·atomicstorep(void* volatile*, void*);
+void   runtime·atomicor8(byte volatile*, byte);
 
 void   runtime·setg(G*);
 void   runtime·newextram(void);