]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: faster allocator, garbage collector
authorRuss Cox <rsc@golang.org>
Thu, 3 Feb 2011 04:03:47 +0000 (23:03 -0500)
committerRuss Cox <rsc@golang.org>
Thu, 3 Feb 2011 04:03:47 +0000 (23:03 -0500)
GC is still single-threaded.
Multiple threads will happen in another CL.

Garbage collection pauses are typically
about half as long as they were before this CL.

R=brainman, iant, r
CC=golang-dev
https://golang.org/cl/3975046

13 files changed:
src/pkg/runtime/debug.go
src/pkg/runtime/iface.c
src/pkg/runtime/malloc.goc
src/pkg/runtime/malloc.h
src/pkg/runtime/mcentral.c
src/pkg/runtime/mfinal.c
src/pkg/runtime/mgc0.c
src/pkg/runtime/mheap.c
src/pkg/runtime/mprof.goc
src/pkg/runtime/msize.c
src/pkg/runtime/slice.c
src/pkg/runtime/string.goc
src/pkg/runtime/windows/mem.c

index d09db1be6aeafa480a1f1dd70f64e32418d9a24e..5117e1a551e25b41eea807155c4a3e9d9c79d264 100644 (file)
@@ -69,7 +69,8 @@ type MemStatsType struct {
 
        // Per-size allocation statistics.
        // Not locked during update; approximate.
-       BySize [67]struct {
+       // 61 is NumSizeClasses in the C code.
+       BySize [61]struct {
                Size    uint32
                Mallocs uint64
                Frees   uint64
index aa36df68eee3f4e9ac5ae819fab9f0b22a2418fb..3dec45e2b8ebc1e5f9210d73d283a19f01a30700 100644 (file)
@@ -702,7 +702,7 @@ unsafe·New(Eface typ, void *ret)
        t = (Type*)((Eface*)typ.data-1);
 
        if(t->kind&KindNoPointers)
-               ret = runtime·mallocgc(t->size, RefNoPointers, 1, 1);
+               ret = runtime·mallocgc(t->size, FlagNoPointers, 1, 1);
        else
                ret = runtime·mal(t->size);
        FLUSH(&ret);
@@ -722,7 +722,7 @@ unsafe·NewArray(Eface typ, uint32 n, void *ret)
        
        size = n*t->size;
        if(t->kind&KindNoPointers)
-               ret = runtime·mallocgc(size, RefNoPointers, 1, 1);
+               ret = runtime·mallocgc(size, FlagNoPointers, 1, 1);
        else
                ret = runtime·mal(size);
        FLUSH(&ret);
index cc28b943df5c87613741a3ae3c5d31b6c1bb4877..8899b01195dce900f4006c31095afce6a2b0b6c6 100644 (file)
@@ -36,14 +36,13 @@ fastrand1(void)
 // Small objects are allocated from the per-thread cache's free lists.
 // Large objects (> 32 kB) are allocated straight from the heap.
 void*
-runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
+runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
 {
        int32 sizeclass, rate;
        MCache *c;
        uintptr npages;
        MSpan *s;
        void *v;
-       uint32 *ref;
 
        if(runtime·gcwaiting && g != m->g0 && m->locks == 0)
                runtime·gosched();
@@ -65,12 +64,6 @@ runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
                mstats.alloc += size;
                mstats.total_alloc += size;
                mstats.by_size[sizeclass].nmalloc++;
-
-               if(!runtime·mlookup(v, nil, nil, nil, &ref)) {
-                       runtime·printf("malloc %D; runtime·mlookup failed\n", (uint64)size);
-                       runtime·throw("malloc runtime·mlookup");
-               }
-               *ref = RefNone | refflag;
        } else {
                // TODO(rsc): Report tracebacks for very large allocations.
 
@@ -87,13 +80,14 @@ runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
                v = (void*)(s->start << PageShift);
 
                // setup for mark sweep
-               s->gcref0 = RefNone | refflag;
-               ref = &s->gcref0;
+               runtime·markspan(v, 0, 0, true);
        }
+       if(!(flag & FlagNoGC))
+               runtime·markallocated(v, size, (flag&FlagNoPointers) != 0);
 
        m->mallocing = 0;
 
-       if(!(refflag & RefNoProfiling) && (rate = runtime·MemProfileRate) > 0) {
+       if(!(flag & FlagNoProfiling) && (rate = runtime·MemProfileRate) > 0) {
                if(size >= rate)
                        goto profile;
                if(m->mcache->next_sample > size)
@@ -104,7 +98,7 @@ runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
                                rate = 0x3fffffff;
                        m->mcache->next_sample = fastrand1() % (2*rate);
                profile:
-                       *ref |= RefProfiled;
+                       runtime·setblockspecial(v);
                        runtime·MProf_Malloc(v, size);
                }
        }
@@ -124,33 +118,35 @@ runtime·malloc(uintptr size)
 void
 runtime·free(void *v)
 {
-       int32 sizeclass, size;
+       int32 sizeclass;
        MSpan *s;
        MCache *c;
-       uint32 prof, *ref;
+       uint32 prof;
+       uintptr size;
 
        if(v == nil)
                return;
+       
+       // If you change this also change mgc0.c:/^sweepspan,
+       // which has a copy of the guts of free.
 
        if(m->mallocing)
                runtime·throw("malloc/free - deadlock");
        m->mallocing = 1;
 
-       if(!runtime·mlookup(v, nil, nil, &s, &ref)) {
+       if(!runtime·mlookup(v, nil, nil, &s)) {
                runtime·printf("free %p: not an allocated block\n", v);
                runtime·throw("free runtime·mlookup");
        }
-       prof = *ref & RefProfiled;
-       *ref = RefFree;
+       prof = runtime·blockspecial(v);
 
        // Find size class for v.
        sizeclass = s->sizeclass;
        if(sizeclass == 0) {
                // Large object.
-               if(prof)
-                       runtime·MProf_Free(v, s->npages<<PageShift);
-               mstats.alloc -= s->npages<<PageShift;
-               runtime·memclr(v, s->npages<<PageShift);
+               size = s->npages<<PageShift;
+               *(uintptr*)(s->start<<PageShift) = 1;   // mark as "needs to be zeroed"
+               runtime·unmarkspan(v, 1<<PageShift);
                runtime·MHeap_Free(&runtime·mheap, s, 1);
        } else {
                // Small object.
@@ -158,19 +154,20 @@ runtime·free(void *v)
                size = runtime·class_to_size[sizeclass];
                if(size > sizeof(uintptr))
                        ((uintptr*)v)[1] = 1;   // mark as "needs to be zeroed"
-               if(prof)
-                       runtime·MProf_Free(v, size);
-               mstats.alloc -= size;
                mstats.by_size[sizeclass].nfree++;
                runtime·MCache_Free(c, v, sizeclass, size);
        }
+       runtime·markfreed(v, size);
+       mstats.alloc -= size;
+       if(prof)
+               runtime·MProf_Free(v, size);
        m->mallocing = 0;
 }
 
 int32
-runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)
+runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
 {
-       uintptr n, nobj, i;
+       uintptr n, i;
        byte *p;
        MSpan *s;
 
@@ -179,12 +176,11 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)
        if(sp)
                *sp = s;
        if(s == nil) {
+               runtime·checkfreed(v, 1);
                if(base)
                        *base = nil;
                if(size)
                        *size = 0;
-               if(ref)
-                       *ref = 0;
                return 0;
        }
 
@@ -195,14 +191,11 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)
                        *base = p;
                if(size)
                        *size = s->npages<<PageShift;
-               if(ref)
-                       *ref = &s->gcref0;
                return 1;
        }
 
-       if((byte*)v >= (byte*)s->gcref) {
-               // pointers into the gc ref counts
-               // do not count as pointers.
+       if((byte*)v >= (byte*)s->limit) {
+               // pointers past the last block do not count as pointers.
                return 0;
        }
 
@@ -213,21 +206,6 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)
        if(size)
                *size = n;
 
-       // good for error checking, but expensive
-       if(0) {
-               nobj = (s->npages << PageShift) / (n + RefcountOverhead);
-               if((byte*)s->gcref < p || (byte*)(s->gcref+nobj) > p+(s->npages<<PageShift)) {
-                       runtime·printf("odd span state=%d span=%p base=%p sizeclass=%d n=%D size=%D npages=%D\n",
-                               s->state, s, p, s->sizeclass, (uint64)nobj, (uint64)n, (uint64)s->npages);
-                       runtime·printf("s->base sizeclass %d v=%p base=%p gcref=%p blocksize=%D nobj=%D size=%D end=%p end=%p\n",
-                               s->sizeclass, v, p, s->gcref, (uint64)s->npages<<PageShift,
-                               (uint64)nobj, (uint64)n, s->gcref + nobj, p+(s->npages<<PageShift));
-                       runtime·throw("bad gcref");
-               }
-       }
-       if(ref)
-               *ref = &s->gcref[i];
-
        return 1;
 }
 
@@ -246,14 +224,20 @@ runtime·allocmcache(void)
 
 int32 runtime·sizeof_C_MStats = sizeof(MStats);
 
+#define MaxArena32 (2U<<30)
+
 void
 runtime·mallocinit(void)
 {
        byte *p;
-       uintptr arena_size;
+       uintptr arena_size, bitmap_size;
+       extern byte end[];
 
        runtime·InitSizes();
 
+       // Set up the allocation arena, a contiguous area of memory where
+       // allocated data will be found.  The arena begins with a bitmap large
+       // enough to hold 4 bits per allocated word.
        if(sizeof(void*) == 8) {
                // On a 64-bit machine, allocate from a single contiguous reservation.
                // 16 GB should be big enough for now.
@@ -273,19 +257,44 @@ runtime·mallocinit(void)
                // odds of the conservative garbage collector not collecting memory
                // because some non-pointer block of memory had a bit pattern
                // that matched a memory address.
+               //
+               // Actually we reserve 17 GB (because the bitmap ends up being 1 GB)
+               // but it hardly matters: fc is not valid UTF-8 either, and we have to
+               // allocate 15 GB before we get that far.
                arena_size = 16LL<<30;
-               p = runtime·SysReserve((void*)(0x00f8ULL<<32), arena_size);
+               bitmap_size = arena_size / (sizeof(void*)*8/4);
+               p = runtime·SysReserve((void*)(0x00f8ULL<<32), bitmap_size + arena_size);
                if(p == nil)
                        runtime·throw("runtime: cannot reserve arena virtual address space");
-               runtime·mheap.arena_start = p;
-               runtime·mheap.arena_used = p;
-               runtime·mheap.arena_end = p + arena_size;
        } else {
-               // On a 32-bit machine, we'll take what we can get for each allocation
-               // and maintain arena_start and arena_end as min, max we've seen.
-               runtime·mheap.arena_start = (byte*)0xffffffff;
-               runtime·mheap.arena_end = 0;
+               // On a 32-bit machine, we can't typically get away
+               // with a giant virtual address space reservation.
+               // Instead we map the memory information bitmap
+               // immediately after the data segment, large enough
+               // to handle another 2GB of mappings (256 MB),
+               // along with a reservation for another 512 MB of memory.
+               // When that gets used up, we'll start asking the kernel
+               // for any memory anywhere and hope it's in the 2GB
+               // following the bitmap (presumably the executable begins
+               // near the bottom of memory, so we'll have to use up
+               // most of memory before the kernel resorts to giving out
+               // memory before the beginning of the text segment).
+               //
+               // Alternatively we could reserve 512 MB bitmap, enough
+               // for 4GB of mappings, and then accept any memory the
+               // kernel threw at us, but normally that's a waste of 512 MB
+               // of address space, which is probably too much in a 32-bit world.
+               bitmap_size = MaxArena32 / (sizeof(void*)*8/4);
+               arena_size = 512<<20;
+
+               p = (void*)(((uintptr)end + 64*1024 - 1) & ~(64*1024-1));
+               if(runtime·SysReserve(p, bitmap_size + arena_size) != p)
+                       runtime·throw("runtime: cannot reserve memory bitmap virtual address space");
        }
+       runtime·mheap.bitmap = p;
+       runtime·mheap.arena_start = p + bitmap_size;
+       runtime·mheap.arena_used = runtime·mheap.arena_start;
+       runtime·mheap.arena_end = runtime·mheap.arena_start + arena_size;
 
        // Initialize the rest of the allocator.        
        runtime·MHeap_Init(&runtime·mheap, runtime·SysAlloc);
@@ -299,26 +308,41 @@ void*
 runtime·MHeap_SysAlloc(MHeap *h, uintptr n)
 {
        byte *p;
-       
-       if(sizeof(void*) == 8) {
+
+       if(n <= h->arena_end - h->arena_used) {
                // Keep taking from our reservation.
-               if(h->arena_end - h->arena_used < n)
-                       return nil;
                p = h->arena_used;
                runtime·SysMap(p, n);
                h->arena_used += n;
+               runtime·MHeap_MapBits(h);
                return p;
-       } else {
-               // Take what we can get from the OS.
-               p = runtime·SysAlloc(n);
-               if(p == nil)
-                       return nil;
-               if(p+n > h->arena_used)
-                       h->arena_used = p+n;
-               if(p > h->arena_end)
-                       h->arena_end = p;
-               return p;               
        }
+       
+       // On 64-bit, our reservation is all we have.
+       if(sizeof(void*) == 8)
+               return nil;
+
+       // On 32-bit, once the reservation is gone we can
+       // try to get memory at a location chosen by the OS
+       // and hope that it is in the range we allocated bitmap for.
+       p = runtime·SysAlloc(n);
+       if(p == nil)
+               return nil;
+
+       if(p < h->arena_start || p+n - h->arena_start >= MaxArena32) {
+               runtime·printf("runtime: memory allocated by OS not in usable range");
+               runtime·SysFree(p, n);
+               return nil;
+       }
+
+       if(p+n > h->arena_used) {
+               h->arena_used = p+n;
+               if(h->arena_used > h->arena_end)
+                       h->arena_end = h->arena_used;
+               runtime·MHeap_MapBits(h);
+       }
+       
+       return p;
 }
 
 // Runtime stubs.
@@ -353,7 +377,6 @@ void*
 runtime·stackalloc(uint32 n)
 {
        void *v;
-       uint32 *ref;
 
        if(m->mallocing || m->gcing || n == FixedStack) {
                runtime·lock(&stacks);
@@ -369,11 +392,7 @@ runtime·stackalloc(uint32 n)
                runtime·unlock(&stacks);
                return v;
        }
-       v = runtime·mallocgc(n, RefNoProfiling, 0, 0);
-       if(!runtime·mlookup(v, nil, nil, nil, &ref))
-               runtime·throw("stackalloc runtime·mlookup");
-       *ref = RefStack;
-       return v;
+       return runtime·mallocgc(n, FlagNoProfiling|FlagNoGC, 0, 0);
 }
 
 void
@@ -399,7 +418,7 @@ func Free(p *byte) {
 }
 
 func Lookup(p *byte) (base *byte, size uintptr) {
-       runtime·mlookup(p, &base, &size, nil, nil);
+       runtime·mlookup(p, &base, &size, nil);
 }
 
 func GC() {
@@ -422,7 +441,7 @@ func SetFinalizer(obj Eface, finalizer Eface) {
                runtime·printf("runtime.SetFinalizer: first argument is %S, not pointer\n", *obj.type->string);
                goto throw;
        }
-       if(!runtime·mlookup(obj.data, &base, &size, nil, nil) || obj.data != base) {
+       if(!runtime·mlookup(obj.data, &base, &size, nil) || obj.data != base) {
                runtime·printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n");
                goto throw;
        }
index e2472e8d23823dba3f9486f2feeab8da4577e522..4e2794570deb8611a310f56102fde87bef3473be 100644 (file)
@@ -97,8 +97,14 @@ typedef      uintptr PageID;         // address >> PageShift
 
 enum
 {
+       // Computed constant.  The definition of MaxSmallSize and the
+       // algorithm in msize.c produce some number of different allocation
+       // size classes.  NumSizeClasses is that number.  It's needed here
+       // because there are static arrays of this length; when msize runs its
+       // size choosing algorithm it double-checks that NumSizeClasses agrees.
+       NumSizeClasses = 61,
+
        // Tunable constants.
-       NumSizeClasses = 67,            // Number of size classes (must match msize.c)
        MaxSmallSize = 32<<10,
 
        FixAllocChunk = 128<<10,        // Chunk size for FixAlloc
@@ -290,10 +296,7 @@ struct MSpan
        uint32  ref;            // number of allocated objects in this span
        uint32  sizeclass;      // size class
        uint32  state;          // MSpanInUse etc
-       union {
-               uint32  *gcref; // sizeclass > 0
-               uint32  gcref0; // sizeclass == 0
-       };
+       byte    *limit; // end of data in span
 };
 
 void   runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages);
@@ -336,6 +339,7 @@ struct MHeap
 
        // range of addresses we might see in the heap
        byte *bitmap;
+       uintptr bitmap_mapped;
        byte *arena_start;
        byte *arena_used;
        byte *arena_end;
@@ -359,26 +363,29 @@ MSpan*    runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct
 void   runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct);
 MSpan* runtime·MHeap_Lookup(MHeap *h, void *v);
 MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v);
-void   runtime·MGetSizeClassInfo(int32 sizeclass, int32 *size, int32 *npages, int32 *nobj);
+void   runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj);
 void*  runtime·MHeap_SysAlloc(MHeap *h, uintptr n);
+void   runtime·MHeap_MapBits(MHeap *h);
 
 void*  runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed);
-int32  runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s, uint32 **ref);
+int32  runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s);
 void   runtime·gc(int32 force);
+void   runtime·markallocated(void *v, uintptr n, bool noptr);
+void   runtime·checkallocated(void *v, uintptr n);
+void   runtime·markfreed(void *v, uintptr n);
+void   runtime·checkfreed(void *v, uintptr n);
+int32  runtime·checking;
+void   runtime·markspan(void *v, uintptr size, uintptr n, bool leftover);
+void   runtime·unmarkspan(void *v, uintptr size);
+bool   runtime·blockspecial(void*);
+void   runtime·setblockspecial(void*);
 
 enum
 {
-       RefcountOverhead = 4,   // one uint32 per object
-
-       RefFree = 0,    // must be zero
-       RefStack,               // stack segment - don't free and don't scan for pointers
-       RefNone,                // no references
-       RefSome,                // some references
-       RefNoPointers = 0x80000000U,    // flag - no pointers here
-       RefHasFinalizer = 0x40000000U,  // flag - has finalizer
-       RefProfiled = 0x20000000U,      // flag - is in profiling table
-       RefNoProfiling = 0x10000000U,   // flag - must not profile
-       RefFlags = 0xFFFF0000U,
+       // flags to malloc
+       FlagNoPointers = 1<<0,  // no pointers here
+       FlagNoProfiling = 1<<1, // must not profile
+       FlagNoGC = 1<<2,        // must not free or scan for pointers
 };
 
 void   runtime·MProf_Malloc(void*, uintptr);
index f1ad119d3a473fbf8c6916272aef22e57e5cb8dd..29b03b58f8899eb3304864c8cb4d06d6b1a58eae 100644 (file)
@@ -113,8 +113,7 @@ static void
 MCentral_Free(MCentral *c, void *v)
 {
        MSpan *s;
-       PageID page;
-       MLink *p, *next;
+       MLink *p;
        int32 size;
 
        // Find span for v.
@@ -138,16 +137,8 @@ MCentral_Free(MCentral *c, void *v)
        if(--s->ref == 0) {
                size = runtime·class_to_size[c->sizeclass];
                runtime·MSpanList_Remove(s);
-               // The second word of each freed block indicates
-               // whether it needs to be zeroed.  The first word
-               // is the link pointer and must always be cleared.
-               for(p=s->freelist; p; p=next) {
-                       next = p->next;
-                       if(size > sizeof(uintptr) && ((uintptr*)p)[1] != 0)
-                               runtime·memclr((byte*)p, size);
-                       else
-                               p->next = nil;
-               }
+               runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
+               *(uintptr*)(s->start<<PageShift) = 1;  // needs zeroing
                s->freelist = nil;
                c->nfree -= (s->npages << PageShift) / size;
                runtime·unlock(c);
@@ -157,7 +148,7 @@ MCentral_Free(MCentral *c, void *v)
 }
 
 void
-runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 *nobj)
+runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *sizep, int32 *npagesp, int32 *nobj)
 {
        int32 size;
        int32 npages;
@@ -166,7 +157,7 @@ runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32
        size = runtime·class_to_size[sizeclass];
        *npagesp = npages;
        *sizep = size;
-       *nobj = (npages << PageShift) / (size + RefcountOverhead);
+       *nobj = (npages << PageShift) / size;
 }
 
 // Fetch a new span from the heap and
@@ -174,7 +165,8 @@ runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32
 static bool
 MCentral_Grow(MCentral *c)
 {
-       int32 i, n, npages, size;
+       int32 i, n, npages;
+       uintptr size;
        MLink **tailp, *v;
        byte *p;
        MSpan *s;
@@ -191,7 +183,7 @@ MCentral_Grow(MCentral *c)
        // Carve span into sequence of blocks.
        tailp = &s->freelist;
        p = (byte*)(s->start << PageShift);
-       s->gcref = (uint32*)(p + size*n);
+       s->limit = p + size*n;
        for(i=0; i<n; i++) {
                v = (MLink*)p;
                *tailp = v;
@@ -199,6 +191,7 @@ MCentral_Grow(MCentral *c)
                p += size;
        }
        *tailp = nil;
+       runtime·markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift));
 
        runtime·lock(c);
        c->nfree += n;
index f73561b3cd9fb2a9d75101bb0bf62c6f606706f8..03ee777c0be9cc07907792c92f551f9a9b6ff807 100644 (file)
@@ -5,6 +5,7 @@
 #include "runtime.h"
 #include "malloc.h"
 
+// TODO(rsc): Why not just use mheap.Lock?
 static Lock finlock;
 
 // Finalizer hash table.  Direct hash, linear scan, at most 3/4 full.
@@ -101,24 +102,21 @@ runtime·addfinalizer(void *p, void (*f)(void*), int32 nret)
        }
 
        runtime·lock(&finlock);
-       if(!runtime·mlookup(p, &base, nil, nil, &ref) || p != base) {
+       if(!runtime·mlookup(p, &base, nil, nil) || p != base) {
                runtime·unlock(&finlock);
                runtime·throw("addfinalizer on invalid pointer");
        }
        if(f == nil) {
-               if(*ref & RefHasFinalizer) {
-                       lookfintab(&fintab, p, 1);
-                       *ref &= ~RefHasFinalizer;
-               }
+               lookfintab(&fintab, p, 1);
                runtime·unlock(&finlock);
                return;
        }
 
-       if(*ref & RefHasFinalizer) {
+       if(lookfintab(&fintab, p, 0)) {
                runtime·unlock(&finlock);
                runtime·throw("double finalizer");
        }
-       *ref |= RefHasFinalizer;
+       runtime·setblockspecial(p);
 
        if(fintab.nkey >= fintab.max/2+fintab.max/4) {
                // keep table at most 3/4 full:
@@ -134,7 +132,7 @@ runtime·addfinalizer(void *p, void (*f)(void*), int32 nret)
                        newtab.max *= 3;
                }
 
-               newtab.key = runtime·mallocgc(newtab.max*sizeof newtab.key[0], RefNoPointers, 0, 1);
+               newtab.key = runtime·mallocgc(newtab.max*sizeof newtab.key[0], FlagNoPointers, 0, 1);
                newtab.val = runtime·mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1);
 
                for(i=0; i<fintab.max; i++) {
index af1c721e8b205b4458d860c5f65eb0529062c727..232c6cdcd5b052f6fe72ae0fea3e25c3bd35aceb 100644 (file)
@@ -2,28 +2,66 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Garbage collector -- step 0.
-//
-// Stop the world, mark and sweep garbage collector.
-// NOT INTENDED FOR PRODUCTION USE.
-//
-// A mark and sweep collector provides a way to exercise
-// and test the memory allocator and the stack walking machinery
-// without also needing to get reference counting
-// exactly right.
+// Garbage collector.
 
 #include "runtime.h"
 #include "malloc.h"
 
 enum {
-       Debug = 0
+       Debug = 0,
+       UseCas = 1,
+       PtrSize = sizeof(void*),
+       
+       // Four bits per word (see #defines below).
+       wordsPerBitmapWord = sizeof(void*)*8/4,
+       bitShift = sizeof(void*)*8/4,
 };
 
-typedef struct BlockList BlockList;
-struct BlockList
+// Bits in per-word bitmap.
+// #defines because enum might not be able to hold the values.
+//
+// Each word in the bitmap describes wordsPerBitmapWord words
+// of heap memory.  There are 4 bitmap bits dedicated to each heap word,
+// so on a 64-bit system there is one bitmap word per 16 heap words.
+// The bits in the word are packed together by type first, then by
+// heap location, so each 64-bit bitmap word consists of, from top to bottom,
+// the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
+// then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
+// This layout makes it easier to iterate over the bits of a given type.
+//
+// The bitmap starts at mheap.arena_start and extends *backward* from
+// there.  On a 64-bit system the off'th word in the arena is tracked by
+// the off/16+1'th word before mheap.arena_start.  (On a 32-bit system,
+// the only difference is that the divisor is 8.)
+//
+// To pull out the bits corresponding to a given pointer p, we use:
+//
+//     off = p - (uintptr*)mheap.arena_start;  // word offset
+//     b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
+//     shift = off % wordsPerBitmapWord
+//     bits = *b >> shift;
+//     /* then test bits & bitAllocated, bits & bitMarked, etc. */
+//
+#define bitAllocated           ((uintptr)1<<(bitShift*0))
+#define bitNoPointers          ((uintptr)1<<(bitShift*1))      /* when bitAllocated is set */
+#define bitMarked              ((uintptr)1<<(bitShift*2))      /* when bitAllocated is set */
+#define bitSpecial             ((uintptr)1<<(bitShift*3))      /* when bitAllocated is set - has finalizer or being profiled */
+#define bitBlockBoundary       ((uintptr)1<<(bitShift*1))      /* when bitAllocated is NOT set */
+
+#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
+
+static uint64 nlookup;
+static uint64 nsizelookup;
+static uint64 naddrlookup;
+static uint64 nhandoff;
+static int32 gctrace;
+
+typedef struct Workbuf Workbuf;
+struct Workbuf
 {
-       byte *obj;
-       uintptr size;
+       Workbuf *next;
+       uintptr nw;
+       byte *w[2048-2];
 };
 
 extern byte data[];
@@ -33,72 +71,258 @@ extern byte end[];
 static G *fing;
 static Finalizer *finq;
 static int32 fingwait;
-static BlockList *bl, *ebl;
+static uint32 nfullwait;
 
 static void runfinq(void);
-
-enum {
-       PtrSize = sizeof(void*)
-};
-
+static bool bitlookup(void*, uintptr**, uintptr*, int32*);
+static Workbuf* getempty(Workbuf*);
+static Workbuf* getfull(Workbuf*);
+
+// scanblock scans a block of n bytes starting at pointer b for references
+// to other objects, scanning any it finds recursively until there are no
+// unscanned objects left.  Instead of using an explicit recursion, it keeps
+// a work list in the Workbuf* structures and loops in the main function
+// body.  Keeping an explicit work list is easier on the stack allocator and
+// more efficient.
 static void
 scanblock(byte *b, int64 n)
 {
-       int32 off;
-       void *obj;
-       uintptr size;
-       uint32 *refp, ref;
+       byte *obj, *arena_start, *p;
        void **vp;
-       int64 i;
-       BlockList *w;
-
-       w = bl;
-       w->obj = b;
-       w->size = n;
-       w++;
+       uintptr size, *bitp, bits, shift, i, j, x, xbits, off;
+       MSpan *s;
+       PageID k;
+       void **bw, **w, **ew;
+       Workbuf *wbuf;
 
-       while(w > bl) {
-               w--;
-               b = w->obj;
-               n = w->size;
+       // Memory arena parameters.
+       arena_start = runtime·mheap.arena_start;
+       
+       wbuf = nil;  // current work buffer
+       ew = nil;  // end of work buffer
+       bw = nil;  // beginning of work buffer
+       w = nil;  // current pointer into work buffer
+
+       // Align b to a word boundary.
+       off = (uintptr)b & (PtrSize-1);
+       if(off != 0) {
+               b += PtrSize - off;
+               n -= PtrSize - off;
+       }
 
+       for(;;) {
+               // Each iteration scans the block b of length n, queueing pointers in
+               // the work buffer.
                if(Debug > 1)
                        runtime·printf("scanblock %p %D\n", b, n);
-               off = (uint32)(uintptr)b & (PtrSize-1);
-               if(off) {
-                       b += PtrSize - off;
-                       n -= PtrSize - off;
-               }
-       
+
                vp = (void**)b;
                n /= PtrSize;
                for(i=0; i<n; i++) {
-                       obj = vp[i];
-                       if(obj == nil)
+                       obj = (byte*)vp[i];
+                       
+                       // Words outside the arena cannot be pointers.
+                       if((byte*)obj < arena_start || (byte*)obj >= runtime·mheap.arena_used)
                                continue;
-                       if(runtime·mheap.arena_start <= (byte*)obj && (byte*)obj < runtime·mheap.arena_end) {
-                               if(runtime·mlookup(obj, &obj, &size, nil, &refp)) {
-                                       ref = *refp;
-                                       switch(ref & ~RefFlags) {
-                                       case RefNone:
-                                               if(Debug > 1)
-                                                       runtime·printf("found at %p: ", &vp[i]);
-                                               *refp = RefSome | (ref & RefFlags);
-                                               if(!(ref & RefNoPointers)) {
-                                                       if(w >= ebl)
-                                                               runtime·throw("scanblock: garbage collection stack overflow");
-                                                       w->obj = obj;
-                                                       w->size = size;
-                                                       w++;
-                                               }
-                                               break;
-                                       }
+                       
+                       // obj may be a pointer to a live object.
+                       // Try to find the beginning of the object.
+                       
+                       // Round down to word boundary.
+                       obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
+
+                       // Find bits for this word.
+                       off = (uintptr*)obj - (uintptr*)arena_start;
+                       bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+                       shift = off % wordsPerBitmapWord;
+                       xbits = *bitp;
+                       bits = xbits >> shift;
+
+                       // Pointing at the beginning of a block?
+                       if((bits & (bitAllocated|bitBlockBoundary)) != 0)
+                               goto found;
+
+                       // Pointing just past the beginning?
+                       // Scan backward a little to find a block boundary.
+                       for(j=shift; j-->0; ) {
+                               if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
+                                       obj = (byte*)obj - (shift-j)*PtrSize;
+                                       shift = j;
+                                       bits = xbits>>shift;
+                                       goto found;
                                }
                        }
+
+                       // Otherwise consult span table to find beginning.
+                       // (Manually inlined copy of MHeap_LookupMaybe.)
+                       nlookup++;
+                       naddrlookup++;
+                       k = (uintptr)obj>>PageShift;
+                       x = k;
+                       if(sizeof(void*) == 8)
+                               x -= (uintptr)arena_start>>PageShift;
+                       s = runtime·mheap.map[x];
+                       if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
+                               continue;
+                       p =  (byte*)((uintptr)s->start<<PageShift);
+                       if(s->sizeclass == 0) {
+                               obj = p;
+                       } else {
+                               if((byte*)obj >= (byte*)s->limit)
+                                       continue;
+                               size = runtime·class_to_size[s->sizeclass];
+                               int32 i = ((byte*)obj - p)/size;
+                               obj = p+i*size;
+                       }
+
+                       // Now that we know the object header, reload bits.
+                       off = (uintptr*)obj - (uintptr*)arena_start;
+                       bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+                       shift = off % wordsPerBitmapWord;
+                       xbits = *bitp;
+                       bits = xbits >> shift;
+
+               found:
+                       // Now we have bits, bitp, and shift correct for
+                       // obj pointing at the base of the object.
+                       // If not allocated or already marked, done.
+                       if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0)
+                               continue;
+                       *bitp |= bitMarked<<shift;
+
+                       // If object has no pointers, don't need to scan further.
+                       if((bits & bitNoPointers) != 0)
+                               continue;
+
+                       // If buffer is full, get a new one.
+                       if(w >= ew) {
+                               wbuf = getempty(wbuf);
+                               bw = wbuf->w;
+                               w = bw;
+                               ew = bw + nelem(wbuf->w);
+                       }
+                       *w++ = obj;
+               }
+               
+               // Done scanning [b, b+n).  Prepare for the next iteration of
+               // the loop by setting b and n to the parameters for the next block.
+
+               // Fetch b from the work buffers.
+               if(w <= bw) {
+                       // Emptied our buffer: refill.
+                       wbuf = getfull(wbuf);
+                       if(wbuf == nil)
+                               break;
+                       bw = wbuf->w;
+                       ew = wbuf->w + nelem(wbuf->w);
+                       w = bw+wbuf->nw;
                }
+               b = *--w;
+       
+               // Figure out n = size of b.  Start by loading bits for b.
+               off = (uintptr*)b - (uintptr*)arena_start;
+               bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
+               shift = off % wordsPerBitmapWord;
+               xbits = *bitp;
+               bits = xbits >> shift;
+               
+               // Might be small; look for nearby block boundary.
+               // A block boundary is marked by either bitBlockBoundary
+               // or bitAllocated being set (see notes near their definition).
+               enum {
+                       boundary = bitBlockBoundary|bitAllocated
+               };
+               // Look for a block boundary both after and before b
+               // in the same bitmap word.
+               //
+               // A block boundary j words after b is indicated by
+               //      bits>>j & boundary
+               // assuming shift+j < bitShift.  (If shift+j >= bitShift then
+               // we'll be bleeding other bit types like bitMarked into our test.)
+               // Instead of inserting the conditional shift+j < bitShift into the loop,
+               // we can let j range from 1 to bitShift as long as we first
+               // apply a mask to keep only the bits corresponding
+               // to shift+j < bitShift aka j < bitShift-shift.
+               bits &= (boundary<<(bitShift-shift)) - boundary;
+               
+               // A block boundary j words before b is indicated by
+               //      xbits>>(shift-j) & boundary
+               // (assuming shift >= j).  There is no cleverness here
+               // avoid the test, because when j gets too large the shift
+               // turns negative, which is undefined in C.             
+
+               for(j=1; j<bitShift; j++) {
+                       if(((bits>>j)&boundary) != 0 || shift>=j && ((xbits>>(shift-j))&boundary) != 0) {
+                               n = j*PtrSize;
+                               goto scan;
+                       }
+               }
+               
+               // Fall back to asking span about size class.
+               // (Manually inlined copy of MHeap_Lookup.)
+               nlookup++;
+               nsizelookup++;
+               x = (uintptr)b>>PageShift;
+               if(sizeof(void*) == 8)
+                       x -= (uintptr)arena_start>>PageShift;
+               s = runtime·mheap.map[x];
+               if(s->sizeclass == 0)
+                       n = s->npages<<PageShift;
+               else
+                       n = runtime·class_to_size[s->sizeclass];
+       scan:;
+       }
+}
+
+static struct {
+       Workbuf *full;
+       Workbuf *empty;
+       byte    *chunk;
+       uintptr nchunk;
+} work;
+
+// Get an empty work buffer off the work.empty list,
+// allocating new buffers as needed.
+static Workbuf*
+getempty(Workbuf *b)
+{
+       if(b != nil) {
+               b->nw = nelem(b->w);
+               b->next = work.full;
+               work.full = b;
+       }
+       b = work.empty;
+       if(b != nil) {
+               work.empty = b->next;
+               return b;
+       }
+       
+       if(work.nchunk < sizeof *b) {
+               work.nchunk = 1<<20;
+               work.chunk = runtime·SysAlloc(work.nchunk);
+       }
+       b = (Workbuf*)work.chunk;
+       work.chunk += sizeof *b;
+       work.nchunk -= sizeof *b;
+       return b;
+}
+
+// Get a full work buffer off the work.full list, or return nil.
+static Workbuf*
+getfull(Workbuf *b)
+{
+       if(b != nil) {
+               b->nw = 0;
+               b->next = work.empty;
+               work.empty = b;
        }
+       b = work.full;
+       if(b != nil)
+               work.full = b->next;
+       return b;
 }
 
+// Scanstack calls scanblock on each of gp's stack segments.
 static void
 scanstack(G *gp)
 {
@@ -119,46 +343,26 @@ scanstack(G *gp)
        }
 }
 
+// Markfin calls scanblock on the blocks that have finalizers:
+// the things pointed at cannot be freed until the finalizers have run.
 static void
 markfin(void *v)
 {
        uintptr size;
-       uint32 *refp;
 
        size = 0;
-       refp = nil;
-       if(!runtime·mlookup(v, &v, &size, nil, &refp) || !(*refp & RefHasFinalizer))
+       if(!runtime·mlookup(v, &v, &size, nil) || !runtime·blockspecial(v))
                runtime·throw("mark - finalizer inconsistency");
-       
+
        // do not mark the finalizer block itself.  just mark the things it points at.
        scanblock(v, size);
 }
 
+// Mark 
 static void
 mark(void)
 {
        G *gp;
-       uintptr blsize, nobj;
-
-       // Figure out how big an object stack we need.
-       // Get a new one if we need more than we have
-       // or we need significantly less than we have.
-       nobj = mstats.heap_objects;
-       if(nobj > ebl - bl || nobj < (ebl-bl)/4) {
-               if(bl != nil)
-                       runtime·SysFree(bl, (byte*)ebl - (byte*)bl);
-               
-               // While we're allocated a new object stack,
-               // add 20% headroom and also round up to
-               // the nearest page boundary, since mmap
-               // will anyway.
-               nobj = nobj * 12/10;
-               blsize = nobj * sizeof *bl;
-               blsize = (blsize + 4095) & ~4095;
-               nobj = blsize / sizeof *bl;
-               bl = runtime·SysAlloc(blsize);
-               ebl = bl + nobj;
-       }
 
        // mark data+bss.
        // skip runtime·mheap itself, which has no interesting pointers
@@ -192,97 +396,85 @@ mark(void)
        runtime·walkfintab(markfin);
 }
 
-// free RefNone, free & queue finalizers for RefNone|RefHasFinalizer, reset RefSome
+// Sweep frees or calls finalizers for blocks not marked in the mark phase.
+// It clears the mark bits in preparation for the next GC round.
 static void
-sweepspan(MSpan *s)
+sweep(void)
 {
-       int32 n, npages, size;
+       MSpan *s;
+       int32 cl, n, npages;
+       uintptr size;
        byte *p;
-       uint32 ref, *gcrefp, *gcrefep;
        MCache *c;
        Finalizer *f;
 
-       p = (byte*)(s->start << PageShift);
-       if(s->sizeclass == 0) {
-               // Large block.
-               ref = s->gcref0;
-               switch(ref & ~(RefFlags^RefHasFinalizer)) {
-               case RefNone:
-                       // Free large object.
-                       mstats.alloc -= s->npages<<PageShift;
-                       mstats.nfree++;
-                       runtime·memclr(p, s->npages<<PageShift);
-                       if(ref & RefProfiled)
-                               runtime·MProf_Free(p, s->npages<<PageShift);
-                       s->gcref0 = RefFree;
-                       runtime·MHeap_Free(&runtime·mheap, s, 1);
-                       break;
-               case RefNone|RefHasFinalizer:
-                       f = runtime·getfinalizer(p, 1);
-                       if(f == nil)
-                               runtime·throw("finalizer inconsistency");
-                       f->arg = p;
-                       f->next = finq;
-                       finq = f;
-                       ref &= ~RefHasFinalizer;
-                       // fall through
-               case RefSome:
-               case RefSome|RefHasFinalizer:
-                       s->gcref0 = RefNone | (ref&RefFlags);
-                       break;
+       for(s = runtime·mheap.allspans; s != nil; s = s->allnext) {
+               if(s->state != MSpanInUse)
+                       continue;
+
+               p = (byte*)(s->start << PageShift);
+               cl = s->sizeclass;
+               if(cl == 0) {
+                       size = s->npages<<PageShift;
+                       n = 1;
+               } else {
+                       // Chunk full of small blocks.
+                       size = runtime·class_to_size[cl];
+                       npages = runtime·class_to_allocnpages[cl];
+                       n = (npages << PageShift) / size;
                }
-               return;
-       }
+       
+               // sweep through n objects of given size starting at p.
+               for(; n > 0; n--, p += size) {
+                       uintptr off, *bitp, shift, bits;
 
-       // Chunk full of small blocks.
-       runtime·MGetSizeClassInfo(s->sizeclass, &size, &npages, &n);
-       gcrefp = s->gcref;
-       gcrefep = s->gcref + n;
-       for(; gcrefp < gcrefep; gcrefp++, p += size) {
-               ref = *gcrefp;
-               if(ref < RefNone)       // RefFree or RefStack
-                       continue;
-               switch(ref & ~(RefFlags^RefHasFinalizer)) {
-               case RefNone:
-                       // Free small object.
-                       if(ref & RefProfiled)
+                       off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start;
+                       bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+                       shift = off % wordsPerBitmapWord;
+                       bits = *bitp>>shift;
+
+                       if((bits & bitAllocated) == 0)
+                               continue;
+
+                       if((bits & bitMarked) != 0) {
+                               *bitp &= ~(bitMarked<<shift);
+                               continue;
+                       }
+
+                       if((bits & bitSpecial) != 0) {
+                               // Special means it has a finalizer or is being profiled.
+                               f = runtime·getfinalizer(p, 1);
+                               if(f != nil) {
+                                       f->arg = p;
+                                       f->next = finq;
+                                       finq = f;
+                                       continue;
+                               }
                                runtime·MProf_Free(p, size);
-                       *gcrefp = RefFree;
-                       c = m->mcache;
-                       if(size > sizeof(uintptr))
-                               ((uintptr*)p)[1] = 1;   // mark as "needs to be zeroed"
+                       }
+
+                       // Mark freed; restore block boundary bit.
+                       *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
+
+                       if(s->sizeclass == 0) {
+                               // Free large span.
+                               runtime·unmarkspan(p, 1<<PageShift);
+                               *(uintptr*)p = 1;       // needs zeroing
+                               runtime·MHeap_Free(&runtime·mheap, s, 1);
+                       } else {
+                               // Free small object.
+                               c = m->mcache;
+                               if(size > sizeof(uintptr))
+                                       ((uintptr*)p)[1] = 1;   // mark as "needs to be zeroed"
+                               mstats.by_size[s->sizeclass].nfree++;
+                               runtime·MCache_Free(c, p, s->sizeclass, size);
+                       }
                        mstats.alloc -= size;
                        mstats.nfree++;
-                       mstats.by_size[s->sizeclass].nfree++;
-                       runtime·MCache_Free(c, p, s->sizeclass, size);
-                       break;
-               case RefNone|RefHasFinalizer:
-                       f = runtime·getfinalizer(p, 1);
-                       if(f == nil)
-                               runtime·throw("finalizer inconsistency");
-                       f->arg = p;
-                       f->next = finq;
-                       finq = f;
-                       ref &= ~RefHasFinalizer;
-                       // fall through
-               case RefSome:
-               case RefSome|RefHasFinalizer:
-                       *gcrefp = RefNone | (ref&RefFlags);
-                       break;
                }
        }
 }
 
-static void
-sweep(void)
-{
-       MSpan *s;
-
-       for(s = runtime·mheap.allspans; s != nil; s = s->allnext)
-               if(s->state == MSpanInUse)
-                       sweepspan(s);
-}
-
 // Semaphore, not Lock, so that the goroutine
 // reschedules when there is contention rather
 // than spinning.
@@ -326,7 +518,8 @@ cachestats(void)
 void
 runtime·gc(int32 force)
 {
-       int64 t0, t1;
+       int64 t0, t1, t2, t3;
+       uint64 heap0, heap1, obj0, obj1;
        byte *p;
        Finalizer *fp;
 
@@ -349,23 +542,41 @@ runtime·gc(int32 force)
                        gcpercent = -1;
                else
                        gcpercent = runtime·atoi(p);
+               
+               p = runtime·getenv("GOGCTRACE");
+               if(p != nil)
+                       gctrace = runtime·atoi(p);
        }
        if(gcpercent < 0)
                return;
 
        runtime·semacquire(&gcsema);
+       if(!force && mstats.heap_alloc < mstats.next_gc) {
+               runtime·semrelease(&gcsema);
+               return;
+       }
+
        t0 = runtime·nanotime();
+       nlookup = 0;
+       nsizelookup = 0;
+       naddrlookup = 0;
+
        m->gcing = 1;
        runtime·stoptheworld();
        if(runtime·mheap.Lock.key != 0)
                runtime·throw("runtime·mheap locked during gc");
-       if(force || mstats.heap_alloc >= mstats.next_gc) {
-               cachestats();
-               mark();
-               sweep();
-               stealcache();
-               mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
-       }
+
+       cachestats();
+       heap0 = mstats.heap_alloc;
+       obj0 = mstats.nmalloc - mstats.nfree;
+
+       mark();
+       t1 = runtime·nanotime();
+       sweep();
+       t2 = runtime·nanotime();
+       stealcache();
+
+       mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
        m->gcing = 0;
 
        m->locks++;     // disable gc during the mallocs in newproc
@@ -381,18 +592,34 @@ runtime·gc(int32 force)
        }
        m->locks--;
 
-       t1 = runtime·nanotime();
+       cachestats();
+       heap1 = mstats.heap_alloc;
+       obj1 = mstats.nmalloc - mstats.nfree;
+
+       t3 = runtime·nanotime();
+       mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
+       mstats.pause_total_ns += t3 - t0;
        mstats.numgc++;
-       mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t1 - t0;
-       mstats.pause_total_ns += t1 - t0;
        if(mstats.debuggc)
-               runtime·printf("pause %D\n", t1-t0);
+               runtime·printf("pause %D\n", t3-t0);
+       
+       if(gctrace) {
+               runtime·printf("gc%d: %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D pointer lookups (%D size, %D addr)\n",
+                       mstats.numgc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
+                       heap0>>20, heap1>>20, obj0, obj1,
+                       mstats.nmalloc, mstats.nfree,
+                       nlookup, nsizelookup, naddrlookup);
+       }
+
        runtime·semrelease(&gcsema);
        runtime·starttheworld();
        
        // give the queued finalizers, if any, a chance to run
        if(fp != nil)
                runtime·gosched();
+       
+       if(gctrace > 1 && !force)
+               runtime·gc(1);
 }
 
 static void
@@ -430,3 +657,157 @@ runfinq(void)
                runtime·gc(1); // trigger another gc to clean up the finalized objects, if possible
        }
 }
+
+// mark the block at v of size n as allocated.
+// If noptr is true, mark it as having no pointers.
+void
+runtime·markallocated(void *v, uintptr n, bool noptr)
+{
+       uintptr *b, bits, off, shift;
+
+       if(0)
+               runtime·printf("markallocated %p+%p\n", v, n);
+
+       if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
+               runtime·throw("markallocated: bad pointer");
+
+       off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
+       b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+       shift = off % wordsPerBitmapWord;
+
+       bits = (*b & ~(bitMask<<shift)) | (bitAllocated<<shift);
+       if(noptr)
+               bits |= bitNoPointers<<shift;
+       *b = bits;
+}
+
+// mark the block at v of size n as freed.
+void
+runtime·markfreed(void *v, uintptr n)
+{
+       uintptr *b, off, shift;
+
+       if(0)
+               runtime·printf("markallocated %p+%p\n", v, n);
+
+       if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
+               runtime·throw("markallocated: bad pointer");
+
+       off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
+       b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+       shift = off % wordsPerBitmapWord;
+
+       *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
+}
+
+// check that the block at v of size n is marked freed.
+void
+runtime·checkfreed(void *v, uintptr n)
+{
+       uintptr *b, bits, off, shift;
+
+       if(!runtime·checking)
+               return;
+
+       if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
+               return; // not allocated, so okay
+
+       off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
+       b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+       shift = off % wordsPerBitmapWord;
+
+       bits = *b>>shift;
+       if((bits & bitAllocated) != 0) {
+               runtime·printf("checkfreed %p+%p: off=%p have=%p\n",
+                       v, n, off, bits & bitMask);
+               runtime·throw("checkfreed: not freed");
+       }
+}
+
+// mark the span of memory at v as having n blocks of the given size.
+// if leftover is true, there is left over space at the end of the span.
+void
+runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
+{
+       uintptr *b, off, shift;
+       byte *p;
+
+       if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
+               runtime·throw("markspan: bad pointer");
+
+       p = v;
+       if(leftover)    // mark a boundary just past end of last block too
+               n++;
+       for(; n-- > 0; p += size) {
+               off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start;  // word offset
+               b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+               shift = off % wordsPerBitmapWord;
+               *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
+       }
+}
+
+// unmark the span of memory at v of length n bytes.
+void
+runtime·unmarkspan(void *v, uintptr n)
+{
+       uintptr *p, *b, off;
+
+       if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
+               runtime·throw("markspan: bad pointer");
+
+       p = v;
+       off = p - (uintptr*)runtime·mheap.arena_start;  // word offset
+       if(off % wordsPerBitmapWord != 0)
+               runtime·throw("markspan: unaligned pointer");
+       b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+       n /= PtrSize;
+       if(n%wordsPerBitmapWord != 0)
+               runtime·throw("unmarkspan: unaligned length");
+       n /= wordsPerBitmapWord;
+       while(n-- > 0)
+               *b-- = 0;
+}
+
+bool
+runtime·blockspecial(void *v)
+{
+       uintptr *b, off, shift;
+
+       off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
+       b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+       shift = off % wordsPerBitmapWord;
+
+       return (*b & (bitSpecial<<shift)) != 0;
+}
+
+void
+runtime·setblockspecial(void *v)
+{
+       uintptr *b, off, shift;
+
+       off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
+       b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+       shift = off % wordsPerBitmapWord;
+
+       *b |= bitSpecial<<shift;
+}
+void
+runtime·MHeap_MapBits(MHeap *h)
+{
+       // Caller has added extra mappings to the arena.
+       // Add extra mappings of bitmap words as needed.
+       // We allocate extra bitmap pieces in chunks of bitmapChunk.
+       enum {
+               bitmapChunk = 8192
+       };
+       uintptr n;
+       
+       n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
+       n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
+       if(h->bitmap_mapped >= n)
+               return;
+
+       runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped);
+       h->bitmap_mapped = n;
+}
index 0c9ac0a09689cc94607bdf84c13a814d55e0da0d..8061b7cf88838a5ac03f0394813e98c324035cba 100644 (file)
@@ -180,7 +180,9 @@ MHeap_Grow(MHeap *h, uintptr npage)
        // Allocate a multiple of 64kB (16 pages).
        npage = (npage+15)&~15;
        ask = npage<<PageShift;
-       if(ask < HeapAllocChunk)
+       if(ask > h->arena_end - h->arena_used)
+               return false;
+       if(ask < HeapAllocChunk && HeapAllocChunk <= h->arena_end - h->arena_used)
                ask = HeapAllocChunk;
 
        v = runtime·MHeap_SysAlloc(h, ask);
@@ -194,11 +196,6 @@ MHeap_Grow(MHeap *h, uintptr npage)
        }
        mstats.heap_sys += ask;
 
-       if((byte*)v < h->arena_start || h->arena_start == nil)
-               h->arena_start = v;
-       if((byte*)v+ask > h->arena_end)
-               h->arena_end = (byte*)v+ask;
-
        // Create a fake "in use" span and free it, so that the
        // right coalescing happens.
        s = runtime·FixAlloc_Alloc(&h->spanalloc);
@@ -370,10 +367,14 @@ runtime·MSpanList_IsEmpty(MSpan *list)
 void
 runtime·MSpanList_Insert(MSpan *list, MSpan *span)
 {
-       if(span->next != nil || span->prev != nil)
+       if(span->next != nil || span->prev != nil) {
+               runtime·printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);
                runtime·throw("MSpanList_Insert");
+       }
        span->next = list->next;
        span->prev = list;
        span->next->prev = span;
        span->prev->next = span;
 }
+
+
index f4581e98d55547972883ab19a56cdb6950efa675..aae3d183fef9a1e4b10af8e3186948bdb2cbe25b 100644 (file)
@@ -65,7 +65,7 @@ stkbucket(uintptr *stk, int32 nstk)
                   runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
                        return b;
 
-       b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], RefNoProfiling, 0, 1);
+       b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
        bucketmem += sizeof *b + nstk*sizeof stk[0];
        runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
        b->hash = h;
@@ -132,7 +132,7 @@ setaddrbucket(uintptr addr, Bucket *b)
                if(ah->addr == (addr>>20))
                        goto found;
 
-       ah = runtime·mallocgc(sizeof *ah, RefNoProfiling, 0, 1);
+       ah = runtime·mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
        addrmem += sizeof *ah;
        ah->next = addrhash[h];
        ah->addr = addr>>20;
@@ -140,7 +140,7 @@ setaddrbucket(uintptr addr, Bucket *b)
 
 found:
        if((e = addrfree) == nil) {
-               e = runtime·mallocgc(64*sizeof *e, RefNoProfiling, 0, 0);
+               e = runtime·mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);
                addrmem += 64*sizeof *e;
                for(i=0; i+1<64; i++)
                        e[i].next = &e[i+1];
index ec85eb37317d5cd500c18141bb126c7550cc0787..770ef38ceffba74a2fb03a7916b0253360cd276d 100644 (file)
@@ -57,7 +57,7 @@ runtime·SizeToClass(int32 size)
 void
 runtime·InitSizes(void)
 {
-       int32 align, sizeclass, size, osize, nextsize, n;
+       int32 align, sizeclass, size, nextsize, n;
        uint32 i;
        uintptr allocsize, npages;
 
@@ -81,8 +81,7 @@ runtime·InitSizes(void)
                // the leftover is less than 1/8 of the total,
                // so wasted space is at most 12.5%.
                allocsize = PageSize;
-               osize = size + RefcountOverhead;
-               while(allocsize%osize > (allocsize/8))
+               while(allocsize%size > allocsize/8)
                        allocsize += PageSize;
                npages = allocsize >> PageShift;
 
@@ -93,7 +92,7 @@ runtime·InitSizes(void)
                // different sizes.
                if(sizeclass > 1
                && npages == runtime·class_to_allocnpages[sizeclass-1]
-               && allocsize/osize == allocsize/(runtime·class_to_size[sizeclass-1]+RefcountOverhead)) {
+               && allocsize/size == allocsize/runtime·class_to_size[sizeclass-1]) {
                        runtime·class_to_size[sizeclass-1] = size;
                        continue;
                }
index 05107547922cbca253f43a6951048c154bc5d607..1fee923e43be41d9aa3022dc83cce960156087a3 100644 (file)
@@ -41,7 +41,7 @@ makeslice1(SliceType *t, int32 len, int32 cap, Slice *ret)
        ret->cap = cap;
 
        if((t->elem->kind&KindNoPointers))
-               ret->array = runtime·mallocgc(size, RefNoPointers, 1, 1);
+               ret->array = runtime·mallocgc(size, FlagNoPointers, 1, 1);
        else
                ret->array = runtime·mal(size);
 }
index 916559eb2d88c1148c110a6badc6b7e1b08b0b15..b72aa937c33f2a69fa7d09bfe2e94a4591695c26 100644 (file)
@@ -225,7 +225,7 @@ func slicebytetostring(b Slice) (s String) {
 }
 
 func stringtoslicebyte(s String) (b Slice) {
-       b.array = runtime·mallocgc(s.len, RefNoPointers, 1, 1);
+       b.array = runtime·mallocgc(s.len, FlagNoPointers, 1, 1);
        b.len = s.len;
        b.cap = s.len;
        runtime·mcpy(b.array, s.str, s.len);
@@ -268,7 +268,7 @@ func stringtosliceint(s String) (b Slice) {
                n++;
        }
 
-       b.array = runtime·mallocgc(n*sizeof(r[0]), RefNoPointers, 1, 1);
+       b.array = runtime·mallocgc(n*sizeof(r[0]), FlagNoPointers, 1, 1);
        b.len = n;
        b.cap = n;
        p = s.str;
index 19d11ce8d3e76b262da833a811ff3978455a8bc1..c523195a46ae8af585c6132dc43ed3e53fd7f562 100644 (file)
@@ -48,7 +48,7 @@ runtime·SysFree(void *v, uintptr n)
 void*
 runtime·SysReserve(void *v, uintptr n)
 {
-       return runtime·stdcall(runtime·VirtualAlloc, 4, v, n, MEM_RESERVE, 0);
+       return runtime·stdcall(runtime·VirtualAlloc, 4, v, n, MEM_RESERVE, PAGE_EXECUTE_READWRITE);
 }
 
 void