]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.cc] runtime: convert memory allocator and garbage collector to Go
authorRuss Cox <rsc@golang.org>
Tue, 11 Nov 2014 22:05:02 +0000 (17:05 -0500)
committerRuss Cox <rsc@golang.org>
Tue, 11 Nov 2014 22:05:02 +0000 (17:05 -0500)
The conversion was done with an automated tool and then
modified only as necessary to make it compile and run.

[This CL is part of the removal of C code from package runtime.
See golang.org/s/dev.cc for an overview.]

LGTM=r
R=r
CC=austin, dvyukov, golang-codereviews, iant, khr
https://golang.org/cl/167540043

30 files changed:
src/runtime/chan.go
src/runtime/heapdump.c [deleted file]
src/runtime/heapdump.go [new file with mode: 0644]
src/runtime/malloc.c [deleted file]
src/runtime/malloc.go
src/runtime/malloc.h [deleted file]
src/runtime/malloc1.go [new file with mode: 0644]
src/runtime/malloc2.go [new file with mode: 0644]
src/runtime/mcache.c [deleted file]
src/runtime/mcache.go [new file with mode: 0644]
src/runtime/mcentral.c [deleted file]
src/runtime/mcentral.go [new file with mode: 0644]
src/runtime/mem.go
src/runtime/mem_darwin.c [deleted file]
src/runtime/mem_darwin.go [new file with mode: 0644]
src/runtime/mem_linux.c [deleted file]
src/runtime/mem_linux.go [new file with mode: 0644]
src/runtime/mfixalloc.c [deleted file]
src/runtime/mfixalloc.go [new file with mode: 0644]
src/runtime/mgc.go [new file with mode: 0644]
src/runtime/mgc0.c [deleted file]
src/runtime/mgc0.go
src/runtime/mgc1.go [moved from src/runtime/mgc0.h with 73% similarity]
src/runtime/mheap.c [deleted file]
src/runtime/mheap.go [new file with mode: 0644]
src/runtime/mprof.go
src/runtime/msize.c [deleted file]
src/runtime/msize.go [new file with mode: 0644]
src/runtime/slice.go
src/runtime/string.go

index 004970182686da6f5a214abd0909a9a6fe6b8d5a..bb0110f94cf1b5a5c6939b9d860e7455a8477bdb 100644 (file)
@@ -26,7 +26,7 @@ func makechan(t *chantype, size int64) *hchan {
        if hchanSize%maxAlign != 0 || elem.align > maxAlign {
                gothrow("makechan: bad alignment")
        }
-       if size < 0 || int64(uintptr(size)) != size || (elem.size > 0 && uintptr(size) > (maxmem-hchanSize)/uintptr(elem.size)) {
+       if size < 0 || int64(uintptr(size)) != size || (elem.size > 0 && uintptr(size) > (_MaxMem-hchanSize)/uintptr(elem.size)) {
                panic("makechan: size out of range")
        }
 
diff --git a/src/runtime/heapdump.c b/src/runtime/heapdump.c
deleted file mode 100644 (file)
index eddbc1d..0000000
+++ /dev/null
@@ -1,864 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Implementation of runtime/debug.WriteHeapDump.  Writes all
-// objects in the heap plus additional info (roots, threads,
-// finalizers, etc.) to a file.
-
-// The format of the dumped file is described at
-// http://code.google.com/p/go-wiki/wiki/heapdump14
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-#include "mgc0.h"
-#include "type.h"
-#include "typekind.h"
-#include "funcdata.h"
-#include "zaexperiment.h"
-#include "textflag.h"
-
-extern byte runtime·data[];
-extern byte runtime·edata[];
-extern byte runtime·bss[];
-extern byte runtime·ebss[];
-
-enum {
-       FieldKindEol = 0,
-       FieldKindPtr = 1,
-       FieldKindIface = 2,
-       FieldKindEface = 3,
-
-       TagEOF = 0,
-       TagObject = 1,
-       TagOtherRoot = 2,
-       TagType = 3,
-       TagGoRoutine = 4,
-       TagStackFrame = 5,
-       TagParams = 6,
-       TagFinalizer = 7,
-       TagItab = 8,
-       TagOSThread = 9,
-       TagMemStats = 10,
-       TagQueuedFinalizer = 11,
-       TagData = 12,
-       TagBss = 13,
-       TagDefer = 14,
-       TagPanic = 15,
-       TagMemProf = 16,
-       TagAllocSample = 17,
-};
-
-static uintptr* playgcprog(uintptr offset, uintptr *prog, void (*callback)(void*,uintptr,uintptr), void *arg);
-static void dumpfields(BitVector bv);
-static void dumpbvtypes(BitVector *bv, byte *base);
-static BitVector makeheapobjbv(byte *p, uintptr size);
-
-// fd to write the dump to.
-static uintptr dumpfd;
-
-#pragma dataflag NOPTR /* tmpbuf not a heap pointer at least */
-static byte    *tmpbuf;
-static uintptr tmpbufsize;
-
-// buffer of pending write data
-enum {
-       BufSize = 4096,
-};
-#pragma dataflag NOPTR
-static byte buf[BufSize];
-static uintptr nbuf;
-
-static void
-write(byte *data, uintptr len)
-{
-       if(len + nbuf <= BufSize) {
-               runtime·memmove(buf + nbuf, data, len);
-               nbuf += len;
-               return;
-       }
-       runtime·write(dumpfd, buf, nbuf);
-       if(len >= BufSize) {
-               runtime·write(dumpfd, data, len);
-               nbuf = 0;
-       } else {
-               runtime·memmove(buf, data, len);
-               nbuf = len;
-       }
-}
-
-static void
-flush(void)
-{
-       runtime·write(dumpfd, buf, nbuf);
-       nbuf = 0;
-}
-
-// Cache of types that have been serialized already.
-// We use a type's hash field to pick a bucket.
-// Inside a bucket, we keep a list of types that
-// have been serialized so far, most recently used first.
-// Note: when a bucket overflows we may end up
-// serializing a type more than once.  That's ok.
-enum {
-       TypeCacheBuckets = 256, // must be a power of 2
-       TypeCacheAssoc = 4,
-};
-typedef struct TypeCacheBucket TypeCacheBucket;
-struct TypeCacheBucket {
-       Type *t[TypeCacheAssoc];
-};
-#pragma dataflag NOPTR /* only initialized and used while world is stopped */
-static TypeCacheBucket typecache[TypeCacheBuckets];
-
-// dump a uint64 in a varint format parseable by encoding/binary
-static void
-dumpint(uint64 v)
-{
-       byte buf[10];
-       int32 n;
-       n = 0;
-       while(v >= 0x80) {
-               buf[n++] = v | 0x80;
-               v >>= 7;
-       }
-       buf[n++] = v;
-       write(buf, n);
-}
-
-static void
-dumpbool(bool b)
-{
-       dumpint(b ? 1 : 0);
-}
-
-// dump varint uint64 length followed by memory contents
-static void
-dumpmemrange(byte *data, uintptr len)
-{
-       dumpint(len);
-       write(data, len);
-}
-
-static void
-dumpstr(String s)
-{
-       dumpmemrange(s.str, s.len);
-}
-
-static void
-dumpcstr(int8 *c)
-{
-       dumpmemrange((byte*)c, runtime·findnull((byte*)c));
-}
-
-// dump information for a type
-static void
-dumptype(Type *t)
-{
-       TypeCacheBucket *b;
-       int32 i, j;
-
-       if(t == nil) {
-               return;
-       }
-
-       // If we've definitely serialized the type before,
-       // no need to do it again.
-       b = &typecache[t->hash & (TypeCacheBuckets-1)];
-       if(t == b->t[0]) return;
-       for(i = 1; i < TypeCacheAssoc; i++) {
-               if(t == b->t[i]) {
-                       // Move-to-front
-                       for(j = i; j > 0; j--) {
-                               b->t[j] = b->t[j-1];
-                       }
-                       b->t[0] = t;
-                       return;
-               }
-       }
-       // Might not have been dumped yet.  Dump it and
-       // remember we did so.
-       for(j = TypeCacheAssoc-1; j > 0; j--) {
-               b->t[j] = b->t[j-1];
-       }
-       b->t[0] = t;
-       
-       // dump the type
-       dumpint(TagType);
-       dumpint((uintptr)t);
-       dumpint(t->size);
-       if(t->x == nil || t->x->pkgPath == nil || t->x->name == nil) {
-               dumpstr(*t->string);
-       } else {
-               dumpint(t->x->pkgPath->len + 1 + t->x->name->len);
-               write(t->x->pkgPath->str, t->x->pkgPath->len);
-               write((byte*)".", 1);
-               write(t->x->name->str, t->x->name->len);
-       }
-       dumpbool((t->kind & KindDirectIface) == 0 || (t->kind & KindNoPointers) == 0);
-}
-
-// dump an object
-static void
-dumpobj(byte *obj, uintptr size, BitVector bv)
-{
-       dumpbvtypes(&bv, obj);
-       dumpint(TagObject);
-       dumpint((uintptr)obj);
-       dumpmemrange(obj, size);
-       dumpfields(bv);
-}
-
-static void
-dumpotherroot(int8 *description, byte *to)
-{
-       dumpint(TagOtherRoot);
-       dumpcstr(description);
-       dumpint((uintptr)to);
-}
-
-static void
-dumpfinalizer(byte *obj, FuncVal *fn, Type* fint, PtrType *ot)
-{
-       dumpint(TagFinalizer);
-       dumpint((uintptr)obj);
-       dumpint((uintptr)fn);
-       dumpint((uintptr)fn->fn);
-       dumpint((uintptr)fint);
-       dumpint((uintptr)ot);
-}
-
-typedef struct ChildInfo ChildInfo;
-struct ChildInfo {
-       // Information passed up from the callee frame about
-       // the layout of the outargs region.
-       uintptr argoff;     // where the arguments start in the frame
-       uintptr arglen;     // size of args region
-       BitVector args;    // if args.n >= 0, pointer map of args region
-
-       byte *sp;           // callee sp
-       uintptr depth;      // depth in call stack (0 == most recent)
-};
-
-// dump kinds & offsets of interesting fields in bv
-static void
-dumpbv(BitVector *bv, uintptr offset)
-{
-       uintptr i;
-
-       for(i = 0; i < bv->n; i += BitsPerPointer) {
-               switch(bv->bytedata[i/8] >> i%8 & 3) {
-               case BitsDead:
-                       // BitsDead has already been processed in makeheapobjbv.
-                       // We should only see it in stack maps, in which case we should continue processing.
-                       break;
-               case BitsScalar:
-                       break;
-               case BitsPointer:
-                       dumpint(FieldKindPtr);
-                       dumpint(offset + i / BitsPerPointer * PtrSize);
-                       break;
-               case BitsMultiWord:
-                       switch(bv->bytedata[(i+BitsPerPointer)/8] >> (i+BitsPerPointer)%8 & 3) {
-                       default:
-                               runtime·throw("unexpected garbage collection bits");
-                       case BitsIface:
-                               dumpint(FieldKindIface);
-                               dumpint(offset + i / BitsPerPointer * PtrSize);
-                               i += BitsPerPointer;
-                               break;
-                       case BitsEface:
-                               dumpint(FieldKindEface);
-                               dumpint(offset + i / BitsPerPointer * PtrSize);
-                               i += BitsPerPointer;
-                               break;
-                       }
-               }
-       }
-}
-
-static bool
-dumpframe(Stkframe *s, void *arg)
-{
-       Func *f;
-       ChildInfo *child;
-       uintptr pc, off, size;
-       int32 pcdata;
-       StackMap *stackmap;
-       int8 *name;
-       BitVector bv;
-
-       child = (ChildInfo*)arg;
-       f = s->fn;
-
-       // Figure out what we can about our stack map
-       pc = s->pc;
-       if(pc != f->entry)
-               pc--;
-       pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, pc);
-       if(pcdata == -1) {
-               // We do not have a valid pcdata value but there might be a
-               // stackmap for this function.  It is likely that we are looking
-               // at the function prologue, assume so and hope for the best.
-               pcdata = 0;
-       }
-       stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
-
-       // Dump any types we will need to resolve Efaces.
-       if(child->args.n >= 0)
-               dumpbvtypes(&child->args, (byte*)s->sp + child->argoff);
-       if(stackmap != nil && stackmap->n > 0) {
-               bv = runtime·stackmapdata(stackmap, pcdata);
-               dumpbvtypes(&bv, (byte*)(s->varp - bv.n / BitsPerPointer * PtrSize));
-       } else {
-               bv.n = -1;
-       }
-
-       // Dump main body of stack frame.
-       dumpint(TagStackFrame);
-       dumpint(s->sp); // lowest address in frame
-       dumpint(child->depth); // # of frames deep on the stack
-       dumpint((uintptr)child->sp); // sp of child, or 0 if bottom of stack
-       dumpmemrange((byte*)s->sp, s->fp - s->sp);  // frame contents
-       dumpint(f->entry);
-       dumpint(s->pc);
-       dumpint(s->continpc);
-       name = runtime·funcname(f);
-       if(name == nil)
-               name = "unknown function";
-       dumpcstr(name);
-
-       // Dump fields in the outargs section
-       if(child->args.n >= 0) {
-               dumpbv(&child->args, child->argoff);
-       } else {
-               // conservative - everything might be a pointer
-               for(off = child->argoff; off < child->argoff + child->arglen; off += PtrSize) {
-                       dumpint(FieldKindPtr);
-                       dumpint(off);
-               }
-       }
-
-       // Dump fields in the local vars section
-       if(stackmap == nil) {
-               // No locals information, dump everything.
-               for(off = child->arglen; off < s->varp - s->sp; off += PtrSize) {
-                       dumpint(FieldKindPtr);
-                       dumpint(off);
-               }
-       } else if(stackmap->n < 0) {
-               // Locals size information, dump just the locals.
-               size = -stackmap->n;
-               for(off = s->varp - size - s->sp; off <  s->varp - s->sp; off += PtrSize) {
-                       dumpint(FieldKindPtr);
-                       dumpint(off);
-               }
-       } else if(stackmap->n > 0) {
-               // Locals bitmap information, scan just the pointers in
-               // locals.
-               dumpbv(&bv, s->varp - bv.n / BitsPerPointer * PtrSize - s->sp);
-       }
-       dumpint(FieldKindEol);
-
-       // Record arg info for parent.
-       child->argoff = s->argp - s->fp;
-       child->arglen = s->arglen;
-       child->sp = (byte*)s->sp;
-       child->depth++;
-       stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
-       if(stackmap != nil)
-               child->args = runtime·stackmapdata(stackmap, pcdata);
-       else
-               child->args.n = -1;
-       return true;
-}
-
-static void
-dumpgoroutine(G *gp)
-{
-       uintptr sp, pc, lr;
-       ChildInfo child;
-       Defer *d;
-       Panic *p;
-       bool (*fn)(Stkframe*, void*);
-
-       if(gp->syscallsp != (uintptr)nil) {
-               sp = gp->syscallsp;
-               pc = gp->syscallpc;
-               lr = 0;
-       } else {
-               sp = gp->sched.sp;
-               pc = gp->sched.pc;
-               lr = gp->sched.lr;
-       }
-
-       dumpint(TagGoRoutine);
-       dumpint((uintptr)gp);
-       dumpint((uintptr)sp);
-       dumpint(gp->goid);
-       dumpint(gp->gopc);
-       dumpint(runtime·readgstatus(gp));
-       dumpbool(gp->issystem);
-       dumpbool(false);  // isbackground
-       dumpint(gp->waitsince);
-       dumpstr(gp->waitreason);
-       dumpint((uintptr)gp->sched.ctxt);
-       dumpint((uintptr)gp->m);
-       dumpint((uintptr)gp->defer);
-       dumpint((uintptr)gp->panic);
-
-       // dump stack
-       child.args.n = -1;
-       child.arglen = 0;
-       child.sp = nil;
-       child.depth = 0;
-       fn = dumpframe;
-       runtime·gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, &fn, &child, 0);
-
-       // dump defer & panic records
-       for(d = gp->defer; d != nil; d = d->link) {
-               dumpint(TagDefer);
-               dumpint((uintptr)d);
-               dumpint((uintptr)gp);
-               dumpint((uintptr)d->argp);
-               dumpint((uintptr)d->pc);
-               dumpint((uintptr)d->fn);
-               dumpint((uintptr)d->fn->fn);
-               dumpint((uintptr)d->link);
-       }
-       for (p = gp->panic; p != nil; p = p->link) {
-               dumpint(TagPanic);
-               dumpint((uintptr)p);
-               dumpint((uintptr)gp);
-               dumpint((uintptr)p->arg.type);
-               dumpint((uintptr)p->arg.data);
-               dumpint(0); // was p->defer, no longer recorded
-               dumpint((uintptr)p->link);
-       }
-}
-
-static void
-dumpgs(void)
-{
-       G *gp;
-       uint32 i;
-       uint32 status;
-
-       // goroutines & stacks
-       for(i = 0; i < runtime·allglen; i++) {
-               gp = runtime·allg[i];
-               status = runtime·readgstatus(gp); // The world is stopped so gp will not be in a scan state.
-               switch(status){
-               default:
-                       runtime·printf("runtime: unexpected G.status %d\n", status);
-                       runtime·throw("dumpgs in STW - bad status");
-               case Gdead:
-                       break;
-               case Grunnable:
-               case Gsyscall:
-               case Gwaiting:
-                       dumpgoroutine(gp);
-                       break;
-               }
-       }
-}
-
-static void
-finq_callback(FuncVal *fn, byte *obj, uintptr nret, Type *fint, PtrType *ot)
-{
-       dumpint(TagQueuedFinalizer);
-       dumpint((uintptr)obj);
-       dumpint((uintptr)fn);
-       dumpint((uintptr)fn->fn);
-       dumpint((uintptr)fint);
-       dumpint((uintptr)ot);
-       USED(&nret);
-}
-
-
-static void
-dumproots(void)
-{
-       MSpan *s, **allspans;
-       uint32 spanidx;
-       Special *sp;
-       SpecialFinalizer *spf;
-       byte *p;
-
-       // data segment
-       dumpbvtypes(&runtime·gcdatamask, runtime·data);
-       dumpint(TagData);
-       dumpint((uintptr)runtime·data);
-       dumpmemrange(runtime·data, runtime·edata - runtime·data);
-       dumpfields(runtime·gcdatamask);
-
-       // bss segment
-       dumpbvtypes(&runtime·gcbssmask, runtime·bss);
-       dumpint(TagBss);
-       dumpint((uintptr)runtime·bss);
-       dumpmemrange(runtime·bss, runtime·ebss - runtime·bss);
-       dumpfields(runtime·gcbssmask);
-
-       // MSpan.types
-       allspans = runtime·mheap.allspans;
-       for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
-               s = allspans[spanidx];
-               if(s->state == MSpanInUse) {
-                       // Finalizers
-                       for(sp = s->specials; sp != nil; sp = sp->next) {
-                               if(sp->kind != KindSpecialFinalizer)
-                                       continue;
-                               spf = (SpecialFinalizer*)sp;
-                               p = (byte*)((s->start << PageShift) + spf->special.offset);
-                               dumpfinalizer(p, spf->fn, spf->fint, spf->ot);
-                       }
-               }
-       }
-
-       // Finalizer queue
-       runtime·iterate_finq(finq_callback);
-}
-
-// Bit vector of free marks.   
-// Needs to be as big as the largest number of objects per span.       
-#pragma dataflag NOPTR
-static byte free[PageSize/8];  
-
-static void
-dumpobjs(void)
-{
-       uintptr i, j, size, n;
-       MSpan *s;
-       MLink *l;
-       byte *p;
-
-       for(i = 0; i < runtime·mheap.nspan; i++) {
-               s = runtime·mheap.allspans[i];
-               if(s->state != MSpanInUse)
-                       continue;
-               p = (byte*)(s->start << PageShift);
-               size = s->elemsize;
-               n = (s->npages << PageShift) / size;
-               if(n > nelem(free))     
-                       runtime·throw("free array doesn't have enough entries");       
-               for(l = s->freelist; l != nil; l = l->next)
-                       free[((byte*)l - p) / size] = true;     
-               for(j = 0; j < n; j++, p += size) {
-                       if(free[j]) {   
-                               free[j] = false;        
-                               continue;       
-                       }
-                       dumpobj(p, size, makeheapobjbv(p, size));
-               }
-       }
-}
-
-static void
-dumpparams(void)
-{
-       byte *x;
-
-       dumpint(TagParams);
-       x = (byte*)1;
-       if(*(byte*)&x == 1)
-               dumpbool(false); // little-endian ptrs
-       else
-               dumpbool(true); // big-endian ptrs
-       dumpint(PtrSize);
-       dumpint((uintptr)runtime·mheap.arena_start);
-       dumpint((uintptr)runtime·mheap.arena_used);
-       dumpint(thechar);
-       dumpcstr(GOEXPERIMENT);
-       dumpint(runtime·ncpu);
-}
-
-static void
-itab_callback(Itab *tab)
-{
-       Type *t;
-
-       t = tab->type;
-       // Dump a map from itab* to the type of its data field.
-       // We want this map so we can deduce types of interface referents.
-       if((t->kind & KindDirectIface) == 0) {
-               // indirect - data slot is a pointer to t.
-               dumptype(t->ptrto);
-               dumpint(TagItab);
-               dumpint((uintptr)tab);
-               dumpint((uintptr)t->ptrto);
-       } else if((t->kind & KindNoPointers) == 0) {
-               // t is pointer-like - data slot is a t.
-               dumptype(t);
-               dumpint(TagItab);
-               dumpint((uintptr)tab);
-               dumpint((uintptr)t);
-       } else {
-               // Data slot is a scalar.  Dump type just for fun.
-               // With pointer-only interfaces, this shouldn't happen.
-               dumptype(t);
-               dumpint(TagItab);
-               dumpint((uintptr)tab);
-               dumpint((uintptr)t);
-       }
-}
-
-static void
-dumpitabs(void)
-{
-       void (*fn)(Itab*);
-       
-       fn = itab_callback;
-       runtime·iterate_itabs(&fn);
-}
-
-static void
-dumpms(void)
-{
-       M *mp;
-
-       for(mp = runtime·allm; mp != nil; mp = mp->alllink) {
-               dumpint(TagOSThread);
-               dumpint((uintptr)mp);
-               dumpint(mp->id);
-               dumpint(mp->procid);
-       }
-}
-
-static void
-dumpmemstats(void)
-{
-       int32 i;
-
-       dumpint(TagMemStats);
-       dumpint(mstats.alloc);
-       dumpint(mstats.total_alloc);
-       dumpint(mstats.sys);
-       dumpint(mstats.nlookup);
-       dumpint(mstats.nmalloc);
-       dumpint(mstats.nfree);
-       dumpint(mstats.heap_alloc);
-       dumpint(mstats.heap_sys);
-       dumpint(mstats.heap_idle);
-       dumpint(mstats.heap_inuse);
-       dumpint(mstats.heap_released);
-       dumpint(mstats.heap_objects);
-       dumpint(mstats.stacks_inuse);
-       dumpint(mstats.stacks_sys);
-       dumpint(mstats.mspan_inuse);
-       dumpint(mstats.mspan_sys);
-       dumpint(mstats.mcache_inuse);
-       dumpint(mstats.mcache_sys);
-       dumpint(mstats.buckhash_sys);
-       dumpint(mstats.gc_sys);
-       dumpint(mstats.other_sys);
-       dumpint(mstats.next_gc);
-       dumpint(mstats.last_gc);
-       dumpint(mstats.pause_total_ns);
-       for(i = 0; i < 256; i++)
-               dumpint(mstats.pause_ns[i]);
-       dumpint(mstats.numgc);
-}
-
-static void
-dumpmemprof_callback(Bucket *b, uintptr nstk, uintptr *stk, uintptr size, uintptr allocs, uintptr frees)
-{
-       uintptr i, pc;
-       Func *f;
-       byte buf[20];
-       String file;
-       int32 line;
-
-       dumpint(TagMemProf);
-       dumpint((uintptr)b);
-       dumpint(size);
-       dumpint(nstk);
-       for(i = 0; i < nstk; i++) {
-               pc = stk[i];
-               f = runtime·findfunc(pc);
-               if(f == nil) {
-                       runtime·snprintf(buf, sizeof(buf), "%X", (uint64)pc);
-                       dumpcstr((int8*)buf);
-                       dumpcstr("?");
-                       dumpint(0);
-               } else {
-                       dumpcstr(runtime·funcname(f));
-                       // TODO: Why do we need to back up to a call instruction here?
-                       // Maybe profiler should do this.
-                       if(i > 0 && pc > f->entry) {
-                               if(thechar == '6' || thechar == '8')
-                                       pc--;
-                               else
-                                       pc -= 4; // arm, etc
-                       }
-                       line = runtime·funcline(f, pc, &file);
-                       dumpstr(file);
-                       dumpint(line);
-               }
-       }
-       dumpint(allocs);
-       dumpint(frees);
-}
-
-static void
-dumpmemprof(void)
-{
-       MSpan *s, **allspans;
-       uint32 spanidx;
-       Special *sp;
-       SpecialProfile *spp;
-       byte *p;
-       void (*fn)(Bucket*, uintptr, uintptr*, uintptr, uintptr, uintptr);
-       
-       fn = dumpmemprof_callback;
-       runtime·iterate_memprof(&fn);
-
-       allspans = runtime·mheap.allspans;
-       for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
-               s = allspans[spanidx];
-               if(s->state != MSpanInUse)
-                       continue;
-               for(sp = s->specials; sp != nil; sp = sp->next) {
-                       if(sp->kind != KindSpecialProfile)
-                               continue;
-                       spp = (SpecialProfile*)sp;
-                       p = (byte*)((s->start << PageShift) + spp->special.offset);
-                       dumpint(TagAllocSample);
-                       dumpint((uintptr)p);
-                       dumpint((uintptr)spp->b);
-               }
-       }
-}
-
-static void
-mdump(void)
-{
-       byte *hdr;
-       uintptr i;
-       MSpan *s;
-
-       // make sure we're done sweeping
-       for(i = 0; i < runtime·mheap.nspan; i++) {
-               s = runtime·mheap.allspans[i];
-               if(s->state == MSpanInUse)
-                       runtime·MSpan_EnsureSwept(s);
-       }
-
-       runtime·memclr((byte*)&typecache[0], sizeof(typecache));
-       hdr = (byte*)"go1.4 heap dump\n";
-       write(hdr, runtime·findnull(hdr));
-       dumpparams();
-       dumpitabs();
-       dumpobjs();
-       dumpgs();
-       dumpms();
-       dumproots();
-       dumpmemstats();
-       dumpmemprof();
-       dumpint(TagEOF);
-       flush();
-}
-
-void
-runtime·writeheapdump_m(void)
-{
-       uintptr fd;
-       
-       fd = g->m->scalararg[0];
-       g->m->scalararg[0] = 0;
-
-       runtime·casgstatus(g->m->curg, Grunning, Gwaiting);
-       g->waitreason = runtime·gostringnocopy((byte*)"dumping heap");
-
-       // Update stats so we can dump them.
-       // As a side effect, flushes all the MCaches so the MSpan.freelist
-       // lists contain all the free objects.
-       runtime·updatememstats(nil);
-
-       // Set dump file.
-       dumpfd = fd;
-
-       // Call dump routine.
-       mdump();
-
-       // Reset dump file.
-       dumpfd = 0;
-       if(tmpbuf != nil) {
-               runtime·SysFree(tmpbuf, tmpbufsize, &mstats.other_sys);
-               tmpbuf = nil;
-               tmpbufsize = 0;
-       }
-
-       runtime·casgstatus(g->m->curg, Gwaiting, Grunning);
-}
-
-// dumpint() the kind & offset of each field in an object.
-static void
-dumpfields(BitVector bv)
-{
-       dumpbv(&bv, 0);
-       dumpint(FieldKindEol);
-}
-
-// The heap dump reader needs to be able to disambiguate
-// Eface entries.  So it needs to know every type that might
-// appear in such an entry.  The following routine accomplishes that.
-
-// Dump all the types that appear in the type field of
-// any Eface described by this bit vector.
-static void
-dumpbvtypes(BitVector *bv, byte *base)
-{
-       uintptr i;
-
-       for(i = 0; i < bv->n; i += BitsPerPointer) {
-               if((bv->bytedata[i/8] >> i%8 & 3) != BitsMultiWord)
-                       continue;
-               switch(bv->bytedata[(i+BitsPerPointer)/8] >> (i+BitsPerPointer)%8 & 3) {
-               default:
-                       runtime·throw("unexpected garbage collection bits");
-               case BitsIface:
-                       i += BitsPerPointer;
-                       break;
-               case BitsEface:
-                       dumptype(*(Type**)(base + i / BitsPerPointer * PtrSize));
-                       i += BitsPerPointer;
-                       break;
-               }
-       }
-}
-
-static BitVector
-makeheapobjbv(byte *p, uintptr size)
-{
-       uintptr off, nptr, i;
-       byte shift, *bitp, bits;
-       bool mw;
-
-       // Extend the temp buffer if necessary.
-       nptr = size/PtrSize;
-       if(tmpbufsize < nptr*BitsPerPointer/8+1) {
-               if(tmpbuf != nil)
-                       runtime·SysFree(tmpbuf, tmpbufsize, &mstats.other_sys);
-               tmpbufsize = nptr*BitsPerPointer/8+1;
-               tmpbuf = runtime·sysAlloc(tmpbufsize, &mstats.other_sys);
-               if(tmpbuf == nil)
-                       runtime·throw("heapdump: out of memory");
-       }
-
-       // Copy and compact the bitmap.
-       mw = false;
-       for(i = 0; i < nptr; i++) {
-               off = (uintptr*)(p + i*PtrSize) - (uintptr*)runtime·mheap.arena_start;
-               bitp = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
-               shift = (off % wordsPerBitmapByte) * gcBits;
-               bits = (*bitp >> (shift + 2)) & BitsMask;
-               if(!mw && bits == BitsDead)
-                       break;  // end of heap object
-               mw = !mw && bits == BitsMultiWord;
-               tmpbuf[i*BitsPerPointer/8] &= ~(BitsMask<<((i*BitsPerPointer)%8));
-               tmpbuf[i*BitsPerPointer/8] |= bits<<((i*BitsPerPointer)%8);
-       }
-       return (BitVector){i*BitsPerPointer, tmpbuf};
-}
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
new file mode 100644 (file)
index 0000000..01e70a3
--- /dev/null
@@ -0,0 +1,733 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Implementation of runtime/debug.WriteHeapDump.  Writes all
+// objects in the heap plus additional info (roots, threads,
+// finalizers, etc.) to a file.
+
+// The format of the dumped file is described at
+// http://code.google.com/p/go-wiki/wiki/heapdump14
+
+package runtime
+
+import "unsafe"
+
+const (
+       fieldKindEol       = 0
+       fieldKindPtr       = 1
+       fieldKindIface     = 2
+       fieldKindEface     = 3
+       tagEOF             = 0
+       tagObject          = 1
+       tagOtherRoot       = 2
+       tagType            = 3
+       tagGoroutine       = 4
+       tagStackFrame      = 5
+       tagParams          = 6
+       tagFinalizer       = 7
+       tagItab            = 8
+       tagOSThread        = 9
+       tagMemStats        = 10
+       tagQueuedFinalizer = 11
+       tagData            = 12
+       tagBSS             = 13
+       tagDefer           = 14
+       tagPanic           = 15
+       tagMemProf         = 16
+       tagAllocSample     = 17
+)
+
+var dumpfd uintptr // fd to write the dump to.
+var tmpbuf []byte
+
+// buffer of pending write data
+const (
+       bufSize = 4096
+)
+
+var buf [bufSize]byte
+var nbuf uintptr
+
+func dwrite(data unsafe.Pointer, len uintptr) {
+       if len == 0 {
+               return
+       }
+       if nbuf+len <= bufSize {
+               copy(buf[nbuf:], (*[bufSize]byte)(data)[:len])
+               nbuf += len
+               return
+       }
+
+       write(dumpfd, (unsafe.Pointer)(&buf), int32(nbuf))
+       if len >= bufSize {
+               write(dumpfd, data, int32(len))
+               nbuf = 0
+       } else {
+               copy(buf[:], (*[bufSize]byte)(data)[:len])
+               nbuf = len
+       }
+}
+
+func dwritebyte(b byte) {
+       dwrite(unsafe.Pointer(&b), 1)
+}
+
+func flush() {
+       write(dumpfd, (unsafe.Pointer)(&buf), int32(nbuf))
+       nbuf = 0
+}
+
+// Cache of types that have been serialized already.
+// We use a type's hash field to pick a bucket.
+// Inside a bucket, we keep a list of types that
+// have been serialized so far, most recently used first.
+// Note: when a bucket overflows we may end up
+// serializing a type more than once.  That's ok.
+const (
+       typeCacheBuckets = 256
+       typeCacheAssoc   = 4
+)
+
+type typeCacheBucket struct {
+       t [typeCacheAssoc]*_type
+}
+
+var typecache [typeCacheBuckets]typeCacheBucket
+
+// dump a uint64 in a varint format parseable by encoding/binary
+func dumpint(v uint64) {
+       var buf [10]byte
+       var n int
+       for v >= 0x80 {
+               buf[n] = byte(v | 0x80)
+               n++
+               v >>= 7
+       }
+       buf[n] = byte(v)
+       n++
+       dwrite(unsafe.Pointer(&buf), uintptr(n))
+}
+
+func dumpbool(b bool) {
+       if b {
+               dumpint(1)
+       } else {
+               dumpint(0)
+       }
+}
+
+// dump varint uint64 length followed by memory contents
+func dumpmemrange(data unsafe.Pointer, len uintptr) {
+       dumpint(uint64(len))
+       dwrite(data, len)
+}
+
+func dumpslice(b []byte) {
+       dumpint(uint64(len(b)))
+       if len(b) > 0 {
+               dwrite(unsafe.Pointer(&b[0]), uintptr(len(b)))
+       }
+}
+
+func dumpstr(s string) {
+       sp := (*stringStruct)(unsafe.Pointer(&s))
+       dumpmemrange(sp.str, uintptr(sp.len))
+}
+
+// dump information for a type
+func dumptype(t *_type) {
+       if t == nil {
+               return
+       }
+
+       // If we've definitely serialized the type before,
+       // no need to do it again.
+       b := &typecache[t.hash&(typeCacheBuckets-1)]
+       if t == b.t[0] {
+               return
+       }
+       for i := 1; i < typeCacheAssoc; i++ {
+               if t == b.t[i] {
+                       // Move-to-front
+                       for j := i; j > 0; j-- {
+                               b.t[j] = b.t[j-1]
+                       }
+                       b.t[0] = t
+                       return
+               }
+       }
+
+       // Might not have been dumped yet.  Dump it and
+       // remember we did so.
+       for j := typeCacheAssoc - 1; j > 0; j-- {
+               b.t[j] = b.t[j-1]
+       }
+       b.t[0] = t
+
+       // dump the type
+       dumpint(tagType)
+       dumpint(uint64(uintptr(unsafe.Pointer(t))))
+       dumpint(uint64(t.size))
+       if t.x == nil || t.x.pkgpath == nil || t.x.name == nil {
+               dumpstr(*t._string)
+       } else {
+               pkgpath := (*stringStruct)(unsafe.Pointer(&t.x.pkgpath))
+               name := (*stringStruct)(unsafe.Pointer(&t.x.name))
+               dumpint(uint64(uintptr(pkgpath.len) + 1 + uintptr(name.len)))
+               dwrite(pkgpath.str, uintptr(pkgpath.len))
+               dwritebyte('.')
+               dwrite(name.str, uintptr(name.len))
+       }
+       dumpbool(t.kind&kindDirectIface == 0 || t.kind&kindNoPointers == 0)
+}
+
+// dump an object
+func dumpobj(obj unsafe.Pointer, size uintptr, bv bitvector) {
+       dumpbvtypes(&bv, obj)
+       dumpint(tagObject)
+       dumpint(uint64(uintptr(obj)))
+       dumpmemrange(obj, size)
+       dumpfields(bv)
+}
+
+func dumpotherroot(description string, to unsafe.Pointer) {
+       dumpint(tagOtherRoot)
+       dumpstr(description)
+       dumpint(uint64(uintptr(to)))
+}
+
+func dumpfinalizer(obj unsafe.Pointer, fn *funcval, fint *_type, ot *ptrtype) {
+       dumpint(tagFinalizer)
+       dumpint(uint64(uintptr(obj)))
+       dumpint(uint64(uintptr(unsafe.Pointer(fn))))
+       dumpint(uint64(uintptr(unsafe.Pointer(fn.fn))))
+       dumpint(uint64(uintptr(unsafe.Pointer(fint))))
+       dumpint(uint64(uintptr(unsafe.Pointer(ot))))
+}
+
+type childInfo struct {
+       // Information passed up from the callee frame about
+       // the layout of the outargs region.
+       argoff uintptr   // where the arguments start in the frame
+       arglen uintptr   // size of args region
+       args   bitvector // if args.n >= 0, pointer map of args region
+       sp     *uint8    // callee sp
+       depth  uintptr   // depth in call stack (0 == most recent)
+}
+
+// dump kinds & offsets of interesting fields in bv
+func dumpbv(cbv *bitvector, offset uintptr) {
+       bv := gobv(*cbv)
+       for i := uintptr(0); i < uintptr(bv.n); i += bitsPerPointer {
+               switch bv.bytedata[i/8] >> (i % 8) & 3 {
+               default:
+                       gothrow("unexpected pointer bits")
+               case _BitsDead:
+                       // BitsDead has already been processed in makeheapobjbv.
+                       // We should only see it in stack maps, in which case we should continue processing.
+               case _BitsScalar:
+                       // ok
+               case _BitsPointer:
+                       dumpint(fieldKindPtr)
+                       dumpint(uint64(offset + i/_BitsPerPointer*ptrSize))
+               }
+       }
+}
+
+func dumpframe(s *stkframe, arg unsafe.Pointer) bool {
+       child := (*childInfo)(arg)
+       f := s.fn
+
+       // Figure out what we can about our stack map
+       pc := s.pc
+       if pc != f.entry {
+               pc--
+       }
+       pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, pc)
+       if pcdata == -1 {
+               // We do not have a valid pcdata value but there might be a
+               // stackmap for this function.  It is likely that we are looking
+               // at the function prologue, assume so and hope for the best.
+               pcdata = 0
+       }
+       stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
+
+       // Dump any types we will need to resolve Efaces.
+       if child.args.n >= 0 {
+               dumpbvtypes(&child.args, unsafe.Pointer(s.sp+child.argoff))
+       }
+       var bv bitvector
+       if stkmap != nil && stkmap.n > 0 {
+               bv = stackmapdata(stkmap, pcdata)
+               dumpbvtypes(&bv, unsafe.Pointer(s.varp-uintptr(bv.n/_BitsPerPointer*ptrSize)))
+       } else {
+               bv.n = -1
+       }
+
+       // Dump main body of stack frame.
+       dumpint(tagStackFrame)
+       dumpint(uint64(s.sp))                              // lowest address in frame
+       dumpint(uint64(child.depth))                       // # of frames deep on the stack
+       dumpint(uint64(uintptr(unsafe.Pointer(child.sp)))) // sp of child, or 0 if bottom of stack
+       dumpmemrange(unsafe.Pointer(s.sp), s.fp-s.sp)      // frame contents
+       dumpint(uint64(f.entry))
+       dumpint(uint64(s.pc))
+       dumpint(uint64(s.continpc))
+       name := gofuncname(f)
+       if name == "" {
+               name = "unknown function"
+       }
+       dumpstr(name)
+
+       // Dump fields in the outargs section
+       if child.args.n >= 0 {
+               dumpbv(&child.args, child.argoff)
+       } else {
+               // conservative - everything might be a pointer
+               for off := child.argoff; off < child.argoff+child.arglen; off += ptrSize {
+                       dumpint(fieldKindPtr)
+                       dumpint(uint64(off))
+               }
+       }
+
+       // Dump fields in the local vars section
+       if stkmap == nil {
+               // No locals information, dump everything.
+               for off := child.arglen; off < s.varp-s.sp; off += ptrSize {
+                       dumpint(fieldKindPtr)
+                       dumpint(uint64(off))
+               }
+       } else if stkmap.n < 0 {
+               // Locals size information, dump just the locals.
+               size := uintptr(-stkmap.n)
+               for off := s.varp - size - s.sp; off < s.varp-s.sp; off += ptrSize {
+                       dumpint(fieldKindPtr)
+                       dumpint(uint64(off))
+               }
+       } else if stkmap.n > 0 {
+               // Locals bitmap information, scan just the pointers in
+               // locals.
+               dumpbv(&bv, s.varp-uintptr(bv.n)/_BitsPerPointer*ptrSize-s.sp)
+       }
+       dumpint(fieldKindEol)
+
+       // Record arg info for parent.
+       child.argoff = s.argp - s.fp
+       child.arglen = s.arglen
+       child.sp = (*uint8)(unsafe.Pointer(s.sp))
+       child.depth++
+       stkmap = (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
+       if stkmap != nil {
+               child.args = stackmapdata(stkmap, pcdata)
+       } else {
+               child.args.n = -1
+       }
+       return true
+}
+
+func dumpgoroutine(gp *g) {
+       var sp, pc, lr uintptr
+       if gp.syscallsp != 0 {
+               sp = gp.syscallsp
+               pc = gp.syscallpc
+               lr = 0
+       } else {
+               sp = gp.sched.sp
+               pc = gp.sched.pc
+               lr = gp.sched.lr
+       }
+
+       dumpint(tagGoroutine)
+       dumpint(uint64(uintptr(unsafe.Pointer(gp))))
+       dumpint(uint64(sp))
+       dumpint(uint64(gp.goid))
+       dumpint(uint64(gp.gopc))
+       dumpint(uint64(readgstatus(gp)))
+       dumpbool(gp.issystem)
+       dumpbool(false) // isbackground
+       dumpint(uint64(gp.waitsince))
+       dumpstr(gp.waitreason)
+       dumpint(uint64(uintptr(gp.sched.ctxt)))
+       dumpint(uint64(uintptr(unsafe.Pointer(gp.m))))
+       dumpint(uint64(uintptr(unsafe.Pointer(gp._defer))))
+       dumpint(uint64(uintptr(unsafe.Pointer(gp._panic))))
+
+       // dump stack
+       var child childInfo
+       child.args.n = -1
+       child.arglen = 0
+       child.sp = nil
+       child.depth = 0
+       gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, dumpframe, noescape(unsafe.Pointer(&child)), 0)
+
+       // dump defer & panic records
+       for d := gp._defer; d != nil; d = d.link {
+               dumpint(tagDefer)
+               dumpint(uint64(uintptr(unsafe.Pointer(d))))
+               dumpint(uint64(uintptr(unsafe.Pointer(gp))))
+               dumpint(uint64(d.argp))
+               dumpint(uint64(d.pc))
+               dumpint(uint64(uintptr(unsafe.Pointer(d.fn))))
+               dumpint(uint64(uintptr(unsafe.Pointer(d.fn.fn))))
+               dumpint(uint64(uintptr(unsafe.Pointer(d.link))))
+       }
+       for p := gp._panic; p != nil; p = p.link {
+               dumpint(tagPanic)
+               dumpint(uint64(uintptr(unsafe.Pointer(p))))
+               dumpint(uint64(uintptr(unsafe.Pointer(gp))))
+               eface := (*eface)(unsafe.Pointer(&p.arg))
+               dumpint(uint64(uintptr(unsafe.Pointer(eface._type))))
+               dumpint(uint64(uintptr(unsafe.Pointer(eface.data))))
+               dumpint(0) // was p->defer, no longer recorded
+               dumpint(uint64(uintptr(unsafe.Pointer(p.link))))
+       }
+}
+
+func dumpgs() {
+       // goroutines & stacks
+       for i := 0; uintptr(i) < allglen; i++ {
+               gp := allgs[i]
+               status := readgstatus(gp) // The world is stopped so gp will not be in a scan state.
+               switch status {
+               default:
+                       print("runtime: unexpected G.status ", hex(status), "\n")
+                       gothrow("dumpgs in STW - bad status")
+               case _Gdead:
+                       // ok
+               case _Grunnable,
+                       _Gsyscall,
+                       _Gwaiting:
+                       dumpgoroutine(gp)
+               }
+       }
+}
+
+func finq_callback(fn *funcval, obj unsafe.Pointer, nret uintptr, fint *_type, ot *ptrtype) {
+       dumpint(tagQueuedFinalizer)
+       dumpint(uint64(uintptr(obj)))
+       dumpint(uint64(uintptr(unsafe.Pointer(fn))))
+       dumpint(uint64(uintptr(unsafe.Pointer(fn.fn))))
+       dumpint(uint64(uintptr(unsafe.Pointer(fint))))
+       dumpint(uint64(uintptr(unsafe.Pointer(ot))))
+}
+
+func dumproots() {
+       // data segment
+       dumpbvtypes(&gcdatamask, unsafe.Pointer(&data))
+       dumpint(tagData)
+       dumpint(uint64(uintptr(unsafe.Pointer(&data))))
+       dumpmemrange(unsafe.Pointer(&data), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)))
+       dumpfields(gcdatamask)
+
+       // bss segment
+       dumpbvtypes(&gcbssmask, unsafe.Pointer(&bss))
+       dumpint(tagBSS)
+       dumpint(uint64(uintptr(unsafe.Pointer(&bss))))
+       dumpmemrange(unsafe.Pointer(&bss), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)))
+       dumpfields(gcbssmask)
+
+       // MSpan.types
+       allspans := h_allspans
+       for spanidx := uint32(0); spanidx < mheap_.nspan; spanidx++ {
+               s := allspans[spanidx]
+               if s.state == _MSpanInUse {
+                       // Finalizers
+                       for sp := s.specials; sp != nil; sp = sp.next {
+                               if sp.kind != _KindSpecialFinalizer {
+                                       continue
+                               }
+                               spf := (*specialfinalizer)(unsafe.Pointer(sp))
+                               p := unsafe.Pointer((uintptr(s.start) << _PageShift) + uintptr(spf.special.offset))
+                               dumpfinalizer(p, spf.fn, spf.fint, spf.ot)
+                       }
+               }
+       }
+
+       // Finalizer queue
+       iterate_finq(finq_callback)
+}
+
+// Bit vector of free marks.
+// Needs to be as big as the largest number of objects per span.
+var freemark [_PageSize / 8]bool
+
+func dumpobjs() {
+       for i := uintptr(0); i < uintptr(mheap_.nspan); i++ {
+               s := h_allspans[i]
+               if s.state != _MSpanInUse {
+                       continue
+               }
+               p := uintptr(s.start << _PageShift)
+               size := s.elemsize
+               n := (s.npages << _PageShift) / size
+               if n > uintptr(len(freemark)) {
+                       gothrow("freemark array doesn't have enough entries")
+               }
+               for l := s.freelist; l != nil; l = l.next {
+                       freemark[(uintptr(unsafe.Pointer(l))-p)/size] = true
+               }
+               for j := uintptr(0); j < n; j, p = j+1, p+size {
+                       if freemark[j] {
+                               freemark[j] = false
+                               continue
+                       }
+                       dumpobj(unsafe.Pointer(p), size, makeheapobjbv(p, size))
+               }
+       }
+}
+
+func dumpparams() {
+       dumpint(tagParams)
+       x := uintptr(1)
+       if *(*byte)(unsafe.Pointer(&x)) == 1 {
+               dumpbool(false) // little-endian ptrs
+       } else {
+               dumpbool(true) // big-endian ptrs
+       }
+       dumpint(ptrSize)
+       dumpint(uint64(mheap_.arena_start))
+       dumpint(uint64(mheap_.arena_used))
+       dumpint(thechar)
+       dumpstr(goexperiment)
+       dumpint(uint64(ncpu))
+}
+
+func itab_callback(tab *itab) {
+       t := tab._type
+       // Dump a map from itab* to the type of its data field.
+       // We want this map so we can deduce types of interface referents.
+       if t.kind&kindDirectIface == 0 {
+               // indirect - data slot is a pointer to t.
+               dumptype(t.ptrto)
+               dumpint(tagItab)
+               dumpint(uint64(uintptr(unsafe.Pointer(tab))))
+               dumpint(uint64(uintptr(unsafe.Pointer(t.ptrto))))
+       } else if t.kind&kindNoPointers == 0 {
+               // t is pointer-like - data slot is a t.
+               dumptype(t)
+               dumpint(tagItab)
+               dumpint(uint64(uintptr(unsafe.Pointer(tab))))
+               dumpint(uint64(uintptr(unsafe.Pointer(t))))
+       } else {
+               // Data slot is a scalar.  Dump type just for fun.
+               // With pointer-only interfaces, this shouldn't happen.
+               dumptype(t)
+               dumpint(tagItab)
+               dumpint(uint64(uintptr(unsafe.Pointer(tab))))
+               dumpint(uint64(uintptr(unsafe.Pointer(t))))
+       }
+}
+
+func dumpitabs() {
+       iterate_itabs(itab_callback)
+}
+
+func dumpms() {
+       for mp := allm; mp != nil; mp = mp.alllink {
+               dumpint(tagOSThread)
+               dumpint(uint64(uintptr(unsafe.Pointer(mp))))
+               dumpint(uint64(mp.id))
+               dumpint(mp.procid)
+       }
+}
+
+func dumpmemstats() {
+       dumpint(tagMemStats)
+       dumpint(memstats.alloc)
+       dumpint(memstats.total_alloc)
+       dumpint(memstats.sys)
+       dumpint(memstats.nlookup)
+       dumpint(memstats.nmalloc)
+       dumpint(memstats.nfree)
+       dumpint(memstats.heap_alloc)
+       dumpint(memstats.heap_sys)
+       dumpint(memstats.heap_idle)
+       dumpint(memstats.heap_inuse)
+       dumpint(memstats.heap_released)
+       dumpint(memstats.heap_objects)
+       dumpint(memstats.stacks_inuse)
+       dumpint(memstats.stacks_sys)
+       dumpint(memstats.mspan_inuse)
+       dumpint(memstats.mspan_sys)
+       dumpint(memstats.mcache_inuse)
+       dumpint(memstats.mcache_sys)
+       dumpint(memstats.buckhash_sys)
+       dumpint(memstats.gc_sys)
+       dumpint(memstats.other_sys)
+       dumpint(memstats.next_gc)
+       dumpint(memstats.last_gc)
+       dumpint(memstats.pause_total_ns)
+       for i := 0; i < 256; i++ {
+               dumpint(memstats.pause_ns[i])
+       }
+       dumpint(uint64(memstats.numgc))
+}
+
+func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs, frees uintptr) {
+       stk := (*[100000]uintptr)(unsafe.Pointer(pstk))
+       dumpint(tagMemProf)
+       dumpint(uint64(uintptr(unsafe.Pointer(b))))
+       dumpint(uint64(size))
+       dumpint(uint64(nstk))
+       for i := uintptr(0); i < nstk; i++ {
+               pc := stk[i]
+               f := findfunc(pc)
+               if f == nil {
+                       var buf [64]byte
+                       n := len(buf)
+                       n--
+                       buf[n] = ')'
+                       if pc == 0 {
+                               n--
+                               buf[n] = '0'
+                       } else {
+                               for pc > 0 {
+                                       n--
+                                       buf[n] = "0123456789abcdef"[pc&15]
+                                       pc >>= 4
+                               }
+                       }
+                       n--
+                       buf[n] = 'x'
+                       n--
+                       buf[n] = '0'
+                       n--
+                       buf[n] = '('
+                       dumpslice(buf[n:])
+                       dumpstr("?")
+                       dumpint(0)
+               } else {
+                       dumpstr(gofuncname(f))
+                       if i > 0 && pc > f.entry {
+                               pc--
+                       }
+                       var file string
+                       line := funcline(f, pc, &file)
+                       dumpstr(file)
+                       dumpint(uint64(line))
+               }
+       }
+       dumpint(uint64(allocs))
+       dumpint(uint64(frees))
+}
+
+func dumpmemprof() {
+       iterate_memprof(dumpmemprof_callback)
+       allspans := h_allspans
+       for spanidx := uint32(0); spanidx < mheap_.nspan; spanidx++ {
+               s := allspans[spanidx]
+               if s.state != _MSpanInUse {
+                       continue
+               }
+               for sp := s.specials; sp != nil; sp = sp.next {
+                       if sp.kind != _KindSpecialProfile {
+                               continue
+                       }
+                       spp := (*specialprofile)(unsafe.Pointer(sp))
+                       p := uintptr(s.start<<_PageShift) + uintptr(spp.special.offset)
+                       dumpint(tagAllocSample)
+                       dumpint(uint64(p))
+                       dumpint(uint64(uintptr(unsafe.Pointer(spp.b))))
+               }
+       }
+}
+
+var dumphdr = []byte("go1.4 heap dump\n")
+
+func mdump() {
+       // make sure we're done sweeping
+       for i := uintptr(0); i < uintptr(mheap_.nspan); i++ {
+               s := h_allspans[i]
+               if s.state == _MSpanInUse {
+                       mSpan_EnsureSwept(s)
+               }
+       }
+       memclr(unsafe.Pointer(&typecache), unsafe.Sizeof(typecache))
+       dwrite(unsafe.Pointer(&dumphdr[0]), uintptr(len(dumphdr)))
+       dumpparams()
+       dumpitabs()
+       dumpobjs()
+       dumpgs()
+       dumpms()
+       dumproots()
+       dumpmemstats()
+       dumpmemprof()
+       dumpint(tagEOF)
+       flush()
+}
+
+func writeheapdump_m() {
+       _g_ := getg()
+       fd := _g_.m.scalararg[0]
+       _g_.m.scalararg[0] = 0
+
+       casgstatus(_g_.m.curg, _Grunning, _Gwaiting)
+       _g_.waitreason = "dumping heap"
+
+       // Update stats so we can dump them.
+       // As a side effect, flushes all the MCaches so the MSpan.freelist
+       // lists contain all the free objects.
+       updatememstats(nil)
+
+       // Set dump file.
+       dumpfd = fd
+
+       // Call dump routine.
+       mdump()
+
+       // Reset dump file.
+       dumpfd = 0
+       if tmpbuf != nil {
+               sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys)
+               tmpbuf = nil
+       }
+
+       casgstatus(_g_.m.curg, _Gwaiting, _Grunning)
+}
+
+// dumpint() the kind & offset of each field in an object.
+func dumpfields(bv bitvector) {
+       dumpbv(&bv, 0)
+       dumpint(fieldKindEol)
+}
+
+// The heap dump reader needs to be able to disambiguate
+// Eface entries.  So it needs to know every type that might
+// appear in such an entry.  The following routine accomplishes that.
+// TODO(rsc, khr): Delete - no longer possible.
+
+// Dump all the types that appear in the type field of
+// any Eface described by this bit vector.
+func dumpbvtypes(bv *bitvector, base unsafe.Pointer) {
+}
+
+func makeheapobjbv(p uintptr, size uintptr) bitvector {
+       // Extend the temp buffer if necessary.
+       nptr := size / ptrSize
+       if uintptr(len(tmpbuf)) < nptr*_BitsPerPointer/8+1 {
+               if tmpbuf != nil {
+                       sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys)
+               }
+               n := nptr*_BitsPerPointer/8 + 1
+               p := sysAlloc(n, &memstats.other_sys)
+               if p == nil {
+                       gothrow("heapdump: out of memory")
+               }
+               tmpbuf = (*[1 << 30]byte)(p)[:n]
+       }
+       // Copy and compact the bitmap.
+       var i uintptr
+       for i = 0; i < nptr; i++ {
+               off := (p + i*ptrSize - mheap_.arena_start) / ptrSize
+               bitp := (*uint8)(unsafe.Pointer(mheap_.arena_start - off/wordsPerBitmapByte - 1))
+               shift := uint8((off % wordsPerBitmapByte) * gcBits)
+               bits := (*bitp >> (shift + 2)) & _BitsMask
+               if bits == _BitsDead {
+                       break // end of heap object
+               }
+               tmpbuf[i*_BitsPerPointer/8] &^= (_BitsMask << ((i * _BitsPerPointer) % 8))
+               tmpbuf[i*_BitsPerPointer/8] |= bits << ((i * _BitsPerPointer) % 8)
+       }
+       return bitvector{int32(i * _BitsPerPointer), &tmpbuf[0]}
+}
diff --git a/src/runtime/malloc.c b/src/runtime/malloc.c
deleted file mode 100644 (file)
index b79c30b..0000000
+++ /dev/null
@@ -1,396 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// See malloc.h for overview.
-//
-// TODO(rsc): double-check stats.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-#include "type.h"
-#include "typekind.h"
-#include "race.h"
-#include "stack.h"
-#include "textflag.h"
-
-// Mark mheap as 'no pointers', it does not contain interesting pointers but occupies ~45K.
-#pragma dataflag NOPTR
-MHeap runtime·mheap;
-#pragma dataflag NOPTR
-MStats runtime·memstats;
-
-int32
-runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
-{
-       uintptr n, i;
-       byte *p;
-       MSpan *s;
-
-       g->m->mcache->local_nlookup++;
-       if (sizeof(void*) == 4 && g->m->mcache->local_nlookup >= (1<<30)) {
-               // purge cache stats to prevent overflow
-               runtime·lock(&runtime·mheap.lock);
-               runtime·purgecachedstats(g->m->mcache);
-               runtime·unlock(&runtime·mheap.lock);
-       }
-
-       s = runtime·MHeap_LookupMaybe(&runtime·mheap, v);
-       if(sp)
-               *sp = s;
-       if(s == nil) {
-               if(base)
-                       *base = nil;
-               if(size)
-                       *size = 0;
-               return 0;
-       }
-
-       p = (byte*)((uintptr)s->start<<PageShift);
-       if(s->sizeclass == 0) {
-               // Large object.
-               if(base)
-                       *base = p;
-               if(size)
-                       *size = s->npages<<PageShift;
-               return 1;
-       }
-
-       n = s->elemsize;
-       if(base) {
-               i = ((byte*)v - p)/n;
-               *base = p + i*n;
-       }
-       if(size)
-               *size = n;
-
-       return 1;
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·purgecachedstats(MCache *c)
-{
-       MHeap *h;
-       int32 i;
-
-       // Protected by either heap or GC lock.
-       h = &runtime·mheap;
-       mstats.heap_alloc += c->local_cachealloc;
-       c->local_cachealloc = 0;
-       mstats.tinyallocs += c->local_tinyallocs;
-       c->local_tinyallocs = 0;
-       mstats.nlookup += c->local_nlookup;
-       c->local_nlookup = 0;
-       h->largefree += c->local_largefree;
-       c->local_largefree = 0;
-       h->nlargefree += c->local_nlargefree;
-       c->local_nlargefree = 0;
-       for(i=0; i<nelem(c->local_nsmallfree); i++) {
-               h->nsmallfree[i] += c->local_nsmallfree[i];
-               c->local_nsmallfree[i] = 0;
-       }
-}
-
-// Size of the trailing by_size array differs between Go and C,
-// and all data after by_size is local to C, not exported to Go.
-// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
-// sizeof_C_MStats is what C thinks about size of Go struct.
-uintptr runtime·sizeof_C_MStats = offsetof(MStats, by_size[61]);
-
-#define MaxArena32 (2U<<30)
-
-// For use by Go. If it were a C enum it would be made available automatically,
-// but the value of MaxMem is too large for enum.
-uintptr runtime·maxmem = MaxMem;
-
-void
-runtime·mallocinit(void)
-{
-       byte *p, *p1;
-       uintptr arena_size, bitmap_size, spans_size, p_size;
-       extern byte runtime·end[];
-       uintptr limit;
-       uint64 i;
-       bool reserved;
-
-       p = nil;
-       p_size = 0;
-       arena_size = 0;
-       bitmap_size = 0;
-       spans_size = 0;
-       reserved = false;
-
-       // for 64-bit build
-       USED(p);
-       USED(p_size);
-       USED(arena_size);
-       USED(bitmap_size);
-       USED(spans_size);
-
-       runtime·InitSizes();
-
-       if(runtime·class_to_size[TinySizeClass] != TinySize)
-               runtime·throw("bad TinySizeClass");
-
-       // limit = runtime·memlimit();
-       // See https://code.google.com/p/go/issues/detail?id=5049
-       // TODO(rsc): Fix after 1.1.
-       limit = 0;
-
-       // Set up the allocation arena, a contiguous area of memory where
-       // allocated data will be found.  The arena begins with a bitmap large
-       // enough to hold 4 bits per allocated word.
-       if(sizeof(void*) == 8 && (limit == 0 || limit > (1<<30))) {
-               // On a 64-bit machine, allocate from a single contiguous reservation.
-               // 128 GB (MaxMem) should be big enough for now.
-               //
-               // The code will work with the reservation at any address, but ask
-               // SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
-               // Allocating a 128 GB region takes away 37 bits, and the amd64
-               // doesn't let us choose the top 17 bits, so that leaves the 11 bits
-               // in the middle of 0x00c0 for us to choose.  Choosing 0x00c0 means
-               // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
-               // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
-               // UTF-8 sequences, and they are otherwise as far away from 
-               // ff (likely a common byte) as possible.  If that fails, we try other 0xXXc0
-               // addresses.  An earlier attempt to use 0x11f8 caused out of memory errors
-               // on OS X during thread allocations.  0x00c0 causes conflicts with
-               // AddressSanitizer which reserves all memory up to 0x0100.
-               // These choices are both for debuggability and to reduce the
-               // odds of the conservative garbage collector not collecting memory
-               // because some non-pointer block of memory had a bit pattern
-               // that matched a memory address.
-               //
-               // Actually we reserve 136 GB (because the bitmap ends up being 8 GB)
-               // but it hardly matters: e0 00 is not valid UTF-8 either.
-               //
-               // If this fails we fall back to the 32 bit memory mechanism
-               arena_size = MaxMem;
-               bitmap_size = arena_size / (sizeof(void*)*8/4);
-               spans_size = arena_size / PageSize * sizeof(runtime·mheap.spans[0]);
-               spans_size = ROUND(spans_size, PageSize);
-               for(i = 0; i <= 0x7f; i++) {
-                       p = (void*)(i<<40 | 0x00c0ULL<<32);
-                       p_size = bitmap_size + spans_size + arena_size + PageSize;
-                       p = runtime·SysReserve(p, p_size, &reserved);
-                       if(p != nil)
-                               break;
-               }
-       }
-       if (p == nil) {
-               // On a 32-bit machine, we can't typically get away
-               // with a giant virtual address space reservation.
-               // Instead we map the memory information bitmap
-               // immediately after the data segment, large enough
-               // to handle another 2GB of mappings (256 MB),
-               // along with a reservation for another 512 MB of memory.
-               // When that gets used up, we'll start asking the kernel
-               // for any memory anywhere and hope it's in the 2GB
-               // following the bitmap (presumably the executable begins
-               // near the bottom of memory, so we'll have to use up
-               // most of memory before the kernel resorts to giving out
-               // memory before the beginning of the text segment).
-               //
-               // Alternatively we could reserve 512 MB bitmap, enough
-               // for 4GB of mappings, and then accept any memory the
-               // kernel threw at us, but normally that's a waste of 512 MB
-               // of address space, which is probably too much in a 32-bit world.
-               bitmap_size = MaxArena32 / (sizeof(void*)*8/4);
-               arena_size = 512<<20;
-               spans_size = MaxArena32 / PageSize * sizeof(runtime·mheap.spans[0]);
-               if(limit > 0 && arena_size+bitmap_size+spans_size > limit) {
-                       bitmap_size = (limit / 9) & ~((1<<PageShift) - 1);
-                       arena_size = bitmap_size * 8;
-                       spans_size = arena_size / PageSize * sizeof(runtime·mheap.spans[0]);
-               }
-               spans_size = ROUND(spans_size, PageSize);
-
-               // SysReserve treats the address we ask for, end, as a hint,
-               // not as an absolute requirement.  If we ask for the end
-               // of the data segment but the operating system requires
-               // a little more space before we can start allocating, it will
-               // give out a slightly higher pointer.  Except QEMU, which
-               // is buggy, as usual: it won't adjust the pointer upward.
-               // So adjust it upward a little bit ourselves: 1/4 MB to get
-               // away from the running binary image and then round up
-               // to a MB boundary.
-               p = (byte*)ROUND((uintptr)runtime·end + (1<<18), 1<<20);
-               p_size = bitmap_size + spans_size + arena_size + PageSize;
-               p = runtime·SysReserve(p, p_size, &reserved);
-               if(p == nil)
-                       runtime·throw("runtime: cannot reserve arena virtual address space");
-       }
-
-       // PageSize can be larger than OS definition of page size,
-       // so SysReserve can give us a PageSize-unaligned pointer.
-       // To overcome this we ask for PageSize more and round up the pointer.
-       p1 = (byte*)ROUND((uintptr)p, PageSize);
-
-       runtime·mheap.spans = (MSpan**)p1;
-       runtime·mheap.bitmap = p1 + spans_size;
-       runtime·mheap.arena_start = p1 + spans_size + bitmap_size;
-       runtime·mheap.arena_used = runtime·mheap.arena_start;
-       runtime·mheap.arena_end = p + p_size;
-       runtime·mheap.arena_reserved = reserved;
-
-       if(((uintptr)runtime·mheap.arena_start & (PageSize-1)) != 0)
-               runtime·throw("misrounded allocation in mallocinit");
-
-       // Initialize the rest of the allocator.        
-       runtime·MHeap_Init(&runtime·mheap);
-       g->m->mcache = runtime·allocmcache();
-}
-
-void*
-runtime·MHeap_SysAlloc(MHeap *h, uintptr n)
-{
-       byte *p, *p_end;
-       uintptr p_size;
-       bool reserved;
-
-       if(n > h->arena_end - h->arena_used) {
-               // We are in 32-bit mode, maybe we didn't use all possible address space yet.
-               // Reserve some more space.
-               byte *new_end;
-
-               p_size = ROUND(n + PageSize, 256<<20);
-               new_end = h->arena_end + p_size;
-               if(new_end <= h->arena_start + MaxArena32) {
-                       // TODO: It would be bad if part of the arena
-                       // is reserved and part is not.
-                       p = runtime·SysReserve(h->arena_end, p_size, &reserved);
-                       if(p == h->arena_end) {
-                               h->arena_end = new_end;
-                               h->arena_reserved = reserved;
-                       }
-                       else if(p+p_size <= h->arena_start + MaxArena32) {
-                               // Keep everything page-aligned.
-                               // Our pages are bigger than hardware pages.
-                               h->arena_end = p+p_size;
-                               h->arena_used = p + (-(uintptr)p&(PageSize-1));
-                               h->arena_reserved = reserved;
-                       } else {
-                               uint64 stat;
-                               stat = 0;
-                               runtime·SysFree(p, p_size, &stat);
-                       }
-               }
-       }
-       if(n <= h->arena_end - h->arena_used) {
-               // Keep taking from our reservation.
-               p = h->arena_used;
-               runtime·SysMap(p, n, h->arena_reserved, &mstats.heap_sys);
-               h->arena_used += n;
-               runtime·MHeap_MapBits(h);
-               runtime·MHeap_MapSpans(h);
-               if(raceenabled)
-                       runtime·racemapshadow(p, n);
-               
-               if(((uintptr)p & (PageSize-1)) != 0)
-                       runtime·throw("misrounded allocation in MHeap_SysAlloc");
-               return p;
-       }
-       
-       // If using 64-bit, our reservation is all we have.
-       if(h->arena_end - h->arena_start >= MaxArena32)
-               return nil;
-
-       // On 32-bit, once the reservation is gone we can
-       // try to get memory at a location chosen by the OS
-       // and hope that it is in the range we allocated bitmap for.
-       p_size = ROUND(n, PageSize) + PageSize;
-       p = runtime·sysAlloc(p_size, &mstats.heap_sys);
-       if(p == nil)
-               return nil;
-
-       if(p < h->arena_start || p+p_size - h->arena_start >= MaxArena32) {
-               runtime·printf("runtime: memory allocated by OS (%p) not in usable range [%p,%p)\n",
-                       p, h->arena_start, h->arena_start+MaxArena32);
-               runtime·SysFree(p, p_size, &mstats.heap_sys);
-               return nil;
-       }
-       
-       p_end = p + p_size;
-       p += -(uintptr)p & (PageSize-1);
-       if(p+n > h->arena_used) {
-               h->arena_used = p+n;
-               if(p_end > h->arena_end)
-                       h->arena_end = p_end;
-               runtime·MHeap_MapBits(h);
-               runtime·MHeap_MapSpans(h);
-               if(raceenabled)
-                       runtime·racemapshadow(p, n);
-       }
-       
-       if(((uintptr)p & (PageSize-1)) != 0)
-               runtime·throw("misrounded allocation in MHeap_SysAlloc");
-       return p;
-}
-
-void
-runtime·setFinalizer_m(void)
-{
-       FuncVal *fn;
-       void *arg;
-       uintptr nret;
-       Type *fint;
-       PtrType *ot;
-
-       fn = g->m->ptrarg[0];
-       arg = g->m->ptrarg[1];
-       nret = g->m->scalararg[0];
-       fint = g->m->ptrarg[2];
-       ot = g->m->ptrarg[3];
-       g->m->ptrarg[0] = nil;
-       g->m->ptrarg[1] = nil;
-       g->m->ptrarg[2] = nil;
-       g->m->ptrarg[3] = nil;
-
-       g->m->scalararg[0] = runtime·addfinalizer(arg, fn, nret, fint, ot);
-}
-
-void
-runtime·removeFinalizer_m(void)
-{
-       void *p;
-
-       p = g->m->ptrarg[0];
-       g->m->ptrarg[0] = nil;
-       runtime·removefinalizer(p);
-}
-
-// mcallable cache refill
-void 
-runtime·mcacheRefill_m(void)
-{
-       runtime·MCache_Refill(g->m->mcache, (int32)g->m->scalararg[0]);
-}
-
-void
-runtime·largeAlloc_m(void)
-{
-       uintptr npages, size;
-       MSpan *s;
-       void *v;
-       int32 flag;
-
-       //runtime·printf("largeAlloc size=%D\n", g->m->scalararg[0]);
-       // Allocate directly from heap.
-       size = g->m->scalararg[0];
-       flag = (int32)g->m->scalararg[1];
-       if(size + PageSize < size)
-               runtime·throw("out of memory");
-       npages = size >> PageShift;
-       if((size & PageMask) != 0)
-               npages++;
-       s = runtime·MHeap_Alloc(&runtime·mheap, npages, 0, 1, !(flag & FlagNoZero));
-       if(s == nil)
-               runtime·throw("out of memory");
-       s->limit = (byte*)(s->start<<PageShift) + size;
-       v = (void*)(s->start << PageShift);
-       // setup for mark sweep
-       runtime·markspan(v, 0, 0, true);
-       g->m->ptrarg[0] = s;
-}
index 8cf1c3d342664269703fe0fdc727851cddc308b0..a11724500fc97b6c1b26ad24334bf2c92121233b 100644 (file)
@@ -28,10 +28,11 @@ const (
        maxGCMask       = _MaxGCMask
        bitsDead        = _BitsDead
        bitsPointer     = _BitsPointer
+       bitsScalar      = _BitsScalar
 
        mSpanInUse = _MSpanInUse
 
-       concurrentSweep = _ConcurrentSweep != 0
+       concurrentSweep = _ConcurrentSweep
 )
 
 // Page number (address>>pageShift)
@@ -142,10 +143,9 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
                        s = c.alloc[tinySizeClass]
                        v := s.freelist
                        if v == nil {
-                               mp := acquirem()
-                               mp.scalararg[0] = tinySizeClass
-                               onM(mcacheRefill_m)
-                               releasem(mp)
+                               onM(func() {
+                                       mCache_Refill(c, tinySizeClass)
+                               })
                                s = c.alloc[tinySizeClass]
                                v = s.freelist
                        }
@@ -173,10 +173,9 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
                        s = c.alloc[sizeclass]
                        v := s.freelist
                        if v == nil {
-                               mp := acquirem()
-                               mp.scalararg[0] = uintptr(sizeclass)
-                               onM(mcacheRefill_m)
-                               releasem(mp)
+                               onM(func() {
+                                       mCache_Refill(c, int32(sizeclass))
+                               })
                                s = c.alloc[sizeclass]
                                v = s.freelist
                        }
@@ -193,13 +192,10 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
                }
                c.local_cachealloc += intptr(size)
        } else {
-               mp := acquirem()
-               mp.scalararg[0] = uintptr(size)
-               mp.scalararg[1] = uintptr(flags)
-               onM(largeAlloc_m)
-               s = (*mspan)(mp.ptrarg[0])
-               mp.ptrarg[0] = nil
-               releasem(mp)
+               var s *mspan
+               onM(func() {
+                       s = largeAlloc(size, uint32(flags))
+               })
                x = unsafe.Pointer(uintptr(s.start << pageShift))
                size = uintptr(s.elemsize)
        }
@@ -359,7 +355,7 @@ func newarray(typ *_type, n uintptr) unsafe.Pointer {
        if typ.kind&kindNoPointers != 0 {
                flags |= flagNoScan
        }
-       if int(n) < 0 || (typ.size > 0 && n > maxmem/uintptr(typ.size)) {
+       if int(n) < 0 || (typ.size > 0 && n > _MaxMem/uintptr(typ.size)) {
                panic("runtime: allocation size out of range")
        }
        return mallocgc(uintptr(typ.size)*n, typ, flags)
@@ -585,10 +581,9 @@ func SetFinalizer(obj interface{}, finalizer interface{}) {
        ftyp := f._type
        if ftyp == nil {
                // switch to M stack and remove finalizer
-               mp := acquirem()
-               mp.ptrarg[0] = e.data
-               onM(removeFinalizer_m)
-               releasem(mp)
+               onM(func() {
+                       removefinalizer(e.data)
+               })
                return
        }
 
@@ -633,18 +628,11 @@ okarg:
        // make sure we have a finalizer goroutine
        createfing()
 
-       // switch to M stack to add finalizer record
-       mp := acquirem()
-       mp.ptrarg[0] = f.data
-       mp.ptrarg[1] = e.data
-       mp.scalararg[0] = nret
-       mp.ptrarg[2] = unsafe.Pointer(fint)
-       mp.ptrarg[3] = unsafe.Pointer(ot)
-       onM(setFinalizer_m)
-       if mp.scalararg[0] != 1 {
-               gothrow("runtime.SetFinalizer: finalizer already set")
-       }
-       releasem(mp)
+       onM(func() {
+               if !addfinalizer(e.data, (*funcval)(f.data), nret, fint, ot) {
+                       gothrow("runtime.SetFinalizer: finalizer already set")
+               }
+       })
 }
 
 // round n up to a multiple of a.  a must be a power of 2.
diff --git a/src/runtime/malloc.h b/src/runtime/malloc.h
deleted file mode 100644 (file)
index adb8d3d..0000000
+++ /dev/null
@@ -1,621 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Memory allocator, based on tcmalloc.
-// http://goog-perftools.sourceforge.net/doc/tcmalloc.html
-
-// The main allocator works in runs of pages.
-// Small allocation sizes (up to and including 32 kB) are
-// rounded to one of about 100 size classes, each of which
-// has its own free list of objects of exactly that size.
-// Any free page of memory can be split into a set of objects
-// of one size class, which are then managed using free list
-// allocators.
-//
-// The allocator's data structures are:
-//
-//     FixAlloc: a free-list allocator for fixed-size objects,
-//             used to manage storage used by the allocator.
-//     MHeap: the malloc heap, managed at page (4096-byte) granularity.
-//     MSpan: a run of pages managed by the MHeap.
-//     MCentral: a shared free list for a given size class.
-//     MCache: a per-thread (in Go, per-P) cache for small objects.
-//     MStats: allocation statistics.
-//
-// Allocating a small object proceeds up a hierarchy of caches:
-//
-//     1. Round the size up to one of the small size classes
-//        and look in the corresponding MCache free list.
-//        If the list is not empty, allocate an object from it.
-//        This can all be done without acquiring a lock.
-//
-//     2. If the MCache free list is empty, replenish it by
-//        taking a bunch of objects from the MCentral free list.
-//        Moving a bunch amortizes the cost of acquiring the MCentral lock.
-//
-//     3. If the MCentral free list is empty, replenish it by
-//        allocating a run of pages from the MHeap and then
-//        chopping that memory into a objects of the given size.
-//        Allocating many objects amortizes the cost of locking
-//        the heap.
-//
-//     4. If the MHeap is empty or has no page runs large enough,
-//        allocate a new group of pages (at least 1MB) from the
-//        operating system.  Allocating a large run of pages
-//        amortizes the cost of talking to the operating system.
-//
-// Freeing a small object proceeds up the same hierarchy:
-//
-//     1. Look up the size class for the object and add it to
-//        the MCache free list.
-//
-//     2. If the MCache free list is too long or the MCache has
-//        too much memory, return some to the MCentral free lists.
-//
-//     3. If all the objects in a given span have returned to
-//        the MCentral list, return that span to the page heap.
-//
-//     4. If the heap has too much memory, return some to the
-//        operating system.
-//
-//     TODO(rsc): Step 4 is not implemented.
-//
-// Allocating and freeing a large object uses the page heap
-// directly, bypassing the MCache and MCentral free lists.
-//
-// The small objects on the MCache and MCentral free lists
-// may or may not be zeroed.  They are zeroed if and only if
-// the second word of the object is zero.  A span in the
-// page heap is zeroed unless s->needzero is set. When a span
-// is allocated to break into small objects, it is zeroed if needed
-// and s->needzero is set. There are two main benefits to delaying the
-// zeroing this way:
-//
-//     1. stack frames allocated from the small object lists
-//        or the page heap can avoid zeroing altogether.
-//     2. the cost of zeroing when reusing a small object is
-//        charged to the mutator, not the garbage collector.
-//
-// This C code was written with an eye toward translating to Go
-// in the future.  Methods have the form Type_Method(Type *t, ...).
-
-typedef struct MCentral        MCentral;
-typedef struct MHeap   MHeap;
-typedef struct MSpan   MSpan;
-typedef struct MStats  MStats;
-typedef struct MLink   MLink;
-typedef struct GCStats GCStats;
-
-enum
-{
-       PageShift       = 13,
-       PageSize        = 1<<PageShift,
-       PageMask        = PageSize - 1,
-};
-typedef        uintptr pageID;         // address >> PageShift
-
-enum
-{
-       // Computed constant.  The definition of MaxSmallSize and the
-       // algorithm in msize.c produce some number of different allocation
-       // size classes.  NumSizeClasses is that number.  It's needed here
-       // because there are static arrays of this length; when msize runs its
-       // size choosing algorithm it double-checks that NumSizeClasses agrees.
-       NumSizeClasses = 67,
-
-       // Tunable constants.
-       MaxSmallSize = 32<<10,
-
-       // Tiny allocator parameters, see "Tiny allocator" comment in malloc.goc.
-       TinySize = 16,
-       TinySizeClass = 2,
-
-       FixAllocChunk = 16<<10,         // Chunk size for FixAlloc
-       MaxMHeapList = 1<<(20 - PageShift),     // Maximum page length for fixed-size list in MHeap.
-       HeapAllocChunk = 1<<20,         // Chunk size for heap growth
-
-       // Per-P, per order stack segment cache size.
-       StackCacheSize = 32*1024,
-       // Number of orders that get caching.  Order 0 is FixedStack
-       // and each successive order is twice as large.
-       NumStackOrders = 3,
-
-       // Number of bits in page to span calculations (4k pages).
-       // On Windows 64-bit we limit the arena to 32GB or 35 bits (see below for reason).
-       // On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
-       // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
-#ifdef _64BIT
-#ifdef GOOS_windows
-       // Windows counts memory used by page table into committed memory
-       // of the process, so we can't reserve too much memory.
-       // See http://golang.org/issue/5402 and http://golang.org/issue/5236.
-       MHeapMap_Bits = 35 - PageShift,
-#else
-       MHeapMap_Bits = 37 - PageShift,
-#endif
-#else
-       MHeapMap_Bits = 32 - PageShift,
-#endif
-
-       // Max number of threads to run garbage collection.
-       // 2, 3, and 4 are all plausible maximums depending
-       // on the hardware details of the machine.  The garbage
-       // collector scales well to 32 cpus.
-       MaxGcproc = 32,
-};
-
-// Maximum memory allocation size, a hint for callers.
-// This must be a #define instead of an enum because it
-// is so large.
-#ifdef _64BIT
-#define        MaxMem  (1ULL<<(MHeapMap_Bits+PageShift))       /* 128 GB or 32 GB */
-#else
-#define        MaxMem  ((uintptr)-1)
-#endif
-
-// A generic linked list of blocks.  (Typically the block is bigger than sizeof(MLink).)
-struct MLink
-{
-       MLink *next;
-};
-
-// sysAlloc obtains a large chunk of zeroed memory from the
-// operating system, typically on the order of a hundred kilobytes
-// or a megabyte.
-// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
-// may use larger alignment, so the caller must be careful to realign the
-// memory obtained by sysAlloc.
-//
-// SysUnused notifies the operating system that the contents
-// of the memory region are no longer needed and can be reused
-// for other purposes.
-// SysUsed notifies the operating system that the contents
-// of the memory region are needed again.
-//
-// SysFree returns it unconditionally; this is only used if
-// an out-of-memory error has been detected midway through
-// an allocation.  It is okay if SysFree is a no-op.
-//
-// SysReserve reserves address space without allocating memory.
-// If the pointer passed to it is non-nil, the caller wants the
-// reservation there, but SysReserve can still choose another
-// location if that one is unavailable.  On some systems and in some
-// cases SysReserve will simply check that the address space is
-// available and not actually reserve it.  If SysReserve returns
-// non-nil, it sets *reserved to true if the address space is
-// reserved, false if it has merely been checked.
-// NOTE: SysReserve returns OS-aligned memory, but the heap allocator
-// may use larger alignment, so the caller must be careful to realign the
-// memory obtained by sysAlloc.
-//
-// SysMap maps previously reserved address space for use.
-// The reserved argument is true if the address space was really
-// reserved, not merely checked.
-//
-// SysFault marks a (already sysAlloc'd) region to fault
-// if accessed.  Used only for debugging the runtime.
-
-void*  runtime·sysAlloc(uintptr nbytes, uint64 *stat);
-void   runtime·SysFree(void *v, uintptr nbytes, uint64 *stat);
-void   runtime·SysUnused(void *v, uintptr nbytes);
-void   runtime·SysUsed(void *v, uintptr nbytes);
-void   runtime·SysMap(void *v, uintptr nbytes, bool reserved, uint64 *stat);
-void*  runtime·SysReserve(void *v, uintptr nbytes, bool *reserved);
-void   runtime·SysFault(void *v, uintptr nbytes);
-
-// FixAlloc is a simple free-list allocator for fixed size objects.
-// Malloc uses a FixAlloc wrapped around sysAlloc to manages its
-// MCache and MSpan objects.
-//
-// Memory returned by FixAlloc_Alloc is not zeroed.
-// The caller is responsible for locking around FixAlloc calls.
-// Callers can keep state in the object but the first word is
-// smashed by freeing and reallocating.
-struct FixAlloc
-{
-       uintptr size;
-       void    (*first)(void *arg, byte *p);   // called first time p is returned
-       void*   arg;
-       MLink*  list;
-       byte*   chunk;
-       uint32  nchunk;
-       uintptr inuse;  // in-use bytes now
-       uint64* stat;
-};
-
-void   runtime·FixAlloc_Init(FixAlloc *f, uintptr size, void (*first)(void*, byte*), void *arg, uint64 *stat);
-void*  runtime·FixAlloc_Alloc(FixAlloc *f);
-void   runtime·FixAlloc_Free(FixAlloc *f, void *p);
-
-
-// Statistics.
-// Shared with Go: if you edit this structure, also edit type MemStats in mem.go.
-struct MStats
-{
-       // General statistics.
-       uint64  alloc;          // bytes allocated and still in use
-       uint64  total_alloc;    // bytes allocated (even if freed)
-       uint64  sys;            // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
-       uint64  nlookup;        // number of pointer lookups
-       uint64  nmalloc;        // number of mallocs
-       uint64  nfree;  // number of frees
-
-       // Statistics about malloc heap.
-       // protected by mheap.lock
-       uint64  heap_alloc;     // bytes allocated and still in use
-       uint64  heap_sys;       // bytes obtained from system
-       uint64  heap_idle;      // bytes in idle spans
-       uint64  heap_inuse;     // bytes in non-idle spans
-       uint64  heap_released;  // bytes released to the OS
-       uint64  heap_objects;   // total number of allocated objects
-
-       // Statistics about allocation of low-level fixed-size structures.
-       // Protected by FixAlloc locks.
-       uint64  stacks_inuse;   // this number is included in heap_inuse above
-       uint64  stacks_sys;     // always 0 in mstats
-       uint64  mspan_inuse;    // MSpan structures
-       uint64  mspan_sys;
-       uint64  mcache_inuse;   // MCache structures
-       uint64  mcache_sys;
-       uint64  buckhash_sys;   // profiling bucket hash table
-       uint64  gc_sys;
-       uint64  other_sys;
-
-       // Statistics about garbage collector.
-       // Protected by mheap or stopping the world during GC.
-       uint64  next_gc;        // next GC (in heap_alloc time)
-       uint64  last_gc;        // last GC (in absolute time)
-       uint64  pause_total_ns;
-       uint64  pause_ns[256];  // circular buffer of recent GC pause lengths
-       uint64  pause_end[256]; // circular buffer of recent GC end times (nanoseconds since 1970)
-       uint32  numgc;
-       bool    enablegc;
-       bool    debuggc;
-
-       // Statistics about allocation size classes.
-       
-       struct MStatsBySize {
-               uint32 size;
-               uint64 nmalloc;
-               uint64 nfree;
-       } by_size[NumSizeClasses];
-       
-       uint64  tinyallocs;     // number of tiny allocations that didn't cause actual allocation; not exported to Go directly
-};
-
-
-#define mstats runtime·memstats
-extern MStats mstats;
-void   runtime·updatememstats(GCStats *stats);
-void   runtime·ReadMemStats(MStats *stats);
-
-// Size classes.  Computed and initialized by InitSizes.
-//
-// SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
-//     1 <= sizeclass < NumSizeClasses, for n.
-//     Size class 0 is reserved to mean "not small".
-//
-// class_to_size[i] = largest size in class i
-// class_to_allocnpages[i] = number of pages to allocate when
-//     making new objects in class i
-
-int32  runtime·SizeToClass(int32);
-uintptr        runtime·roundupsize(uintptr);
-extern int32   runtime·class_to_size[NumSizeClasses];
-extern int32   runtime·class_to_allocnpages[NumSizeClasses];
-extern int8    runtime·size_to_class8[1024/8 + 1];
-extern int8    runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1];
-extern void    runtime·InitSizes(void);
-
-typedef struct MCacheList MCacheList;
-struct MCacheList
-{
-       MLink *list;
-       uint32 nlist;
-};
-
-typedef struct StackFreeList StackFreeList;
-struct StackFreeList
-{
-       MLink *list;  // linked list of free stacks
-       uintptr size; // total size of stacks in list
-};
-
-typedef struct SudoG SudoG;
-
-// Per-thread (in Go, per-P) cache for small objects.
-// No locking needed because it is per-thread (per-P).
-struct MCache
-{
-       // The following members are accessed on every malloc,
-       // so they are grouped here for better caching.
-       int32 next_sample;              // trigger heap sample after allocating this many bytes
-       intptr local_cachealloc;        // bytes allocated (or freed) from cache since last lock of heap
-       // Allocator cache for tiny objects w/o pointers.
-       // See "Tiny allocator" comment in malloc.goc.
-       byte*   tiny;
-       uintptr tinysize;
-       uintptr local_tinyallocs;       // number of tiny allocs not counted in other stats
-       // The rest is not accessed on every malloc.
-       MSpan*  alloc[NumSizeClasses];  // spans to allocate from
-
-       StackFreeList stackcache[NumStackOrders];
-
-       SudoG*  sudogcache;
-
-       void*   gcworkbuf;
-
-       // Local allocator stats, flushed during GC.
-       uintptr local_nlookup;          // number of pointer lookups
-       uintptr local_largefree;        // bytes freed for large objects (>MaxSmallSize)
-       uintptr local_nlargefree;       // number of frees for large objects (>MaxSmallSize)
-       uintptr local_nsmallfree[NumSizeClasses];       // number of frees for small objects (<=MaxSmallSize)
-};
-
-MSpan* runtime·MCache_Refill(MCache *c, int32 sizeclass);
-void   runtime·MCache_ReleaseAll(MCache *c);
-void   runtime·stackcache_clear(MCache *c);
-void   runtime·gcworkbuffree(void *b);
-
-enum
-{
-       KindSpecialFinalizer = 1,
-       KindSpecialProfile = 2,
-       // Note: The finalizer special must be first because if we're freeing
-       // an object, a finalizer special will cause the freeing operation
-       // to abort, and we want to keep the other special records around
-       // if that happens.
-};
-
-typedef struct Special Special;
-struct Special
-{
-       Special*        next;   // linked list in span
-       uint16          offset; // span offset of object
-       byte            kind;   // kind of Special
-};
-
-// The described object has a finalizer set for it.
-typedef struct SpecialFinalizer SpecialFinalizer;
-struct SpecialFinalizer
-{
-       Special         special;
-       FuncVal*        fn;
-       uintptr         nret;
-       Type*           fint;
-       PtrType*        ot;
-};
-
-// The described object is being heap profiled.
-typedef struct Bucket Bucket; // from mprof.h
-typedef struct SpecialProfile SpecialProfile;
-struct SpecialProfile
-{
-       Special special;
-       Bucket* b;
-};
-
-// An MSpan is a run of pages.
-enum
-{
-       MSpanInUse = 0, // allocated for garbage collected heap
-       MSpanStack,     // allocated for use by stack allocator
-       MSpanFree,
-       MSpanListHead,
-       MSpanDead,
-};
-struct MSpan
-{
-       MSpan   *next;          // in a span linked list
-       MSpan   *prev;          // in a span linked list
-       pageID  start;          // starting page number
-       uintptr npages;         // number of pages in span
-       MLink   *freelist;      // list of free objects
-       // sweep generation:
-       // if sweepgen == h->sweepgen - 2, the span needs sweeping
-       // if sweepgen == h->sweepgen - 1, the span is currently being swept
-       // if sweepgen == h->sweepgen, the span is swept and ready to use
-       // h->sweepgen is incremented by 2 after every GC
-       uint32  sweepgen;
-       uint16  ref;            // capacity - number of objects in freelist
-       uint8   sizeclass;      // size class
-       bool    incache;        // being used by an MCache
-       uint8   state;          // MSpanInUse etc
-       uint8   needzero;       // needs to be zeroed before allocation
-       uintptr elemsize;       // computed from sizeclass or from npages
-       int64   unusedsince;    // First time spotted by GC in MSpanFree state
-       uintptr npreleased;     // number of pages released to the OS
-       byte    *limit;         // end of data in span
-       Mutex   specialLock;    // guards specials list
-       Special *specials;      // linked list of special records sorted by offset.
-};
-
-void   runtime·MSpan_Init(MSpan *span, pageID start, uintptr npages);
-void   runtime·MSpan_EnsureSwept(MSpan *span);
-bool   runtime·MSpan_Sweep(MSpan *span, bool preserve);
-
-// Every MSpan is in one doubly-linked list,
-// either one of the MHeap's free lists or one of the
-// MCentral's span lists.  We use empty MSpan structures as list heads.
-void   runtime·MSpanList_Init(MSpan *list);
-bool   runtime·MSpanList_IsEmpty(MSpan *list);
-void   runtime·MSpanList_Insert(MSpan *list, MSpan *span);
-void   runtime·MSpanList_InsertBack(MSpan *list, MSpan *span);
-void   runtime·MSpanList_Remove(MSpan *span); // from whatever list it is in
-
-
-// Central list of free objects of a given size.
-struct MCentral
-{
-       Mutex  lock;
-       int32 sizeclass;
-       MSpan nonempty; // list of spans with a free object
-       MSpan empty;    // list of spans with no free objects (or cached in an MCache)
-};
-
-void   runtime·MCentral_Init(MCentral *c, int32 sizeclass);
-MSpan* runtime·MCentral_CacheSpan(MCentral *c);
-void   runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s);
-bool   runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve);
-
-// Main malloc heap.
-// The heap itself is the "free[]" and "large" arrays,
-// but all the other global data is here too.
-struct MHeap
-{
-       Mutex  lock;
-       MSpan free[MaxMHeapList];       // free lists of given length
-       MSpan freelarge;                // free lists length >= MaxMHeapList
-       MSpan busy[MaxMHeapList];       // busy lists of large objects of given length
-       MSpan busylarge;                // busy lists of large objects length >= MaxMHeapList
-       MSpan **allspans;               // all spans out there
-       MSpan **gcspans;                // copy of allspans referenced by GC marker or sweeper
-       uint32  nspan;
-       uint32  nspancap;
-       uint32  sweepgen;               // sweep generation, see comment in MSpan
-       uint32  sweepdone;              // all spans are swept
-
-       // span lookup
-       MSpan** spans;
-       uintptr spans_mapped;
-
-       // range of addresses we might see in the heap
-       byte *bitmap;
-       uintptr bitmap_mapped;
-       byte *arena_start;
-       byte *arena_used;
-       byte *arena_end;
-       bool arena_reserved;
-
-       // central free lists for small size classes.
-       // the padding makes sure that the MCentrals are
-       // spaced CacheLineSize bytes apart, so that each MCentral.lock
-       // gets its own cache line.
-       struct MHeapCentral {
-               MCentral mcentral;
-               byte pad[CacheLineSize];
-       } central[NumSizeClasses];
-
-       FixAlloc spanalloc;     // allocator for Span*
-       FixAlloc cachealloc;    // allocator for MCache*
-       FixAlloc specialfinalizeralloc; // allocator for SpecialFinalizer*
-       FixAlloc specialprofilealloc;   // allocator for SpecialProfile*
-       Mutex speciallock; // lock for sepcial record allocators.
-
-       // Malloc stats.
-       uint64 largefree;       // bytes freed for large objects (>MaxSmallSize)
-       uint64 nlargefree;      // number of frees for large objects (>MaxSmallSize)
-       uint64 nsmallfree[NumSizeClasses];      // number of frees for small objects (<=MaxSmallSize)
-};
-#define runtime·mheap runtime·mheap_
-extern MHeap runtime·mheap;
-
-void   runtime·MHeap_Init(MHeap *h);
-MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero);
-MSpan* runtime·MHeap_AllocStack(MHeap *h, uintptr npage);
-void   runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct);
-void   runtime·MHeap_FreeStack(MHeap *h, MSpan *s);
-MSpan* runtime·MHeap_Lookup(MHeap *h, void *v);
-MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v);
-void*  runtime·MHeap_SysAlloc(MHeap *h, uintptr n);
-void   runtime·MHeap_MapBits(MHeap *h);
-void   runtime·MHeap_MapSpans(MHeap *h);
-void   runtime·MHeap_Scavenge(int32 k, uint64 now, uint64 limit);
-
-void*  runtime·persistentalloc(uintptr size, uintptr align, uint64 *stat);
-int32  runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s);
-uintptr        runtime·sweepone(void);
-void   runtime·markspan(void *v, uintptr size, uintptr n, bool leftover);
-void   runtime·unmarkspan(void *v, uintptr size);
-void   runtime·purgecachedstats(MCache*);
-void   runtime·tracealloc(void*, uintptr, Type*);
-void   runtime·tracefree(void*, uintptr);
-void   runtime·tracegc(void);
-
-int32  runtime·gcpercent;
-int32  runtime·readgogc(void);
-void   runtime·clearpools(void);
-
-enum
-{
-       // flags to malloc
-       FlagNoScan      = 1<<0, // GC doesn't have to scan object
-       FlagNoZero      = 1<<1, // don't zero memory
-};
-
-void   runtime·mProf_Malloc(void*, uintptr);
-void   runtime·mProf_Free(Bucket*, uintptr, bool);
-void   runtime·mProf_GC(void);
-void   runtime·iterate_memprof(void (**callback)(Bucket*, uintptr, uintptr*, uintptr, uintptr, uintptr));
-int32  runtime·gcprocs(void);
-void   runtime·helpgc(int32 nproc);
-void   runtime·gchelper(void);
-void   runtime·createfing(void);
-G*     runtime·wakefing(void);
-void   runtime·getgcmask(byte*, Type*, byte**, uintptr*);
-
-// NOTE: Layout known to queuefinalizer.
-typedef struct Finalizer Finalizer;
-struct Finalizer
-{
-       FuncVal *fn;    // function to call
-       void *arg;      // ptr to object
-       uintptr nret;   // bytes of return values from fn
-       Type *fint;     // type of first argument of fn
-       PtrType *ot;    // type of ptr to object
-};
-
-typedef struct FinBlock FinBlock;
-struct FinBlock
-{
-       FinBlock *alllink;
-       FinBlock *next;
-       int32 cnt;
-       int32 cap;
-       Finalizer fin[1];
-};
-extern Mutex   runtime·finlock;       // protects the following variables
-extern G*      runtime·fing;
-extern bool    runtime·fingwait;
-extern bool    runtime·fingwake;
-extern FinBlock        *runtime·finq;         // list of finalizers that are to be executed
-extern FinBlock        *runtime·finc;         // cache of free blocks
-
-void   runtime·setprofilebucket_m(void);
-
-bool   runtime·addfinalizer(void*, FuncVal *fn, uintptr, Type*, PtrType*);
-void   runtime·removefinalizer(void*);
-void   runtime·queuefinalizer(byte *p, FuncVal *fn, uintptr nret, Type *fint, PtrType *ot);
-bool   runtime·freespecial(Special *s, void *p, uintptr size, bool freed);
-
-// Information from the compiler about the layout of stack frames.
-struct BitVector
-{
-       int32 n; // # of bits
-       uint8 *bytedata;
-};
-typedef struct StackMap StackMap;
-struct StackMap
-{
-       int32 n; // number of bitmaps
-       int32 nbit; // number of bits in each bitmap
-       uint8 bytedata[]; // bitmaps, each starting on a 32-bit boundary
-};
-// Returns pointer map data for the given stackmap index
-// (the index is encoded in PCDATA_StackMapIndex).
-BitVector      runtime·stackmapdata(StackMap *stackmap, int32 n);
-
-extern BitVector       runtime·gcdatamask;
-extern BitVector       runtime·gcbssmask;
-
-// defined in mgc0.go
-void   runtime·gc_m_ptr(Eface*);
-void   runtime·gc_g_ptr(Eface*);
-void   runtime·gc_itab_ptr(Eface*);
-
-void  runtime·setgcpercent_m(void);
-
-// Value we use to mark dead pointers when GODEBUG=gcdead=1.
-#define PoisonGC ((uintptr)0xf969696969696969ULL)
-#define PoisonStack ((uintptr)0x6868686868686868ULL)
diff --git a/src/runtime/malloc1.go b/src/runtime/malloc1.go
new file mode 100644 (file)
index 0000000..db02d9c
--- /dev/null
@@ -0,0 +1,318 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// See malloc.h for overview.
+//
+// TODO(rsc): double-check stats.
+
+package runtime
+
+import "unsafe"
+
+const _MaxArena32 = 2 << 30
+
+// For use by Go. If it were a C enum it would be made available automatically,
+// but the value of MaxMem is too large for enum.
+// XXX - uintptr runtime·maxmem = MaxMem;
+
+func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
+       _g_ := getg()
+
+       _g_.m.mcache.local_nlookup++
+       if ptrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 {
+               // purge cache stats to prevent overflow
+               lock(&mheap_.lock)
+               purgecachedstats(_g_.m.mcache)
+               unlock(&mheap_.lock)
+       }
+
+       s := mHeap_LookupMaybe(&mheap_, unsafe.Pointer(v))
+       if sp != nil {
+               *sp = s
+       }
+       if s == nil {
+               if base != nil {
+                       *base = 0
+               }
+               if size != nil {
+                       *size = 0
+               }
+               return 0
+       }
+
+       p := uintptr(s.start) << _PageShift
+       if s.sizeclass == 0 {
+               // Large object.
+               if base != nil {
+                       *base = p
+               }
+               if size != nil {
+                       *size = s.npages << _PageShift
+               }
+               return 1
+       }
+
+       n := s.elemsize
+       if base != nil {
+               i := (uintptr(v) - uintptr(p)) / n
+               *base = p + i*n
+       }
+       if size != nil {
+               *size = n
+       }
+
+       return 1
+}
+
+//go:nosplit
+func purgecachedstats(c *mcache) {
+       // Protected by either heap or GC lock.
+       h := &mheap_
+       memstats.heap_alloc += uint64(c.local_cachealloc)
+       c.local_cachealloc = 0
+       memstats.tinyallocs += uint64(c.local_tinyallocs)
+       c.local_tinyallocs = 0
+       memstats.nlookup += uint64(c.local_nlookup)
+       c.local_nlookup = 0
+       h.largefree += uint64(c.local_largefree)
+       c.local_largefree = 0
+       h.nlargefree += uint64(c.local_nlargefree)
+       c.local_nlargefree = 0
+       for i := 0; i < len(c.local_nsmallfree); i++ {
+               h.nsmallfree[i] += uint64(c.local_nsmallfree[i])
+               c.local_nsmallfree[i] = 0
+       }
+}
+
+func mallocinit() {
+       initSizes()
+
+       if class_to_size[_TinySizeClass] != _TinySize {
+               gothrow("bad TinySizeClass")
+       }
+
+       var p, arena_size, bitmap_size, spans_size, p_size, limit uintptr
+       var reserved bool
+
+       // limit = runtime.memlimit();
+       // See https://code.google.com/p/go/issues/detail?id=5049
+       // TODO(rsc): Fix after 1.1.
+       limit = 0
+
+       // Set up the allocation arena, a contiguous area of memory where
+       // allocated data will be found.  The arena begins with a bitmap large
+       // enough to hold 4 bits per allocated word.
+       if ptrSize == 8 && (limit == 0 || limit > 1<<30) {
+               // On a 64-bit machine, allocate from a single contiguous reservation.
+               // 128 GB (MaxMem) should be big enough for now.
+               //
+               // The code will work with the reservation at any address, but ask
+               // SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
+               // Allocating a 128 GB region takes away 37 bits, and the amd64
+               // doesn't let us choose the top 17 bits, so that leaves the 11 bits
+               // in the middle of 0x00c0 for us to choose.  Choosing 0x00c0 means
+               // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
+               // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
+               // UTF-8 sequences, and they are otherwise as far away from
+               // ff (likely a common byte) as possible.  If that fails, we try other 0xXXc0
+               // addresses.  An earlier attempt to use 0x11f8 caused out of memory errors
+               // on OS X during thread allocations.  0x00c0 causes conflicts with
+               // AddressSanitizer which reserves all memory up to 0x0100.
+               // These choices are both for debuggability and to reduce the
+               // odds of the conservative garbage collector not collecting memory
+               // because some non-pointer block of memory had a bit pattern
+               // that matched a memory address.
+               //
+               // Actually we reserve 136 GB (because the bitmap ends up being 8 GB)
+               // but it hardly matters: e0 00 is not valid UTF-8 either.
+               //
+               // If this fails we fall back to the 32 bit memory mechanism
+               arena_size = round(_MaxMem, _PageSize)
+               bitmap_size = arena_size / (ptrSize * 8 / 4)
+               spans_size = arena_size / _PageSize * ptrSize
+               spans_size = round(spans_size, _PageSize)
+               for i := 0; i <= 0x7f; i++ {
+                       p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
+                       p_size = bitmap_size + spans_size + arena_size + _PageSize
+                       p = uintptr(sysReserve(unsafe.Pointer(p), p_size, &reserved))
+                       if p != 0 {
+                               break
+                       }
+               }
+       }
+
+       if p == 0 {
+               // On a 32-bit machine, we can't typically get away
+               // with a giant virtual address space reservation.
+               // Instead we map the memory information bitmap
+               // immediately after the data segment, large enough
+               // to handle another 2GB of mappings (256 MB),
+               // along with a reservation for another 512 MB of memory.
+               // When that gets used up, we'll start asking the kernel
+               // for any memory anywhere and hope it's in the 2GB
+               // following the bitmap (presumably the executable begins
+               // near the bottom of memory, so we'll have to use up
+               // most of memory before the kernel resorts to giving out
+               // memory before the beginning of the text segment).
+               //
+               // Alternatively we could reserve 512 MB bitmap, enough
+               // for 4GB of mappings, and then accept any memory the
+               // kernel threw at us, but normally that's a waste of 512 MB
+               // of address space, which is probably too much in a 32-bit world.
+               bitmap_size = _MaxArena32 / (ptrSize * 8 / 4)
+               arena_size = 512 << 20
+               spans_size = _MaxArena32 / _PageSize * ptrSize
+               if limit > 0 && arena_size+bitmap_size+spans_size > limit {
+                       bitmap_size = (limit / 9) &^ ((1 << _PageShift) - 1)
+                       arena_size = bitmap_size * 8
+                       spans_size = arena_size / _PageSize * ptrSize
+               }
+               spans_size = round(spans_size, _PageSize)
+
+               // SysReserve treats the address we ask for, end, as a hint,
+               // not as an absolute requirement.  If we ask for the end
+               // of the data segment but the operating system requires
+               // a little more space before we can start allocating, it will
+               // give out a slightly higher pointer.  Except QEMU, which
+               // is buggy, as usual: it won't adjust the pointer upward.
+               // So adjust it upward a little bit ourselves: 1/4 MB to get
+               // away from the running binary image and then round up
+               // to a MB boundary.
+               p = round(uintptr(unsafe.Pointer(&end))+(1<<18), 1<<20)
+               p_size = bitmap_size + spans_size + arena_size + _PageSize
+               p = uintptr(sysReserve(unsafe.Pointer(p), p_size, &reserved))
+               if p == 0 {
+                       gothrow("runtime: cannot reserve arena virtual address space")
+               }
+       }
+
+       // PageSize can be larger than OS definition of page size,
+       // so SysReserve can give us a PageSize-unaligned pointer.
+       // To overcome this we ask for PageSize more and round up the pointer.
+       p1 := round(p, _PageSize)
+
+       mheap_.spans = (**mspan)(unsafe.Pointer(p1))
+       mheap_.bitmap = p1 + spans_size
+       mheap_.arena_start = p1 + (spans_size + bitmap_size)
+       mheap_.arena_used = mheap_.arena_start
+       mheap_.arena_end = p + p_size
+       mheap_.arena_reserved = reserved
+
+       if mheap_.arena_start&(_PageSize-1) != 0 {
+               println("bad pagesize", hex(p), hex(p1), hex(spans_size), hex(bitmap_size), hex(_PageSize), "start", hex(mheap_.arena_start))
+               gothrow("misrounded allocation in mallocinit")
+       }
+
+       // Initialize the rest of the allocator.
+       mHeap_Init(&mheap_, spans_size)
+       _g_ := getg()
+       _g_.m.mcache = allocmcache()
+}
+
+func mHeap_SysAlloc(h *mheap, n uintptr) unsafe.Pointer {
+       if n > uintptr(h.arena_end)-uintptr(h.arena_used) {
+               // We are in 32-bit mode, maybe we didn't use all possible address space yet.
+               // Reserve some more space.
+               p_size := round(n+_PageSize, 256<<20)
+               new_end := h.arena_end + p_size
+               if new_end <= h.arena_start+_MaxArena32 {
+                       // TODO: It would be bad if part of the arena
+                       // is reserved and part is not.
+                       var reserved bool
+                       p := uintptr(sysReserve((unsafe.Pointer)(h.arena_end), p_size, &reserved))
+                       if p == h.arena_end {
+                               h.arena_end = new_end
+                               h.arena_reserved = reserved
+                       } else if p+p_size <= h.arena_start+_MaxArena32 {
+                               // Keep everything page-aligned.
+                               // Our pages are bigger than hardware pages.
+                               h.arena_end = p + p_size
+                               h.arena_used = p + (-uintptr(p) & (_PageSize - 1))
+                               h.arena_reserved = reserved
+                       } else {
+                               var stat uint64
+                               sysFree((unsafe.Pointer)(p), p_size, &stat)
+                       }
+               }
+       }
+
+       if n <= uintptr(h.arena_end)-uintptr(h.arena_used) {
+               // Keep taking from our reservation.
+               p := h.arena_used
+               sysMap((unsafe.Pointer)(p), n, h.arena_reserved, &memstats.heap_sys)
+               h.arena_used += n
+               mHeap_MapBits(h)
+               mHeap_MapSpans(h)
+               if raceenabled {
+                       racemapshadow((unsafe.Pointer)(p), n)
+               }
+
+               if uintptr(p)&(_PageSize-1) != 0 {
+                       gothrow("misrounded allocation in MHeap_SysAlloc")
+               }
+               return (unsafe.Pointer)(p)
+       }
+
+       // If using 64-bit, our reservation is all we have.
+       if uintptr(h.arena_end)-uintptr(h.arena_start) >= _MaxArena32 {
+               return nil
+       }
+
+       // On 32-bit, once the reservation is gone we can
+       // try to get memory at a location chosen by the OS
+       // and hope that it is in the range we allocated bitmap for.
+       p_size := round(n, _PageSize) + _PageSize
+       p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
+       if p == 0 {
+               return nil
+       }
+
+       if p < h.arena_start || uintptr(p)+p_size-uintptr(h.arena_start) >= _MaxArena32 {
+               print("runtime: memory allocated by OS (", p, ") not in usable range [", hex(h.arena_start), ",", hex(h.arena_start+_MaxArena32), ")\n")
+               sysFree((unsafe.Pointer)(p), p_size, &memstats.heap_sys)
+               return nil
+       }
+
+       p_end := p + p_size
+       p += -p & (_PageSize - 1)
+       if uintptr(p)+n > uintptr(h.arena_used) {
+               h.arena_used = p + n
+               if p_end > h.arena_end {
+                       h.arena_end = p_end
+               }
+               mHeap_MapBits(h)
+               mHeap_MapSpans(h)
+               if raceenabled {
+                       racemapshadow((unsafe.Pointer)(p), n)
+               }
+       }
+
+       if uintptr(p)&(_PageSize-1) != 0 {
+               gothrow("misrounded allocation in MHeap_SysAlloc")
+       }
+       return (unsafe.Pointer)(p)
+}
+
+var end struct{}
+
+func largeAlloc(size uintptr, flag uint32) *mspan {
+       // print("largeAlloc size=", size, "\n")
+
+       if size+_PageSize < size {
+               gothrow("out of memory")
+       }
+       npages := size >> _PageShift
+       if size&_PageMask != 0 {
+               npages++
+       }
+       s := mHeap_Alloc(&mheap_, npages, 0, true, flag&_FlagNoZero == 0)
+       if s == nil {
+               gothrow("out of memory")
+       }
+       s.limit = uintptr(s.start)<<_PageShift + size
+       v := unsafe.Pointer(uintptr(s.start) << _PageShift)
+       // setup for mark sweep
+       markspan(v, 0, 0, true)
+       return s
+}
diff --git a/src/runtime/malloc2.go b/src/runtime/malloc2.go
new file mode 100644 (file)
index 0000000..e4bd963
--- /dev/null
@@ -0,0 +1,475 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// Memory allocator, based on tcmalloc.
+// http://goog-perftools.sourceforge.net/doc/tcmalloc.html
+
+// The main allocator works in runs of pages.
+// Small allocation sizes (up to and including 32 kB) are
+// rounded to one of about 100 size classes, each of which
+// has its own free list of objects of exactly that size.
+// Any free page of memory can be split into a set of objects
+// of one size class, which are then managed using free list
+// allocators.
+//
+// The allocator's data structures are:
+//
+//     FixAlloc: a free-list allocator for fixed-size objects,
+//             used to manage storage used by the allocator.
+//     MHeap: the malloc heap, managed at page (4096-byte) granularity.
+//     MSpan: a run of pages managed by the MHeap.
+//     MCentral: a shared free list for a given size class.
+//     MCache: a per-thread (in Go, per-P) cache for small objects.
+//     MStats: allocation statistics.
+//
+// Allocating a small object proceeds up a hierarchy of caches:
+//
+//     1. Round the size up to one of the small size classes
+//        and look in the corresponding MCache free list.
+//        If the list is not empty, allocate an object from it.
+//        This can all be done without acquiring a lock.
+//
+//     2. If the MCache free list is empty, replenish it by
+//        taking a bunch of objects from the MCentral free list.
+//        Moving a bunch amortizes the cost of acquiring the MCentral lock.
+//
+//     3. If the MCentral free list is empty, replenish it by
+//        allocating a run of pages from the MHeap and then
+//        chopping that memory into a objects of the given size.
+//        Allocating many objects amortizes the cost of locking
+//        the heap.
+//
+//     4. If the MHeap is empty or has no page runs large enough,
+//        allocate a new group of pages (at least 1MB) from the
+//        operating system.  Allocating a large run of pages
+//        amortizes the cost of talking to the operating system.
+//
+// Freeing a small object proceeds up the same hierarchy:
+//
+//     1. Look up the size class for the object and add it to
+//        the MCache free list.
+//
+//     2. If the MCache free list is too long or the MCache has
+//        too much memory, return some to the MCentral free lists.
+//
+//     3. If all the objects in a given span have returned to
+//        the MCentral list, return that span to the page heap.
+//
+//     4. If the heap has too much memory, return some to the
+//        operating system.
+//
+//     TODO(rsc): Step 4 is not implemented.
+//
+// Allocating and freeing a large object uses the page heap
+// directly, bypassing the MCache and MCentral free lists.
+//
+// The small objects on the MCache and MCentral free lists
+// may or may not be zeroed.  They are zeroed if and only if
+// the second word of the object is zero.  A span in the
+// page heap is zeroed unless s->needzero is set. When a span
+// is allocated to break into small objects, it is zeroed if needed
+// and s->needzero is set. There are two main benefits to delaying the
+// zeroing this way:
+//
+//     1. stack frames allocated from the small object lists
+//        or the page heap can avoid zeroing altogether.
+//     2. the cost of zeroing when reusing a small object is
+//        charged to the mutator, not the garbage collector.
+//
+// This C code was written with an eye toward translating to Go
+// in the future.  Methods have the form Type_Method(Type *t, ...).
+
+const (
+       _PageShift = 13
+       _PageSize  = 1 << _PageShift
+       _PageMask  = _PageSize - 1
+)
+
+const (
+       // _64bit = 1 on 64-bit systems, 0 on 32-bit systems
+       _64bit = 1 << (^uintptr(0) >> 63) / 2
+
+       // Computed constant.  The definition of MaxSmallSize and the
+       // algorithm in msize.c produce some number of different allocation
+       // size classes.  NumSizeClasses is that number.  It's needed here
+       // because there are static arrays of this length; when msize runs its
+       // size choosing algorithm it double-checks that NumSizeClasses agrees.
+       _NumSizeClasses = 67
+
+       // Tunable constants.
+       _MaxSmallSize = 32 << 10
+
+       // Tiny allocator parameters, see "Tiny allocator" comment in malloc.goc.
+       _TinySize      = 16
+       _TinySizeClass = 2
+
+       _FixAllocChunk  = 16 << 10               // Chunk size for FixAlloc
+       _MaxMHeapList   = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
+       _HeapAllocChunk = 1 << 20                // Chunk size for heap growth
+
+       // Per-P, per order stack segment cache size.
+       _StackCacheSize = 32 * 1024
+
+       // Number of orders that get caching.  Order 0 is FixedStack
+       // and each successive order is twice as large.
+       _NumStackOrders = 3
+
+       // Number of bits in page to span calculations (4k pages).
+       // On Windows 64-bit we limit the arena to 32GB or 35 bits.
+       // Windows counts memory used by page table into committed memory
+       // of the process, so we can't reserve too much memory.
+       // See http://golang.org/issue/5402 and http://golang.org/issue/5236.
+       // On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
+       // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
+       _MHeapMap_TotalBits = (_64bit*_Windows)*35 + (_64bit*(1-_Windows))*37 + (1-_64bit)*32
+       _MHeapMap_Bits      = _MHeapMap_TotalBits - _PageShift
+
+       _MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1)
+
+       // Max number of threads to run garbage collection.
+       // 2, 3, and 4 are all plausible maximums depending
+       // on the hardware details of the machine.  The garbage
+       // collector scales well to 32 cpus.
+       _MaxGcproc = 32
+)
+
+// A generic linked list of blocks.  (Typically the block is bigger than sizeof(MLink).)
+type mlink struct {
+       next *mlink
+}
+
+// sysAlloc obtains a large chunk of zeroed memory from the
+// operating system, typically on the order of a hundred kilobytes
+// or a megabyte.
+// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
+// may use larger alignment, so the caller must be careful to realign the
+// memory obtained by sysAlloc.
+//
+// SysUnused notifies the operating system that the contents
+// of the memory region are no longer needed and can be reused
+// for other purposes.
+// SysUsed notifies the operating system that the contents
+// of the memory region are needed again.
+//
+// SysFree returns it unconditionally; this is only used if
+// an out-of-memory error has been detected midway through
+// an allocation.  It is okay if SysFree is a no-op.
+//
+// SysReserve reserves address space without allocating memory.
+// If the pointer passed to it is non-nil, the caller wants the
+// reservation there, but SysReserve can still choose another
+// location if that one is unavailable.  On some systems and in some
+// cases SysReserve will simply check that the address space is
+// available and not actually reserve it.  If SysReserve returns
+// non-nil, it sets *reserved to true if the address space is
+// reserved, false if it has merely been checked.
+// NOTE: SysReserve returns OS-aligned memory, but the heap allocator
+// may use larger alignment, so the caller must be careful to realign the
+// memory obtained by sysAlloc.
+//
+// SysMap maps previously reserved address space for use.
+// The reserved argument is true if the address space was really
+// reserved, not merely checked.
+//
+// SysFault marks a (already sysAlloc'd) region to fault
+// if accessed.  Used only for debugging the runtime.
+
+// FixAlloc is a simple free-list allocator for fixed size objects.
+// Malloc uses a FixAlloc wrapped around sysAlloc to manages its
+// MCache and MSpan objects.
+//
+// Memory returned by FixAlloc_Alloc is not zeroed.
+// The caller is responsible for locking around FixAlloc calls.
+// Callers can keep state in the object but the first word is
+// smashed by freeing and reallocating.
+type fixalloc struct {
+       size   uintptr
+       first  unsafe.Pointer // go func(unsafe.pointer, unsafe.pointer); f(arg, p) called first time p is returned
+       arg    unsafe.Pointer
+       list   *mlink
+       chunk  *byte
+       nchunk uint32
+       inuse  uintptr // in-use bytes now
+       stat   *uint64
+}
+
+// Statistics.
+// Shared with Go: if you edit this structure, also edit type MemStats in mem.go.
+type mstats struct {
+       // General statistics.
+       alloc       uint64 // bytes allocated and still in use
+       total_alloc uint64 // bytes allocated (even if freed)
+       sys         uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
+       nlookup     uint64 // number of pointer lookups
+       nmalloc     uint64 // number of mallocs
+       nfree       uint64 // number of frees
+
+       // Statistics about malloc heap.
+       // protected by mheap.lock
+       heap_alloc    uint64 // bytes allocated and still in use
+       heap_sys      uint64 // bytes obtained from system
+       heap_idle     uint64 // bytes in idle spans
+       heap_inuse    uint64 // bytes in non-idle spans
+       heap_released uint64 // bytes released to the os
+       heap_objects  uint64 // total number of allocated objects
+
+       // Statistics about allocation of low-level fixed-size structures.
+       // Protected by FixAlloc locks.
+       stacks_inuse uint64 // this number is included in heap_inuse above
+       stacks_sys   uint64 // always 0 in mstats
+       mspan_inuse  uint64 // mspan structures
+       mspan_sys    uint64
+       mcache_inuse uint64 // mcache structures
+       mcache_sys   uint64
+       buckhash_sys uint64 // profiling bucket hash table
+       gc_sys       uint64
+       other_sys    uint64
+
+       // Statistics about garbage collector.
+       // Protected by mheap or stopping the world during GC.
+       next_gc        uint64 // next gc (in heap_alloc time)
+       last_gc        uint64 // last gc (in absolute time)
+       pause_total_ns uint64
+       pause_ns       [256]uint64 // circular buffer of recent gc pause lengths
+       pause_end      [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
+       numgc          uint32
+       enablegc       bool
+       debuggc        bool
+
+       // Statistics about allocation size classes.
+
+       by_size [_NumSizeClasses]struct {
+               size    uint32
+               nmalloc uint64
+               nfree   uint64
+       }
+
+       tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
+}
+
+var memstats mstats
+
+// Size classes.  Computed and initialized by InitSizes.
+//
+// SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
+//     1 <= sizeclass < NumSizeClasses, for n.
+//     Size class 0 is reserved to mean "not small".
+//
+// class_to_size[i] = largest size in class i
+// class_to_allocnpages[i] = number of pages to allocate when
+//     making new objects in class i
+
+var class_to_size [_NumSizeClasses]int32
+var class_to_allocnpages [_NumSizeClasses]int32
+var size_to_class8 [1024/8 + 1]int8
+var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
+
+type mcachelist struct {
+       list  *mlink
+       nlist uint32
+}
+
+type stackfreelist struct {
+       list *mlink  // linked list of free stacks
+       size uintptr // total size of stacks in list
+}
+
+// Per-thread (in Go, per-P) cache for small objects.
+// No locking needed because it is per-thread (per-P).
+type mcache struct {
+       // The following members are accessed on every malloc,
+       // so they are grouped here for better caching.
+       next_sample      int32  // trigger heap sample after allocating this many bytes
+       local_cachealloc intptr // bytes allocated (or freed) from cache since last lock of heap
+       // Allocator cache for tiny objects w/o pointers.
+       // See "Tiny allocator" comment in malloc.goc.
+       tiny             *byte
+       tinysize         uintptr
+       local_tinyallocs uintptr // number of tiny allocs not counted in other stats
+
+       // The rest is not accessed on every malloc.
+       alloc [_NumSizeClasses]*mspan // spans to allocate from
+
+       stackcache [_NumStackOrders]stackfreelist
+
+       sudogcache *sudog
+
+       gcworkbuf unsafe.Pointer
+
+       // Local allocator stats, flushed during GC.
+       local_nlookup    uintptr                  // number of pointer lookups
+       local_largefree  uintptr                  // bytes freed for large objects (>maxsmallsize)
+       local_nlargefree uintptr                  // number of frees for large objects (>maxsmallsize)
+       local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
+}
+
+const (
+       _KindSpecialFinalizer = 1
+       _KindSpecialProfile   = 2
+       // Note: The finalizer special must be first because if we're freeing
+       // an object, a finalizer special will cause the freeing operation
+       // to abort, and we want to keep the other special records around
+       // if that happens.
+)
+
+type special struct {
+       next   *special // linked list in span
+       offset uint16   // span offset of object
+       kind   byte     // kind of special
+}
+
+// The described object has a finalizer set for it.
+type specialfinalizer struct {
+       special special
+       fn      *funcval
+       nret    uintptr
+       fint    *_type
+       ot      *ptrtype
+}
+
+// The described object is being heap profiled.
+type specialprofile struct {
+       special special
+       b       *bucket
+}
+
+// An MSpan is a run of pages.
+const (
+       _MSpanInUse = iota // allocated for garbage collected heap
+       _MSpanStack        // allocated for use by stack allocator
+       _MSpanFree
+       _MSpanListHead
+       _MSpanDead
+)
+
+type mspan struct {
+       next     *mspan  // in a span linked list
+       prev     *mspan  // in a span linked list
+       start    pageID  // starting page number
+       npages   uintptr // number of pages in span
+       freelist *mlink  // list of free objects
+       // sweep generation:
+       // if sweepgen == h->sweepgen - 2, the span needs sweeping
+       // if sweepgen == h->sweepgen - 1, the span is currently being swept
+       // if sweepgen == h->sweepgen, the span is swept and ready to use
+       // h->sweepgen is incremented by 2 after every GC
+       sweepgen    uint32
+       ref         uint16   // capacity - number of objects in freelist
+       sizeclass   uint8    // size class
+       incache     bool     // being used by an mcache
+       state       uint8    // mspaninuse etc
+       needzero    uint8    // needs to be zeroed before allocation
+       elemsize    uintptr  // computed from sizeclass or from npages
+       unusedsince int64    // first time spotted by gc in mspanfree state
+       npreleased  uintptr  // number of pages released to the os
+       limit       uintptr  // end of data in span
+       speciallock mutex    // guards specials list
+       specials    *special // linked list of special records sorted by offset.
+}
+
+// Every MSpan is in one doubly-linked list,
+// either one of the MHeap's free lists or one of the
+// MCentral's span lists.  We use empty MSpan structures as list heads.
+
+// Central list of free objects of a given size.
+type mcentral struct {
+       lock      mutex
+       sizeclass int32
+       nonempty  mspan // list of spans with a free object
+       empty     mspan // list of spans with no free objects (or cached in an mcache)
+}
+
+// Main malloc heap.
+// The heap itself is the "free[]" and "large" arrays,
+// but all the other global data is here too.
+type mheap struct {
+       lock      mutex
+       free      [_MaxMHeapList]mspan // free lists of given length
+       freelarge mspan                // free lists length >= _MaxMHeapList
+       busy      [_MaxMHeapList]mspan // busy lists of large objects of given length
+       busylarge mspan                // busy lists of large objects length >= _MaxMHeapList
+       allspans  **mspan              // all spans out there
+       gcspans   **mspan              // copy of allspans referenced by gc marker or sweeper
+       nspan     uint32
+       sweepgen  uint32 // sweep generation, see comment in mspan
+       sweepdone uint32 // all spans are swept
+
+       // span lookup
+       spans        **mspan
+       spans_mapped uintptr
+
+       // range of addresses we might see in the heap
+       bitmap         uintptr
+       bitmap_mapped  uintptr
+       arena_start    uintptr
+       arena_used     uintptr
+       arena_end      uintptr
+       arena_reserved bool
+
+       // central free lists for small size classes.
+       // the padding makes sure that the MCentrals are
+       // spaced CacheLineSize bytes apart, so that each MCentral.lock
+       // gets its own cache line.
+       central [_NumSizeClasses]struct {
+               mcentral mcentral
+               pad      [_CacheLineSize]byte
+       }
+
+       spanalloc             fixalloc // allocator for span*
+       cachealloc            fixalloc // allocator for mcache*
+       specialfinalizeralloc fixalloc // allocator for specialfinalizer*
+       specialprofilealloc   fixalloc // allocator for specialprofile*
+       speciallock           mutex    // lock for sepcial record allocators.
+
+       // Malloc stats.
+       largefree  uint64                  // bytes freed for large objects (>maxsmallsize)
+       nlargefree uint64                  // number of frees for large objects (>maxsmallsize)
+       nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
+}
+
+var mheap_ mheap
+
+const (
+       // flags to malloc
+       _FlagNoScan = 1 << 0 // GC doesn't have to scan object
+       _FlagNoZero = 1 << 1 // don't zero memory
+)
+
+// NOTE: Layout known to queuefinalizer.
+type finalizer struct {
+       fn   *funcval       // function to call
+       arg  unsafe.Pointer // ptr to object
+       nret uintptr        // bytes of return values from fn
+       fint *_type         // type of first argument of fn
+       ot   *ptrtype       // type of ptr to object
+}
+
+type finblock struct {
+       alllink *finblock
+       next    *finblock
+       cnt     int32
+       cap     int32
+       fin     [1]finalizer
+}
+
+// Information from the compiler about the layout of stack frames.
+type bitvector struct {
+       n        int32 // # of bits
+       bytedata *uint8
+}
+
+type stackmap struct {
+       n        int32   // number of bitmaps
+       nbit     int32   // number of bits in each bitmap
+       bytedata [0]byte // bitmaps, each starting on a 32-bit boundary
+}
+
+// Returns pointer map data for the given stackmap index
+// (the index is encoded in PCDATA_StackMapIndex).
+
+// defined in mgc0.go
diff --git a/src/runtime/mcache.c b/src/runtime/mcache.c
deleted file mode 100644 (file)
index 5fdbe32..0000000
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Per-P malloc cache for small objects.
-//
-// See malloc.h for an overview.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-
-extern volatile intgo runtime·MemProfileRate;
-
-// dummy MSpan that contains no free objects.
-MSpan runtime·emptymspan;
-
-MCache*
-runtime·allocmcache(void)
-{
-       intgo rate;
-       MCache *c;
-       int32 i;
-
-       runtime·lock(&runtime·mheap.lock);
-       c = runtime·FixAlloc_Alloc(&runtime·mheap.cachealloc);
-       runtime·unlock(&runtime·mheap.lock);
-       runtime·memclr((byte*)c, sizeof(*c));
-       for(i = 0; i < NumSizeClasses; i++)
-               c->alloc[i] = &runtime·emptymspan;
-
-       // Set first allocation sample size.
-       rate = runtime·MemProfileRate;
-       if(rate > 0x3fffffff)   // make 2*rate not overflow
-               rate = 0x3fffffff;
-       if(rate != 0)
-               c->next_sample = runtime·fastrand1() % (2*rate);
-
-       return c;
-}
-
-static void
-freemcache(MCache *c)
-{
-       runtime·MCache_ReleaseAll(c);
-       runtime·stackcache_clear(c);
-       runtime·gcworkbuffree(c->gcworkbuf);
-       runtime·lock(&runtime·mheap.lock);
-       runtime·purgecachedstats(c);
-       runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c);
-       runtime·unlock(&runtime·mheap.lock);
-}
-
-static void
-freemcache_m(void)
-{
-       MCache *c;
-
-       c = g->m->ptrarg[0];
-       g->m->ptrarg[0] = nil;
-       freemcache(c);
-}
-
-void
-runtime·freemcache(MCache *c)
-{
-       void (*fn)(void);
-
-       g->m->ptrarg[0] = c;
-       fn = freemcache_m;
-       runtime·onM(&fn);
-}
-
-// Gets a span that has a free object in it and assigns it
-// to be the cached span for the given sizeclass.  Returns this span.
-MSpan*
-runtime·MCache_Refill(MCache *c, int32 sizeclass)
-{
-       MSpan *s;
-
-       g->m->locks++;
-       // Return the current cached span to the central lists.
-       s = c->alloc[sizeclass];
-       if(s->freelist != nil)
-               runtime·throw("refill on a nonempty span");
-       if(s != &runtime·emptymspan)
-               s->incache = false;
-
-       // Get a new cached span from the central lists.
-       s = runtime·MCentral_CacheSpan(&runtime·mheap.central[sizeclass].mcentral);
-       if(s == nil)
-               runtime·throw("out of memory");
-       if(s->freelist == nil) {
-               runtime·printf("%d %d\n", s->ref, (int32)((s->npages << PageShift) / s->elemsize));
-               runtime·throw("empty span");
-       }
-       c->alloc[sizeclass] = s;
-       g->m->locks--;
-       return s;
-}
-
-void
-runtime·MCache_ReleaseAll(MCache *c)
-{
-       int32 i;
-       MSpan *s;
-
-       for(i=0; i<NumSizeClasses; i++) {
-               s = c->alloc[i];
-               if(s != &runtime·emptymspan) {
-                       runtime·MCentral_UncacheSpan(&runtime·mheap.central[i].mcentral, s);
-                       c->alloc[i] = &runtime·emptymspan;
-               }
-       }
-}
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
new file mode 100644 (file)
index 0000000..7482bc0
--- /dev/null
@@ -0,0 +1,86 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Per-P malloc cache for small objects.
+//
+// See malloc.h for an overview.
+
+package runtime
+
+import "unsafe"
+
+// dummy MSpan that contains no free objects.
+var emptymspan mspan
+
+func allocmcache() *mcache {
+       lock(&mheap_.lock)
+       c := (*mcache)(fixAlloc_Alloc(&mheap_.cachealloc))
+       unlock(&mheap_.lock)
+       memclr(unsafe.Pointer(c), unsafe.Sizeof(*c))
+       for i := 0; i < _NumSizeClasses; i++ {
+               c.alloc[i] = &emptymspan
+       }
+
+       // Set first allocation sample size.
+       rate := MemProfileRate
+       if rate > 0x3fffffff { // make 2*rate not overflow
+               rate = 0x3fffffff
+       }
+       if rate != 0 {
+               c.next_sample = int32(int(fastrand1()) % (2 * rate))
+       }
+
+       return c
+}
+
+func freemcache(c *mcache) {
+       onM(func() {
+               mCache_ReleaseAll(c)
+               stackcache_clear(c)
+               gcworkbuffree(c.gcworkbuf)
+               lock(&mheap_.lock)
+               purgecachedstats(c)
+               fixAlloc_Free(&mheap_.cachealloc, unsafe.Pointer(c))
+               unlock(&mheap_.lock)
+       })
+}
+
+// Gets a span that has a free object in it and assigns it
+// to be the cached span for the given sizeclass.  Returns this span.
+func mCache_Refill(c *mcache, sizeclass int32) *mspan {
+       _g_ := getg()
+
+       _g_.m.locks++
+       // Return the current cached span to the central lists.
+       s := c.alloc[sizeclass]
+       if s.freelist != nil {
+               gothrow("refill on a nonempty span")
+       }
+       if s != &emptymspan {
+               s.incache = false
+       }
+
+       // Get a new cached span from the central lists.
+       s = mCentral_CacheSpan(&mheap_.central[sizeclass].mcentral)
+       if s == nil {
+               gothrow("out of memory")
+       }
+       if s.freelist == nil {
+               println(s.ref, (s.npages<<_PageShift)/s.elemsize)
+               gothrow("empty span")
+       }
+       c.alloc[sizeclass] = s
+       _g_.m.locks--
+       return s
+}
+
+func mCache_ReleaseAll(c *mcache) {
+       for i := 0; i < _NumSizeClasses; i++ {
+               s := c.alloc[i]
+               if s != &emptymspan {
+                       mCentral_UncacheSpan(&mheap_.central[i].mcentral, s)
+                       c.alloc[i] = &emptymspan
+               }
+       }
+}
diff --git a/src/runtime/mcentral.c b/src/runtime/mcentral.c
deleted file mode 100644 (file)
index fe6bcfe..0000000
+++ /dev/null
@@ -1,214 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Central free lists.
-//
-// See malloc.h for an overview.
-//
-// The MCentral doesn't actually contain the list of free objects; the MSpan does.
-// Each MCentral is two lists of MSpans: those with free objects (c->nonempty)
-// and those that are completely allocated (c->empty).
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-
-static MSpan* MCentral_Grow(MCentral *c);
-
-// Initialize a single central free list.
-void
-runtime·MCentral_Init(MCentral *c, int32 sizeclass)
-{
-       c->sizeclass = sizeclass;
-       runtime·MSpanList_Init(&c->nonempty);
-       runtime·MSpanList_Init(&c->empty);
-}
-
-// Allocate a span to use in an MCache.
-MSpan*
-runtime·MCentral_CacheSpan(MCentral *c)
-{
-       MSpan *s;
-       int32 cap, n;
-       uint32 sg;
-
-       runtime·lock(&c->lock);
-       sg = runtime·mheap.sweepgen;
-retry:
-       for(s = c->nonempty.next; s != &c->nonempty; s = s->next) {
-               if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
-                       runtime·MSpanList_Remove(s);
-                       runtime·MSpanList_InsertBack(&c->empty, s);
-                       runtime·unlock(&c->lock);
-                       runtime·MSpan_Sweep(s, true);
-                       goto havespan;
-               }
-               if(s->sweepgen == sg-1) {
-                       // the span is being swept by background sweeper, skip
-                       continue;
-               }
-               // we have a nonempty span that does not require sweeping, allocate from it
-               runtime·MSpanList_Remove(s);
-               runtime·MSpanList_InsertBack(&c->empty, s);
-               runtime·unlock(&c->lock);
-               goto havespan;
-       }
-
-       for(s = c->empty.next; s != &c->empty; s = s->next) {
-               if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
-                       // we have an empty span that requires sweeping,
-                       // sweep it and see if we can free some space in it
-                       runtime·MSpanList_Remove(s);
-                       // swept spans are at the end of the list
-                       runtime·MSpanList_InsertBack(&c->empty, s);
-                       runtime·unlock(&c->lock);
-                       runtime·MSpan_Sweep(s, true);
-                       if(s->freelist != nil)
-                               goto havespan;
-                       runtime·lock(&c->lock);
-                       // the span is still empty after sweep
-                       // it is already in the empty list, so just retry
-                       goto retry;
-               }
-               if(s->sweepgen == sg-1) {
-                       // the span is being swept by background sweeper, skip
-                       continue;
-               }
-               // already swept empty span,
-               // all subsequent ones must also be either swept or in process of sweeping
-               break;
-       }
-       runtime·unlock(&c->lock);
-
-       // Replenish central list if empty.
-       s = MCentral_Grow(c);
-       if(s == nil)
-               return nil;
-       runtime·lock(&c->lock);
-       runtime·MSpanList_InsertBack(&c->empty, s);
-       runtime·unlock(&c->lock);
-
-havespan:
-       // At this point s is a non-empty span, queued at the end of the empty list,
-       // c is unlocked.
-       cap = (s->npages << PageShift) / s->elemsize;
-       n = cap - s->ref;
-       if(n == 0)
-               runtime·throw("empty span");
-       if(s->freelist == nil)
-               runtime·throw("freelist empty");
-       s->incache = true;
-       return s;
-}
-
-// Return span from an MCache.
-void
-runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s)
-{
-       int32 cap, n;
-
-       runtime·lock(&c->lock);
-
-       s->incache = false;
-
-       if(s->ref == 0)
-               runtime·throw("uncaching full span");
-
-       cap = (s->npages << PageShift) / s->elemsize;
-       n = cap - s->ref;
-       if(n > 0) {
-               runtime·MSpanList_Remove(s);
-               runtime·MSpanList_Insert(&c->nonempty, s);
-       }
-       runtime·unlock(&c->lock);
-}
-
-// Free n objects from a span s back into the central free list c.
-// Called during sweep.
-// Returns true if the span was returned to heap.  Sets sweepgen to
-// the latest generation.
-// If preserve=true, don't return the span to heap nor relink in MCentral lists;
-// caller takes care of it.
-bool
-runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve)
-{
-       bool wasempty;
-
-       if(s->incache)
-               runtime·throw("freespan into cached span");
-
-       // Add the objects back to s's free list.
-       wasempty = s->freelist == nil;
-       end->next = s->freelist;
-       s->freelist = start;
-       s->ref -= n;
-
-       if(preserve) {
-               // preserve is set only when called from MCentral_CacheSpan above,
-               // the span must be in the empty list.
-               if(s->next == nil)
-                       runtime·throw("can't preserve unlinked span");
-               runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
-               return false;
-       }
-
-       runtime·lock(&c->lock);
-
-       // Move to nonempty if necessary.
-       if(wasempty) {
-               runtime·MSpanList_Remove(s);
-               runtime·MSpanList_Insert(&c->nonempty, s);
-       }
-
-       // delay updating sweepgen until here.  This is the signal that
-       // the span may be used in an MCache, so it must come after the
-       // linked list operations above (actually, just after the
-       // lock of c above.)
-       runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
-
-       if(s->ref != 0) {
-               runtime·unlock(&c->lock);
-               return false;
-       }
-
-       // s is completely freed, return it to the heap.
-       runtime·MSpanList_Remove(s);
-       s->needzero = 1;
-       s->freelist = nil;
-       runtime·unlock(&c->lock);
-       runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
-       runtime·MHeap_Free(&runtime·mheap, s, 0);
-       return true;
-}
-
-// Fetch a new span from the heap and carve into objects for the free list.
-static MSpan*
-MCentral_Grow(MCentral *c)
-{
-       uintptr size, npages, i, n;
-       MLink **tailp, *v;
-       byte *p;
-       MSpan *s;
-
-       npages = runtime·class_to_allocnpages[c->sizeclass];
-       size = runtime·class_to_size[c->sizeclass];
-       n = (npages << PageShift) / size;
-       s = runtime·MHeap_Alloc(&runtime·mheap, npages, c->sizeclass, 0, 1);
-       if(s == nil)
-               return nil;
-
-       // Carve span into sequence of blocks.
-       tailp = &s->freelist;
-       p = (byte*)(s->start << PageShift);
-       s->limit = p + size*n;
-       for(i=0; i<n; i++) {
-               v = (MLink*)p;
-               *tailp = v;
-               tailp = &v->next;
-               p += size;
-       }
-       *tailp = nil;
-       runtime·markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift));
-       return s;
-}
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go
new file mode 100644 (file)
index 0000000..0d172a0
--- /dev/null
@@ -0,0 +1,199 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Central free lists.
+//
+// See malloc.h for an overview.
+//
+// The MCentral doesn't actually contain the list of free objects; the MSpan does.
+// Each MCentral is two lists of MSpans: those with free objects (c->nonempty)
+// and those that are completely allocated (c->empty).
+
+package runtime
+
+import "unsafe"
+
+// Initialize a single central free list.
+func mCentral_Init(c *mcentral, sizeclass int32) {
+       c.sizeclass = sizeclass
+       mSpanList_Init(&c.nonempty)
+       mSpanList_Init(&c.empty)
+}
+
+// Allocate a span to use in an MCache.
+func mCentral_CacheSpan(c *mcentral) *mspan {
+       lock(&c.lock)
+       sg := mheap_.sweepgen
+retry:
+       var s *mspan
+       for s = c.nonempty.next; s != &c.nonempty; s = s.next {
+               if s.sweepgen == sg-2 && cas(&s.sweepgen, sg-2, sg-1) {
+                       mSpanList_Remove(s)
+                       mSpanList_InsertBack(&c.empty, s)
+                       unlock(&c.lock)
+                       mSpan_Sweep(s, true)
+                       goto havespan
+               }
+               if s.sweepgen == sg-1 {
+                       // the span is being swept by background sweeper, skip
+                       continue
+               }
+               // we have a nonempty span that does not require sweeping, allocate from it
+               mSpanList_Remove(s)
+               mSpanList_InsertBack(&c.empty, s)
+               unlock(&c.lock)
+               goto havespan
+       }
+
+       for s = c.empty.next; s != &c.empty; s = s.next {
+               if s.sweepgen == sg-2 && cas(&s.sweepgen, sg-2, sg-1) {
+                       // we have an empty span that requires sweeping,
+                       // sweep it and see if we can free some space in it
+                       mSpanList_Remove(s)
+                       // swept spans are at the end of the list
+                       mSpanList_InsertBack(&c.empty, s)
+                       unlock(&c.lock)
+                       mSpan_Sweep(s, true)
+                       if s.freelist != nil {
+                               goto havespan
+                       }
+                       lock(&c.lock)
+                       // the span is still empty after sweep
+                       // it is already in the empty list, so just retry
+                       goto retry
+               }
+               if s.sweepgen == sg-1 {
+                       // the span is being swept by background sweeper, skip
+                       continue
+               }
+               // already swept empty span,
+               // all subsequent ones must also be either swept or in process of sweeping
+               break
+       }
+       unlock(&c.lock)
+
+       // Replenish central list if empty.
+       s = mCentral_Grow(c)
+       if s == nil {
+               return nil
+       }
+       lock(&c.lock)
+       mSpanList_InsertBack(&c.empty, s)
+       unlock(&c.lock)
+
+       // At this point s is a non-empty span, queued at the end of the empty list,
+       // c is unlocked.
+havespan:
+       cap := int32((s.npages << _PageShift) / s.elemsize)
+       n := cap - int32(s.ref)
+       if n == 0 {
+               gothrow("empty span")
+       }
+       if s.freelist == nil {
+               gothrow("freelist empty")
+       }
+       s.incache = true
+       return s
+}
+
+// Return span from an MCache.
+func mCentral_UncacheSpan(c *mcentral, s *mspan) {
+       lock(&c.lock)
+
+       s.incache = false
+
+       if s.ref == 0 {
+               gothrow("uncaching full span")
+       }
+
+       cap := int32((s.npages << _PageShift) / s.elemsize)
+       n := cap - int32(s.ref)
+       if n > 0 {
+               mSpanList_Remove(s)
+               mSpanList_Insert(&c.nonempty, s)
+       }
+       unlock(&c.lock)
+}
+
+// Free n objects from a span s back into the central free list c.
+// Called during sweep.
+// Returns true if the span was returned to heap.  Sets sweepgen to
+// the latest generation.
+// If preserve=true, don't return the span to heap nor relink in MCentral lists;
+// caller takes care of it.
+func mCentral_FreeSpan(c *mcentral, s *mspan, n int32, start *mlink, end *mlink, preserve bool) bool {
+       if s.incache {
+               gothrow("freespan into cached span")
+       }
+
+       // Add the objects back to s's free list.
+       wasempty := s.freelist == nil
+       end.next = s.freelist
+       s.freelist = start
+       s.ref -= uint16(n)
+
+       if preserve {
+               // preserve is set only when called from MCentral_CacheSpan above,
+               // the span must be in the empty list.
+               if s.next == nil {
+                       gothrow("can't preserve unlinked span")
+               }
+               atomicstore(&s.sweepgen, mheap_.sweepgen)
+               return false
+       }
+
+       lock(&c.lock)
+
+       // Move to nonempty if necessary.
+       if wasempty {
+               mSpanList_Remove(s)
+               mSpanList_Insert(&c.nonempty, s)
+       }
+
+       // delay updating sweepgen until here.  This is the signal that
+       // the span may be used in an MCache, so it must come after the
+       // linked list operations above (actually, just after the
+       // lock of c above.)
+       atomicstore(&s.sweepgen, mheap_.sweepgen)
+
+       if s.ref != 0 {
+               unlock(&c.lock)
+               return false
+       }
+
+       // s is completely freed, return it to the heap.
+       mSpanList_Remove(s)
+       s.needzero = 1
+       s.freelist = nil
+       unlock(&c.lock)
+       unmarkspan(uintptr(s.start)<<_PageShift, s.npages<<_PageShift)
+       mHeap_Free(&mheap_, s, 0)
+       return true
+}
+
+// Fetch a new span from the heap and carve into objects for the free list.
+func mCentral_Grow(c *mcentral) *mspan {
+       npages := uintptr(class_to_allocnpages[c.sizeclass])
+       size := uintptr(class_to_size[c.sizeclass])
+       n := (npages << _PageShift) / size
+
+       s := mHeap_Alloc(&mheap_, npages, c.sizeclass, false, true)
+       if s == nil {
+               return nil
+       }
+
+       // Carve span into sequence of blocks.
+       tailp := &s.freelist
+       p := uintptr(s.start << _PageShift)
+       s.limit = p + size*n
+       for i := uintptr(0); i < n; i++ {
+               v := (*mlink)(unsafe.Pointer(p))
+               *tailp = v
+               tailp = &v.next
+               p += size
+       }
+       *tailp = nil
+       markspan(unsafe.Pointer(uintptr(s.start)<<_PageShift), size, n, size*n < s.npages<<_PageShift)
+       return s
+}
index e6f1eb0e64f1da9f4f141fe6813bc9cdd4570b4e..6bd250d380bb335ce397ceb5bc32f8782c197b2b 100644 (file)
@@ -59,7 +59,11 @@ type MemStats struct {
        }
 }
 
-var sizeof_C_MStats uintptr // filled in by malloc.goc
+// Size of the trailing by_size array differs between Go and C,
+// and all data after by_size is local to runtime, not exported.
+// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
+// sizeof_C_MStats is what C thinks about size of Go struct.
+var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0])
 
 func init() {
        var memStats MemStats
diff --git a/src/runtime/mem_darwin.c b/src/runtime/mem_darwin.c
deleted file mode 100644 (file)
index bf3ede5..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright 2010 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "malloc.h"
-#include "textflag.h"
-
-#pragma textflag NOSPLIT
-void*
-runtime·sysAlloc(uintptr n, uint64 *stat)
-{
-       void *v;
-
-       v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-       if(v < (void*)4096)
-               return nil;
-       runtime·xadd64(stat, n);
-       return v;
-}
-
-void
-runtime·SysUnused(void *v, uintptr n)
-{
-       // Linux's MADV_DONTNEED is like BSD's MADV_FREE.
-       runtime·madvise(v, n, MADV_FREE);
-}
-
-void
-runtime·SysUsed(void *v, uintptr n)
-{
-       USED(v);
-       USED(n);
-}
-
-void
-runtime·SysFree(void *v, uintptr n, uint64 *stat)
-{
-       runtime·xadd64(stat, -(uint64)n);
-       runtime·munmap(v, n);
-}
-
-void
-runtime·SysFault(void *v, uintptr n)
-{
-       runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
-}
-
-void*
-runtime·SysReserve(void *v, uintptr n, bool *reserved)
-{
-       void *p;
-
-       *reserved = true;
-       p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
-       if(p < (void*)4096)
-               return nil;
-       return p;
-}
-
-enum
-{
-       ENOMEM = 12,
-};
-
-void
-runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
-{
-       void *p;
-       
-       USED(reserved);
-
-       runtime·xadd64(stat, n);
-       p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
-       if(p == (void*)ENOMEM)
-               runtime·throw("runtime: out of memory");
-       if(p != v)
-               runtime·throw("runtime: cannot map pages in arena address space");
-}
diff --git a/src/runtime/mem_darwin.go b/src/runtime/mem_darwin.go
new file mode 100644 (file)
index 0000000..1bee933
--- /dev/null
@@ -0,0 +1,58 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+//go:nosplit
+func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer {
+       v := (unsafe.Pointer)(mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0))
+       if uintptr(v) < 4096 {
+               return nil
+       }
+       xadd64(stat, int64(n))
+       return v
+}
+
+func sysUnused(v unsafe.Pointer, n uintptr) {
+       // Linux's MADV_DONTNEED is like BSD's MADV_FREE.
+       madvise(v, n, _MADV_FREE)
+}
+
+func sysUsed(v unsafe.Pointer, n uintptr) {
+}
+
+func sysFree(v unsafe.Pointer, n uintptr, stat *uint64) {
+       xadd64(stat, -int64(n))
+       munmap(v, n)
+}
+
+func sysFault(v unsafe.Pointer, n uintptr) {
+       mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
+}
+
+func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
+       *reserved = true
+       p := (unsafe.Pointer)(mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0))
+       if uintptr(p) < 4096 {
+               return nil
+       }
+       return p
+}
+
+const (
+       _ENOMEM = 12
+)
+
+func sysMap(v unsafe.Pointer, n uintptr, reserved bool, stat *uint64) {
+       xadd64(stat, int64(n))
+       p := (unsafe.Pointer)(mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0))
+       if uintptr(p) == _ENOMEM {
+               gothrow("runtime: out of memory")
+       }
+       if p != v {
+               gothrow("runtime: cannot map pages in arena address space")
+       }
+}
diff --git a/src/runtime/mem_linux.c b/src/runtime/mem_linux.c
deleted file mode 100644 (file)
index bfb4056..0000000
+++ /dev/null
@@ -1,162 +0,0 @@
-// Copyright 2010 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "malloc.h"
-#include "textflag.h"
-
-enum
-{
-       _PAGE_SIZE = 4096,
-       EACCES = 13,
-};
-
-static int32
-addrspace_free(void *v, uintptr n)
-{
-       int32 errval;
-       uintptr chunk;
-       uintptr off;
-       
-       // NOTE: vec must be just 1 byte long here.
-       // Mincore returns ENOMEM if any of the pages are unmapped,
-       // but we want to know that all of the pages are unmapped.
-       // To make these the same, we can only ask about one page
-       // at a time. See golang.org/issue/7476.
-       static byte vec[1];
-
-       for(off = 0; off < n; off += chunk) {
-               chunk = _PAGE_SIZE * sizeof vec;
-               if(chunk > (n - off))
-                       chunk = n - off;
-               errval = runtime·mincore((int8*)v + off, chunk, vec);
-               // ENOMEM means unmapped, which is what we want.
-               // Anything else we assume means the pages are mapped.
-               if (errval != -ENOMEM)
-                       return 0;
-       }
-       return 1;
-}
-
-static void *
-mmap_fixed(byte *v, uintptr n, int32 prot, int32 flags, int32 fd, uint32 offset)
-{
-       void *p;
-
-       p = runtime·mmap(v, n, prot, flags, fd, offset);
-       if(p != v && addrspace_free(v, n)) {
-               // On some systems, mmap ignores v without
-               // MAP_FIXED, so retry if the address space is free.
-               if(p > (void*)4096)
-                       runtime·munmap(p, n);
-               p = runtime·mmap(v, n, prot, flags|MAP_FIXED, fd, offset);
-       }
-       return p;
-}
-
-#pragma textflag NOSPLIT
-void*
-runtime·sysAlloc(uintptr n, uint64 *stat)
-{
-       void *p;
-
-       p = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-       if(p < (void*)4096) {
-               if(p == (void*)EACCES) {
-                       runtime·printf("runtime: mmap: access denied\n");
-                       runtime·printf("if you're running SELinux, enable execmem for this process.\n");
-                       runtime·exit(2);
-               }
-               if(p == (void*)EAGAIN) {
-                       runtime·printf("runtime: mmap: too much locked memory (check 'ulimit -l').\n");
-                       runtime·exit(2);
-               }
-               return nil;
-       }
-       runtime·xadd64(stat, n);
-       return p;
-}
-
-void
-runtime·SysUnused(void *v, uintptr n)
-{
-       runtime·madvise(v, n, MADV_DONTNEED);
-}
-
-void
-runtime·SysUsed(void *v, uintptr n)
-{
-       USED(v);
-       USED(n);
-}
-
-void
-runtime·SysFree(void *v, uintptr n, uint64 *stat)
-{
-       runtime·xadd64(stat, -(uint64)n);
-       runtime·munmap(v, n);
-}
-
-void
-runtime·SysFault(void *v, uintptr n)
-{
-       runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
-}
-
-void*
-runtime·SysReserve(void *v, uintptr n, bool *reserved)
-{
-       void *p;
-
-       // On 64-bit, people with ulimit -v set complain if we reserve too
-       // much address space.  Instead, assume that the reservation is okay
-       // if we can reserve at least 64K and check the assumption in SysMap.
-       // Only user-mode Linux (UML) rejects these requests.
-       if(sizeof(void*) == 8 && n > 1LL<<32) {
-               p = mmap_fixed(v, 64<<10, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
-               if (p != v) {
-                       if(p >= (void*)4096)
-                               runtime·munmap(p, 64<<10);
-                       return nil;
-               }
-               runtime·munmap(p, 64<<10);
-               *reserved = false;
-               return v;
-       }
-
-       p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
-       if((uintptr)p < 4096)
-               return nil;
-       *reserved = true;
-       return p;
-}
-
-void
-runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
-{
-       void *p;
-       
-       runtime·xadd64(stat, n);
-
-       // On 64-bit, we don't actually have v reserved, so tread carefully.
-       if(!reserved) {
-               p = mmap_fixed(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-               if(p == (void*)ENOMEM)
-                       runtime·throw("runtime: out of memory");
-               if(p != v) {
-                       runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
-                       runtime·throw("runtime: address space conflict");
-               }
-               return;
-       }
-
-       p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
-       if(p == (void*)ENOMEM)
-               runtime·throw("runtime: out of memory");
-       if(p != v)
-               runtime·throw("runtime: cannot map pages in arena address space");
-}
diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go
new file mode 100644 (file)
index 0000000..0ef6eea
--- /dev/null
@@ -0,0 +1,135 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+const (
+       _PAGE_SIZE = 4096
+       _EACCES    = 13
+)
+
+// NOTE: vec must be just 1 byte long here.
+// Mincore returns ENOMEM if any of the pages are unmapped,
+// but we want to know that all of the pages are unmapped.
+// To make these the same, we can only ask about one page
+// at a time. See golang.org/issue/7476.
+var addrspace_vec [1]byte
+
+func addrspace_free(v unsafe.Pointer, n uintptr) bool {
+       var chunk uintptr
+       for off := uintptr(0); off < n; off += chunk {
+               chunk = _PAGE_SIZE * uintptr(len(addrspace_vec))
+               if chunk > (n - off) {
+                       chunk = n - off
+               }
+               errval := mincore(unsafe.Pointer(uintptr(v)+off), chunk, &addrspace_vec[0])
+               // ENOMEM means unmapped, which is what we want.
+               // Anything else we assume means the pages are mapped.
+               if errval != -_ENOMEM {
+                       return false
+               }
+       }
+       return true
+}
+
+func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) unsafe.Pointer {
+       p := mmap(v, n, prot, flags, fd, offset)
+       if p != v && addrspace_free(v, n) {
+               // On some systems, mmap ignores v without
+               // MAP_FIXED, so retry if the address space is free.
+               if uintptr(p) > 4096 {
+                       munmap(p, n)
+               }
+               p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
+       }
+       return p
+}
+
+//go:nosplit
+func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer {
+       p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+       if uintptr(p) < 4096 {
+               if uintptr(p) == _EACCES {
+                       print("runtime: mmap: access denied\n")
+                       print("if you're running SELinux, enable execmem for this process.\n")
+                       exit(2)
+               }
+               if uintptr(p) == _EAGAIN {
+                       print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
+                       exit(2)
+               }
+               return nil
+       }
+       xadd64(stat, int64(n))
+       return p
+}
+
+func sysUnused(v unsafe.Pointer, n uintptr) {
+       madvise(v, n, _MADV_DONTNEED)
+}
+
+func sysUsed(v unsafe.Pointer, n uintptr) {
+}
+
+func sysFree(v unsafe.Pointer, n uintptr, stat *uint64) {
+       xadd64(stat, -int64(n))
+       munmap(v, n)
+}
+
+func sysFault(v unsafe.Pointer, n uintptr) {
+       mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
+}
+
+func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
+       // On 64-bit, people with ulimit -v set complain if we reserve too
+       // much address space.  Instead, assume that the reservation is okay
+       // if we can reserve at least 64K and check the assumption in SysMap.
+       // Only user-mode Linux (UML) rejects these requests.
+       if ptrSize == 7 && uint64(n) > 1<<32 {
+               p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+               if p != v {
+                       if uintptr(p) >= 4096 {
+                               munmap(p, 64<<10)
+                       }
+                       return nil
+               }
+               munmap(p, 64<<10)
+               *reserved = false
+               return v
+       }
+
+       p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+       if uintptr(p) < 4096 {
+               return nil
+       }
+       *reserved = true
+       return p
+}
+
+func sysMap(v unsafe.Pointer, n uintptr, reserved bool, stat *uint64) {
+       xadd64(stat, int64(n))
+
+       // On 64-bit, we don't actually have v reserved, so tread carefully.
+       if !reserved {
+               p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+               if uintptr(p) == _ENOMEM {
+                       gothrow("runtime: out of memory")
+               }
+               if p != v {
+                       print("runtime: address space conflict: map(", v, ") = ", p, "\n")
+                       gothrow("runtime: address space conflict")
+               }
+               return
+       }
+
+       p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
+       if uintptr(p) == _ENOMEM {
+               gothrow("runtime: out of memory")
+       }
+       if p != v {
+               gothrow("runtime: cannot map pages in arena address space")
+       }
+}
diff --git a/src/runtime/mfixalloc.c b/src/runtime/mfixalloc.c
deleted file mode 100644 (file)
index d670629..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Fixed-size object allocator.  Returned memory is not zeroed.
-//
-// See malloc.h for overview.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-
-// Initialize f to allocate objects of the given size,
-// using the allocator to obtain chunks of memory.
-void
-runtime·FixAlloc_Init(FixAlloc *f, uintptr size, void (*first)(void*, byte*), void *arg, uint64 *stat)
-{
-       f->size = size;
-       f->first = first;
-       f->arg = arg;
-       f->list = nil;
-       f->chunk = nil;
-       f->nchunk = 0;
-       f->inuse = 0;
-       f->stat = stat;
-}
-
-void*
-runtime·FixAlloc_Alloc(FixAlloc *f)
-{
-       void *v;
-       
-       if(f->size == 0) {
-               runtime·printf("runtime: use of FixAlloc_Alloc before FixAlloc_Init\n");
-               runtime·throw("runtime: internal error");
-       }
-
-       if(f->list) {
-               v = f->list;
-               f->list = *(void**)f->list;
-               f->inuse += f->size;
-               return v;
-       }
-       if(f->nchunk < f->size) {
-               f->chunk = runtime·persistentalloc(FixAllocChunk, 0, f->stat);
-               f->nchunk = FixAllocChunk;
-       }
-       v = f->chunk;
-       if(f->first)
-               f->first(f->arg, v);
-       f->chunk += f->size;
-       f->nchunk -= f->size;
-       f->inuse += f->size;
-       return v;
-}
-
-void
-runtime·FixAlloc_Free(FixAlloc *f, void *p)
-{
-       f->inuse -= f->size;
-       *(void**)p = f->list;
-       f->list = p;
-}
-
diff --git a/src/runtime/mfixalloc.go b/src/runtime/mfixalloc.go
new file mode 100644 (file)
index 0000000..b66a17e
--- /dev/null
@@ -0,0 +1,59 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fixed-size object allocator.  Returned memory is not zeroed.
+//
+// See malloc.h for overview.
+
+package runtime
+
+import "unsafe"
+
+// Initialize f to allocate objects of the given size,
+// using the allocator to obtain chunks of memory.
+func fixAlloc_Init(f *fixalloc, size uintptr, first func(unsafe.Pointer, unsafe.Pointer), arg unsafe.Pointer, stat *uint64) {
+       f.size = size
+       f.first = *(*unsafe.Pointer)(unsafe.Pointer(&first))
+       f.arg = arg
+       f.list = nil
+       f.chunk = nil
+       f.nchunk = 0
+       f.inuse = 0
+       f.stat = stat
+}
+
+func fixAlloc_Alloc(f *fixalloc) unsafe.Pointer {
+       if f.size == 0 {
+               print("runtime: use of FixAlloc_Alloc before FixAlloc_Init\n")
+               gothrow("runtime: internal error")
+       }
+
+       if f.list != nil {
+               v := unsafe.Pointer(f.list)
+               f.list = f.list.next
+               f.inuse += f.size
+               return v
+       }
+       if uintptr(f.nchunk) < f.size {
+               f.chunk = (*uint8)(persistentalloc(_FixAllocChunk, 0, f.stat))
+               f.nchunk = _FixAllocChunk
+       }
+
+       v := (unsafe.Pointer)(f.chunk)
+       if f.first != nil {
+               fn := *(*func(unsafe.Pointer, unsafe.Pointer))(unsafe.Pointer(&f.first))
+               fn(f.arg, v)
+       }
+       f.chunk = (*byte)(add(unsafe.Pointer(f.chunk), f.size))
+       f.nchunk -= uint32(f.size)
+       f.inuse += f.size
+       return v
+}
+
+func fixAlloc_Free(f *fixalloc, p unsafe.Pointer) {
+       f.inuse -= f.size
+       v := (*mlink)(p)
+       v.next = f.list
+       f.list = v
+}
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
new file mode 100644 (file)
index 0000000..569bf5d
--- /dev/null
@@ -0,0 +1,1827 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO(rsc): The code having to do with the heap bitmap needs very serious cleanup.
+// It has gotten completely out of control.
+
+// Garbage collector (GC).
+//
+// GC is:
+// - mark&sweep
+// - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc)
+// - parallel (up to MaxGcproc threads)
+// - partially concurrent (mark is stop-the-world, while sweep is concurrent)
+// - non-moving/non-compacting
+// - full (non-partial)
+//
+// GC rate.
+// Next GC is after we've allocated an extra amount of memory proportional to
+// the amount already in use. The proportion is controlled by GOGC environment variable
+// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
+// (this mark is tracked in next_gc variable). This keeps the GC cost in linear
+// proportion to the allocation cost. Adjusting GOGC just changes the linear constant
+// (and also the amount of extra memory used).
+//
+// Concurrent sweep.
+// The sweep phase proceeds concurrently with normal program execution.
+// The heap is swept span-by-span both lazily (when a goroutine needs another span)
+// and concurrently in a background goroutine (this helps programs that are not CPU bound).
+// However, at the end of the stop-the-world GC phase we don't know the size of the live heap,
+// and so next_gc calculation is tricky and happens as follows.
+// At the end of the stop-the-world phase next_gc is conservatively set based on total
+// heap size; all spans are marked as "needs sweeping".
+// Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory.
+// The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc
+// closer to the target value. However, this is not enough to avoid over-allocating memory.
+// Consider that a goroutine wants to allocate a new span for a large object and
+// there are no free swept spans, but there are small-object unswept spans.
+// If the goroutine naively allocates a new span, it can surpass the yet-unknown
+// target next_gc value. In order to prevent such cases (1) when a goroutine needs
+// to allocate a new small-object span, it sweeps small-object spans for the same
+// object size until it frees at least one object; (2) when a goroutine needs to
+// allocate large-object span from heap, it sweeps spans until it frees at least
+// that many pages into heap. Together these two measures ensure that we don't surpass
+// target next_gc value by a large margin. There is an exception: if a goroutine sweeps
+// and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span,
+// but there can still be other one-page unswept spans which could be combined into a two-page span.
+// It's critical to ensure that no operations proceed on unswept spans (that would corrupt
+// mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
+// so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
+// When a goroutine explicitly frees an object or sets a finalizer, it ensures that
+// the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish).
+// The finalizer goroutine is kicked off only when all spans are swept.
+// When the next GC starts, it sweeps all not-yet-swept spans (if any).
+
+package runtime
+
+import "unsafe"
+
+const (
+       _DebugGC         = 0
+       _DebugGCPtrs     = false // if true, print trace of every pointer load during GC
+       _ConcurrentSweep = true
+
+       _WorkbufSize     = 4 * 1024
+       _FinBlockSize    = 4 * 1024
+       _RootData        = 0
+       _RootBss         = 1
+       _RootFinalizers  = 2
+       _RootSpans       = 3
+       _RootFlushCaches = 4
+       _RootCount       = 5
+)
+
+// ptrmask for an allocation containing a single pointer.
+var oneptr = [...]uint8{bitsPointer}
+
+// Initialized from $GOGC.  GOGC=off means no gc.
+var gcpercent int32
+
+// Holding worldsema grants an M the right to try to stop the world.
+// The procedure is:
+//
+//     semacquire(&worldsema);
+//     m.gcing = 1;
+//     stoptheworld();
+//
+//     ... do stuff ...
+//
+//     m.gcing = 0;
+//     semrelease(&worldsema);
+//     starttheworld();
+//
+var worldsema uint32 = 1
+
+type workbuf struct {
+       node lfnode // must be first
+       nobj uintptr
+       obj  [(_WorkbufSize - unsafe.Sizeof(lfnode{}) - ptrSize) / ptrSize]uintptr
+}
+
+var data, edata, bss, ebss, gcdata, gcbss struct{}
+
+var finlock mutex  // protects the following variables
+var fing *g        // goroutine that runs finalizers
+var finq *finblock // list of finalizers that are to be executed
+var finc *finblock // cache of free blocks
+var finptrmask [_FinBlockSize / ptrSize / pointersPerByte]byte
+var fingwait bool
+var fingwake bool
+var allfin *finblock // list of all blocks
+
+var gcdatamask bitvector
+var gcbssmask bitvector
+
+var gclock mutex
+
+var badblock [1024]uintptr
+var nbadblock int32
+
+type workdata struct {
+       full    uint64                // lock-free list of full blocks
+       empty   uint64                // lock-free list of empty blocks
+       pad0    [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait
+       nproc   uint32
+       tstart  int64
+       nwait   uint32
+       ndone   uint32
+       alldone note
+       markfor *parfor
+
+       // Copy of mheap.allspans for marker or sweeper.
+       spans []*mspan
+}
+
+var work workdata
+
+//go:linkname weak_cgo_allocate go.weak.runtime._cgo_allocate_internal
+var weak_cgo_allocate byte
+
+// Is _cgo_allocate linked into the binary?
+func have_cgo_allocate() bool {
+       return &weak_cgo_allocate != nil
+}
+
+// scanblock scans a block of n bytes starting at pointer b for references
+// to other objects, scanning any it finds recursively until there are no
+// unscanned objects left.  Instead of using an explicit recursion, it keeps
+// a work list in the Workbuf* structures and loops in the main function
+// body.  Keeping an explicit work list is easier on the stack allocator and
+// more efficient.
+func scanblock(b, n uintptr, ptrmask *uint8) {
+       // Cache memory arena parameters in local vars.
+       arena_start := mheap_.arena_start
+       arena_used := mheap_.arena_used
+
+       wbuf := getempty(nil)
+       nobj := wbuf.nobj
+       wp := &wbuf.obj[nobj]
+       keepworking := b == 0
+
+       var ptrbitp unsafe.Pointer
+
+       // ptrmask can have 2 possible values:
+       // 1. nil - obtain pointer mask from GC bitmap.
+       // 2. pointer to a compact mask (for stacks and data).
+       goto_scanobj := b != 0
+
+       for {
+               if goto_scanobj {
+                       goto_scanobj = false
+               } else {
+                       if nobj == 0 {
+                               // Out of work in workbuf.
+                               if !keepworking {
+                                       putempty(wbuf)
+                                       return
+                               }
+
+                               // Refill workbuf from global queue.
+                               wbuf = getfull(wbuf)
+                               if wbuf == nil {
+                                       return
+                               }
+                               nobj = wbuf.nobj
+                               if nobj < uintptr(len(wbuf.obj)) {
+                                       wp = &wbuf.obj[nobj]
+                               } else {
+                                       wp = nil
+                               }
+                       }
+
+                       // If another proc wants a pointer, give it some.
+                       if work.nwait > 0 && nobj > 4 && work.full == 0 {
+                               wbuf.nobj = nobj
+                               wbuf = handoff(wbuf)
+                               nobj = wbuf.nobj
+                               if nobj < uintptr(len(wbuf.obj)) {
+                                       wp = &wbuf.obj[nobj]
+                               } else {
+                                       wp = nil
+                               }
+                       }
+
+                       nobj--
+                       wp = &wbuf.obj[nobj]
+                       b = *wp
+                       n = arena_used - uintptr(b)
+                       ptrmask = nil // use GC bitmap for pointer info
+               }
+
+               if _DebugGCPtrs {
+                       print("scanblock ", b, " +", hex(n), " ", ptrmask, "\n")
+               }
+
+               // Find bits of the beginning of the object.
+               if ptrmask == nil {
+                       off := (uintptr(b) - arena_start) / ptrSize
+                       ptrbitp = unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1)
+               }
+
+               var i uintptr
+               for i = 0; i < n; i += ptrSize {
+                       // Find bits for this word.
+                       var bits uintptr
+                       if ptrmask == nil {
+                               // Check if we have reached end of span.
+                               if (uintptr(b)+i)%_PageSize == 0 &&
+                                       h_spans[(uintptr(b)-arena_start)>>_PageShift] != h_spans[(uintptr(b)+i-arena_start)>>_PageShift] {
+                                       break
+                               }
+
+                               // Consult GC bitmap.
+                               bits = uintptr(*(*byte)(ptrbitp))
+
+                               if wordsPerBitmapByte != 2 {
+                                       gothrow("alg doesn't work for wordsPerBitmapByte != 2")
+                               }
+                               j := (uintptr(b) + i) / ptrSize & 1
+                               ptrbitp = add(ptrbitp, -j)
+                               bits >>= gcBits * j
+
+                               if bits&bitBoundary != 0 && i != 0 {
+                                       break // reached beginning of the next object
+                               }
+                               bits = (bits >> 2) & bitsMask
+                               if bits == bitsDead {
+                                       break // reached no-scan part of the object
+                               }
+                       } else {
+                               // dense mask (stack or data)
+                               bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * bitsPerPointer)) & bitsMask
+                       }
+
+                       if bits <= _BitsScalar { // BitsScalar || BitsDead
+                               continue
+                       }
+
+                       if bits != _BitsPointer {
+                               gothrow("unexpected garbage collection bits")
+                       }
+
+                       obj := *(*uintptr)(unsafe.Pointer(b + i))
+                       obj0 := obj
+
+               markobj:
+                       var s *mspan
+                       var off, bitp, shift, xbits uintptr
+
+                       // At this point we have extracted the next potential pointer.
+                       // Check if it points into heap.
+                       if obj == 0 {
+                               continue
+                       }
+                       if obj < arena_start || arena_used <= obj {
+                               if uintptr(obj) < _PhysPageSize && invalidptr != 0 {
+                                       s = nil
+                                       goto badobj
+                               }
+                               continue
+                       }
+
+                       // Mark the object.
+                       obj &^= ptrSize - 1
+                       off = (obj - arena_start) / ptrSize
+                       bitp = arena_start - off/wordsPerBitmapByte - 1
+                       shift = (off % wordsPerBitmapByte) * gcBits
+                       xbits = uintptr(*(*byte)(unsafe.Pointer(bitp)))
+                       bits = (xbits >> shift) & bitMask
+                       if (bits & bitBoundary) == 0 {
+                               // Not a beginning of a block, consult span table to find the block beginning.
+                               k := pageID(obj >> _PageShift)
+                               x := k
+                               x -= pageID(arena_start >> _PageShift)
+                               s = h_spans[x]
+                               if s == nil || k < s.start || s.limit <= obj || s.state != mSpanInUse {
+                                       // Stack pointers lie within the arena bounds but are not part of the GC heap.
+                                       // Ignore them.
+                                       if s != nil && s.state == _MSpanStack {
+                                               continue
+                                       }
+                                       goto badobj
+                               }
+                               p := uintptr(s.start) << _PageShift
+                               if s.sizeclass != 0 {
+                                       size := s.elemsize
+                                       idx := (obj - p) / size
+                                       p = p + idx*size
+                               }
+                               if p == obj {
+                                       print("runtime: failed to find block beginning for ", hex(p), " s=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), "\n")
+                                       gothrow("failed to find block beginning")
+                               }
+                               obj = p
+                               goto markobj
+                       }
+
+                       if _DebugGCPtrs {
+                               print("scan *", hex(b+i), " = ", hex(obj0), " => base ", hex(obj), "\n")
+                       }
+
+                       if nbadblock > 0 && obj == badblock[nbadblock-1] {
+                               // Running garbage collection again because
+                               // we want to find the path from a root to a bad pointer.
+                               // Found possible next step; extend or finish path.
+                               for j := int32(0); j < nbadblock; j++ {
+                                       if badblock[j] == b {
+                                               goto AlreadyBad
+                                       }
+                               }
+                               print("runtime: found *(", hex(b), "+", hex(i), ") = ", hex(obj0), "+", hex(obj-obj0), "\n")
+                               if ptrmask != nil {
+                                       gothrow("bad pointer")
+                               }
+                               if nbadblock >= int32(len(badblock)) {
+                                       gothrow("badblock trace too long")
+                               }
+                               badblock[nbadblock] = uintptr(b)
+                               nbadblock++
+                       AlreadyBad:
+                       }
+
+                       // Now we have bits, bitp, and shift correct for
+                       // obj pointing at the base of the object.
+                       // Only care about not marked objects.
+                       if bits&bitMarked != 0 {
+                               continue
+                       }
+
+                       // If obj size is greater than 8, then each byte of GC bitmap
+                       // contains info for at most one object. In such case we use
+                       // non-atomic byte store to mark the object. This can lead
+                       // to double enqueue of the object for scanning, but scanning
+                       // is an idempotent operation, so it is OK. This cannot lead
+                       // to bitmap corruption because the single marked bit is the
+                       // only thing that can change in the byte.
+                       // For 8-byte objects we use non-atomic store, if the other
+                       // quadruple is already marked. Otherwise we resort to CAS
+                       // loop for marking.
+                       if xbits&(bitMask|bitMask<<gcBits) != bitBoundary|bitBoundary<<gcBits || work.nproc == 1 {
+                               *(*byte)(unsafe.Pointer(bitp)) = uint8(xbits | bitMarked<<shift)
+                       } else {
+                               atomicor8((*byte)(unsafe.Pointer(bitp)), bitMarked<<shift)
+                       }
+
+                       if (xbits>>(shift+2))&bitsMask == bitsDead {
+                               continue // noscan object
+                       }
+
+                       // Queue the obj for scanning.
+                       // TODO: PREFETCH here.
+
+                       // If workbuf is full, obtain an empty one.
+                       if nobj >= uintptr(len(wbuf.obj)) {
+                               wbuf.nobj = nobj
+                               wbuf = getempty(wbuf)
+                               nobj = wbuf.nobj
+                               wp = &wbuf.obj[nobj]
+                       }
+                       *wp = obj
+                       nobj++
+                       if nobj < uintptr(len(wbuf.obj)) {
+                               wp = &wbuf.obj[nobj]
+                       } else {
+                               wp = nil
+                       }
+                       continue
+
+               badobj:
+                       // If cgo_allocate is linked into the binary, it can allocate
+                       // memory as []unsafe.Pointer that may not contain actual
+                       // pointers and must be scanned conservatively.
+                       // In this case alone, allow the bad pointer.
+                       if have_cgo_allocate() && ptrmask == nil {
+                               continue
+                       }
+
+                       // Anything else indicates a bug somewhere.
+                       // If we're in the middle of chasing down a different bad pointer,
+                       // don't confuse the trace by printing about this one.
+                       if nbadblock > 0 {
+                               continue
+                       }
+
+                       print("runtime: garbage collector found invalid heap pointer *(", hex(b), "+", hex(i), ")=", hex(obj))
+                       if s == nil {
+                               print(" s=nil\n")
+                       } else {
+                               print(" span=", uintptr(s.start)<<_PageShift, "-", s.limit, "-", (uintptr(s.start)+s.npages)<<_PageShift, " state=", s.state, "\n")
+                       }
+                       if ptrmask != nil {
+                               gothrow("invalid heap pointer")
+                       }
+                       // Add to badblock list, which will cause the garbage collection
+                       // to keep repeating until it has traced the chain of pointers
+                       // leading to obj all the way back to a root.
+                       if nbadblock == 0 {
+                               badblock[nbadblock] = uintptr(b)
+                               nbadblock++
+                       }
+               }
+               if _DebugGCPtrs {
+                       print("end scanblock ", hex(b), " +", hex(n), " ", ptrmask, "\n")
+               }
+               if _DebugGC > 0 && ptrmask == nil {
+                       // For heap objects ensure that we did not overscan.
+                       var p, n uintptr
+                       if mlookup(b, &p, &n, nil) == 0 || b != p || i > n {
+                               print("runtime: scanned (", hex(b), "+", hex(i), "), heap object (", hex(p), "+", hex(n), ")\n")
+                               gothrow("scanblock: scanned invalid object")
+                       }
+               }
+       }
+}
+
+func markroot(desc *parfor, i uint32) {
+       // Note: if you add a case here, please also update heapdump.c:dumproots.
+       switch i {
+       case _RootData:
+               scanblock(uintptr(unsafe.Pointer(&data)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)), gcdatamask.bytedata)
+
+       case _RootBss:
+               scanblock(uintptr(unsafe.Pointer(&bss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)), gcbssmask.bytedata)
+
+       case _RootFinalizers:
+               for fb := allfin; fb != nil; fb = fb.alllink {
+                       scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0])
+               }
+
+       case _RootSpans:
+               // mark MSpan.specials
+               sg := mheap_.sweepgen
+               for spanidx := uint32(0); spanidx < uint32(len(work.spans)); spanidx++ {
+                       s := work.spans[spanidx]
+                       if s.state != mSpanInUse {
+                               continue
+                       }
+                       if s.sweepgen != sg {
+                               print("sweep ", s.sweepgen, " ", sg, "\n")
+                               gothrow("gc: unswept span")
+                       }
+                       for sp := s.specials; sp != nil; sp = sp.next {
+                               if sp.kind != _KindSpecialFinalizer {
+                                       continue
+                               }
+                               // don't mark finalized object, but scan it so we
+                               // retain everything it points to.
+                               spf := (*specialfinalizer)(unsafe.Pointer(sp))
+                               // A finalizer can be set for an inner byte of an object, find object beginning.
+                               p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
+                               scanblock(p, s.elemsize, nil)
+                               scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0])
+                       }
+               }
+
+       case _RootFlushCaches:
+               flushallmcaches()
+
+       default:
+               // the rest is scanning goroutine stacks
+               if uintptr(i-_RootCount) >= allglen {
+                       gothrow("markroot: bad index")
+               }
+               gp := allgs[i-_RootCount]
+               // remember when we've first observed the G blocked
+               // needed only to output in traceback
+               status := readgstatus(gp)
+               if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
+                       gp.waitsince = work.tstart
+               }
+               // Shrink a stack if not much of it is being used.
+               shrinkstack(gp)
+               if readgstatus(gp) == _Gdead {
+                       gp.gcworkdone = true
+               } else {
+                       gp.gcworkdone = false
+               }
+               restart := stopg(gp)
+               scanstack(gp)
+               if restart {
+                       restartg(gp)
+               }
+       }
+}
+
+// Get an empty work buffer off the work.empty list,
+// allocating new buffers as needed.
+func getempty(b *workbuf) *workbuf {
+       _g_ := getg()
+       if b != nil {
+               lfstackpush(&work.full, &b.node)
+       }
+       b = nil
+       c := _g_.m.mcache
+       if c.gcworkbuf != nil {
+               b = (*workbuf)(c.gcworkbuf)
+               c.gcworkbuf = nil
+       }
+       if b == nil {
+               b = (*workbuf)(lfstackpop(&work.empty))
+       }
+       if b == nil {
+               b = (*workbuf)(persistentalloc(unsafe.Sizeof(*b), _CacheLineSize, &memstats.gc_sys))
+       }
+       b.nobj = 0
+       return b
+}
+
+func putempty(b *workbuf) {
+       _g_ := getg()
+       c := _g_.m.mcache
+       if c.gcworkbuf == nil {
+               c.gcworkbuf = (unsafe.Pointer)(b)
+               return
+       }
+       lfstackpush(&work.empty, &b.node)
+}
+
+func gcworkbuffree(b unsafe.Pointer) {
+       if b != nil {
+               putempty((*workbuf)(b))
+       }
+}
+
+// Get a full work buffer off the work.full list, or return nil.
+func getfull(b *workbuf) *workbuf {
+       if b != nil {
+               lfstackpush(&work.empty, &b.node)
+       }
+       b = (*workbuf)(lfstackpop(&work.full))
+       if b != nil || work.nproc == 1 {
+               return b
+       }
+
+       xadd(&work.nwait, +1)
+       for i := 0; ; i++ {
+               if work.full != 0 {
+                       xadd(&work.nwait, -1)
+                       b = (*workbuf)(lfstackpop(&work.full))
+                       if b != nil {
+                               return b
+                       }
+                       xadd(&work.nwait, +1)
+               }
+               if work.nwait == work.nproc {
+                       return nil
+               }
+               _g_ := getg()
+               if i < 10 {
+                       _g_.m.gcstats.nprocyield++
+                       procyield(20)
+               } else if i < 20 {
+                       _g_.m.gcstats.nosyield++
+                       osyield()
+               } else {
+                       _g_.m.gcstats.nsleep++
+                       usleep(100)
+               }
+       }
+}
+
+func handoff(b *workbuf) *workbuf {
+       // Make new buffer with half of b's pointers.
+       b1 := getempty(nil)
+       n := b.nobj / 2
+       b.nobj -= n
+       b1.nobj = n
+       memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), n*unsafe.Sizeof(b1.obj[0]))
+       _g_ := getg()
+       _g_.m.gcstats.nhandoff++
+       _g_.m.gcstats.nhandoffcnt += uint64(n)
+
+       // Put b on full list - let first half of b get stolen.
+       lfstackpush(&work.full, &b.node)
+       return b1
+}
+
+func stackmapdata(stkmap *stackmap, n int32) bitvector {
+       if n < 0 || n >= stkmap.n {
+               gothrow("stackmapdata: index out of range")
+       }
+       return bitvector{stkmap.nbit, (*byte)(add(unsafe.Pointer(&stkmap.bytedata), uintptr(n*((stkmap.nbit+31)/32*4))))}
+}
+
+// Scan a stack frame: local variables and function arguments/results.
+func scanframe(frame *stkframe, unused unsafe.Pointer) bool {
+
+       f := frame.fn
+       targetpc := frame.continpc
+       if targetpc == 0 {
+               // Frame is dead.
+               return true
+       }
+       if _DebugGC > 1 {
+               print("scanframe ", gofuncname(f), "\n")
+       }
+       if targetpc != f.entry {
+               targetpc--
+       }
+       pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
+       if pcdata == -1 {
+               // We do not have a valid pcdata value but there might be a
+               // stackmap for this function.  It is likely that we are looking
+               // at the function prologue, assume so and hope for the best.
+               pcdata = 0
+       }
+
+       // Scan local variables if stack frame has been allocated.
+       size := frame.varp - frame.sp
+       var minsize uintptr
+       if thechar != '6' && thechar != '8' {
+               minsize = ptrSize
+       } else {
+               minsize = 0
+       }
+       if size > minsize {
+               stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
+               if stkmap == nil || stkmap.n <= 0 {
+                       print("runtime: frame ", gofuncname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
+                       gothrow("missing stackmap")
+               }
+
+               // Locals bitmap information, scan just the pointers in locals.
+               if pcdata < 0 || pcdata >= stkmap.n {
+                       // don't know where we are
+                       print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " locals stack map entries for ", gofuncname(f), " (targetpc=", targetpc, ")\n")
+                       gothrow("scanframe: bad symbol table")
+               }
+               bv := stackmapdata(stkmap, pcdata)
+               size = (uintptr(bv.n) * ptrSize) / bitsPerPointer
+               scanblock(frame.varp-size, uintptr(bv.n)/bitsPerPointer*ptrSize, bv.bytedata)
+       }
+
+       // Scan arguments.
+       if frame.arglen > 0 {
+               var bv bitvector
+               if frame.argmap != nil {
+                       bv = *frame.argmap
+               } else {
+                       stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
+                       if stkmap == nil || stkmap.n <= 0 {
+                               print("runtime: frame ", gofuncname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n")
+                               gothrow("missing stackmap")
+                       }
+                       if pcdata < 0 || pcdata >= stkmap.n {
+                               // don't know where we are
+                               print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " args stack map entries for ", gofuncname(f), " (targetpc=", targetpc, ")\n")
+                               gothrow("scanframe: bad symbol table")
+                       }
+                       bv = stackmapdata(stkmap, pcdata)
+               }
+               scanblock(frame.argp, uintptr(bv.n)/bitsPerPointer*ptrSize, bv.bytedata)
+       }
+       return true
+}
+
+func scanstack(gp *g) {
+       // TODO(rsc): Due to a precedence error, this was never checked in the original C version.
+       // If you enable the check, the gothrow happens.
+       /*
+               if readgstatus(gp)&_Gscan == 0 {
+                       print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+                       gothrow("mark - bad status")
+               }
+       */
+
+       switch readgstatus(gp) &^ _Gscan {
+       default:
+               print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+               gothrow("mark - bad status")
+       case _Gdead:
+               return
+       case _Grunning:
+               print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+               gothrow("mark - world not stopped")
+       case _Grunnable, _Gsyscall, _Gwaiting:
+               // ok
+       }
+
+       if gp == getg() {
+               gothrow("can't scan our own stack")
+       }
+       mp := gp.m
+       if mp != nil && mp.helpgc != 0 {
+               gothrow("can't scan gchelper stack")
+       }
+
+       gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
+       tracebackdefers(gp, scanframe, nil)
+}
+
+// The gp has been moved to a gc safepoint. If there is gcphase specific
+// work it is done here.
+func gcphasework(gp *g) {
+       switch gcphase {
+       default:
+               gothrow("gcphasework in bad gcphase")
+       case _GCoff, _GCquiesce, _GCstw, _GCsweep:
+               // No work for now.
+       case _GCmark:
+               // Disabled until concurrent GC is implemented
+               // but indicate the scan has been done.
+               // scanstack(gp);
+       }
+       gp.gcworkdone = true
+}
+
+var finalizer1 = [...]byte{
+       // Each Finalizer is 5 words, ptr ptr uintptr ptr ptr.
+       // Each byte describes 4 words.
+       // Need 4 Finalizers described by 5 bytes before pattern repeats:
+       //      ptr ptr uintptr ptr ptr
+       //      ptr ptr uintptr ptr ptr
+       //      ptr ptr uintptr ptr ptr
+       //      ptr ptr uintptr ptr ptr
+       // aka
+       //      ptr ptr uintptr ptr
+       //      ptr ptr ptr uintptr
+       //      ptr ptr ptr ptr
+       //      uintptr ptr ptr ptr
+       //      ptr uintptr ptr ptr
+       // Assumptions about Finalizer layout checked below.
+       bitsPointer | bitsPointer<<2 | bitsScalar<<4 | bitsPointer<<6,
+       bitsPointer | bitsPointer<<2 | bitsPointer<<4 | bitsScalar<<6,
+       bitsPointer | bitsPointer<<2 | bitsPointer<<4 | bitsPointer<<6,
+       bitsScalar | bitsPointer<<2 | bitsPointer<<4 | bitsPointer<<6,
+       bitsPointer | bitsScalar<<2 | bitsPointer<<4 | bitsPointer<<6,
+}
+
+func queuefinalizer(p unsafe.Pointer, fn *funcval, nret uintptr, fint *_type, ot *ptrtype) {
+       lock(&finlock)
+       if finq == nil || finq.cnt == finq.cap {
+               if finc == nil {
+                       finc = (*finblock)(persistentalloc(_FinBlockSize, 0, &memstats.gc_sys))
+                       finc.cap = int32((_FinBlockSize-unsafe.Sizeof(finblock{}))/unsafe.Sizeof(finalizer{}) + 1)
+                       finc.alllink = allfin
+                       allfin = finc
+                       if finptrmask[0] == 0 {
+                               // Build pointer mask for Finalizer array in block.
+                               // Check assumptions made in finalizer1 array above.
+                               if (unsafe.Sizeof(finalizer{}) != 5*ptrSize ||
+                                       unsafe.Offsetof(finalizer{}.fn) != 0 ||
+                                       unsafe.Offsetof(finalizer{}.arg) != ptrSize ||
+                                       unsafe.Offsetof(finalizer{}.nret) != 2*ptrSize ||
+                                       unsafe.Offsetof(finalizer{}.fint) != 3*ptrSize ||
+                                       unsafe.Offsetof(finalizer{}.ot) != 4*ptrSize ||
+                                       bitsPerPointer != 2) {
+                                       gothrow("finalizer out of sync")
+                               }
+                               for i := range finptrmask {
+                                       finptrmask[i] = finalizer1[i%len(finalizer1)]
+                               }
+                       }
+               }
+               block := finc
+               finc = block.next
+               block.next = finq
+               finq = block
+       }
+       f := (*finalizer)(add(unsafe.Pointer(&finq.fin[0]), uintptr(finq.cnt)*unsafe.Sizeof(finq.fin[0])))
+       finq.cnt++
+       f.fn = fn
+       f.nret = nret
+       f.fint = fint
+       f.ot = ot
+       f.arg = p
+       fingwake = true
+       unlock(&finlock)
+}
+
+func iterate_finq(callback func(*funcval, unsafe.Pointer, uintptr, *_type, *ptrtype)) {
+       for fb := allfin; fb != nil; fb = fb.alllink {
+               for i := int32(0); i < fb.cnt; i++ {
+                       f := &fb.fin[i]
+                       callback(f.fn, f.arg, f.nret, f.fint, f.ot)
+               }
+       }
+}
+
+func mSpan_EnsureSwept(s *mspan) {
+       // Caller must disable preemption.
+       // Otherwise when this function returns the span can become unswept again
+       // (if GC is triggered on another goroutine).
+       _g_ := getg()
+       if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
+               gothrow("MSpan_EnsureSwept: m is not locked")
+       }
+
+       sg := mheap_.sweepgen
+       if atomicload(&s.sweepgen) == sg {
+               return
+       }
+       if cas(&s.sweepgen, sg-2, sg-1) {
+               mSpan_Sweep(s, false)
+               return
+       }
+       // unfortunate condition, and we don't have efficient means to wait
+       for atomicload(&s.sweepgen) != sg {
+               osyield()
+       }
+}
+
+// Sweep frees or collects finalizers for blocks not marked in the mark phase.
+// It clears the mark bits in preparation for the next GC round.
+// Returns true if the span was returned to heap.
+// If preserve=true, don't return it to heap nor relink in MCentral lists;
+// caller takes care of it.
+func mSpan_Sweep(s *mspan, preserve bool) bool {
+       // It's critical that we enter this function with preemption disabled,
+       // GC must not start while we are in the middle of this function.
+       _g_ := getg()
+       if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
+               gothrow("MSpan_Sweep: m is not locked")
+       }
+       sweepgen := mheap_.sweepgen
+       if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
+               print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+               gothrow("MSpan_Sweep: bad span state")
+       }
+       arena_start := mheap_.arena_start
+       cl := s.sizeclass
+       size := s.elemsize
+       var n int32
+       var npages int32
+       if cl == 0 {
+               n = 1
+       } else {
+               // Chunk full of small blocks.
+               npages = class_to_allocnpages[cl]
+               n = (npages << _PageShift) / int32(size)
+       }
+       res := false
+       nfree := 0
+       var head mlink
+       end := &head
+       c := _g_.m.mcache
+       sweepgenset := false
+
+       // Mark any free objects in this span so we don't collect them.
+       for link := s.freelist; link != nil; link = link.next {
+               off := (uintptr(unsafe.Pointer(link)) - arena_start) / ptrSize
+               bitp := arena_start - off/wordsPerBitmapByte - 1
+               shift := (off % wordsPerBitmapByte) * gcBits
+               *(*byte)(unsafe.Pointer(bitp)) |= bitMarked << shift
+       }
+
+       // Unlink & free special records for any objects we're about to free.
+       specialp := &s.specials
+       special := *specialp
+       for special != nil {
+               // A finalizer can be set for an inner byte of an object, find object beginning.
+               p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size
+               off := (p - arena_start) / ptrSize
+               bitp := arena_start - off/wordsPerBitmapByte - 1
+               shift := (off % wordsPerBitmapByte) * gcBits
+               bits := (*(*byte)(unsafe.Pointer(bitp)) >> shift) & bitMask
+               if bits&bitMarked == 0 {
+                       // Find the exact byte for which the special was setup
+                       // (as opposed to object beginning).
+                       p := uintptr(s.start<<_PageShift) + uintptr(special.offset)
+                       // about to free object: splice out special record
+                       y := special
+                       special = special.next
+                       *specialp = special
+                       if !freespecial(y, unsafe.Pointer(p), size, false) {
+                               // stop freeing of object if it has a finalizer
+                               *(*byte)(unsafe.Pointer(bitp)) |= bitMarked << shift
+                       }
+               } else {
+                       // object is still live: keep special record
+                       specialp = &special.next
+                       special = *specialp
+               }
+       }
+
+       // Sweep through n objects of given size starting at p.
+       // This thread owns the span now, so it can manipulate
+       // the block bitmap without atomic operations.
+       p := uintptr(s.start << _PageShift)
+       off := (p - arena_start) / ptrSize
+       bitp := arena_start - off/wordsPerBitmapByte - 1
+       shift := uint(0)
+       step := size / (ptrSize * wordsPerBitmapByte)
+       // Rewind to the previous quadruple as we move to the next
+       // in the beginning of the loop.
+       bitp += step
+       if step == 0 {
+               // 8-byte objects.
+               bitp++
+               shift = gcBits
+       }
+       for ; n > 0; n, p = n-1, p+size {
+               bitp -= step
+               if step == 0 {
+                       if shift != 0 {
+                               bitp--
+                       }
+                       shift = gcBits - shift
+               }
+
+               xbits := *(*byte)(unsafe.Pointer(bitp))
+               bits := (xbits >> shift) & bitMask
+
+               // Allocated and marked object, reset bits to allocated.
+               if bits&bitMarked != 0 {
+                       *(*byte)(unsafe.Pointer(bitp)) &^= bitMarked << shift
+                       continue
+               }
+
+               // At this point we know that we are looking at garbage object
+               // that needs to be collected.
+               if debug.allocfreetrace != 0 {
+                       tracefree(unsafe.Pointer(p), size)
+               }
+
+               // Reset to allocated+noscan.
+               *(*byte)(unsafe.Pointer(bitp)) = uint8(uintptr(xbits&^((bitMarked|bitsMask<<2)<<shift)) | uintptr(bitsDead)<<(shift+2))
+               if cl == 0 {
+                       // Free large span.
+                       if preserve {
+                               gothrow("can't preserve large span")
+                       }
+                       unmarkspan(p, s.npages<<_PageShift)
+                       s.needzero = 1
+
+                       // important to set sweepgen before returning it to heap
+                       atomicstore(&s.sweepgen, sweepgen)
+                       sweepgenset = true
+
+                       // NOTE(rsc,dvyukov): The original implementation of efence
+                       // in CL 22060046 used SysFree instead of SysFault, so that
+                       // the operating system would eventually give the memory
+                       // back to us again, so that an efence program could run
+                       // longer without running out of memory. Unfortunately,
+                       // calling SysFree here without any kind of adjustment of the
+                       // heap data structures means that when the memory does
+                       // come back to us, we have the wrong metadata for it, either in
+                       // the MSpan structures or in the garbage collection bitmap.
+                       // Using SysFault here means that the program will run out of
+                       // memory fairly quickly in efence mode, but at least it won't
+                       // have mysterious crashes due to confused memory reuse.
+                       // It should be possible to switch back to SysFree if we also
+                       // implement and then call some kind of MHeap_DeleteSpan.
+                       if debug.efence > 0 {
+                               s.limit = 0 // prevent mlookup from finding this span
+                               sysFault(unsafe.Pointer(p), size)
+                       } else {
+                               mHeap_Free(&mheap_, s, 1)
+                       }
+                       c.local_nlargefree++
+                       c.local_largefree += size
+                       xadd64(&memstats.next_gc, -int64(size)*int64(gcpercent+100)/100)
+                       res = true
+               } else {
+                       // Free small object.
+                       if size > 2*ptrSize {
+                               *(*uintptr)(unsafe.Pointer(p + ptrSize)) = uintptrMask & 0xdeaddeaddeaddead // mark as "needs to be zeroed"
+                       } else if size > ptrSize {
+                               *(*uintptr)(unsafe.Pointer(p + ptrSize)) = 0
+                       }
+                       end.next = (*mlink)(unsafe.Pointer(p))
+                       end = end.next
+                       nfree++
+               }
+       }
+
+       // We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
+       // because of the potential for a concurrent free/SetFinalizer.
+       // But we need to set it before we make the span available for allocation
+       // (return it to heap or mcentral), because allocation code assumes that a
+       // span is already swept if available for allocation.
+       if !sweepgenset && nfree == 0 {
+               // The span must be in our exclusive ownership until we update sweepgen,
+               // check for potential races.
+               if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
+                       print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+                       gothrow("MSpan_Sweep: bad span state after sweep")
+               }
+               atomicstore(&s.sweepgen, sweepgen)
+       }
+       if nfree > 0 {
+               c.local_nsmallfree[cl] += uintptr(nfree)
+               c.local_cachealloc -= intptr(uintptr(nfree) * size)
+               xadd64(&memstats.next_gc, -int64(nfree)*int64(size)*int64(gcpercent+100)/100)
+               res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head.next, end, preserve)
+               // MCentral_FreeSpan updates sweepgen
+       }
+       return res
+}
+
+// State of background sweep.
+// Protected by gclock.
+type sweepdata struct {
+       g       *g
+       parked  bool
+       started bool
+
+       spanidx uint32 // background sweeper position
+
+       nbgsweep    uint32
+       npausesweep uint32
+}
+
+var sweep sweepdata
+
+// sweeps one span
+// returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep
+func sweepone() uintptr {
+       _g_ := getg()
+
+       // increment locks to ensure that the goroutine is not preempted
+       // in the middle of sweep thus leaving the span in an inconsistent state for next GC
+       _g_.m.locks++
+       sg := mheap_.sweepgen
+       for {
+               idx := xadd(&sweep.spanidx, 1) - 1
+               if idx >= uint32(len(work.spans)) {
+                       mheap_.sweepdone = 1
+                       _g_.m.locks--
+                       return ^uintptr(0)
+               }
+               s := work.spans[idx]
+               if s.state != mSpanInUse {
+                       s.sweepgen = sg
+                       continue
+               }
+               if s.sweepgen != sg-2 || !cas(&s.sweepgen, sg-2, sg-1) {
+                       continue
+               }
+               npages := s.npages
+               if !mSpan_Sweep(s, false) {
+                       npages = 0
+               }
+               _g_.m.locks--
+               return npages
+       }
+}
+
+func gosweepone() uintptr {
+       var ret uintptr
+       onM(func() {
+               ret = sweepone()
+       })
+       return ret
+}
+
+func gosweepdone() bool {
+       return mheap_.sweepdone != 0
+}
+
+func gchelper() {
+       _g_ := getg()
+       _g_.m.traceback = 2
+       gchelperstart()
+
+       // parallel mark for over gc roots
+       parfordo(work.markfor)
+
+       // help other threads scan secondary blocks
+       scanblock(0, 0, nil)
+
+       nproc := work.nproc // work.nproc can change right after we increment work.ndone
+       if xadd(&work.ndone, +1) == nproc-1 {
+               notewakeup(&work.alldone)
+       }
+       _g_.m.traceback = 0
+}
+
+func cachestats() {
+       for i := 0; ; i++ {
+               p := allp[i]
+               if p == nil {
+                       break
+               }
+               c := p.mcache
+               if c == nil {
+                       continue
+               }
+               purgecachedstats(c)
+       }
+}
+
+func flushallmcaches() {
+       for i := 0; ; i++ {
+               p := allp[i]
+               if p == nil {
+                       break
+               }
+               c := p.mcache
+               if c == nil {
+                       continue
+               }
+               mCache_ReleaseAll(c)
+               stackcache_clear(c)
+       }
+}
+
+func updatememstats(stats *gcstats) {
+       if stats != nil {
+               *stats = gcstats{}
+       }
+       for mp := allm; mp != nil; mp = mp.alllink {
+               if stats != nil {
+                       src := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(&mp.gcstats))
+                       dst := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(stats))
+                       for i, v := range src {
+                               dst[i] += v
+                       }
+                       mp.gcstats = gcstats{}
+               }
+       }
+
+       memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
+       memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
+       memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys +
+               memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys
+
+       // Calculate memory allocator stats.
+       // During program execution we only count number of frees and amount of freed memory.
+       // Current number of alive object in the heap and amount of alive heap memory
+       // are calculated by scanning all spans.
+       // Total number of mallocs is calculated as number of frees plus number of alive objects.
+       // Similarly, total amount of allocated memory is calculated as amount of freed memory
+       // plus amount of alive heap memory.
+       memstats.alloc = 0
+       memstats.total_alloc = 0
+       memstats.nmalloc = 0
+       memstats.nfree = 0
+       for i := 0; i < len(memstats.by_size); i++ {
+               memstats.by_size[i].nmalloc = 0
+               memstats.by_size[i].nfree = 0
+       }
+
+       // Flush MCache's to MCentral.
+       onM(flushallmcaches)
+
+       // Aggregate local stats.
+       cachestats()
+
+       // Scan all spans and count number of alive objects.
+       lock(&mheap_.lock)
+       for i := uint32(0); i < mheap_.nspan; i++ {
+               s := h_allspans[i]
+               if s.state != mSpanInUse {
+                       continue
+               }
+               if s.sizeclass == 0 {
+                       memstats.nmalloc++
+                       memstats.alloc += uint64(s.elemsize)
+               } else {
+                       memstats.nmalloc += uint64(s.ref)
+                       memstats.by_size[s.sizeclass].nmalloc += uint64(s.ref)
+                       memstats.alloc += uint64(s.ref) * uint64(s.elemsize)
+               }
+       }
+       unlock(&mheap_.lock)
+
+       // Aggregate by size class.
+       smallfree := uint64(0)
+       memstats.nfree = mheap_.nlargefree
+       for i := 0; i < len(memstats.by_size); i++ {
+               memstats.nfree += mheap_.nsmallfree[i]
+               memstats.by_size[i].nfree = mheap_.nsmallfree[i]
+               memstats.by_size[i].nmalloc += mheap_.nsmallfree[i]
+               smallfree += uint64(mheap_.nsmallfree[i]) * uint64(class_to_size[i])
+       }
+       memstats.nfree += memstats.tinyallocs
+       memstats.nmalloc += memstats.nfree
+
+       // Calculate derived stats.
+       memstats.total_alloc = uint64(memstats.alloc) + uint64(mheap_.largefree) + smallfree
+       memstats.heap_alloc = memstats.alloc
+       memstats.heap_objects = memstats.nmalloc - memstats.nfree
+}
+
+// Structure of arguments passed to function gc().
+// This allows the arguments to be passed via mcall.
+type gc_args struct {
+       start_time int64 // start time of GC in ns (just before stoptheworld)
+       eagersweep bool
+}
+
+func gcinit() {
+       if unsafe.Sizeof(workbuf{}) != _WorkbufSize {
+               gothrow("runtime: size of Workbuf is suboptimal")
+       }
+
+       work.markfor = parforalloc(_MaxGcproc)
+       gcpercent = readgogc()
+       gcdatamask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcdata)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)))
+       gcbssmask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcbss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)))
+}
+
+func gc_m() {
+       _g_ := getg()
+       gp := _g_.m.curg
+       casgstatus(gp, _Grunning, _Gwaiting)
+       gp.waitreason = "garbage collection"
+
+       var a gc_args
+       a.start_time = int64(_g_.m.scalararg[0]) | int64(uintptr(_g_.m.scalararg[1]))<<32
+       a.eagersweep = _g_.m.scalararg[2] != 0
+       gc(&a)
+
+       if nbadblock > 0 {
+               // Work out path from root to bad block.
+               for {
+                       gc(&a)
+                       if nbadblock >= int32(len(badblock)) {
+                               gothrow("cannot find path to bad pointer")
+                       }
+               }
+       }
+
+       casgstatus(gp, _Gwaiting, _Grunning)
+}
+
+func gc(args *gc_args) {
+       if _DebugGCPtrs {
+               print("GC start\n")
+       }
+
+       if debug.allocfreetrace > 0 {
+               tracegc()
+       }
+
+       _g_ := getg()
+       _g_.m.traceback = 2
+       t0 := args.start_time
+       work.tstart = args.start_time
+
+       var t1 int64
+       if debug.gctrace > 0 {
+               t1 = nanotime()
+       }
+
+       // Sweep what is not sweeped by bgsweep.
+       for sweepone() != ^uintptr(0) {
+               sweep.npausesweep++
+       }
+
+       // Cache runtime.mheap_.allspans in work.spans to avoid conflicts with
+       // resizing/freeing allspans.
+       // New spans can be created while GC progresses, but they are not garbage for
+       // this round:
+       //  - new stack spans can be created even while the world is stopped.
+       //  - new malloc spans can be created during the concurrent sweep
+
+       // Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
+       lock(&mheap_.lock)
+       // Free the old cached sweep array if necessary.
+       if work.spans != nil && &work.spans[0] != &h_allspans[0] {
+               sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys)
+       }
+       // Cache the current array for marking.
+       mheap_.gcspans = mheap_.allspans
+       work.spans = h_allspans
+       unlock(&mheap_.lock)
+
+       work.nwait = 0
+       work.ndone = 0
+       work.nproc = uint32(gcprocs())
+       parforsetup(work.markfor, work.nproc, uint32(_RootCount+allglen), nil, false, markroot)
+       if work.nproc > 1 {
+               noteclear(&work.alldone)
+               helpgc(int32(work.nproc))
+       }
+
+       var t2 int64
+       if debug.gctrace > 0 {
+               t2 = nanotime()
+       }
+
+       gchelperstart()
+       parfordo(work.markfor)
+       scanblock(0, 0, nil)
+
+       var t3 int64
+       if debug.gctrace > 0 {
+               t3 = nanotime()
+       }
+
+       if work.nproc > 1 {
+               notesleep(&work.alldone)
+       }
+
+       shrinkfinish()
+
+       cachestats()
+       // next_gc calculation is tricky with concurrent sweep since we don't know size of live heap
+       // estimate what was live heap size after previous GC (for printing only)
+       heap0 := memstats.next_gc * 100 / (uint64(gcpercent) + 100)
+       // conservatively set next_gc to high value assuming that everything is live
+       // concurrent/lazy sweep will reduce this number while discovering new garbage
+       memstats.next_gc = memstats.heap_alloc + memstats.heap_alloc*uint64(gcpercent)/100
+
+       t4 := nanotime()
+       atomicstore64(&memstats.last_gc, uint64(unixnanotime())) // must be Unix time to make sense to user
+       memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(t4 - t0)
+       memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(t4)
+       memstats.pause_total_ns += uint64(t4 - t0)
+       memstats.numgc++
+       if memstats.debuggc {
+               print("pause ", t4-t0, "\n")
+       }
+
+       if debug.gctrace > 0 {
+               heap1 := memstats.heap_alloc
+               var stats gcstats
+               updatememstats(&stats)
+               if heap1 != memstats.heap_alloc {
+                       print("runtime: mstats skew: heap=", heap1, "/", memstats.heap_alloc, "\n")
+                       gothrow("mstats skew")
+               }
+               obj := memstats.nmalloc - memstats.nfree
+
+               stats.nprocyield += work.markfor.nprocyield
+               stats.nosyield += work.markfor.nosyield
+               stats.nsleep += work.markfor.nsleep
+
+               print("gc", memstats.numgc, "(", work.nproc, "): ",
+                       (t1-t0)/1000, "+", (t2-t1)/1000, "+", (t3-t2)/1000, "+", (t4-t3)/1000, " us, ",
+                       heap0>>20, " -> ", heap1>>20, " MB, ",
+                       obj, " (", memstats.nmalloc, "-", memstats.nfree, ") objects, ",
+                       gcount(), " goroutines, ",
+                       len(work.spans), "/", sweep.nbgsweep, "/", sweep.npausesweep, " sweeps, ",
+                       stats.nhandoff, "(", stats.nhandoffcnt, ") handoff, ",
+                       work.markfor.nsteal, "(", work.markfor.nstealcnt, ") steal, ",
+                       stats.nprocyield, "/", stats.nosyield, "/", stats.nsleep, " yields\n")
+               sweep.nbgsweep = 0
+               sweep.npausesweep = 0
+       }
+
+       // See the comment in the beginning of this function as to why we need the following.
+       // Even if this is still stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
+       lock(&mheap_.lock)
+       // Free the old cached mark array if necessary.
+       if work.spans != nil && &work.spans[0] != &h_allspans[0] {
+               sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys)
+       }
+
+       // Cache the current array for sweeping.
+       mheap_.gcspans = mheap_.allspans
+       mheap_.sweepgen += 2
+       mheap_.sweepdone = 0
+       work.spans = h_allspans
+       sweep.spanidx = 0
+       unlock(&mheap_.lock)
+
+       if _ConcurrentSweep && !args.eagersweep {
+               lock(&gclock)
+               if !sweep.started {
+                       go bgsweep()
+                       sweep.started = true
+               } else if sweep.parked {
+                       sweep.parked = false
+                       ready(sweep.g)
+               }
+               unlock(&gclock)
+       } else {
+               // Sweep all spans eagerly.
+               for sweepone() != ^uintptr(0) {
+                       sweep.npausesweep++
+               }
+               // Do an additional mProf_GC, because all 'free' events are now real as well.
+               mProf_GC()
+       }
+
+       mProf_GC()
+       _g_.m.traceback = 0
+
+       if _DebugGCPtrs {
+               print("GC end\n")
+       }
+}
+
+func readmemstats_m() {
+       _g_ := getg()
+       stats := (*mstats)(_g_.m.ptrarg[0])
+       _g_.m.ptrarg[0] = nil
+
+       updatememstats(nil)
+
+       // Size of the trailing by_size array differs between Go and C,
+       // NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
+       memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats)
+
+       // Stack numbers are part of the heap numbers, separate those out for user consumption
+       stats.stacks_sys = stats.stacks_inuse
+       stats.heap_inuse -= stats.stacks_inuse
+       stats.heap_sys -= stats.stacks_inuse
+}
+
+//go:linkname readGCStats runtime/debug.readGCStats
+func readGCStats(pauses *[]uint64) {
+       onM(func() {
+               readGCStats_m(pauses)
+       })
+}
+
+func readGCStats_m(pauses *[]uint64) {
+       p := *pauses
+       // Calling code in runtime/debug should make the slice large enough.
+       if cap(p) < len(memstats.pause_ns)+3 {
+               gothrow("runtime: short slice passed to readGCStats")
+       }
+
+       // Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
+       lock(&mheap_.lock)
+
+       n := memstats.numgc
+       if n > uint32(len(memstats.pause_ns)) {
+               n = uint32(len(memstats.pause_ns))
+       }
+
+       // The pause buffer is circular. The most recent pause is at
+       // pause_ns[(numgc-1)%len(pause_ns)], and then backward
+       // from there to go back farther in time. We deliver the times
+       // most recent first (in p[0]).
+       p = p[:cap(p)]
+       for i := uint32(0); i < n; i++ {
+               j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
+               p[i] = memstats.pause_ns[j]
+               p[n+i] = memstats.pause_end[j]
+       }
+
+       p[n+n] = memstats.last_gc
+       p[n+n+1] = uint64(memstats.numgc)
+       p[n+n+2] = memstats.pause_total_ns
+       unlock(&mheap_.lock)
+       *pauses = p[:n+n+3]
+}
+
+func setGCPercent(in int32) (out int32) {
+       lock(&mheap_.lock)
+       out = gcpercent
+       if in < 0 {
+               in = -1
+       }
+       gcpercent = in
+       unlock(&mheap_.lock)
+       return out
+}
+
+func gchelperstart() {
+       _g_ := getg()
+
+       if _g_.m.helpgc < 0 || _g_.m.helpgc >= _MaxGcproc {
+               gothrow("gchelperstart: bad m->helpgc")
+       }
+       if _g_ != _g_.m.g0 {
+               gothrow("gchelper not running on g0 stack")
+       }
+}
+
+func wakefing() *g {
+       var res *g
+       lock(&finlock)
+       if fingwait && fingwake {
+               fingwait = false
+               fingwake = false
+               res = fing
+       }
+       unlock(&finlock)
+       return res
+}
+
+func addb(p *byte, n uintptr) *byte {
+       return (*byte)(add(unsafe.Pointer(p), n))
+}
+
+// Recursively unrolls GC program in prog.
+// mask is where to store the result.
+// ppos is a pointer to position in mask, in bits.
+// sparse says to generate 4-bits per word mask for heap (2-bits for data/bss otherwise).
+func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) *byte {
+       arena_start := mheap_.arena_start
+       pos := *ppos
+       mask := (*[1 << 30]byte)(unsafe.Pointer(maskp))
+       for {
+               switch *prog {
+               default:
+                       gothrow("unrollgcprog: unknown instruction")
+
+               case insData:
+                       prog = addb(prog, 1)
+                       siz := int(*prog)
+                       prog = addb(prog, 1)
+                       p := (*[1 << 30]byte)(unsafe.Pointer(prog))
+                       for i := 0; i < siz; i++ {
+                               v := p[i/_PointersPerByte]
+                               v >>= (uint(i) % _PointersPerByte) * _BitsPerPointer
+                               v &= _BitsMask
+                               if inplace {
+                                       // Store directly into GC bitmap.
+                                       off := (uintptr(unsafe.Pointer(&mask[pos])) - arena_start) / ptrSize
+                                       bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
+                                       shift := (off % wordsPerBitmapByte) * gcBits
+                                       if shift == 0 {
+                                               *bitp = 0
+                                       }
+                                       *bitp |= v << (shift + 2)
+                                       pos += ptrSize
+                               } else if sparse {
+                                       // 4-bits per word
+                                       v <<= (pos % 8) + 2
+                                       mask[pos/8] |= v
+                                       pos += gcBits
+                               } else {
+                                       // 2-bits per word
+                                       v <<= pos % 8
+                                       mask[pos/8] |= v
+                                       pos += _BitsPerPointer
+                               }
+                       }
+                       prog = addb(prog, round(uintptr(siz)*_BitsPerPointer, 8)/8)
+
+               case insArray:
+                       prog = (*byte)(add(unsafe.Pointer(prog), 1))
+                       siz := uintptr(0)
+                       for i := uintptr(0); i < ptrSize; i++ {
+                               siz = (siz << 8) + uintptr(*(*byte)(add(unsafe.Pointer(prog), ptrSize-i-1)))
+                       }
+                       prog = (*byte)(add(unsafe.Pointer(prog), ptrSize))
+                       var prog1 *byte
+                       for i := uintptr(0); i < siz; i++ {
+                               prog1 = unrollgcprog1(&mask[0], prog, &pos, inplace, sparse)
+                       }
+                       if *prog1 != insArrayEnd {
+                               gothrow("unrollgcprog: array does not end with insArrayEnd")
+                       }
+                       prog = (*byte)(add(unsafe.Pointer(prog1), 1))
+
+               case insArrayEnd, insEnd:
+                       *ppos = pos
+                       return prog
+               }
+       }
+}
+
+// Unrolls GC program prog for data/bss, returns dense GC mask.
+func unrollglobgcprog(prog *byte, size uintptr) bitvector {
+       masksize := round(round(size, ptrSize)/ptrSize*bitsPerPointer, 8) / 8
+       mask := (*[1 << 30]byte)(persistentalloc(masksize+1, 0, &memstats.gc_sys))
+       mask[masksize] = 0xa1
+       pos := uintptr(0)
+       prog = unrollgcprog1(&mask[0], prog, &pos, false, false)
+       if pos != size/ptrSize*bitsPerPointer {
+               print("unrollglobgcprog: bad program size, got ", pos, ", expect ", size/ptrSize*bitsPerPointer, "\n")
+               gothrow("unrollglobgcprog: bad program size")
+       }
+       if *prog != insEnd {
+               gothrow("unrollglobgcprog: program does not end with insEnd")
+       }
+       if mask[masksize] != 0xa1 {
+               gothrow("unrollglobgcprog: overflow")
+       }
+       return bitvector{int32(masksize * 8), &mask[0]}
+}
+
+func unrollgcproginplace_m() {
+       _g_ := getg()
+
+       v := _g_.m.ptrarg[0]
+       typ := (*_type)(_g_.m.ptrarg[1])
+       size := _g_.m.scalararg[0]
+       size0 := _g_.m.scalararg[1]
+       _g_.m.ptrarg[0] = nil
+       _g_.m.ptrarg[1] = nil
+
+       pos := uintptr(0)
+       prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
+       for pos != size0 {
+               unrollgcprog1((*byte)(v), prog, &pos, true, true)
+       }
+
+       // Mark first word as bitAllocated.
+       arena_start := mheap_.arena_start
+       off := (uintptr(v) - arena_start) / ptrSize
+       bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
+       shift := (off % wordsPerBitmapByte) * gcBits
+       *bitp |= bitBoundary << shift
+
+       // Mark word after last as BitsDead.
+       if size0 < size {
+               off := (uintptr(v) + size0 - arena_start) / ptrSize
+               bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
+               shift := (off % wordsPerBitmapByte) * gcBits
+               *bitp &= uint8(^(bitPtrMask << shift) | uintptr(bitsDead)<<(shift+2))
+       }
+}
+
+var unroll mutex
+
+// Unrolls GC program in typ.gc[1] into typ.gc[0]
+func unrollgcprog_m() {
+       _g_ := getg()
+
+       typ := (*_type)(_g_.m.ptrarg[0])
+       _g_.m.ptrarg[0] = nil
+
+       lock(&unroll)
+       mask := (*byte)(unsafe.Pointer(uintptr(typ.gc[0])))
+       if *mask == 0 {
+               pos := uintptr(8) // skip the unroll flag
+               prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
+               prog = unrollgcprog1(mask, prog, &pos, false, true)
+               if *prog != insEnd {
+                       gothrow("unrollgcprog: program does not end with insEnd")
+               }
+               if typ.size/ptrSize%2 != 0 {
+                       // repeat the program
+                       prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
+                       unrollgcprog1(mask, prog, &pos, false, true)
+               }
+               // atomic way to say mask[0] = 1
+               x := *(*uintptr)(unsafe.Pointer(mask))
+               *(*byte)(unsafe.Pointer(&x)) = 1
+               atomicstoreuintptr((*uintptr)(unsafe.Pointer(mask)), x)
+       }
+       unlock(&unroll)
+}
+
+// mark the span of memory at v as having n blocks of the given size.
+// if leftover is true, there is left over space at the end of the span.
+func markspan(v unsafe.Pointer, size uintptr, n uintptr, leftover bool) {
+       if uintptr(v)+size*n > mheap_.arena_used || uintptr(v) < mheap_.arena_start {
+               gothrow("markspan: bad pointer")
+       }
+
+       // Find bits of the beginning of the span.
+       off := (uintptr(v) - uintptr(mheap_.arena_start)) / ptrSize
+       if off%wordsPerBitmapByte != 0 {
+               gothrow("markspan: unaligned length")
+       }
+       b := mheap_.arena_start - off/wordsPerBitmapByte - 1
+
+       // Okay to use non-atomic ops here, because we control
+       // the entire span, and each bitmap byte has bits for only
+       // one span, so no other goroutines are changing these bitmap words.
+
+       if size == ptrSize {
+               // Possible only on 64-bits (minimal size class is 8 bytes).
+               // Set memory to 0x11.
+               if (bitBoundary|bitsDead)<<gcBits|bitBoundary|bitsDead != 0x11 {
+                       gothrow("markspan: bad bits")
+               }
+               if n%(wordsPerBitmapByte*ptrSize) != 0 {
+                       gothrow("markspan: unaligned length")
+               }
+               b = b - n/wordsPerBitmapByte + 1 // find first byte
+               if b%ptrSize != 0 {
+                       gothrow("markspan: unaligned pointer")
+               }
+               for i := uintptr(0); i < n; i, b = i+wordsPerBitmapByte*ptrSize, b+ptrSize {
+                       *(*uintptr)(unsafe.Pointer(b)) = uintptrMask & 0x1111111111111111 // bitBoundary | bitsDead, repeated
+               }
+               return
+       }
+
+       if leftover {
+               n++ // mark a boundary just past end of last block too
+       }
+       step := size / (ptrSize * wordsPerBitmapByte)
+       for i := uintptr(0); i < n; i, b = i+1, b-step {
+               *(*byte)(unsafe.Pointer(b)) = bitBoundary | bitsDead<<2
+       }
+}
+
+// unmark the span of memory at v of length n bytes.
+func unmarkspan(v, n uintptr) {
+       if v+n > mheap_.arena_used || v < mheap_.arena_start {
+               gothrow("markspan: bad pointer")
+       }
+
+       off := (v - mheap_.arena_start) / ptrSize // word offset
+       if off%(ptrSize*wordsPerBitmapByte) != 0 {
+               gothrow("markspan: unaligned pointer")
+       }
+
+       b := mheap_.arena_start - off/wordsPerBitmapByte - 1
+       n /= ptrSize
+       if n%(ptrSize*wordsPerBitmapByte) != 0 {
+               gothrow("unmarkspan: unaligned length")
+       }
+
+       // Okay to use non-atomic ops here, because we control
+       // the entire span, and each bitmap word has bits for only
+       // one span, so no other goroutines are changing these
+       // bitmap words.
+       n /= wordsPerBitmapByte
+       memclr(unsafe.Pointer(b-n+1), n)
+}
+
+func mHeap_MapBits(h *mheap) {
+       // Caller has added extra mappings to the arena.
+       // Add extra mappings of bitmap words as needed.
+       // We allocate extra bitmap pieces in chunks of bitmapChunk.
+       const bitmapChunk = 8192
+
+       n := (h.arena_used - h.arena_start) / (ptrSize * wordsPerBitmapByte)
+       n = round(n, bitmapChunk)
+       n = round(n, _PhysPageSize)
+       if h.bitmap_mapped >= n {
+               return
+       }
+
+       sysMap(unsafe.Pointer(h.arena_start-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
+       h.bitmap_mapped = n
+}
+
+func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
+       target := (*stkframe)(ctxt)
+       if frame.sp <= target.sp && target.sp < frame.varp {
+               *target = *frame
+               return false
+       }
+       return true
+}
+
+// Returns GC type info for object p for testing.
+func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) {
+       *mask = nil
+       *len = 0
+
+       // data
+       if uintptr(unsafe.Pointer(&data)) <= uintptr(p) && uintptr(p) < uintptr(unsafe.Pointer(&edata)) {
+               n := (*ptrtype)(unsafe.Pointer(t)).elem.size
+               *len = n / ptrSize
+               *mask = &make([]byte, *len)[0]
+               for i := uintptr(0); i < n; i += ptrSize {
+                       off := (uintptr(p) + i - uintptr(unsafe.Pointer(&data))) / ptrSize
+                       bits := (*(*byte)(add(unsafe.Pointer(gcdatamask.bytedata), off/pointersPerByte)) >> ((off % pointersPerByte) * bitsPerPointer)) & bitsMask
+                       *(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
+               }
+               return
+       }
+
+       // bss
+       if uintptr(unsafe.Pointer(&bss)) <= uintptr(p) && uintptr(p) < uintptr(unsafe.Pointer(&ebss)) {
+               n := (*ptrtype)(unsafe.Pointer(t)).elem.size
+               *len = n / ptrSize
+               *mask = &make([]byte, *len)[0]
+               for i := uintptr(0); i < n; i += ptrSize {
+                       off := (uintptr(p) + i - uintptr(unsafe.Pointer(&bss))) / ptrSize
+                       bits := (*(*byte)(add(unsafe.Pointer(gcbssmask.bytedata), off/pointersPerByte)) >> ((off % pointersPerByte) * bitsPerPointer)) & bitsMask
+                       *(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
+               }
+               return
+       }
+
+       // heap
+       var n uintptr
+       var base uintptr
+       if mlookup(uintptr(p), &base, &n, nil) != 0 {
+               *len = n / ptrSize
+               *mask = &make([]byte, *len)[0]
+               for i := uintptr(0); i < n; i += ptrSize {
+                       off := (uintptr(base) + i - mheap_.arena_start) / ptrSize
+                       b := mheap_.arena_start - off/wordsPerBitmapByte - 1
+                       shift := (off % wordsPerBitmapByte) * gcBits
+                       bits := (*(*byte)(unsafe.Pointer(b)) >> (shift + 2)) & bitsMask
+                       *(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
+               }
+               return
+       }
+
+       // stack
+       var frame stkframe
+       frame.sp = uintptr(p)
+       _g_ := getg()
+       gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
+       if frame.fn != nil {
+               f := frame.fn
+               targetpc := frame.continpc
+               if targetpc == 0 {
+                       return
+               }
+               if targetpc != f.entry {
+                       targetpc--
+               }
+               pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
+               if pcdata == -1 {
+                       return
+               }
+               stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
+               if stkmap == nil || stkmap.n <= 0 {
+                       return
+               }
+               bv := stackmapdata(stkmap, pcdata)
+               size := uintptr(bv.n) / bitsPerPointer * ptrSize
+               n := (*ptrtype)(unsafe.Pointer(t)).elem.size
+               *len = n / ptrSize
+               *mask = &make([]byte, *len)[0]
+               for i := uintptr(0); i < n; i += ptrSize {
+                       off := (uintptr(p) + i - frame.varp + size) / ptrSize
+                       bits := ((*(*byte)(add(unsafe.Pointer(bv.bytedata), off*bitsPerPointer/8))) >> ((off * bitsPerPointer) % 8)) & bitsMask
+                       *(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
+               }
+       }
+}
+
+func unixnanotime() int64 {
+       var now int64
+       gc_unixnanotime(&now)
+       return now
+}
diff --git a/src/runtime/mgc0.c b/src/runtime/mgc0.c
deleted file mode 100644 (file)
index 7754bad..0000000
+++ /dev/null
@@ -1,2010 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Garbage collector (GC).
-//
-// GC is:
-// - mark&sweep
-// - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc)
-// - parallel (up to MaxGcproc threads)
-// - partially concurrent (mark is stop-the-world, while sweep is concurrent)
-// - non-moving/non-compacting
-// - full (non-partial)
-//
-// GC rate.
-// Next GC is after we've allocated an extra amount of memory proportional to
-// the amount already in use. The proportion is controlled by GOGC environment variable
-// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
-// (this mark is tracked in next_gc variable). This keeps the GC cost in linear
-// proportion to the allocation cost. Adjusting GOGC just changes the linear constant
-// (and also the amount of extra memory used).
-//
-// Concurrent sweep.
-// The sweep phase proceeds concurrently with normal program execution.
-// The heap is swept span-by-span both lazily (when a goroutine needs another span)
-// and concurrently in a background goroutine (this helps programs that are not CPU bound).
-// However, at the end of the stop-the-world GC phase we don't know the size of the live heap,
-// and so next_gc calculation is tricky and happens as follows.
-// At the end of the stop-the-world phase next_gc is conservatively set based on total
-// heap size; all spans are marked as "needs sweeping".
-// Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory.
-// The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc
-// closer to the target value. However, this is not enough to avoid over-allocating memory.
-// Consider that a goroutine wants to allocate a new span for a large object and
-// there are no free swept spans, but there are small-object unswept spans.
-// If the goroutine naively allocates a new span, it can surpass the yet-unknown
-// target next_gc value. In order to prevent such cases (1) when a goroutine needs
-// to allocate a new small-object span, it sweeps small-object spans for the same
-// object size until it frees at least one object; (2) when a goroutine needs to
-// allocate large-object span from heap, it sweeps spans until it frees at least
-// that many pages into heap. Together these two measures ensure that we don't surpass
-// target next_gc value by a large margin. There is an exception: if a goroutine sweeps
-// and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span,
-// but there can still be other one-page unswept spans which could be combined into a two-page span.
-// It's critical to ensure that no operations proceed on unswept spans (that would corrupt
-// mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
-// so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
-// When a goroutine explicitly frees an object or sets a finalizer, it ensures that
-// the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish).
-// The finalizer goroutine is kicked off only when all spans are swept.
-// When the next GC starts, it sweeps all not-yet-swept spans (if any).
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-#include "stack.h"
-#include "mgc0.h"
-#include "chan.h"
-#include "race.h"
-#include "type.h"
-#include "typekind.h"
-#include "funcdata.h"
-#include "textflag.h"
-
-enum {
-       Debug           = 0,
-       DebugPtrs       = 0, // if 1, print trace of every pointer load during GC
-       ConcurrentSweep = 1,
-
-       WorkbufSize     = 4*1024,
-       FinBlockSize    = 4*1024,
-       RootData        = 0,
-       RootBss         = 1,
-       RootFinalizers  = 2,
-       RootSpans       = 3,
-       RootFlushCaches = 4,
-       RootCount       = 5,
-};
-
-// ptrmask for an allocation containing a single pointer.
-static byte oneptr[] = {BitsPointer};
-
-// Initialized from $GOGC.  GOGC=off means no gc.
-extern int32 runtime·gcpercent;
-
-// Holding worldsema grants an M the right to try to stop the world.
-// The procedure is:
-//
-//     runtime·semacquire(&runtime·worldsema);
-//     m->gcing = 1;
-//     runtime·stoptheworld();
-//
-//     ... do stuff ...
-//
-//     m->gcing = 0;
-//     runtime·semrelease(&runtime·worldsema);
-//     runtime·starttheworld();
-//
-uint32 runtime·worldsema = 1;
-
-typedef struct Workbuf Workbuf;
-struct Workbuf
-{
-       LFNode  node; // must be first
-       uintptr nobj;
-       byte*   obj[(WorkbufSize-sizeof(LFNode)-sizeof(uintptr))/PtrSize];
-};
-
-extern byte runtime·data[];
-extern byte runtime·edata[];
-extern byte runtime·bss[];
-extern byte runtime·ebss[];
-
-extern byte runtime·gcdata[];
-extern byte runtime·gcbss[];
-
-Mutex  runtime·finlock;       // protects the following variables
-G*     runtime·fing;          // goroutine that runs finalizers
-FinBlock*      runtime·finq;  // list of finalizers that are to be executed
-FinBlock*      runtime·finc;  // cache of free blocks
-static byte finptrmask[FinBlockSize/PtrSize/PointersPerByte];
-bool   runtime·fingwait;
-bool   runtime·fingwake;
-FinBlock       *runtime·allfin;       // list of all blocks
-
-BitVector      runtime·gcdatamask;
-BitVector      runtime·gcbssmask;
-
-Mutex  runtime·gclock;
-
-static uintptr badblock[1024];
-static int32   nbadblock;
-
-static Workbuf* getempty(Workbuf*);
-static Workbuf* getfull(Workbuf*);
-static void    putempty(Workbuf*);
-static Workbuf* handoff(Workbuf*);
-static void    gchelperstart(void);
-static void    flushallmcaches(void);
-static bool    scanframe(Stkframe *frame, void *unused);
-static void    scanstack(G *gp);
-static BitVector       unrollglobgcprog(byte *prog, uintptr size);
-
-void runtime·bgsweep(void);
-static FuncVal bgsweepv = {runtime·bgsweep};
-
-typedef struct WorkData WorkData;
-struct WorkData {
-       uint64  full;  // lock-free list of full blocks
-       uint64  empty; // lock-free list of empty blocks
-       byte    pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
-       uint32  nproc;
-       int64   tstart;
-       volatile uint32 nwait;
-       volatile uint32 ndone;
-       Note    alldone;
-       ParFor* markfor;
-
-       // Copy of mheap.allspans for marker or sweeper.
-       MSpan** spans;
-       uint32  nspan;
-};
-WorkData runtime·work;
-
-// Is _cgo_allocate linked into the binary?
-static bool
-have_cgo_allocate(void)
-{
-       extern  byte    go·weak·runtime·_cgo_allocate_internal[1];
-       return go·weak·runtime·_cgo_allocate_internal != nil;
-}
-
-// scanblock scans a block of n bytes starting at pointer b for references
-// to other objects, scanning any it finds recursively until there are no
-// unscanned objects left.  Instead of using an explicit recursion, it keeps
-// a work list in the Workbuf* structures and loops in the main function
-// body.  Keeping an explicit work list is easier on the stack allocator and
-// more efficient.
-static void
-scanblock(byte *b, uintptr n, byte *ptrmask)
-{
-       byte *obj, *obj0, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp;
-       uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
-       Workbuf *wbuf;
-       Iface *iface;
-       Eface *eface;
-       Type *typ;
-       MSpan *s;
-       pageID k;
-       bool keepworking;
-
-       // Cache memory arena parameters in local vars.
-       arena_start = runtime·mheap.arena_start;
-       arena_used = runtime·mheap.arena_used;
-
-       wbuf = getempty(nil);
-       nobj = wbuf->nobj;
-       wp = &wbuf->obj[nobj];
-       keepworking = b == nil;
-       scanbufpos = 0;
-       for(i = 0; i < nelem(scanbuf); i++)
-               scanbuf[i] = nil;
-
-       ptrbitp = nil;
-
-       // ptrmask can have 2 possible values:
-       // 1. nil - obtain pointer mask from GC bitmap.
-       // 2. pointer to a compact mask (for stacks and data).
-       if(b != nil)
-               goto scanobj;
-       for(;;) {
-               if(nobj == 0) {
-                       // Out of work in workbuf.
-                       // First, see is there is any work in scanbuf.
-                       for(i = 0; i < nelem(scanbuf); i++) {
-                               b = scanbuf[scanbufpos];
-                               scanbuf[scanbufpos++] = nil;
-                               scanbufpos %= nelem(scanbuf);
-                               if(b != nil) {
-                                       n = arena_used - b; // scan until bitBoundary or BitsDead
-                                       ptrmask = nil; // use GC bitmap for pointer info
-                                       goto scanobj;
-                               }
-                       }
-                       if(!keepworking) {
-                               putempty(wbuf);
-                               return;
-                       }
-                       // Refill workbuf from global queue.
-                       wbuf = getfull(wbuf);
-                       if(wbuf == nil)
-                               return;
-                       nobj = wbuf->nobj;
-                       wp = &wbuf->obj[nobj];
-               }
-
-               // If another proc wants a pointer, give it some.
-               if(runtime·work.nwait > 0 && nobj > 4 && runtime·work.full == 0) {
-                       wbuf->nobj = nobj;
-                       wbuf = handoff(wbuf);
-                       nobj = wbuf->nobj;
-                       wp = &wbuf->obj[nobj];
-               }
-
-               wp--;
-               nobj--;
-               b = *wp;
-               n = arena_used - b; // scan until next bitBoundary or BitsDead
-               ptrmask = nil; // use GC bitmap for pointer info
-
-       scanobj:
-               if(DebugPtrs)
-                       runtime·printf("scanblock %p +%p %p\n", b, n, ptrmask);
-               // Find bits of the beginning of the object.
-               if(ptrmask == nil) {
-                       off = (uintptr*)b - (uintptr*)arena_start;
-                       ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
-               }
-               for(i = 0; i < n; i += PtrSize) {
-                       obj = nil;
-                       // Find bits for this word.
-                       if(ptrmask == nil) {
-                               // Check is we have reached end of span.
-                               if((((uintptr)b+i)%PageSize) == 0 &&
-                                       runtime·mheap.spans[(b-arena_start)>>PageShift] != runtime·mheap.spans[(b+i-arena_start)>>PageShift])
-                                       break;
-                               // Consult GC bitmap.
-                               bits = *ptrbitp;
-
-                               if(wordsPerBitmapByte != 2)
-                                       runtime·throw("alg doesn't work for wordsPerBitmapByte != 2");
-                               j = ((uintptr)b+i)/PtrSize & 1;
-                               ptrbitp -= j;
-                               bits >>= gcBits*j;
-
-                               if((bits&bitBoundary) != 0 && i != 0)
-                                       break; // reached beginning of the next object
-                               bits = (bits>>2)&BitsMask;
-                               if(bits == BitsDead)
-                                       break; // reached no-scan part of the object
-                       } else // dense mask (stack or data)
-                               bits = (ptrmask[(i/PtrSize)/4]>>(((i/PtrSize)%4)*BitsPerPointer))&BitsMask;
-
-                       if(bits <= BitsScalar) // BitsScalar || BitsDead
-                               continue;
-                       if(bits == BitsPointer) {
-                               obj = *(byte**)(b+i);
-                               obj0 = obj;
-                               goto markobj;
-                       }
-
-                       // With those three out of the way, must be multi-word.
-                       if(Debug && bits != BitsMultiWord)
-                               runtime·throw("unexpected garbage collection bits");
-                       // Find the next pair of bits.
-                       if(ptrmask == nil) {
-                               bits = *ptrbitp;
-                               j = ((uintptr)b+i+PtrSize)/PtrSize & 1;
-                               ptrbitp -= j;
-                               bits >>= gcBits*j;
-                               bits = (bits>>2)&BitsMask;
-                       } else
-                               bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;
-
-                       if(Debug && bits != BitsIface && bits != BitsEface)
-                               runtime·throw("unexpected garbage collection bits");
-
-                       if(bits == BitsIface) {
-                               iface = (Iface*)(b+i);
-                               if(iface->tab != nil) {
-                                       typ = iface->tab->type;
-                                       if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
-                                               obj = iface->data;
-                               }
-                       } else {
-                               eface = (Eface*)(b+i);
-                               typ = eface->type;
-                               if(typ != nil) {
-                                       if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
-                                               obj = eface->data;
-                               }
-                       }
-
-                       i += PtrSize;
-
-                       obj0 = obj;
-               markobj:
-                       // At this point we have extracted the next potential pointer.
-                       // Check if it points into heap.
-                       if(obj == nil)
-                               continue;
-                       if(obj < arena_start || obj >= arena_used) {
-                               if((uintptr)obj < PhysPageSize && runtime·invalidptr) {
-                                       s = nil;
-                                       goto badobj;
-                               }
-                               continue;
-                       }
-                       // Mark the object.
-                       obj = (byte*)((uintptr)obj & ~(PtrSize-1));
-                       off = (uintptr*)obj - (uintptr*)arena_start;
-                       bitp = arena_start - off/wordsPerBitmapByte - 1;
-                       shift = (off % wordsPerBitmapByte) * gcBits;
-                       xbits = *bitp;
-                       bits = (xbits >> shift) & bitMask;
-                       if((bits&bitBoundary) == 0) {
-                               // Not a beginning of a block, consult span table to find the block beginning.
-                               k = (uintptr)obj>>PageShift;
-                               x = k;
-                               x -= (uintptr)arena_start>>PageShift;
-                               s = runtime·mheap.spans[x];
-                               if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse) {
-                                       // Stack pointers lie within the arena bounds but are not part of the GC heap.
-                                       // Ignore them.
-                                       if(s != nil && s->state == MSpanStack)
-                                               continue;
-                               
-                               badobj:
-                                       // If cgo_allocate is linked into the binary, it can allocate
-                                       // memory as []unsafe.Pointer that may not contain actual
-                                       // pointers and must be scanned conservatively.
-                                       // In this case alone, allow the bad pointer.
-                                       if(have_cgo_allocate() && ptrmask == nil)
-                                               continue;
-
-                                       // Anything else indicates a bug somewhere.
-                                       // If we're in the middle of chasing down a different bad pointer,
-                                       // don't confuse the trace by printing about this one.
-                                       if(nbadblock > 0)
-                                               continue;
-
-                                       runtime·printf("runtime: garbage collector found invalid heap pointer *(%p+%p)=%p", b, i, obj);
-                                       if(s == nil)
-                                               runtime·printf(" s=nil\n");
-                                       else
-                                               runtime·printf(" span=%p-%p-%p state=%d\n", (uintptr)s->start<<PageShift, s->limit, (uintptr)(s->start+s->npages)<<PageShift, s->state);
-                                       if(ptrmask != nil)
-                                               runtime·throw("invalid heap pointer");
-                                       // Add to badblock list, which will cause the garbage collection
-                                       // to keep repeating until it has traced the chain of pointers
-                                       // leading to obj all the way back to a root.
-                                       if(nbadblock == 0)
-                                               badblock[nbadblock++] = (uintptr)b;
-                                       continue;
-                               }
-                               p = (byte*)((uintptr)s->start<<PageShift);
-                               if(s->sizeclass != 0) {
-                                       size = s->elemsize;
-                                       idx = ((byte*)obj - p)/size;
-                                       p = p+idx*size;
-                               }
-                               if(p == obj) {
-                                       runtime·printf("runtime: failed to find block beginning for %p s=%p s->limit=%p\n",
-                                               p, s->start*PageSize, s->limit);
-                                       runtime·throw("failed to find block beginning");
-                               }
-                               obj = p;
-                               goto markobj;
-                       }
-                       if(DebugPtrs)
-                               runtime·printf("scan *%p = %p => base %p\n", b+i, obj0, obj);
-
-                       if(nbadblock > 0 && (uintptr)obj == badblock[nbadblock-1]) {
-                               // Running garbage collection again because
-                               // we want to find the path from a root to a bad pointer.
-                               // Found possible next step; extend or finish path.
-                               for(j=0; j<nbadblock; j++)
-                                       if(badblock[j] == (uintptr)b)
-                                               goto AlreadyBad;
-                               runtime·printf("runtime: found *(%p+%p) = %p+%p\n", b, i, obj0, (uintptr)(obj-obj0));
-                               if(ptrmask != nil)
-                                       runtime·throw("bad pointer");
-                               if(nbadblock >= nelem(badblock))
-                                       runtime·throw("badblock trace too long");
-                               badblock[nbadblock++] = (uintptr)b;
-                       AlreadyBad:;
-                       }
-
-                       // Now we have bits, bitp, and shift correct for
-                       // obj pointing at the base of the object.
-                       // Only care about not marked objects.
-                       if((bits&bitMarked) != 0)
-                               continue;
-                       // If obj size is greater than 8, then each byte of GC bitmap
-                       // contains info for at most one object. In such case we use
-                       // non-atomic byte store to mark the object. This can lead
-                       // to double enqueue of the object for scanning, but scanning
-                       // is an idempotent operation, so it is OK. This cannot lead
-                       // to bitmap corruption because the single marked bit is the
-                       // only thing that can change in the byte.
-                       // For 8-byte objects we use non-atomic store, if the other
-                       // quadruple is already marked. Otherwise we resort to CAS
-                       // loop for marking.
-                       if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
-                               runtime·work.nproc == 1)
-                               *bitp = xbits | (bitMarked<<shift);
-                       else
-                               runtime·atomicor8(bitp, bitMarked<<shift);
-
-                       if(((xbits>>(shift+2))&BitsMask) == BitsDead)
-                               continue;  // noscan object
-
-                       // Queue the obj for scanning.
-                       PREFETCH(obj);
-                       p = scanbuf[scanbufpos];
-                       scanbuf[scanbufpos++] = obj;
-                       scanbufpos %= nelem(scanbuf);
-                       if(p == nil)
-                               continue;
-
-                       // If workbuf is full, obtain an empty one.
-                       if(nobj >= nelem(wbuf->obj)) {
-                               wbuf->nobj = nobj;
-                               wbuf = getempty(wbuf);
-                               nobj = wbuf->nobj;
-                               wp = &wbuf->obj[nobj];
-                       }
-                       *wp = p;
-                       wp++;
-                       nobj++;
-               }
-               if(DebugPtrs)
-                       runtime·printf("end scanblock %p +%p %p\n", b, n, ptrmask);
-
-               if(Debug && ptrmask == nil) {
-                       // For heap objects ensure that we did not overscan.
-                       n = 0;
-                       p = nil;
-                       if(!runtime·mlookup(b, &p, &n, nil) || b != p || i > n) {
-                               runtime·printf("runtime: scanned (%p,%p), heap object (%p,%p)\n", b, i, p, n);
-                               runtime·throw("scanblock: scanned invalid object");
-                       }
-               }
-       }
-}
-
-static void
-markroot(ParFor *desc, uint32 i)
-{
-       FinBlock *fb;
-       MSpan *s;
-       uint32 spanidx, sg;
-       G *gp;
-       void *p;
-       uint32 status;
-       bool restart;
-
-       USED(&desc);
-       // Note: if you add a case here, please also update heapdump.c:dumproots.
-       switch(i) {
-       case RootData:
-               scanblock(runtime·data, runtime·edata - runtime·data, runtime·gcdatamask.bytedata);
-               break;
-
-       case RootBss:
-               scanblock(runtime·bss, runtime·ebss - runtime·bss, runtime·gcbssmask.bytedata);
-               break;
-
-       case RootFinalizers:
-               for(fb=runtime·allfin; fb; fb=fb->alllink)
-                       scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), finptrmask);
-               break;
-
-       case RootSpans:
-               // mark MSpan.specials
-               sg = runtime·mheap.sweepgen;
-               for(spanidx=0; spanidx<runtime·work.nspan; spanidx++) {
-                       Special *sp;
-                       SpecialFinalizer *spf;
-
-                       s = runtime·work.spans[spanidx];
-                       if(s->state != MSpanInUse)
-                               continue;
-                       if(s->sweepgen != sg) {
-                               runtime·printf("sweep %d %d\n", s->sweepgen, sg);
-                               runtime·throw("gc: unswept span");
-                       }
-                       for(sp = s->specials; sp != nil; sp = sp->next) {
-                               if(sp->kind != KindSpecialFinalizer)
-                                       continue;
-                               // don't mark finalized object, but scan it so we
-                               // retain everything it points to.
-                               spf = (SpecialFinalizer*)sp;
-                               // A finalizer can be set for an inner byte of an object, find object beginning.
-                               p = (void*)((s->start << PageShift) + spf->special.offset/s->elemsize*s->elemsize);
-                               scanblock(p, s->elemsize, nil);
-                               scanblock((void*)&spf->fn, PtrSize, oneptr);
-                       }
-               }
-               break;
-
-       case RootFlushCaches:
-               flushallmcaches();
-               break;
-
-       default:
-               // the rest is scanning goroutine stacks
-               if(i - RootCount >= runtime·allglen)
-                       runtime·throw("markroot: bad index");
-               gp = runtime·allg[i - RootCount];
-               // remember when we've first observed the G blocked
-               // needed only to output in traceback
-               status = runtime·readgstatus(gp);
-               if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
-                       gp->waitsince = runtime·work.tstart;
-               // Shrink a stack if not much of it is being used.
-               runtime·shrinkstack(gp);
-               if(runtime·readgstatus(gp) == Gdead) 
-                       gp->gcworkdone = true;
-               else 
-                       gp->gcworkdone = false; 
-               restart = runtime·stopg(gp);
-               scanstack(gp);
-               if(restart)
-                       runtime·restartg(gp);
-               break;
-       }
-}
-
-// Get an empty work buffer off the work.empty list,
-// allocating new buffers as needed.
-static Workbuf*
-getempty(Workbuf *b)
-{
-       MCache *c;
-
-       if(b != nil)
-               runtime·lfstackpush(&runtime·work.full, &b->node);
-       b = nil;
-       c = g->m->mcache;
-       if(c->gcworkbuf != nil) {
-               b = c->gcworkbuf;
-               c->gcworkbuf = nil;
-       }
-       if(b == nil)
-               b = (Workbuf*)runtime·lfstackpop(&runtime·work.empty);
-       if(b == nil)
-               b = runtime·persistentalloc(sizeof(*b), CacheLineSize, &mstats.gc_sys);
-       b->nobj = 0;
-       return b;
-}
-
-static void
-putempty(Workbuf *b)
-{
-       MCache *c;
-
-       c = g->m->mcache;
-       if(c->gcworkbuf == nil) {
-               c->gcworkbuf = b;
-               return;
-       }
-       runtime·lfstackpush(&runtime·work.empty, &b->node);
-}
-
-void
-runtime·gcworkbuffree(void *b)
-{
-       if(b != nil)
-               putempty(b);
-}
-
-// Get a full work buffer off the work.full list, or return nil.
-static Workbuf*
-getfull(Workbuf *b)
-{
-       int32 i;
-
-       if(b != nil)
-               runtime·lfstackpush(&runtime·work.empty, &b->node);
-       b = (Workbuf*)runtime·lfstackpop(&runtime·work.full);
-       if(b != nil || runtime·work.nproc == 1)
-               return b;
-
-       runtime·xadd(&runtime·work.nwait, +1);
-       for(i=0;; i++) {
-               if(runtime·work.full != 0) {
-                       runtime·xadd(&runtime·work.nwait, -1);
-                       b = (Workbuf*)runtime·lfstackpop(&runtime·work.full);
-                       if(b != nil)
-                               return b;
-                       runtime·xadd(&runtime·work.nwait, +1);
-               }
-               if(runtime·work.nwait == runtime·work.nproc)
-                       return nil;
-               if(i < 10) {
-                       g->m->gcstats.nprocyield++;
-                       runtime·procyield(20);
-               } else if(i < 20) {
-                       g->m->gcstats.nosyield++;
-                       runtime·osyield();
-               } else {
-                       g->m->gcstats.nsleep++;
-                       runtime·usleep(100);
-               }
-       }
-}
-
-static Workbuf*
-handoff(Workbuf *b)
-{
-       int32 n;
-       Workbuf *b1;
-
-       // Make new buffer with half of b's pointers.
-       b1 = getempty(nil);
-       n = b->nobj/2;
-       b->nobj -= n;
-       b1->nobj = n;
-       runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
-       g->m->gcstats.nhandoff++;
-       g->m->gcstats.nhandoffcnt += n;
-
-       // Put b on full list - let first half of b get stolen.
-       runtime·lfstackpush(&runtime·work.full, &b->node);
-       return b1;
-}
-
-BitVector
-runtime·stackmapdata(StackMap *stackmap, int32 n)
-{
-       if(n < 0 || n >= stackmap->n)
-               runtime·throw("stackmapdata: index out of range");
-       return (BitVector){stackmap->nbit, stackmap->bytedata + n*((stackmap->nbit+31)/32*4)};
-}
-
-// Scan a stack frame: local variables and function arguments/results.
-static bool
-scanframe(Stkframe *frame, void *unused)
-{
-       Func *f;
-       StackMap *stackmap;
-       BitVector bv;
-       uintptr size, minsize;
-       uintptr targetpc;
-       int32 pcdata;
-
-       USED(unused);
-       f = frame->fn;
-       targetpc = frame->continpc;
-       if(targetpc == 0) {
-               // Frame is dead.
-               return true;
-       }
-       if(Debug > 1)
-               runtime·printf("scanframe %s\n", runtime·funcname(f));
-       if(targetpc != f->entry)
-               targetpc--;
-       pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc);
-       if(pcdata == -1) {
-               // We do not have a valid pcdata value but there might be a
-               // stackmap for this function.  It is likely that we are looking
-               // at the function prologue, assume so and hope for the best.
-               pcdata = 0;
-       }
-
-       // Scan local variables if stack frame has been allocated.
-       size = frame->varp - frame->sp;
-       if(thechar != '6' && thechar != '8')
-               minsize = sizeof(uintptr);
-       else
-               minsize = 0;
-       if(size > minsize) {
-               stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
-               if(stackmap == nil || stackmap->n <= 0) {
-                       runtime·printf("runtime: frame %s untyped locals %p+%p\n", runtime·funcname(f), (byte*)(frame->varp-size), size);
-                       runtime·throw("missing stackmap");
-               }
-
-               // Locals bitmap information, scan just the pointers in locals.
-               if(pcdata < 0 || pcdata >= stackmap->n) {
-                       // don't know where we are
-                       runtime·printf("runtime: pcdata is %d and %d locals stack map entries for %s (targetpc=%p)\n",
-                               pcdata, stackmap->n, runtime·funcname(f), targetpc);
-                       runtime·throw("scanframe: bad symbol table");
-               }
-               bv = runtime·stackmapdata(stackmap, pcdata);
-               size = (bv.n * PtrSize) / BitsPerPointer;
-               scanblock((byte*)(frame->varp - size), bv.n/BitsPerPointer*PtrSize, bv.bytedata);
-       }
-
-       // Scan arguments.
-       if(frame->arglen > 0) {
-               if(frame->argmap != nil)
-                       bv = *frame->argmap;
-               else {
-                       stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
-                       if(stackmap == nil || stackmap->n <= 0) {
-                               runtime·printf("runtime: frame %s untyped args %p+%p\n", runtime·funcname(f), frame->argp, (uintptr)frame->arglen);
-                               runtime·throw("missing stackmap");
-                       }
-                       if(pcdata < 0 || pcdata >= stackmap->n) {
-                               // don't know where we are
-                               runtime·printf("runtime: pcdata is %d and %d args stack map entries for %s (targetpc=%p)\n",
-                                       pcdata, stackmap->n, runtime·funcname(f), targetpc);
-                               runtime·throw("scanframe: bad symbol table");
-                       }
-                       bv = runtime·stackmapdata(stackmap, pcdata);
-               }
-               scanblock((byte*)frame->argp, bv.n/BitsPerPointer*PtrSize, bv.bytedata);
-       }
-       return true;
-}
-
-static void
-scanstack(G *gp)
-{
-       M *mp;
-       bool (*fn)(Stkframe*, void*);
-
-       if(runtime·readgstatus(gp)&Gscan == 0) {
-               runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
-               runtime·throw("mark - bad status");
-       }
-
-       switch(runtime·readgstatus(gp)&~Gscan) {
-       default:
-               runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
-               runtime·throw("mark - bad status");
-       case Gdead:
-               return;
-       case Grunning:
-               runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
-               runtime·throw("mark - world not stopped");
-       case Grunnable:
-       case Gsyscall:
-       case Gwaiting:
-               break;
-       }
-
-       if(gp == g)
-               runtime·throw("can't scan our own stack");
-       if((mp = gp->m) != nil && mp->helpgc)
-               runtime·throw("can't scan gchelper stack");
-
-       fn = scanframe;
-       runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, 0, nil, 0x7fffffff, &fn, nil, 0);
-       runtime·tracebackdefers(gp, &fn, nil);
-}
-
-// The gp has been moved to a gc safepoint. If there is gcphase specific
-// work it is done here. 
-void
-runtime·gcphasework(G *gp)
-{
-       switch(runtime·gcphase) {
-       default:
-               runtime·throw("gcphasework in bad gcphase");
-       case GCoff:
-       case GCquiesce:
-       case GCstw:
-       case GCsweep:
-               // No work for now.
-               break;
-       case GCmark:
-               // Disabled until concurrent GC is implemented
-               // but indicate the scan has been done. 
-               // scanstack(gp);
-               break;
-       }
-       gp->gcworkdone = true;
-}
-
-#pragma dataflag NOPTR
-static byte finalizer1[] = {
-       // Each Finalizer is 5 words, ptr ptr uintptr ptr ptr.
-       // Each byte describes 4 words.
-       // Need 4 Finalizers described by 5 bytes before pattern repeats:
-       //      ptr ptr uintptr ptr ptr
-       //      ptr ptr uintptr ptr ptr
-       //      ptr ptr uintptr ptr ptr
-       //      ptr ptr uintptr ptr ptr
-       // aka
-       //      ptr ptr uintptr ptr
-       //      ptr ptr ptr uintptr
-       //      ptr ptr ptr ptr
-       //      uintptr ptr ptr ptr
-       //      ptr uintptr ptr ptr
-       // Assumptions about Finalizer layout checked below.
-       BitsPointer | BitsPointer<<2 | BitsScalar<<4 | BitsPointer<<6,
-       BitsPointer | BitsPointer<<2 | BitsPointer<<4 | BitsScalar<<6,
-       BitsPointer | BitsPointer<<2 | BitsPointer<<4 | BitsPointer<<6,
-       BitsScalar | BitsPointer<<2 | BitsPointer<<4 | BitsPointer<<6,
-       BitsPointer | BitsScalar<<2 | BitsPointer<<4 | BitsPointer<<6,
-};
-
-void
-runtime·queuefinalizer(byte *p, FuncVal *fn, uintptr nret, Type *fint, PtrType *ot)
-{
-       FinBlock *block;
-       Finalizer *f;
-       int32 i;
-
-       runtime·lock(&runtime·finlock);
-       if(runtime·finq == nil || runtime·finq->cnt == runtime·finq->cap) {
-               if(runtime·finc == nil) {
-                       runtime·finc = runtime·persistentalloc(FinBlockSize, 0, &mstats.gc_sys);
-                       runtime·finc->cap = (FinBlockSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
-                       runtime·finc->alllink = runtime·allfin;
-                       runtime·allfin = runtime·finc;
-                       if(finptrmask[0] == 0) {
-                               // Build pointer mask for Finalizer array in block.
-                               // Check assumptions made in finalizer1 array above.
-                               if(sizeof(Finalizer) != 5*PtrSize ||
-                                       offsetof(Finalizer, fn) != 0 ||
-                                       offsetof(Finalizer, arg) != PtrSize ||
-                                       offsetof(Finalizer, nret) != 2*PtrSize ||
-                                       offsetof(Finalizer, fint) != 3*PtrSize ||
-                                       offsetof(Finalizer, ot) != 4*PtrSize ||
-                                       BitsPerPointer != 2) {
-                                       runtime·throw("finalizer out of sync");
-                               }
-                               for(i=0; i<nelem(finptrmask); i++)
-                                       finptrmask[i] = finalizer1[i%nelem(finalizer1)];
-                       }
-               }
-               block = runtime·finc;
-               runtime·finc = block->next;
-               block->next = runtime·finq;
-               runtime·finq = block;
-       }
-       f = &runtime·finq->fin[runtime·finq->cnt];
-       runtime·finq->cnt++;
-       f->fn = fn;
-       f->nret = nret;
-       f->fint = fint;
-       f->ot = ot;
-       f->arg = p;
-       runtime·fingwake = true;
-       runtime·unlock(&runtime·finlock);
-}
-
-void
-runtime·iterate_finq(void (*callback)(FuncVal*, byte*, uintptr, Type*, PtrType*))
-{
-       FinBlock *fb;
-       Finalizer *f;
-       uintptr i;
-
-       for(fb = runtime·allfin; fb; fb = fb->alllink) {
-               for(i = 0; i < fb->cnt; i++) {
-                       f = &fb->fin[i];
-                       callback(f->fn, f->arg, f->nret, f->fint, f->ot);
-               }
-       }
-}
-
-void
-runtime·MSpan_EnsureSwept(MSpan *s)
-{
-       uint32 sg;
-
-       // Caller must disable preemption.
-       // Otherwise when this function returns the span can become unswept again
-       // (if GC is triggered on another goroutine).
-       if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
-               runtime·throw("MSpan_EnsureSwept: m is not locked");
-
-       sg = runtime·mheap.sweepgen;
-       if(runtime·atomicload(&s->sweepgen) == sg)
-               return;
-       if(runtime·cas(&s->sweepgen, sg-2, sg-1)) {
-               runtime·MSpan_Sweep(s, false);
-               return;
-       }
-       // unfortunate condition, and we don't have efficient means to wait
-       while(runtime·atomicload(&s->sweepgen) != sg)
-               runtime·osyield();
-}
-
-// Sweep frees or collects finalizers for blocks not marked in the mark phase.
-// It clears the mark bits in preparation for the next GC round.
-// Returns true if the span was returned to heap.
-// If preserve=true, don't return it to heap nor relink in MCentral lists;
-// caller takes care of it.
-bool
-runtime·MSpan_Sweep(MSpan *s, bool preserve)
-{
-       int32 cl, n, npages, nfree;
-       uintptr size, off, step;
-       uint32 sweepgen;
-       byte *p, *bitp, shift, xbits, bits;
-       MCache *c;
-       byte *arena_start;
-       MLink head, *end, *link;
-       Special *special, **specialp, *y;
-       bool res, sweepgenset;
-
-       // It's critical that we enter this function with preemption disabled,
-       // GC must not start while we are in the middle of this function.
-       if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
-               runtime·throw("MSpan_Sweep: m is not locked");
-       sweepgen = runtime·mheap.sweepgen;
-       if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
-               runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
-                       s->state, s->sweepgen, sweepgen);
-               runtime·throw("MSpan_Sweep: bad span state");
-       }
-       arena_start = runtime·mheap.arena_start;
-       cl = s->sizeclass;
-       size = s->elemsize;
-       if(cl == 0) {
-               n = 1;
-       } else {
-               // Chunk full of small blocks.
-               npages = runtime·class_to_allocnpages[cl];
-               n = (npages << PageShift) / size;
-       }
-       res = false;
-       nfree = 0;
-       end = &head;
-       c = g->m->mcache;
-       sweepgenset = false;
-
-       // Mark any free objects in this span so we don't collect them.
-       for(link = s->freelist; link != nil; link = link->next) {
-               off = (uintptr*)link - (uintptr*)arena_start;
-               bitp = arena_start - off/wordsPerBitmapByte - 1;
-               shift = (off % wordsPerBitmapByte) * gcBits;
-               *bitp |= bitMarked<<shift;
-       }
-
-       // Unlink & free special records for any objects we're about to free.
-       specialp = &s->specials;
-       special = *specialp;
-       while(special != nil) {
-               // A finalizer can be set for an inner byte of an object, find object beginning.
-               p = (byte*)(s->start << PageShift) + special->offset/size*size;
-               off = (uintptr*)p - (uintptr*)arena_start;
-               bitp = arena_start - off/wordsPerBitmapByte - 1;
-               shift = (off % wordsPerBitmapByte) * gcBits;
-               bits = (*bitp>>shift) & bitMask;
-               if((bits&bitMarked) == 0) {
-                       // Find the exact byte for which the special was setup
-                       // (as opposed to object beginning).
-                       p = (byte*)(s->start << PageShift) + special->offset;
-                       // about to free object: splice out special record
-                       y = special;
-                       special = special->next;
-                       *specialp = special;
-                       if(!runtime·freespecial(y, p, size, false)) {
-                               // stop freeing of object if it has a finalizer
-                               *bitp |= bitMarked << shift;
-                       }
-               } else {
-                       // object is still live: keep special record
-                       specialp = &special->next;
-                       special = *specialp;
-               }
-       }
-
-       // Sweep through n objects of given size starting at p.
-       // This thread owns the span now, so it can manipulate
-       // the block bitmap without atomic operations.
-       p = (byte*)(s->start << PageShift);
-       // Find bits for the beginning of the span.
-       off = (uintptr*)p - (uintptr*)arena_start;
-       bitp = arena_start - off/wordsPerBitmapByte - 1;
-       shift = 0;
-       step = size/(PtrSize*wordsPerBitmapByte);
-       // Rewind to the previous quadruple as we move to the next
-       // in the beginning of the loop.
-       bitp += step;
-       if(step == 0) {
-               // 8-byte objects.
-               bitp++;
-               shift = gcBits;
-       }
-       for(; n > 0; n--, p += size) {
-               bitp -= step;
-               if(step == 0) {
-                       if(shift != 0)
-                               bitp--;
-                       shift = gcBits - shift;
-               }
-
-               xbits = *bitp;
-               bits = (xbits>>shift) & bitMask;
-
-               // Allocated and marked object, reset bits to allocated.
-               if((bits&bitMarked) != 0) {
-                       *bitp &= ~(bitMarked<<shift);
-                       continue;
-               }
-               // At this point we know that we are looking at garbage object
-               // that needs to be collected.
-               if(runtime·debug.allocfreetrace)
-                       runtime·tracefree(p, size);
-               // Reset to allocated+noscan.
-               *bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
-               if(cl == 0) {
-                       // Free large span.
-                       if(preserve)
-                               runtime·throw("can't preserve large span");
-                       runtime·unmarkspan(p, s->npages<<PageShift);
-                       s->needzero = 1;
-                       // important to set sweepgen before returning it to heap
-                       runtime·atomicstore(&s->sweepgen, sweepgen);
-                       sweepgenset = true;
-                       // NOTE(rsc,dvyukov): The original implementation of efence
-                       // in CL 22060046 used SysFree instead of SysFault, so that
-                       // the operating system would eventually give the memory
-                       // back to us again, so that an efence program could run
-                       // longer without running out of memory. Unfortunately,
-                       // calling SysFree here without any kind of adjustment of the
-                       // heap data structures means that when the memory does
-                       // come back to us, we have the wrong metadata for it, either in
-                       // the MSpan structures or in the garbage collection bitmap.
-                       // Using SysFault here means that the program will run out of
-                       // memory fairly quickly in efence mode, but at least it won't
-                       // have mysterious crashes due to confused memory reuse.
-                       // It should be possible to switch back to SysFree if we also
-                       // implement and then call some kind of MHeap_DeleteSpan.
-                       if(runtime·debug.efence) {
-                               s->limit = nil; // prevent mlookup from finding this span
-                               runtime·SysFault(p, size);
-                       } else
-                               runtime·MHeap_Free(&runtime·mheap, s, 1);
-                       c->local_nlargefree++;
-                       c->local_largefree += size;
-                       runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100));
-                       res = true;
-               } else {
-                       // Free small object.
-                       if(size > 2*sizeof(uintptr))
-                               ((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll;       // mark as "needs to be zeroed"
-                       else if(size > sizeof(uintptr))
-                               ((uintptr*)p)[1] = 0;
-
-                       end->next = (MLink*)p;
-                       end = (MLink*)p;
-                       nfree++;
-               }
-       }
-
-       // We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
-       // because of the potential for a concurrent free/SetFinalizer.
-       // But we need to set it before we make the span available for allocation
-       // (return it to heap or mcentral), because allocation code assumes that a
-       // span is already swept if available for allocation.
-
-       if(!sweepgenset && nfree == 0) {
-               // The span must be in our exclusive ownership until we update sweepgen,
-               // check for potential races.
-               if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
-                       runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
-                               s->state, s->sweepgen, sweepgen);
-                       runtime·throw("MSpan_Sweep: bad span state after sweep");
-               }
-               runtime·atomicstore(&s->sweepgen, sweepgen);
-       }
-       if(nfree > 0) {
-               c->local_nsmallfree[cl] += nfree;
-               c->local_cachealloc -= nfree * size;
-               runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
-               res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
-               // MCentral_FreeSpan updates sweepgen
-       }
-       return res;
-}
-
-// State of background runtime·sweep.
-// Protected by runtime·gclock.
-typedef struct SweepData SweepData;
-struct SweepData
-{
-       G*      g;
-       bool    parked;
-
-       uint32  spanidx;        // background sweeper position
-
-       uint32  nbgsweep;
-       uint32  npausesweep;
-};
-SweepData runtime·sweep;
-
-// sweeps one span
-// returns number of pages returned to heap, or -1 if there is nothing to sweep
-uintptr
-runtime·sweepone(void)
-{
-       MSpan *s;
-       uint32 idx, sg;
-       uintptr npages;
-
-       // increment locks to ensure that the goroutine is not preempted
-       // in the middle of sweep thus leaving the span in an inconsistent state for next GC
-       g->m->locks++;
-       sg = runtime·mheap.sweepgen;
-       for(;;) {
-               idx = runtime·xadd(&runtime·sweep.spanidx, 1) - 1;
-               if(idx >= runtime·work.nspan) {
-                       runtime·mheap.sweepdone = true;
-                       g->m->locks--;
-                       return -1;
-               }
-               s = runtime·work.spans[idx];
-               if(s->state != MSpanInUse) {
-                       s->sweepgen = sg;
-                       continue;
-               }
-               if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
-                       continue;
-               npages = s->npages;
-               if(!runtime·MSpan_Sweep(s, false))
-                       npages = 0;
-               g->m->locks--;
-               return npages;
-       }
-}
-
-static void
-sweepone_m(void)
-{
-       g->m->scalararg[0] = runtime·sweepone();
-}
-
-#pragma textflag NOSPLIT
-uintptr
-runtime·gosweepone(void)
-{
-       void (*fn)(void);
-       
-       fn = sweepone_m;
-       runtime·onM(&fn);
-       return g->m->scalararg[0];
-}
-
-#pragma textflag NOSPLIT
-bool
-runtime·gosweepdone(void)
-{
-       return runtime·mheap.sweepdone;
-}
-
-void
-runtime·gchelper(void)
-{
-       uint32 nproc;
-
-       g->m->traceback = 2;
-       gchelperstart();
-
-       // parallel mark for over gc roots
-       runtime·parfordo(runtime·work.markfor);
-
-       // help other threads scan secondary blocks
-       scanblock(nil, 0, nil);
-
-       nproc = runtime·work.nproc;  // runtime·work.nproc can change right after we increment runtime·work.ndone
-       if(runtime·xadd(&runtime·work.ndone, +1) == nproc-1)
-               runtime·notewakeup(&runtime·work.alldone);
-       g->m->traceback = 0;
-}
-
-static void
-cachestats(void)
-{
-       MCache *c;
-       P *p, **pp;
-
-       for(pp=runtime·allp; p=*pp; pp++) {
-               c = p->mcache;
-               if(c==nil)
-                       continue;
-               runtime·purgecachedstats(c);
-       }
-}
-
-static void
-flushallmcaches(void)
-{
-       P *p, **pp;
-       MCache *c;
-
-       // Flush MCache's to MCentral.
-       for(pp=runtime·allp; p=*pp; pp++) {
-               c = p->mcache;
-               if(c==nil)
-                       continue;
-               runtime·MCache_ReleaseAll(c);
-               runtime·stackcache_clear(c);
-       }
-}
-
-static void
-flushallmcaches_m(G *gp)
-{
-       flushallmcaches();
-       runtime·gogo(&gp->sched);
-}
-
-void
-runtime·updatememstats(GCStats *stats)
-{
-       M *mp;
-       MSpan *s;
-       int32 i;
-       uint64 smallfree;
-       uint64 *src, *dst;
-       void (*fn)(G*);
-
-       if(stats)
-               runtime·memclr((byte*)stats, sizeof(*stats));
-       for(mp=runtime·allm; mp; mp=mp->alllink) {
-               if(stats) {
-                       src = (uint64*)&mp->gcstats;
-                       dst = (uint64*)stats;
-                       for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
-                               dst[i] += src[i];
-                       runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
-               }
-       }
-       mstats.mcache_inuse = runtime·mheap.cachealloc.inuse;
-       mstats.mspan_inuse = runtime·mheap.spanalloc.inuse;
-       mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys +
-               mstats.mcache_sys + mstats.buckhash_sys + mstats.gc_sys + mstats.other_sys;
-       
-       // Calculate memory allocator stats.
-       // During program execution we only count number of frees and amount of freed memory.
-       // Current number of alive object in the heap and amount of alive heap memory
-       // are calculated by scanning all spans.
-       // Total number of mallocs is calculated as number of frees plus number of alive objects.
-       // Similarly, total amount of allocated memory is calculated as amount of freed memory
-       // plus amount of alive heap memory.
-       mstats.alloc = 0;
-       mstats.total_alloc = 0;
-       mstats.nmalloc = 0;
-       mstats.nfree = 0;
-       for(i = 0; i < nelem(mstats.by_size); i++) {
-               mstats.by_size[i].nmalloc = 0;
-               mstats.by_size[i].nfree = 0;
-       }
-
-       // Flush MCache's to MCentral.
-       if(g == g->m->g0)
-               flushallmcaches();
-       else {
-               fn = flushallmcaches_m;
-               runtime·mcall(&fn);
-       }
-
-       // Aggregate local stats.
-       cachestats();
-
-       // Scan all spans and count number of alive objects.
-       runtime·lock(&runtime·mheap.lock);
-       for(i = 0; i < runtime·mheap.nspan; i++) {
-               s = runtime·mheap.allspans[i];
-               if(s->state != MSpanInUse)
-                       continue;
-               if(s->sizeclass == 0) {
-                       mstats.nmalloc++;
-                       mstats.alloc += s->elemsize;
-               } else {
-                       mstats.nmalloc += s->ref;
-                       mstats.by_size[s->sizeclass].nmalloc += s->ref;
-                       mstats.alloc += s->ref*s->elemsize;
-               }
-       }
-       runtime·unlock(&runtime·mheap.lock);
-
-       // Aggregate by size class.
-       smallfree = 0;
-       mstats.nfree = runtime·mheap.nlargefree;
-       for(i = 0; i < nelem(mstats.by_size); i++) {
-               mstats.nfree += runtime·mheap.nsmallfree[i];
-               mstats.by_size[i].nfree = runtime·mheap.nsmallfree[i];
-               mstats.by_size[i].nmalloc += runtime·mheap.nsmallfree[i];
-               smallfree += runtime·mheap.nsmallfree[i] * runtime·class_to_size[i];
-       }
-       mstats.nfree += mstats.tinyallocs;
-       mstats.nmalloc += mstats.nfree;
-
-       // Calculate derived stats.
-       mstats.total_alloc = mstats.alloc + runtime·mheap.largefree + smallfree;
-       mstats.heap_alloc = mstats.alloc;
-       mstats.heap_objects = mstats.nmalloc - mstats.nfree;
-}
-
-// Structure of arguments passed to function gc().
-// This allows the arguments to be passed via runtime·mcall.
-struct gc_args
-{
-       int64 start_time; // start time of GC in ns (just before stoptheworld)
-       bool  eagersweep;
-};
-
-static void gc(struct gc_args *args);
-
-int32
-runtime·readgogc(void)
-{
-       byte *p;
-
-       p = runtime·getenv("GOGC");
-       if(p == nil || p[0] == '\0')
-               return 100;
-       if(runtime·strcmp(p, (byte*)"off") == 0)
-               return -1;
-       return runtime·atoi(p);
-}
-
-void
-runtime·gcinit(void)
-{
-       if(sizeof(Workbuf) != WorkbufSize)
-               runtime·throw("runtime: size of Workbuf is suboptimal");
-
-       runtime·work.markfor = runtime·parforalloc(MaxGcproc);
-       runtime·gcpercent = runtime·readgogc();
-       runtime·gcdatamask = unrollglobgcprog(runtime·gcdata, runtime·edata - runtime·data);
-       runtime·gcbssmask = unrollglobgcprog(runtime·gcbss, runtime·ebss - runtime·bss);
-}
-
-void
-runtime·gc_m(void)
-{
-       struct gc_args a;
-       G *gp;
-
-       gp = g->m->curg;
-       runtime·casgstatus(gp, Grunning, Gwaiting);
-       gp->waitreason = runtime·gostringnocopy((byte*)"garbage collection");
-
-       a.start_time = (uint64)(g->m->scalararg[0]) | ((uint64)(g->m->scalararg[1]) << 32);
-       a.eagersweep = g->m->scalararg[2];
-       gc(&a);
-
-       if(nbadblock > 0) {
-               // Work out path from root to bad block.
-               for(;;) {
-                       gc(&a);
-                       if(nbadblock >= nelem(badblock))
-                               runtime·throw("cannot find path to bad pointer");
-               }
-       }
-
-       runtime·casgstatus(gp, Gwaiting, Grunning);
-}
-
-static void
-gc(struct gc_args *args)
-{
-       int64 t0, t1, t2, t3, t4;
-       uint64 heap0, heap1, obj;
-       GCStats stats;
-
-       if(DebugPtrs)
-               runtime·printf("GC start\n");
-
-       if(runtime·debug.allocfreetrace)
-               runtime·tracegc();
-
-       g->m->traceback = 2;
-       t0 = args->start_time;
-       runtime·work.tstart = args->start_time; 
-
-       t1 = 0;
-       if(runtime·debug.gctrace)
-               t1 = runtime·nanotime();
-
-       // Sweep what is not sweeped by bgsweep.
-       while(runtime·sweepone() != -1)
-               runtime·sweep.npausesweep++;
-
-       // Cache runtime.mheap.allspans in work.spans to avoid conflicts with
-       // resizing/freeing allspans.
-       // New spans can be created while GC progresses, but they are not garbage for
-       // this round:
-       //  - new stack spans can be created even while the world is stopped.
-       //  - new malloc spans can be created during the concurrent sweep
-
-       // Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
-       runtime·lock(&runtime·mheap.lock);
-       // Free the old cached sweep array if necessary.
-       if(runtime·work.spans != nil && runtime·work.spans != runtime·mheap.allspans)
-               runtime·SysFree(runtime·work.spans, runtime·work.nspan*sizeof(runtime·work.spans[0]), &mstats.other_sys);
-       // Cache the current array for marking.
-       runtime·mheap.gcspans = runtime·mheap.allspans;
-       runtime·work.spans = runtime·mheap.allspans;
-       runtime·work.nspan = runtime·mheap.nspan;
-       runtime·unlock(&runtime·mheap.lock);
-
-       runtime·work.nwait = 0;
-       runtime·work.ndone = 0;
-       runtime·work.nproc = runtime·gcprocs();
-       runtime·parforsetup(runtime·work.markfor, runtime·work.nproc, RootCount + runtime·allglen, nil, false, markroot);
-       if(runtime·work.nproc > 1) {
-               runtime·noteclear(&runtime·work.alldone);
-               runtime·helpgc(runtime·work.nproc);
-       }
-
-       t2 = 0;
-       if(runtime·debug.gctrace)
-               t2 = runtime·nanotime();
-
-       gchelperstart();
-       runtime·parfordo(runtime·work.markfor);
-       scanblock(nil, 0, nil);
-
-       t3 = 0;
-       if(runtime·debug.gctrace)
-               t3 = runtime·nanotime();
-
-       if(runtime·work.nproc > 1)
-               runtime·notesleep(&runtime·work.alldone);
-
-       runtime·shrinkfinish();
-
-       cachestats();
-       // next_gc calculation is tricky with concurrent sweep since we don't know size of live heap
-       // estimate what was live heap size after previous GC (for tracing only)
-       heap0 = mstats.next_gc*100/(runtime·gcpercent+100);
-       // conservatively set next_gc to high value assuming that everything is live
-       // concurrent/lazy sweep will reduce this number while discovering new garbage
-       mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*runtime·gcpercent/100;
-
-       t4 = runtime·nanotime();
-       runtime·atomicstore64(&mstats.last_gc, runtime·unixnanotime());  // must be Unix time to make sense to user
-       mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
-       mstats.pause_end[mstats.numgc%nelem(mstats.pause_end)] = t4;
-       mstats.pause_total_ns += t4 - t0;
-       mstats.numgc++;
-       if(mstats.debuggc)
-               runtime·printf("pause %D\n", t4-t0);
-
-       if(runtime·debug.gctrace) {
-               heap1 = mstats.heap_alloc;
-               runtime·updatememstats(&stats);
-               if(heap1 != mstats.heap_alloc) {
-                       runtime·printf("runtime: mstats skew: heap=%D/%D\n", heap1, mstats.heap_alloc);
-                       runtime·throw("mstats skew");
-               }
-               obj = mstats.nmalloc - mstats.nfree;
-
-               stats.nprocyield += runtime·work.markfor->nprocyield;
-               stats.nosyield += runtime·work.markfor->nosyield;
-               stats.nsleep += runtime·work.markfor->nsleep;
-
-               runtime·printf("gc%d(%d): %D+%D+%D+%D us, %D -> %D MB, %D (%D-%D) objects,"
-                               " %d goroutines,"
-                               " %d/%d/%d sweeps,"
-                               " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
-                       mstats.numgc, runtime·work.nproc, (t1-t0)/1000, (t2-t1)/1000, (t3-t2)/1000, (t4-t3)/1000,
-                       heap0>>20, heap1>>20, obj,
-                       mstats.nmalloc, mstats.nfree,
-                       runtime·gcount(),
-                       runtime·work.nspan, runtime·sweep.nbgsweep, runtime·sweep.npausesweep,
-                       stats.nhandoff, stats.nhandoffcnt,
-                       runtime·work.markfor->nsteal, runtime·work.markfor->nstealcnt,
-                       stats.nprocyield, stats.nosyield, stats.nsleep);
-               runtime·sweep.nbgsweep = runtime·sweep.npausesweep = 0;
-       }
-
-       // See the comment in the beginning of this function as to why we need the following.
-       // Even if this is still stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
-       runtime·lock(&runtime·mheap.lock);
-       // Free the old cached mark array if necessary.
-       if(runtime·work.spans != nil && runtime·work.spans != runtime·mheap.allspans)
-               runtime·SysFree(runtime·work.spans, runtime·work.nspan*sizeof(runtime·work.spans[0]), &mstats.other_sys);
-       // Cache the current array for sweeping.
-       runtime·mheap.gcspans = runtime·mheap.allspans;
-       runtime·mheap.sweepgen += 2;
-       runtime·mheap.sweepdone = false;
-       runtime·work.spans = runtime·mheap.allspans;
-       runtime·work.nspan = runtime·mheap.nspan;
-       runtime·sweep.spanidx = 0;
-       runtime·unlock(&runtime·mheap.lock);
-
-       if(ConcurrentSweep && !args->eagersweep) {
-               runtime·lock(&runtime·gclock);
-               if(runtime·sweep.g == nil)
-                       runtime·sweep.g = runtime·newproc1(&bgsweepv, nil, 0, 0, gc);
-               else if(runtime·sweep.parked) {
-                       runtime·sweep.parked = false;
-                       runtime·ready(runtime·sweep.g);
-               }
-               runtime·unlock(&runtime·gclock);
-       } else {
-               // Sweep all spans eagerly.
-               while(runtime·sweepone() != -1)
-                       runtime·sweep.npausesweep++;
-               // Do an additional mProf_GC, because all 'free' events are now real as well.
-               runtime·mProf_GC();
-       }
-
-       runtime·mProf_GC();
-       g->m->traceback = 0;
-
-       if(DebugPtrs)
-               runtime·printf("GC end\n");
-}
-
-extern uintptr runtime·sizeof_C_MStats;
-
-static void readmemstats_m(void);
-
-void
-runtime·readmemstats_m(void)
-{
-       MStats *stats;
-       
-       stats = g->m->ptrarg[0];
-       g->m->ptrarg[0] = nil;
-
-       runtime·updatememstats(nil);
-       // Size of the trailing by_size array differs between Go and C,
-       // NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
-       runtime·memmove(stats, &mstats, runtime·sizeof_C_MStats);
-
-       // Stack numbers are part of the heap numbers, separate those out for user consumption
-       stats->stacks_sys = stats->stacks_inuse;
-       stats->heap_inuse -= stats->stacks_inuse;
-       stats->heap_sys -= stats->stacks_inuse;
-}
-
-static void readgcstats_m(void);
-
-#pragma textflag NOSPLIT
-void
-runtime∕debug·readGCStats(Slice *pauses)
-{
-       void (*fn)(void);
-       
-       g->m->ptrarg[0] = pauses;
-       fn = readgcstats_m;
-       runtime·onM(&fn);
-}
-
-static void
-readgcstats_m(void)
-{
-       Slice *pauses;  
-       uint64 *p;
-       uint32 i, j, n;
-       
-       pauses = g->m->ptrarg[0];
-       g->m->ptrarg[0] = nil;
-
-       // Calling code in runtime/debug should make the slice large enough.
-       if(pauses->cap < nelem(mstats.pause_ns)+3)
-               runtime·throw("runtime: short slice passed to readGCStats");
-
-       // Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
-       p = (uint64*)pauses->array;
-       runtime·lock(&runtime·mheap.lock);
-
-       n = mstats.numgc;
-       if(n > nelem(mstats.pause_ns))
-               n = nelem(mstats.pause_ns);
-
-       // The pause buffer is circular. The most recent pause is at
-       // pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
-       // from there to go back farther in time. We deliver the times
-       // most recent first (in p[0]).
-       for(i=0; i<n; i++) {
-               j = (mstats.numgc-1-i)%nelem(mstats.pause_ns);
-               p[i] = mstats.pause_ns[j];
-               p[n+i] = mstats.pause_end[j];
-       }
-
-       p[n+n] = mstats.last_gc;
-       p[n+n+1] = mstats.numgc;
-       p[n+n+2] = mstats.pause_total_ns;       
-       runtime·unlock(&runtime·mheap.lock);
-       pauses->len = n+n+3;
-}
-
-void
-runtime·setgcpercent_m(void)
-{
-       int32 in;
-       int32 out;
-
-       in = (int32)(intptr)g->m->scalararg[0];
-
-       runtime·lock(&runtime·mheap.lock);
-       out = runtime·gcpercent;
-       if(in < 0)
-               in = -1;
-       runtime·gcpercent = in;
-       runtime·unlock(&runtime·mheap.lock);
-
-       g->m->scalararg[0] = (uintptr)(intptr)out;
-}
-
-static void
-gchelperstart(void)
-{
-       if(g->m->helpgc < 0 || g->m->helpgc >= MaxGcproc)
-               runtime·throw("gchelperstart: bad m->helpgc");
-       if(g != g->m->g0)
-               runtime·throw("gchelper not running on g0 stack");
-}
-
-G*
-runtime·wakefing(void)
-{
-       G *res;
-
-       res = nil;
-       runtime·lock(&runtime·finlock);
-       if(runtime·fingwait && runtime·fingwake) {
-               runtime·fingwait = false;
-               runtime·fingwake = false;
-               res = runtime·fing;
-       }
-       runtime·unlock(&runtime·finlock);
-       return res;
-}
-
-// Recursively unrolls GC program in prog.
-// mask is where to store the result.
-// ppos is a pointer to position in mask, in bits.
-// sparse says to generate 4-bits per word mask for heap (2-bits for data/bss otherwise).
-static byte*
-unrollgcprog1(byte *mask, byte *prog, uintptr *ppos, bool inplace, bool sparse)
-{
-       uintptr pos, siz, i, off;
-       byte *arena_start, *prog1, v, *bitp, shift;
-
-       arena_start = runtime·mheap.arena_start;
-       pos = *ppos;
-       for(;;) {
-               switch(prog[0]) {
-               case insData:
-                       prog++;
-                       siz = prog[0];
-                       prog++;
-                       for(i = 0; i < siz; i++) {
-                               v = prog[i/PointersPerByte];
-                               v >>= (i%PointersPerByte)*BitsPerPointer;
-                               v &= BitsMask;
-                               if(inplace) {
-                                       // Store directly into GC bitmap.
-                                       off = (uintptr*)(mask+pos) - (uintptr*)arena_start;
-                                       bitp = arena_start - off/wordsPerBitmapByte - 1;
-                                       shift = (off % wordsPerBitmapByte) * gcBits;
-                                       if(shift==0)
-                                               *bitp = 0;
-                                       *bitp |= v<<(shift+2);
-                                       pos += PtrSize;
-                               } else if(sparse) {
-                                       // 4-bits per word
-                                       v <<= (pos%8)+2;
-                                       mask[pos/8] |= v;
-                                       pos += gcBits;
-                               } else {
-                                       // 2-bits per word
-                                       v <<= pos%8;
-                                       mask[pos/8] |= v;
-                                       pos += BitsPerPointer;
-                               }
-                       }
-                       prog += ROUND(siz*BitsPerPointer, 8)/8;
-                       break;
-               case insArray:
-                       prog++;
-                       siz = 0;
-                       for(i = 0; i < PtrSize; i++)
-                               siz = (siz<<8) + prog[PtrSize-i-1];
-                       prog += PtrSize;
-                       prog1 = nil;
-                       for(i = 0; i < siz; i++)
-                               prog1 = unrollgcprog1(mask, prog, &pos, inplace, sparse);
-                       if(prog1[0] != insArrayEnd)
-                               runtime·throw("unrollgcprog: array does not end with insArrayEnd");
-                       prog = prog1+1;
-                       break;
-               case insArrayEnd:
-               case insEnd:
-                       *ppos = pos;
-                       return prog;
-               default:
-                       runtime·throw("unrollgcprog: unknown instruction");
-               }
-       }
-}
-
-// Unrolls GC program prog for data/bss, returns dense GC mask.
-static BitVector
-unrollglobgcprog(byte *prog, uintptr size)
-{
-       byte *mask;
-       uintptr pos, masksize;
-
-       masksize = ROUND(ROUND(size, PtrSize)/PtrSize*BitsPerPointer, 8)/8;
-       mask = runtime·persistentalloc(masksize+1, 0, &mstats.gc_sys);
-       mask[masksize] = 0xa1;
-       pos = 0;
-       prog = unrollgcprog1(mask, prog, &pos, false, false);
-       if(pos != size/PtrSize*BitsPerPointer) {
-               runtime·printf("unrollglobgcprog: bad program size, got %D, expect %D\n",
-                       (uint64)pos, (uint64)size/PtrSize*BitsPerPointer);
-               runtime·throw("unrollglobgcprog: bad program size");
-       }
-       if(prog[0] != insEnd)
-               runtime·throw("unrollglobgcprog: program does not end with insEnd");
-       if(mask[masksize] != 0xa1)
-               runtime·throw("unrollglobgcprog: overflow");
-       return (BitVector){masksize*8, mask};
-}
-
-void
-runtime·unrollgcproginplace_m(void)
-{
-       uintptr size, size0, pos, off;
-       byte *arena_start, *prog, *bitp, shift;
-       Type *typ;
-       void *v;
-
-       v = g->m->ptrarg[0];
-       typ = g->m->ptrarg[1];
-       size = g->m->scalararg[0];
-       size0 = g->m->scalararg[1];
-       g->m->ptrarg[0] = nil;
-       g->m->ptrarg[1] = nil;
-
-       pos = 0;
-       prog = (byte*)typ->gc[1];
-       while(pos != size0)
-               unrollgcprog1(v, prog, &pos, true, true);
-       // Mark first word as bitAllocated.
-       arena_start = runtime·mheap.arena_start;
-       off = (uintptr*)v - (uintptr*)arena_start;
-       bitp = arena_start - off/wordsPerBitmapByte - 1;
-       shift = (off % wordsPerBitmapByte) * gcBits;
-       *bitp |= bitBoundary<<shift;
-       // Mark word after last as BitsDead.
-       if(size0 < size) {
-               off = (uintptr*)((byte*)v + size0) - (uintptr*)arena_start;
-               bitp = arena_start - off/wordsPerBitmapByte - 1;
-               shift = (off % wordsPerBitmapByte) * gcBits;
-               *bitp &= ~(bitPtrMask<<shift) | ((uintptr)BitsDead<<(shift+2));
-       }
-}
-
-// Unrolls GC program in typ->gc[1] into typ->gc[0]
-void
-runtime·unrollgcprog_m(void)
-{
-       static Mutex lock;
-       Type *typ;
-       byte *mask, *prog;
-       uintptr pos;
-       uint32 x;
-
-       typ = g->m->ptrarg[0];
-       g->m->ptrarg[0] = nil;
-
-       runtime·lock(&lock);
-       mask = (byte*)typ->gc[0];
-       if(mask[0] == 0) {
-               pos = 8;  // skip the unroll flag
-               prog = (byte*)typ->gc[1];
-               prog = unrollgcprog1(mask, prog, &pos, false, true);
-               if(prog[0] != insEnd)
-                       runtime·throw("unrollgcprog: program does not end with insEnd");
-               if(((typ->size/PtrSize)%2) != 0) {
-                       // repeat the program twice
-                       prog = (byte*)typ->gc[1];
-                       unrollgcprog1(mask, prog, &pos, false, true);
-               }
-               // atomic way to say mask[0] = 1
-               x = ((uint32*)mask)[0];
-               runtime·atomicstore((uint32*)mask, x|1);
-       }
-       runtime·unlock(&lock);
-}
-
-// mark the span of memory at v as having n blocks of the given size.
-// if leftover is true, there is left over space at the end of the span.
-void
-runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
-{
-       uintptr i, off, step;
-       byte *b;
-
-       if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
-               runtime·throw("markspan: bad pointer");
-
-       // Find bits of the beginning of the span.
-       off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
-       b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
-       if((off%wordsPerBitmapByte) != 0)
-               runtime·throw("markspan: unaligned length");
-
-       // Okay to use non-atomic ops here, because we control
-       // the entire span, and each bitmap byte has bits for only
-       // one span, so no other goroutines are changing these bitmap words.
-
-       if(size == PtrSize) {
-               // Possible only on 64-bits (minimal size class is 8 bytes).
-               // Poor man's memset(0x11).
-               if(0x11 != ((bitBoundary+BitsDead)<<gcBits) + (bitBoundary+BitsDead))
-                       runtime·throw("markspan: bad bits");
-               if((n%(wordsPerBitmapByte*PtrSize)) != 0)
-                       runtime·throw("markspan: unaligned length");
-               b = b - n/wordsPerBitmapByte + 1;       // find first byte
-               if(((uintptr)b%PtrSize) != 0)
-                       runtime·throw("markspan: unaligned pointer");
-               for(i = 0; i != n; i += wordsPerBitmapByte*PtrSize, b += PtrSize)
-                       *(uintptr*)b = (uintptr)0x1111111111111111ULL;  // bitBoundary+BitsDead
-               return;
-       }
-
-       if(leftover)
-               n++;    // mark a boundary just past end of last block too
-       step = size/(PtrSize*wordsPerBitmapByte);
-       for(i = 0; i != n; i++, b -= step)
-               *b = bitBoundary|(BitsDead<<2);
-}
-
-// unmark the span of memory at v of length n bytes.
-void
-runtime·unmarkspan(void *v, uintptr n)
-{
-       uintptr off;
-       byte *b;
-
-       if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
-               runtime·throw("markspan: bad pointer");
-
-       off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
-       if((off % (PtrSize*wordsPerBitmapByte)) != 0)
-               runtime·throw("markspan: unaligned pointer");
-       b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
-       n /= PtrSize;
-       if(n%(PtrSize*wordsPerBitmapByte) != 0)
-               runtime·throw("unmarkspan: unaligned length");
-       // Okay to use non-atomic ops here, because we control
-       // the entire span, and each bitmap word has bits for only
-       // one span, so no other goroutines are changing these
-       // bitmap words.
-       n /= wordsPerBitmapByte;
-       runtime·memclr(b - n + 1, n);
-}
-
-void
-runtime·MHeap_MapBits(MHeap *h)
-{
-       // Caller has added extra mappings to the arena.
-       // Add extra mappings of bitmap words as needed.
-       // We allocate extra bitmap pieces in chunks of bitmapChunk.
-       enum {
-               bitmapChunk = 8192
-       };
-       uintptr n;
-
-       n = (h->arena_used - h->arena_start) / (PtrSize*wordsPerBitmapByte);
-       n = ROUND(n, bitmapChunk);
-       n = ROUND(n, PhysPageSize);
-       if(h->bitmap_mapped >= n)
-               return;
-
-       runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped, h->arena_reserved, &mstats.gc_sys);
-       h->bitmap_mapped = n;
-}
-
-static bool
-getgcmaskcb(Stkframe *frame, void *ctxt)
-{
-       Stkframe *frame0;
-
-       frame0 = ctxt;
-       if(frame->sp <= frame0->sp && frame0->sp < frame->varp) {
-               *frame0 = *frame;
-               return false;
-       }
-       return true;
-}
-
-// Returns GC type info for object p for testing.
-void
-runtime·getgcmask(byte *p, Type *t, byte **mask, uintptr *len)
-{
-       Stkframe frame;
-       uintptr i, n, off;
-       byte *base, bits, shift, *b;
-       bool (*cb)(Stkframe*, void*);
-
-       *mask = nil;
-       *len = 0;
-
-       // data
-       if(p >= runtime·data && p < runtime·edata) {
-               n = ((PtrType*)t)->elem->size;
-               *len = n/PtrSize;
-               *mask = runtime·mallocgc(*len, nil, FlagNoScan);
-               for(i = 0; i < n; i += PtrSize) {
-                       off = (p+i-runtime·data)/PtrSize;
-                       bits = (runtime·gcdatamask.bytedata[off/PointersPerByte] >> ((off%PointersPerByte)*BitsPerPointer))&BitsMask;
-                       (*mask)[i/PtrSize] = bits;
-               }
-               return;
-       }
-       // bss
-       if(p >= runtime·bss && p < runtime·ebss) {
-               n = ((PtrType*)t)->elem->size;
-               *len = n/PtrSize;
-               *mask = runtime·mallocgc(*len, nil, FlagNoScan);
-               for(i = 0; i < n; i += PtrSize) {
-                       off = (p+i-runtime·bss)/PtrSize;
-                       bits = (runtime·gcbssmask.bytedata[off/PointersPerByte] >> ((off%PointersPerByte)*BitsPerPointer))&BitsMask;
-                       (*mask)[i/PtrSize] = bits;
-               }
-               return;
-       }
-       // heap
-       if(runtime·mlookup(p, &base, &n, nil)) {
-               *len = n/PtrSize;
-               *mask = runtime·mallocgc(*len, nil, FlagNoScan);
-               for(i = 0; i < n; i += PtrSize) {
-                       off = (uintptr*)(base+i) - (uintptr*)runtime·mheap.arena_start;
-                       b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
-                       shift = (off % wordsPerBitmapByte) * gcBits;
-                       bits = (*b >> (shift+2))&BitsMask;
-                       (*mask)[i/PtrSize] = bits;
-               }
-               return;
-       }
-       // stack
-       frame.fn = nil;
-       frame.sp = (uintptr)p;
-       cb = getgcmaskcb;
-       runtime·gentraceback(g->m->curg->sched.pc, g->m->curg->sched.sp, 0, g->m->curg, 0, nil, 1000, &cb, &frame, 0);
-       if(frame.fn != nil) {
-               Func *f;
-               StackMap *stackmap;
-               BitVector bv;
-               uintptr size;
-               uintptr targetpc;
-               int32 pcdata;
-
-               f = frame.fn;
-               targetpc = frame.continpc;
-               if(targetpc == 0)
-                       return;
-               if(targetpc != f->entry)
-                       targetpc--;
-               pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc);
-               if(pcdata == -1)
-                       return;
-               stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
-               if(stackmap == nil || stackmap->n <= 0)
-                       return;
-               bv = runtime·stackmapdata(stackmap, pcdata);
-               size = bv.n/BitsPerPointer*PtrSize;
-               n = ((PtrType*)t)->elem->size;
-               *len = n/PtrSize;
-               *mask = runtime·mallocgc(*len, nil, FlagNoScan);
-               for(i = 0; i < n; i += PtrSize) {
-                       off = (p+i-(byte*)frame.varp+size)/PtrSize;
-                       bits = (bv.bytedata[off*BitsPerPointer/8] >> ((off*BitsPerPointer)%8))&BitsMask;
-                       (*mask)[i/PtrSize] = bits;
-               }
-       }
-}
-
-void runtime·gc_unixnanotime(int64 *now);
-
-int64
-runtime·unixnanotime(void)
-{
-       int64 now;
-
-       runtime·gc_unixnanotime(&now);
-       return now;
-}
index 3a7204b54f27c116b999615d360405cd181749fd..f7e01c898c1f10d6e668940851249672fe265610 100644 (file)
@@ -60,10 +60,8 @@ func clearpools() {
        }
 }
 
-func gosweepone() uintptr
-func gosweepdone() bool
-
 func bgsweep() {
+       sweep.g = getg()
        getg().issystem = true
        for {
                for gosweepone() != ^uintptr(0) {
similarity index 73%
rename from src/runtime/mgc0.h
rename to src/runtime/mgc1.go
index 64f8189143b1e8bf90f11cdef4c36d14a1972c76..d1aab4554614462a1aa2a4643569e6be2b291c9b 100644 (file)
@@ -4,11 +4,15 @@
 
 // Garbage collector (GC)
 
-enum {
+package runtime
+
+const (
        // Four bits per word (see #defines below).
-       gcBits = 4,
-       wordsPerBitmapByte = 8/gcBits,
+       gcBits             = 4
+       wordsPerBitmapByte = 8 / gcBits
+)
 
+const (
        // GC type info programs.
        // The programs allow to store type info required for GC in a compact form.
        // Most importantly arrays take O(1) space instead of O(n).
@@ -26,38 +30,33 @@ enum {
        // For example, for type struct { x []byte; y [20]struct{ z int; w *byte }; }
        // the program looks as:
        //
-       // insData 3 (BitsMultiWord BitsSlice BitsScalar)
+       // insData 3 (BitsPointer BitsScalar BitsScalar)
        //      insArray 20 insData 2 (BitsScalar BitsPointer) insArrayEnd insEnd
        //
        // Total size of the program is 17 bytes (13 bytes on 32-bits).
        // The corresponding GC mask would take 43 bytes (it would be repeated
        // because the type has odd number of words).
-       insData = 1,
-       insArray,
-       insArrayEnd,
-       insEnd,
+       insData = 1 + iota
+       insArray
+       insArrayEnd
+       insEnd
+)
 
+const (
        // Pointer map
-       BitsPerPointer  = 2,
-       BitsMask        = (1<<BitsPerPointer)-1,
-       PointersPerByte = 8/BitsPerPointer,
+       _BitsPerPointer  = 2
+       _BitsMask        = (1 << _BitsPerPointer) - 1
+       _PointersPerByte = 8 / _BitsPerPointer
 
        // If you change these, also change scanblock.
        // scanblock does "if(bits == BitsScalar || bits == BitsDead)" as "if(bits <= BitsScalar)".
-       BitsDead        = 0,
-       BitsScalar      = 1,
-       BitsPointer     = 2,
-       BitsMultiWord   = 3,
-       // BitsMultiWord will be set for the first word of a multi-word item.
-       // When it is set, one of the following will be set for the second word.
-       // NOT USED ANYMORE: BitsString = 0,
-       // NOT USED ANYMORE: BitsSlice  = 1,
-       BitsIface       = 2,
-       BitsEface       = 3,
+       _BitsDead    = 0
+       _BitsScalar  = 1
+       _BitsPointer = 2
 
        // 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively.
-       MaxGCMask       = 64,
-};
+       _MaxGCMask = 64
+)
 
 // Bits in per-word bitmap.
 // #defines because we shift the values beyond 32 bits.
@@ -70,9 +69,9 @@ enum {
 // there.  On a 64-bit system the off'th word in the arena is tracked by
 // the off/16+1'th word before mheap.arena_start.  (On a 32-bit system,
 // the only difference is that the divisor is 8.)
-enum {
-       bitBoundary = 1, // boundary of an object
-       bitMarked = 2, // marked object
-       bitMask = bitBoundary | bitMarked,
-       bitPtrMask = BitsMask<<2,
-};
+const (
+       bitBoundary = 1 // boundary of an object
+       bitMarked   = 2 // marked object
+       bitMask     = bitBoundary | bitMarked
+       bitPtrMask  = _BitsMask << 2
+)
diff --git a/src/runtime/mheap.c b/src/runtime/mheap.c
deleted file mode 100644 (file)
index bb203d5..0000000
+++ /dev/null
@@ -1,889 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Page heap.
-//
-// See malloc.h for overview.
-//
-// When a MSpan is in the heap free list, state == MSpanFree
-// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
-//
-// When a MSpan is allocated, state == MSpanInUse or MSpanStack
-// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-
-static MSpan *MHeap_AllocSpanLocked(MHeap*, uintptr);
-static void MHeap_FreeSpanLocked(MHeap*, MSpan*, bool, bool);
-static bool MHeap_Grow(MHeap*, uintptr);
-static MSpan *MHeap_AllocLarge(MHeap*, uintptr);
-static MSpan *BestFit(MSpan*, uintptr, MSpan*);
-
-static void
-RecordSpan(void *vh, byte *p)
-{
-       MHeap *h;
-       MSpan *s;
-       MSpan **all;
-       uint32 cap;
-
-       h = vh;
-       s = (MSpan*)p;
-       if(h->nspan >= h->nspancap) {
-               cap = 64*1024/sizeof(all[0]);
-               if(cap < h->nspancap*3/2)
-                       cap = h->nspancap*3/2;
-               all = (MSpan**)runtime·sysAlloc(cap*sizeof(all[0]), &mstats.other_sys);
-               if(all == nil)
-                       runtime·throw("runtime: cannot allocate memory");
-               if(h->allspans) {
-                       runtime·memmove(all, h->allspans, h->nspancap*sizeof(all[0]));
-                       // Don't free the old array if it's referenced by sweep.
-                       // See the comment in mgc0.c.
-                       if(h->allspans != runtime·mheap.gcspans)
-                               runtime·SysFree(h->allspans, h->nspancap*sizeof(all[0]), &mstats.other_sys);
-               }
-               h->allspans = all;
-               h->nspancap = cap;
-       }
-       h->allspans[h->nspan++] = s;
-}
-
-// Initialize the heap; fetch memory using alloc.
-void
-runtime·MHeap_Init(MHeap *h)
-{
-       uint32 i;
-
-       runtime·FixAlloc_Init(&h->spanalloc, sizeof(MSpan), RecordSpan, h, &mstats.mspan_sys);
-       runtime·FixAlloc_Init(&h->cachealloc, sizeof(MCache), nil, nil, &mstats.mcache_sys);
-       runtime·FixAlloc_Init(&h->specialfinalizeralloc, sizeof(SpecialFinalizer), nil, nil, &mstats.other_sys);
-       runtime·FixAlloc_Init(&h->specialprofilealloc, sizeof(SpecialProfile), nil, nil, &mstats.other_sys);
-       // h->mapcache needs no init
-       for(i=0; i<nelem(h->free); i++) {
-               runtime·MSpanList_Init(&h->free[i]);
-               runtime·MSpanList_Init(&h->busy[i]);
-       }
-       runtime·MSpanList_Init(&h->freelarge);
-       runtime·MSpanList_Init(&h->busylarge);
-       for(i=0; i<nelem(h->central); i++)
-               runtime·MCentral_Init(&h->central[i].mcentral, i);
-}
-
-void
-runtime·MHeap_MapSpans(MHeap *h)
-{
-       uintptr n;
-
-       // Map spans array, PageSize at a time.
-       n = (uintptr)h->arena_used;
-       n -= (uintptr)h->arena_start;
-       n = n / PageSize * sizeof(h->spans[0]);
-       n = ROUND(n, PhysPageSize);
-       if(h->spans_mapped >= n)
-               return;
-       runtime·SysMap((byte*)h->spans + h->spans_mapped, n - h->spans_mapped, h->arena_reserved, &mstats.other_sys);
-       h->spans_mapped = n;
-}
-
-// Sweeps spans in list until reclaims at least npages into heap.
-// Returns the actual number of pages reclaimed.
-static uintptr
-MHeap_ReclaimList(MHeap *h, MSpan *list, uintptr npages)
-{
-       MSpan *s;
-       uintptr n;
-       uint32 sg;
-
-       n = 0;
-       sg = runtime·mheap.sweepgen;
-retry:
-       for(s = list->next; s != list; s = s->next) {
-               if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
-                       runtime·MSpanList_Remove(s);
-                       // swept spans are at the end of the list
-                       runtime·MSpanList_InsertBack(list, s);
-                       runtime·unlock(&h->lock);
-                       n += runtime·MSpan_Sweep(s, false);
-                       runtime·lock(&h->lock);
-                       if(n >= npages)
-                               return n;
-                       // the span could have been moved elsewhere
-                       goto retry;
-               }
-               if(s->sweepgen == sg-1) {
-                       // the span is being sweept by background sweeper, skip
-                       continue;
-               }
-               // already swept empty span,
-               // all subsequent ones must also be either swept or in process of sweeping
-               break;
-       }
-       return n;
-}
-
-// Sweeps and reclaims at least npage pages into heap.
-// Called before allocating npage pages.
-static void
-MHeap_Reclaim(MHeap *h, uintptr npage)
-{
-       uintptr reclaimed, n;
-
-       // First try to sweep busy spans with large objects of size >= npage,
-       // this has good chances of reclaiming the necessary space.
-       for(n=npage; n < nelem(h->busy); n++) {
-               if(MHeap_ReclaimList(h, &h->busy[n], npage))
-                       return;  // Bingo!
-       }
-
-       // Then -- even larger objects.
-       if(MHeap_ReclaimList(h, &h->busylarge, npage))
-               return;  // Bingo!
-
-       // Now try smaller objects.
-       // One such object is not enough, so we need to reclaim several of them.
-       reclaimed = 0;
-       for(n=0; n < npage && n < nelem(h->busy); n++) {
-               reclaimed += MHeap_ReclaimList(h, &h->busy[n], npage-reclaimed);
-               if(reclaimed >= npage)
-                       return;
-       }
-
-       // Now sweep everything that is not yet swept.
-       runtime·unlock(&h->lock);
-       for(;;) {
-               n = runtime·sweepone();
-               if(n == -1)  // all spans are swept
-                       break;
-               reclaimed += n;
-               if(reclaimed >= npage)
-                       break;
-       }
-       runtime·lock(&h->lock);
-}
-
-// Allocate a new span of npage pages from the heap for GC'd memory
-// and record its size class in the HeapMap and HeapMapCache.
-static MSpan*
-mheap_alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large)
-{
-       MSpan *s;
-
-       if(g != g->m->g0)
-               runtime·throw("mheap_alloc not on M stack");
-       runtime·lock(&h->lock);
-
-       // To prevent excessive heap growth, before allocating n pages
-       // we need to sweep and reclaim at least n pages.
-       if(!h->sweepdone)
-               MHeap_Reclaim(h, npage);
-
-       // transfer stats from cache to global
-       mstats.heap_alloc += g->m->mcache->local_cachealloc;
-       g->m->mcache->local_cachealloc = 0;
-       mstats.tinyallocs += g->m->mcache->local_tinyallocs;
-       g->m->mcache->local_tinyallocs = 0;
-
-       s = MHeap_AllocSpanLocked(h, npage);
-       if(s != nil) {
-               // Record span info, because gc needs to be
-               // able to map interior pointer to containing span.
-               runtime·atomicstore(&s->sweepgen, h->sweepgen);
-               s->state = MSpanInUse;
-               s->freelist = nil;
-               s->ref = 0;
-               s->sizeclass = sizeclass;
-               s->elemsize = (sizeclass==0 ? s->npages<<PageShift : runtime·class_to_size[sizeclass]);
-
-               // update stats, sweep lists
-               if(large) {
-                       mstats.heap_objects++;
-                       mstats.heap_alloc += npage<<PageShift;
-                       // Swept spans are at the end of lists.
-                       if(s->npages < nelem(h->free))
-                               runtime·MSpanList_InsertBack(&h->busy[s->npages], s);
-                       else
-                               runtime·MSpanList_InsertBack(&h->busylarge, s);
-               }
-       }
-       runtime·unlock(&h->lock);
-       return s;
-}
-
-static void
-mheap_alloc_m(G *gp)
-{
-       MHeap *h;
-       MSpan *s;
-
-       h = g->m->ptrarg[0];
-       g->m->ptrarg[0] = nil;
-       s = mheap_alloc(h, g->m->scalararg[0], g->m->scalararg[1], g->m->scalararg[2]);
-       g->m->ptrarg[0] = s;
-
-       runtime·gogo(&gp->sched);
-}
-
-MSpan*
-runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero)
-{
-       MSpan *s;
-       void (*fn)(G*);
-
-       // Don't do any operations that lock the heap on the G stack.
-       // It might trigger stack growth, and the stack growth code needs
-       // to be able to allocate heap.
-       if(g == g->m->g0) {
-               s = mheap_alloc(h, npage, sizeclass, large);
-       } else {
-               g->m->ptrarg[0] = h;
-               g->m->scalararg[0] = npage;
-               g->m->scalararg[1] = sizeclass;
-               g->m->scalararg[2] = large;
-               fn = mheap_alloc_m;
-               runtime·mcall(&fn);
-               s = g->m->ptrarg[0];
-               g->m->ptrarg[0] = nil;
-       }
-       if(s != nil) {
-               if(needzero && s->needzero)
-                       runtime·memclr((byte*)(s->start<<PageShift), s->npages<<PageShift);
-               s->needzero = 0;
-       }
-       return s;
-}
-
-MSpan*
-runtime·MHeap_AllocStack(MHeap *h, uintptr npage)
-{
-       MSpan *s;
-
-       if(g != g->m->g0)
-               runtime·throw("mheap_allocstack not on M stack");
-       runtime·lock(&h->lock);
-       s = MHeap_AllocSpanLocked(h, npage);
-       if(s != nil) {
-               s->state = MSpanStack;
-               s->freelist = nil;
-               s->ref = 0;
-               mstats.stacks_inuse += s->npages<<PageShift;
-       }
-       runtime·unlock(&h->lock);
-       return s;
-}
-
-// Allocates a span of the given size.  h must be locked.
-// The returned span has been removed from the
-// free list, but its state is still MSpanFree.
-static MSpan*
-MHeap_AllocSpanLocked(MHeap *h, uintptr npage)
-{
-       uintptr n;
-       MSpan *s, *t;
-       pageID p;
-
-       // Try in fixed-size lists up to max.
-       for(n=npage; n < nelem(h->free); n++) {
-               if(!runtime·MSpanList_IsEmpty(&h->free[n])) {
-                       s = h->free[n].next;
-                       goto HaveSpan;
-               }
-       }
-
-       // Best fit in list of large spans.
-       if((s = MHeap_AllocLarge(h, npage)) == nil) {
-               if(!MHeap_Grow(h, npage))
-                       return nil;
-               if((s = MHeap_AllocLarge(h, npage)) == nil)
-                       return nil;
-       }
-
-HaveSpan:
-       // Mark span in use.
-       if(s->state != MSpanFree)
-               runtime·throw("MHeap_AllocLocked - MSpan not free");
-       if(s->npages < npage)
-               runtime·throw("MHeap_AllocLocked - bad npages");
-       runtime·MSpanList_Remove(s);
-       if(s->next != nil || s->prev != nil)
-               runtime·throw("still in list");
-       if(s->npreleased > 0) {
-               runtime·SysUsed((void*)(s->start<<PageShift), s->npages<<PageShift);
-               mstats.heap_released -= s->npreleased<<PageShift;
-               s->npreleased = 0;
-       }
-
-       if(s->npages > npage) {
-               // Trim extra and put it back in the heap.
-               t = runtime·FixAlloc_Alloc(&h->spanalloc);
-               runtime·MSpan_Init(t, s->start + npage, s->npages - npage);
-               s->npages = npage;
-               p = t->start;
-               p -= ((uintptr)h->arena_start>>PageShift);
-               if(p > 0)
-                       h->spans[p-1] = s;
-               h->spans[p] = t;
-               h->spans[p+t->npages-1] = t;
-               t->needzero = s->needzero;
-               s->state = MSpanStack; // prevent coalescing with s
-               t->state = MSpanStack;
-               MHeap_FreeSpanLocked(h, t, false, false);
-               t->unusedsince = s->unusedsince; // preserve age (TODO: wrong: t is possibly merged and/or deallocated at this point)
-               s->state = MSpanFree;
-       }
-       s->unusedsince = 0;
-
-       p = s->start;
-       p -= ((uintptr)h->arena_start>>PageShift);
-       for(n=0; n<npage; n++)
-               h->spans[p+n] = s;
-
-       mstats.heap_inuse += npage<<PageShift;
-       mstats.heap_idle -= npage<<PageShift;
-
-       //runtime·printf("spanalloc %p\n", s->start << PageShift);
-       if(s->next != nil || s->prev != nil)
-               runtime·throw("still in list");
-       return s;
-}
-
-// Allocate a span of exactly npage pages from the list of large spans.
-static MSpan*
-MHeap_AllocLarge(MHeap *h, uintptr npage)
-{
-       return BestFit(&h->freelarge, npage, nil);
-}
-
-// Search list for smallest span with >= npage pages.
-// If there are multiple smallest spans, take the one
-// with the earliest starting address.
-static MSpan*
-BestFit(MSpan *list, uintptr npage, MSpan *best)
-{
-       MSpan *s;
-
-       for(s=list->next; s != list; s=s->next) {
-               if(s->npages < npage)
-                       continue;
-               if(best == nil
-               || s->npages < best->npages
-               || (s->npages == best->npages && s->start < best->start))
-                       best = s;
-       }
-       return best;
-}
-
-// Try to add at least npage pages of memory to the heap,
-// returning whether it worked.
-static bool
-MHeap_Grow(MHeap *h, uintptr npage)
-{
-       uintptr ask;
-       void *v;
-       MSpan *s;
-       pageID p;
-
-       // Ask for a big chunk, to reduce the number of mappings
-       // the operating system needs to track; also amortizes
-       // the overhead of an operating system mapping.
-       // Allocate a multiple of 64kB.
-       npage = ROUND(npage, (64<<10)/PageSize);
-       ask = npage<<PageShift;
-       if(ask < HeapAllocChunk)
-               ask = HeapAllocChunk;
-
-       v = runtime·MHeap_SysAlloc(h, ask);
-       if(v == nil) {
-               if(ask > (npage<<PageShift)) {
-                       ask = npage<<PageShift;
-                       v = runtime·MHeap_SysAlloc(h, ask);
-               }
-               if(v == nil) {
-                       runtime·printf("runtime: out of memory: cannot allocate %D-byte block (%D in use)\n", (uint64)ask, mstats.heap_sys);
-                       return false;
-               }
-       }
-
-       // Create a fake "in use" span and free it, so that the
-       // right coalescing happens.
-       s = runtime·FixAlloc_Alloc(&h->spanalloc);
-       runtime·MSpan_Init(s, (uintptr)v>>PageShift, ask>>PageShift);
-       p = s->start;
-       p -= ((uintptr)h->arena_start>>PageShift);
-       h->spans[p] = s;
-       h->spans[p + s->npages - 1] = s;
-       runtime·atomicstore(&s->sweepgen, h->sweepgen);
-       s->state = MSpanInUse;
-       MHeap_FreeSpanLocked(h, s, false, true);
-       return true;
-}
-
-// Look up the span at the given address.
-// Address is guaranteed to be in map
-// and is guaranteed to be start or end of span.
-MSpan*
-runtime·MHeap_Lookup(MHeap *h, void *v)
-{
-       uintptr p;
-       
-       p = (uintptr)v;
-       p -= (uintptr)h->arena_start;
-       return h->spans[p >> PageShift];
-}
-
-// Look up the span at the given address.
-// Address is *not* guaranteed to be in map
-// and may be anywhere in the span.
-// Map entries for the middle of a span are only
-// valid for allocated spans.  Free spans may have
-// other garbage in their middles, so we have to
-// check for that.
-MSpan*
-runtime·MHeap_LookupMaybe(MHeap *h, void *v)
-{
-       MSpan *s;
-       pageID p, q;
-
-       if((byte*)v < h->arena_start || (byte*)v >= h->arena_used)
-               return nil;
-       p = (uintptr)v>>PageShift;
-       q = p;
-       q -= (uintptr)h->arena_start >> PageShift;
-       s = h->spans[q];
-       if(s == nil || p < s->start || v >= s->limit || s->state != MSpanInUse)
-               return nil;
-       return s;
-}
-
-// Free the span back into the heap.
-static void
-mheap_free(MHeap *h, MSpan *s, int32 acct)
-{
-       if(g != g->m->g0)
-               runtime·throw("mheap_free not on M stack");
-       runtime·lock(&h->lock);
-       mstats.heap_alloc += g->m->mcache->local_cachealloc;
-       g->m->mcache->local_cachealloc = 0;
-       mstats.tinyallocs += g->m->mcache->local_tinyallocs;
-       g->m->mcache->local_tinyallocs = 0;
-       if(acct) {
-               mstats.heap_alloc -= s->npages<<PageShift;
-               mstats.heap_objects--;
-       }
-       MHeap_FreeSpanLocked(h, s, true, true);
-       runtime·unlock(&h->lock);
-}
-
-static void
-mheap_free_m(G *gp)
-{
-       MHeap *h;
-       MSpan *s;
-       
-       h = g->m->ptrarg[0];
-       s = g->m->ptrarg[1];
-       g->m->ptrarg[0] = nil;
-       g->m->ptrarg[1] = nil;
-       mheap_free(h, s, g->m->scalararg[0]);
-       runtime·gogo(&gp->sched);
-}
-
-void
-runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct)
-{
-       void (*fn)(G*);
-
-       if(g == g->m->g0) {
-               mheap_free(h, s, acct);
-       } else {
-               g->m->ptrarg[0] = h;
-               g->m->ptrarg[1] = s;
-               g->m->scalararg[0] = acct;
-               fn = mheap_free_m;
-               runtime·mcall(&fn);
-       }
-}
-
-void
-runtime·MHeap_FreeStack(MHeap *h, MSpan *s)
-{
-       if(g != g->m->g0)
-               runtime·throw("mheap_freestack not on M stack");
-       s->needzero = 1;
-       runtime·lock(&h->lock);
-       mstats.stacks_inuse -= s->npages<<PageShift;
-       MHeap_FreeSpanLocked(h, s, true, true);
-       runtime·unlock(&h->lock);
-}
-
-static void
-MHeap_FreeSpanLocked(MHeap *h, MSpan *s, bool acctinuse, bool acctidle)
-{
-       MSpan *t;
-       pageID p;
-
-       switch(s->state) {
-       case MSpanStack:
-               if(s->ref != 0)
-                       runtime·throw("MHeap_FreeSpanLocked - invalid stack free");
-               break;
-       case MSpanInUse:
-               if(s->ref != 0 || s->sweepgen != h->sweepgen) {
-                       runtime·printf("MHeap_FreeSpanLocked - span %p ptr %p ref %d sweepgen %d/%d\n",
-                                      s, s->start<<PageShift, s->ref, s->sweepgen, h->sweepgen);
-                       runtime·throw("MHeap_FreeSpanLocked - invalid free");
-               }
-               break;
-       default:
-               runtime·throw("MHeap_FreeSpanLocked - invalid span state");
-               break;
-       }
-       if(acctinuse)
-               mstats.heap_inuse -= s->npages<<PageShift;
-       if(acctidle)
-               mstats.heap_idle += s->npages<<PageShift;
-       s->state = MSpanFree;
-       runtime·MSpanList_Remove(s);
-       // Stamp newly unused spans. The scavenger will use that
-       // info to potentially give back some pages to the OS.
-       s->unusedsince = runtime·nanotime();
-       s->npreleased = 0;
-
-       // Coalesce with earlier, later spans.
-       p = s->start;
-       p -= (uintptr)h->arena_start >> PageShift;
-       if(p > 0 && (t = h->spans[p-1]) != nil && t->state != MSpanInUse && t->state != MSpanStack) {
-               s->start = t->start;
-               s->npages += t->npages;
-               s->npreleased = t->npreleased; // absorb released pages
-               s->needzero |= t->needzero;
-               p -= t->npages;
-               h->spans[p] = s;
-               runtime·MSpanList_Remove(t);
-               t->state = MSpanDead;
-               runtime·FixAlloc_Free(&h->spanalloc, t);
-       }
-       if((p+s->npages)*sizeof(h->spans[0]) < h->spans_mapped && (t = h->spans[p+s->npages]) != nil && t->state != MSpanInUse && t->state != MSpanStack) {
-               s->npages += t->npages;
-               s->npreleased += t->npreleased;
-               s->needzero |= t->needzero;
-               h->spans[p + s->npages - 1] = s;
-               runtime·MSpanList_Remove(t);
-               t->state = MSpanDead;
-               runtime·FixAlloc_Free(&h->spanalloc, t);
-       }
-
-       // Insert s into appropriate list.
-       if(s->npages < nelem(h->free))
-               runtime·MSpanList_Insert(&h->free[s->npages], s);
-       else
-               runtime·MSpanList_Insert(&h->freelarge, s);
-}
-
-static uintptr
-scavengelist(MSpan *list, uint64 now, uint64 limit)
-{
-       uintptr released, sumreleased;
-       MSpan *s;
-
-       if(runtime·MSpanList_IsEmpty(list))
-               return 0;
-
-       sumreleased = 0;
-       for(s=list->next; s != list; s=s->next) {
-               if((now - s->unusedsince) > limit && s->npreleased != s->npages) {
-                       released = (s->npages - s->npreleased) << PageShift;
-                       mstats.heap_released += released;
-                       sumreleased += released;
-                       s->npreleased = s->npages;
-                       runtime·SysUnused((void*)(s->start << PageShift), s->npages << PageShift);
-               }
-       }
-       return sumreleased;
-}
-
-void
-runtime·MHeap_Scavenge(int32 k, uint64 now, uint64 limit)
-{
-       uint32 i;
-       uintptr sumreleased;
-       MHeap *h;
-       
-       h = &runtime·mheap;
-       runtime·lock(&h->lock);
-       sumreleased = 0;
-       for(i=0; i < nelem(h->free); i++)
-               sumreleased += scavengelist(&h->free[i], now, limit);
-       sumreleased += scavengelist(&h->freelarge, now, limit);
-       runtime·unlock(&h->lock);
-
-       if(runtime·debug.gctrace > 0) {
-               if(sumreleased > 0)
-                       runtime·printf("scvg%d: %D MB released\n", k, (uint64)sumreleased>>20);
-               // TODO(dvyukov): these stats are incorrect as we don't subtract stack usage from heap.
-               // But we can't call ReadMemStats on g0 holding locks.
-               runtime·printf("scvg%d: inuse: %D, idle: %D, sys: %D, released: %D, consumed: %D (MB)\n",
-                       k, mstats.heap_inuse>>20, mstats.heap_idle>>20, mstats.heap_sys>>20,
-                       mstats.heap_released>>20, (mstats.heap_sys - mstats.heap_released)>>20);
-       }
-}
-
-void
-runtime·scavenge_m(void)
-{
-       runtime·MHeap_Scavenge(-1, ~(uintptr)0, 0);
-}
-
-// Initialize a new span with the given start and npages.
-void
-runtime·MSpan_Init(MSpan *span, pageID start, uintptr npages)
-{
-       span->next = nil;
-       span->prev = nil;
-       span->start = start;
-       span->npages = npages;
-       span->freelist = nil;
-       span->ref = 0;
-       span->sizeclass = 0;
-       span->incache = false;
-       span->elemsize = 0;
-       span->state = MSpanDead;
-       span->unusedsince = 0;
-       span->npreleased = 0;
-       span->specialLock.key = 0;
-       span->specials = nil;
-       span->needzero = 0;
-}
-
-// Initialize an empty doubly-linked list.
-void
-runtime·MSpanList_Init(MSpan *list)
-{
-       list->state = MSpanListHead;
-       list->next = list;
-       list->prev = list;
-}
-
-void
-runtime·MSpanList_Remove(MSpan *span)
-{
-       if(span->prev == nil && span->next == nil)
-               return;
-       span->prev->next = span->next;
-       span->next->prev = span->prev;
-       span->prev = nil;
-       span->next = nil;
-}
-
-bool
-runtime·MSpanList_IsEmpty(MSpan *list)
-{
-       return list->next == list;
-}
-
-void
-runtime·MSpanList_Insert(MSpan *list, MSpan *span)
-{
-       if(span->next != nil || span->prev != nil) {
-               runtime·printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);
-               runtime·throw("MSpanList_Insert");
-       }
-       span->next = list->next;
-       span->prev = list;
-       span->next->prev = span;
-       span->prev->next = span;
-}
-
-void
-runtime·MSpanList_InsertBack(MSpan *list, MSpan *span)
-{
-       if(span->next != nil || span->prev != nil) {
-               runtime·printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);
-               runtime·throw("MSpanList_Insert");
-       }
-       span->next = list;
-       span->prev = list->prev;
-       span->next->prev = span;
-       span->prev->next = span;
-}
-
-// Adds the special record s to the list of special records for
-// the object p.  All fields of s should be filled in except for
-// offset & next, which this routine will fill in.
-// Returns true if the special was successfully added, false otherwise.
-// (The add will fail only if a record with the same p and s->kind
-//  already exists.)
-static bool
-addspecial(void *p, Special *s)
-{
-       MSpan *span;
-       Special **t, *x;
-       uintptr offset;
-       byte kind;
-
-       span = runtime·MHeap_LookupMaybe(&runtime·mheap, p);
-       if(span == nil)
-               runtime·throw("addspecial on invalid pointer");
-
-       // Ensure that the span is swept.
-       // GC accesses specials list w/o locks. And it's just much safer.
-       g->m->locks++;
-       runtime·MSpan_EnsureSwept(span);
-
-       offset = (uintptr)p - (span->start << PageShift);
-       kind = s->kind;
-
-       runtime·lock(&span->specialLock);
-
-       // Find splice point, check for existing record.
-       t = &span->specials;
-       while((x = *t) != nil) {
-               if(offset == x->offset && kind == x->kind) {
-                       runtime·unlock(&span->specialLock);
-                       g->m->locks--;
-                       return false; // already exists
-               }
-               if(offset < x->offset || (offset == x->offset && kind < x->kind))
-                       break;
-               t = &x->next;
-       }
-       // Splice in record, fill in offset.
-       s->offset = offset;
-       s->next = x;
-       *t = s;
-       runtime·unlock(&span->specialLock);
-       g->m->locks--;
-       return true;
-}
-
-// Removes the Special record of the given kind for the object p.
-// Returns the record if the record existed, nil otherwise.
-// The caller must FixAlloc_Free the result.
-static Special*
-removespecial(void *p, byte kind)
-{
-       MSpan *span;
-       Special *s, **t;
-       uintptr offset;
-
-       span = runtime·MHeap_LookupMaybe(&runtime·mheap, p);
-       if(span == nil)
-               runtime·throw("removespecial on invalid pointer");
-
-       // Ensure that the span is swept.
-       // GC accesses specials list w/o locks. And it's just much safer.
-       g->m->locks++;
-       runtime·MSpan_EnsureSwept(span);
-
-       offset = (uintptr)p - (span->start << PageShift);
-
-       runtime·lock(&span->specialLock);
-       t = &span->specials;
-       while((s = *t) != nil) {
-               // This function is used for finalizers only, so we don't check for
-               // "interior" specials (p must be exactly equal to s->offset).
-               if(offset == s->offset && kind == s->kind) {
-                       *t = s->next;
-                       runtime·unlock(&span->specialLock);
-                       g->m->locks--;
-                       return s;
-               }
-               t = &s->next;
-       }
-       runtime·unlock(&span->specialLock);
-       g->m->locks--;
-       return nil;
-}
-
-// Adds a finalizer to the object p.  Returns true if it succeeded.
-bool
-runtime·addfinalizer(void *p, FuncVal *f, uintptr nret, Type *fint, PtrType *ot)
-{
-       SpecialFinalizer *s;
-
-       runtime·lock(&runtime·mheap.speciallock);
-       s = runtime·FixAlloc_Alloc(&runtime·mheap.specialfinalizeralloc);
-       runtime·unlock(&runtime·mheap.speciallock);
-       s->special.kind = KindSpecialFinalizer;
-       s->fn = f;
-       s->nret = nret;
-       s->fint = fint;
-       s->ot = ot;
-       if(addspecial(p, &s->special))
-               return true;
-
-       // There was an old finalizer
-       runtime·lock(&runtime·mheap.speciallock);
-       runtime·FixAlloc_Free(&runtime·mheap.specialfinalizeralloc, s);
-       runtime·unlock(&runtime·mheap.speciallock);
-       return false;
-}
-
-// Removes the finalizer (if any) from the object p.
-void
-runtime·removefinalizer(void *p)
-{
-       SpecialFinalizer *s;
-
-       s = (SpecialFinalizer*)removespecial(p, KindSpecialFinalizer);
-       if(s == nil)
-               return; // there wasn't a finalizer to remove
-       runtime·lock(&runtime·mheap.speciallock);
-       runtime·FixAlloc_Free(&runtime·mheap.specialfinalizeralloc, s);
-       runtime·unlock(&runtime·mheap.speciallock);
-}
-
-// Set the heap profile bucket associated with addr to b.
-void
-runtime·setprofilebucket_m(void)
-{      
-       void *p;
-       Bucket *b;
-       SpecialProfile *s;
-       
-       p = g->m->ptrarg[0];
-       b = g->m->ptrarg[1];
-       g->m->ptrarg[0] = nil;
-       g->m->ptrarg[1] = nil;
-
-       runtime·lock(&runtime·mheap.speciallock);
-       s = runtime·FixAlloc_Alloc(&runtime·mheap.specialprofilealloc);
-       runtime·unlock(&runtime·mheap.speciallock);
-       s->special.kind = KindSpecialProfile;
-       s->b = b;
-       if(!addspecial(p, &s->special))
-               runtime·throw("setprofilebucket: profile already set");
-}
-
-// Do whatever cleanup needs to be done to deallocate s.  It has
-// already been unlinked from the MSpan specials list.
-// Returns true if we should keep working on deallocating p.
-bool
-runtime·freespecial(Special *s, void *p, uintptr size, bool freed)
-{
-       SpecialFinalizer *sf;
-       SpecialProfile *sp;
-
-       switch(s->kind) {
-       case KindSpecialFinalizer:
-               sf = (SpecialFinalizer*)s;
-               runtime·queuefinalizer(p, sf->fn, sf->nret, sf->fint, sf->ot);
-               runtime·lock(&runtime·mheap.speciallock);
-               runtime·FixAlloc_Free(&runtime·mheap.specialfinalizeralloc, sf);
-               runtime·unlock(&runtime·mheap.speciallock);
-               return false; // don't free p until finalizer is done
-       case KindSpecialProfile:
-               sp = (SpecialProfile*)s;
-               runtime·mProf_Free(sp->b, size, freed);
-               runtime·lock(&runtime·mheap.speciallock);
-               runtime·FixAlloc_Free(&runtime·mheap.specialprofilealloc, sp);
-               runtime·unlock(&runtime·mheap.speciallock);
-               return true;
-       default:
-               runtime·throw("bad special kind");
-               return true;
-       }
-}
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
new file mode 100644 (file)
index 0000000..b451b63
--- /dev/null
@@ -0,0 +1,785 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Page heap.
+//
+// See malloc.h for overview.
+//
+// When a MSpan is in the heap free list, state == MSpanFree
+// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
+//
+// When a MSpan is allocated, state == MSpanInUse or MSpanStack
+// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
+
+package runtime
+
+import "unsafe"
+
+var h_allspans []*mspan // TODO: make this h.allspans once mheap can be defined in Go
+var h_spans []*mspan    // TODO: make this h.spans once mheap can be defined in Go
+
+func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
+       h := (*mheap)(vh)
+       s := (*mspan)(p)
+       if len(h_allspans) >= cap(h_allspans) {
+               n := 64 * 1024 / ptrSize
+               if n < cap(h_allspans)*3/2 {
+                       n = cap(h_allspans) * 3 / 2
+               }
+               var new []*mspan
+               sp := (*slice)(unsafe.Pointer(&new))
+               sp.array = (*byte)(sysAlloc(uintptr(n)*ptrSize, &memstats.other_sys))
+               if sp.array == nil {
+                       gothrow("runtime: cannot allocate memory")
+               }
+               sp.len = uint(len(h_allspans))
+               sp.cap = uint(n)
+               if len(h_allspans) > 0 {
+                       copy(new, h_allspans)
+                       // Don't free the old array if it's referenced by sweep.
+                       // See the comment in mgc0.c.
+                       if h.allspans != mheap_.gcspans {
+                               sysFree(unsafe.Pointer(h.allspans), uintptr(cap(h_allspans))*ptrSize, &memstats.other_sys)
+                       }
+               }
+               h_allspans = new
+               h.allspans = (**mspan)(unsafe.Pointer(sp.array))
+       }
+       h_allspans = append(h_allspans, s)
+       h.nspan = uint32(len(h_allspans))
+}
+
+// Initialize the heap.
+func mHeap_Init(h *mheap, spans_size uintptr) {
+       fixAlloc_Init(&h.spanalloc, unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
+       fixAlloc_Init(&h.cachealloc, unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
+       fixAlloc_Init(&h.specialfinalizeralloc, unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
+       fixAlloc_Init(&h.specialprofilealloc, unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
+
+       // h->mapcache needs no init
+       for i := range h.free {
+               mSpanList_Init(&h.free[i])
+               mSpanList_Init(&h.busy[i])
+       }
+
+       mSpanList_Init(&h.freelarge)
+       mSpanList_Init(&h.busylarge)
+       for i := range h.central {
+               mCentral_Init(&h.central[i].mcentral, int32(i))
+       }
+
+       sp := (*slice)(unsafe.Pointer(&h_spans))
+       sp.array = (*byte)(unsafe.Pointer(h.spans))
+       sp.len = uint(spans_size / ptrSize)
+       sp.cap = uint(spans_size / ptrSize)
+}
+
+func mHeap_MapSpans(h *mheap) {
+       // Map spans array, PageSize at a time.
+       n := uintptr(unsafe.Pointer(h.arena_used))
+       n -= uintptr(unsafe.Pointer(h.arena_start))
+       n = n / _PageSize * ptrSize
+       n = round(n, _PhysPageSize)
+       if h.spans_mapped >= n {
+               return
+       }
+       sysMap(add(unsafe.Pointer(h.spans), h.spans_mapped), n-h.spans_mapped, h.arena_reserved, &memstats.other_sys)
+       h.spans_mapped = n
+}
+
+// Sweeps spans in list until reclaims at least npages into heap.
+// Returns the actual number of pages reclaimed.
+func mHeap_ReclaimList(h *mheap, list *mspan, npages uintptr) uintptr {
+       n := uintptr(0)
+       sg := mheap_.sweepgen
+retry:
+       for s := list.next; s != list; s = s.next {
+               if s.sweepgen == sg-2 && cas(&s.sweepgen, sg-2, sg-1) {
+                       mSpanList_Remove(s)
+                       // swept spans are at the end of the list
+                       mSpanList_InsertBack(list, s)
+                       unlock(&h.lock)
+                       if mSpan_Sweep(s, false) {
+                               // TODO(rsc,dvyukov): This is probably wrong.
+                               // It is undercounting the number of pages reclaimed.
+                               // See golang.org/issue/9048.
+                               // Note that if we want to add the true count of s's pages,
+                               // we must record that before calling mSpan_Sweep,
+                               // because if mSpan_Sweep returns true the span has
+                               // been
+                               n++
+                       }
+                       lock(&h.lock)
+                       if n >= npages {
+                               return n
+                       }
+                       // the span could have been moved elsewhere
+                       goto retry
+               }
+               if s.sweepgen == sg-1 {
+                       // the span is being sweept by background sweeper, skip
+                       continue
+               }
+               // already swept empty span,
+               // all subsequent ones must also be either swept or in process of sweeping
+               break
+       }
+       return n
+}
+
+// Sweeps and reclaims at least npage pages into heap.
+// Called before allocating npage pages.
+func mHeap_Reclaim(h *mheap, npage uintptr) {
+       // First try to sweep busy spans with large objects of size >= npage,
+       // this has good chances of reclaiming the necessary space.
+       for i := int(npage); i < len(h.busy); i++ {
+               if mHeap_ReclaimList(h, &h.busy[i], npage) != 0 {
+                       return // Bingo!
+               }
+       }
+
+       // Then -- even larger objects.
+       if mHeap_ReclaimList(h, &h.busylarge, npage) != 0 {
+               return // Bingo!
+       }
+
+       // Now try smaller objects.
+       // One such object is not enough, so we need to reclaim several of them.
+       reclaimed := uintptr(0)
+       for i := 0; i < int(npage) && i < len(h.busy); i++ {
+               reclaimed += mHeap_ReclaimList(h, &h.busy[i], npage-reclaimed)
+               if reclaimed >= npage {
+                       return
+               }
+       }
+
+       // Now sweep everything that is not yet swept.
+       unlock(&h.lock)
+       for {
+               n := sweepone()
+               if n == ^uintptr(0) { // all spans are swept
+                       break
+               }
+               reclaimed += n
+               if reclaimed >= npage {
+                       break
+               }
+       }
+       lock(&h.lock)
+}
+
+// Allocate a new span of npage pages from the heap for GC'd memory
+// and record its size class in the HeapMap and HeapMapCache.
+func mHeap_Alloc_m(h *mheap, npage uintptr, sizeclass int32, large bool) *mspan {
+       _g_ := getg()
+       if _g_ != _g_.m.g0 {
+               gothrow("_mheap_alloc not on M stack")
+       }
+       lock(&h.lock)
+
+       // To prevent excessive heap growth, before allocating n pages
+       // we need to sweep and reclaim at least n pages.
+       if h.sweepdone == 0 {
+               mHeap_Reclaim(h, npage)
+       }
+
+       // transfer stats from cache to global
+       memstats.heap_alloc += uint64(_g_.m.mcache.local_cachealloc)
+       _g_.m.mcache.local_cachealloc = 0
+       memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
+       _g_.m.mcache.local_tinyallocs = 0
+
+       s := mHeap_AllocSpanLocked(h, npage)
+       if s != nil {
+               // Record span info, because gc needs to be
+               // able to map interior pointer to containing span.
+               atomicstore(&s.sweepgen, h.sweepgen)
+               s.state = _MSpanInUse
+               s.freelist = nil
+               s.ref = 0
+               s.sizeclass = uint8(sizeclass)
+               if sizeclass == 0 {
+                       s.elemsize = s.npages << _PageShift
+               } else {
+                       s.elemsize = uintptr(class_to_size[sizeclass])
+               }
+
+               // update stats, sweep lists
+               if large {
+                       memstats.heap_objects++
+                       memstats.heap_alloc += uint64(npage << _PageShift)
+                       // Swept spans are at the end of lists.
+                       if s.npages < uintptr(len(h.free)) {
+                               mSpanList_InsertBack(&h.busy[s.npages], s)
+                       } else {
+                               mSpanList_InsertBack(&h.busylarge, s)
+                       }
+               }
+       }
+       unlock(&h.lock)
+       return s
+}
+
+func mHeap_Alloc(h *mheap, npage uintptr, sizeclass int32, large bool, needzero bool) *mspan {
+       // Don't do any operations that lock the heap on the G stack.
+       // It might trigger stack growth, and the stack growth code needs
+       // to be able to allocate heap.
+       var s *mspan
+       onM(func() {
+               s = mHeap_Alloc_m(h, npage, sizeclass, large)
+       })
+
+       if s != nil {
+               if needzero && s.needzero != 0 {
+                       memclr(unsafe.Pointer(s.start<<_PageShift), s.npages<<_PageShift)
+               }
+               s.needzero = 0
+       }
+       return s
+}
+
+func mHeap_AllocStack(h *mheap, npage uintptr) *mspan {
+       _g_ := getg()
+       if _g_ != _g_.m.g0 {
+               gothrow("mheap_allocstack not on M stack")
+       }
+       lock(&h.lock)
+       s := mHeap_AllocSpanLocked(h, npage)
+       if s != nil {
+               s.state = _MSpanStack
+               s.freelist = nil
+               s.ref = 0
+               memstats.stacks_inuse += uint64(s.npages << _PageShift)
+       }
+       unlock(&h.lock)
+       return s
+}
+
+// Allocates a span of the given size.  h must be locked.
+// The returned span has been removed from the
+// free list, but its state is still MSpanFree.
+func mHeap_AllocSpanLocked(h *mheap, npage uintptr) *mspan {
+       var s *mspan
+
+       // Try in fixed-size lists up to max.
+       for i := int(npage); i < len(h.free); i++ {
+               if !mSpanList_IsEmpty(&h.free[i]) {
+                       s = h.free[i].next
+                       goto HaveSpan
+               }
+       }
+
+       // Best fit in list of large spans.
+       s = mHeap_AllocLarge(h, npage)
+       if s == nil {
+               if !mHeap_Grow(h, npage) {
+                       return nil
+               }
+               s = mHeap_AllocLarge(h, npage)
+               if s == nil {
+                       return nil
+               }
+       }
+
+HaveSpan:
+       // Mark span in use.
+       if s.state != _MSpanFree {
+               gothrow("MHeap_AllocLocked - MSpan not free")
+       }
+       if s.npages < npage {
+               gothrow("MHeap_AllocLocked - bad npages")
+       }
+       mSpanList_Remove(s)
+       if s.next != nil || s.prev != nil {
+               gothrow("still in list")
+       }
+       if s.npreleased > 0 {
+               sysUsed((unsafe.Pointer)(s.start<<_PageShift), s.npages<<_PageShift)
+               memstats.heap_released -= uint64(s.npreleased << _PageShift)
+               s.npreleased = 0
+       }
+
+       if s.npages > npage {
+               // Trim extra and put it back in the heap.
+               t := (*mspan)(fixAlloc_Alloc(&h.spanalloc))
+               mSpan_Init(t, s.start+pageID(npage), s.npages-npage)
+               s.npages = npage
+               p := uintptr(t.start)
+               p -= (uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift)
+               if p > 0 {
+                       h_spans[p-1] = s
+               }
+               h_spans[p] = t
+               h_spans[p+t.npages-1] = t
+               t.needzero = s.needzero
+               s.state = _MSpanStack // prevent coalescing with s
+               t.state = _MSpanStack
+               mHeap_FreeSpanLocked(h, t, false, false)
+               t.unusedsince = s.unusedsince // preserve age (TODO: wrong: t is possibly merged and/or deallocated at this point)
+               s.state = _MSpanFree
+       }
+       s.unusedsince = 0
+
+       p := uintptr(s.start)
+       p -= (uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift)
+       for n := uintptr(0); n < npage; n++ {
+               h_spans[p+n] = s
+       }
+
+       memstats.heap_inuse += uint64(npage << _PageShift)
+       memstats.heap_idle -= uint64(npage << _PageShift)
+
+       //println("spanalloc", hex(s.start<<_PageShift))
+       if s.next != nil || s.prev != nil {
+               gothrow("still in list")
+       }
+       return s
+}
+
+// Allocate a span of exactly npage pages from the list of large spans.
+func mHeap_AllocLarge(h *mheap, npage uintptr) *mspan {
+       return bestFit(&h.freelarge, npage, nil)
+}
+
+// Search list for smallest span with >= npage pages.
+// If there are multiple smallest spans, take the one
+// with the earliest starting address.
+func bestFit(list *mspan, npage uintptr, best *mspan) *mspan {
+       for s := list.next; s != list; s = s.next {
+               if s.npages < npage {
+                       continue
+               }
+               if best == nil || s.npages < best.npages || (s.npages == best.npages && s.start < best.start) {
+                       best = s
+               }
+       }
+       return best
+}
+
+// Try to add at least npage pages of memory to the heap,
+// returning whether it worked.
+func mHeap_Grow(h *mheap, npage uintptr) bool {
+       // Ask for a big chunk, to reduce the number of mappings
+       // the operating system needs to track; also amortizes
+       // the overhead of an operating system mapping.
+       // Allocate a multiple of 64kB.
+       npage = round(npage, (64<<10)/_PageSize)
+       ask := npage << _PageShift
+       if ask < _HeapAllocChunk {
+               ask = _HeapAllocChunk
+       }
+
+       v := mHeap_SysAlloc(h, ask)
+       if v == nil {
+               if ask > npage<<_PageShift {
+                       ask = npage << _PageShift
+                       v = mHeap_SysAlloc(h, ask)
+               }
+               if v == nil {
+                       print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
+                       return false
+               }
+       }
+
+       // Create a fake "in use" span and free it, so that the
+       // right coalescing happens.
+       s := (*mspan)(fixAlloc_Alloc(&h.spanalloc))
+       mSpan_Init(s, pageID(uintptr(v)>>_PageShift), ask>>_PageShift)
+       p := uintptr(s.start)
+       p -= (uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift)
+       h_spans[p] = s
+       h_spans[p+s.npages-1] = s
+       atomicstore(&s.sweepgen, h.sweepgen)
+       s.state = _MSpanInUse
+       mHeap_FreeSpanLocked(h, s, false, true)
+       return true
+}
+
+// Look up the span at the given address.
+// Address is guaranteed to be in map
+// and is guaranteed to be start or end of span.
+func mHeap_Lookup(h *mheap, v unsafe.Pointer) *mspan {
+       p := uintptr(v)
+       p -= uintptr(unsafe.Pointer(h.arena_start))
+       return h_spans[p>>_PageShift]
+}
+
+// Look up the span at the given address.
+// Address is *not* guaranteed to be in map
+// and may be anywhere in the span.
+// Map entries for the middle of a span are only
+// valid for allocated spans.  Free spans may have
+// other garbage in their middles, so we have to
+// check for that.
+func mHeap_LookupMaybe(h *mheap, v unsafe.Pointer) *mspan {
+       if uintptr(v) < uintptr(unsafe.Pointer(h.arena_start)) || uintptr(v) >= uintptr(unsafe.Pointer(h.arena_used)) {
+               return nil
+       }
+       p := uintptr(v) >> _PageShift
+       q := p
+       q -= uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift
+       s := h_spans[q]
+       if s == nil || p < uintptr(s.start) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != _MSpanInUse {
+               return nil
+       }
+       return s
+}
+
+// Free the span back into the heap.
+func mHeap_Free(h *mheap, s *mspan, acct int32) {
+       onM(func() {
+               mp := getg().m
+               lock(&h.lock)
+               memstats.heap_alloc += uint64(mp.mcache.local_cachealloc)
+               mp.mcache.local_cachealloc = 0
+               memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs)
+               mp.mcache.local_tinyallocs = 0
+               if acct != 0 {
+                       memstats.heap_alloc -= uint64(s.npages << _PageShift)
+                       memstats.heap_objects--
+               }
+               mHeap_FreeSpanLocked(h, s, true, true)
+               unlock(&h.lock)
+       })
+}
+
+func mHeap_FreeStack(h *mheap, s *mspan) {
+       _g_ := getg()
+       if _g_ != _g_.m.g0 {
+               gothrow("mheap_freestack not on M stack")
+       }
+       s.needzero = 1
+       lock(&h.lock)
+       memstats.stacks_inuse -= uint64(s.npages << _PageShift)
+       mHeap_FreeSpanLocked(h, s, true, true)
+       unlock(&h.lock)
+}
+
+func mHeap_FreeSpanLocked(h *mheap, s *mspan, acctinuse, acctidle bool) {
+       switch s.state {
+       case _MSpanStack:
+               if s.ref != 0 {
+                       gothrow("MHeap_FreeSpanLocked - invalid stack free")
+               }
+       case _MSpanInUse:
+               if s.ref != 0 || s.sweepgen != h.sweepgen {
+                       print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.start<<_PageShift), " ref ", s.ref, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
+                       gothrow("MHeap_FreeSpanLocked - invalid free")
+               }
+       default:
+               gothrow("MHeap_FreeSpanLocked - invalid span state")
+       }
+
+       if acctinuse {
+               memstats.heap_inuse -= uint64(s.npages << _PageShift)
+       }
+       if acctidle {
+               memstats.heap_idle += uint64(s.npages << _PageShift)
+       }
+       s.state = _MSpanFree
+       mSpanList_Remove(s)
+
+       // Stamp newly unused spans. The scavenger will use that
+       // info to potentially give back some pages to the OS.
+       s.unusedsince = nanotime()
+       s.npreleased = 0
+
+       // Coalesce with earlier, later spans.
+       p := uintptr(s.start)
+       p -= uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift
+       if p > 0 {
+               t := h_spans[p-1]
+               if t != nil && t.state != _MSpanInUse && t.state != _MSpanStack {
+                       s.start = t.start
+                       s.npages += t.npages
+                       s.npreleased = t.npreleased // absorb released pages
+                       s.needzero |= t.needzero
+                       p -= t.npages
+                       h_spans[p] = s
+                       mSpanList_Remove(t)
+                       t.state = _MSpanDead
+                       fixAlloc_Free(&h.spanalloc, (unsafe.Pointer)(t))
+               }
+       }
+       if (p+s.npages)*ptrSize < h.spans_mapped {
+               t := h_spans[p+s.npages]
+               if t != nil && t.state != _MSpanInUse && t.state != _MSpanStack {
+                       s.npages += t.npages
+                       s.npreleased += t.npreleased
+                       s.needzero |= t.needzero
+                       h_spans[p+s.npages-1] = s
+                       mSpanList_Remove(t)
+                       t.state = _MSpanDead
+                       fixAlloc_Free(&h.spanalloc, (unsafe.Pointer)(t))
+               }
+       }
+
+       // Insert s into appropriate list.
+       if s.npages < uintptr(len(h.free)) {
+               mSpanList_Insert(&h.free[s.npages], s)
+       } else {
+               mSpanList_Insert(&h.freelarge, s)
+       }
+}
+
+func scavengelist(list *mspan, now, limit uint64) uintptr {
+       if mSpanList_IsEmpty(list) {
+               return 0
+       }
+
+       var sumreleased uintptr
+       for s := list.next; s != list; s = s.next {
+               if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages {
+                       released := (s.npages - s.npreleased) << _PageShift
+                       memstats.heap_released += uint64(released)
+                       sumreleased += released
+                       s.npreleased = s.npages
+                       sysUnused((unsafe.Pointer)(s.start<<_PageShift), s.npages<<_PageShift)
+               }
+       }
+       return sumreleased
+}
+
+func mHeap_Scavenge(k int32, now, limit uint64) {
+       h := &mheap_
+       lock(&h.lock)
+       var sumreleased uintptr
+       for i := 0; i < len(h.free); i++ {
+               sumreleased += scavengelist(&h.free[i], now, limit)
+       }
+       sumreleased += scavengelist(&h.freelarge, now, limit)
+       unlock(&h.lock)
+
+       if debug.gctrace > 0 {
+               if sumreleased > 0 {
+                       print("scvg", k, ": ", sumreleased>>20, " MB released\n")
+               }
+               // TODO(dvyukov): these stats are incorrect as we don't subtract stack usage from heap.
+               // But we can't call ReadMemStats on g0 holding locks.
+               print("scvg", k, ": inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
+       }
+}
+
+func scavenge_m() {
+       mHeap_Scavenge(-1, ^uint64(0), 0)
+}
+
+// Initialize a new span with the given start and npages.
+func mSpan_Init(span *mspan, start pageID, npages uintptr) {
+       span.next = nil
+       span.prev = nil
+       span.start = start
+       span.npages = npages
+       span.freelist = nil
+       span.ref = 0
+       span.sizeclass = 0
+       span.incache = false
+       span.elemsize = 0
+       span.state = _MSpanDead
+       span.unusedsince = 0
+       span.npreleased = 0
+       span.speciallock.key = 0
+       span.specials = nil
+       span.needzero = 0
+}
+
+// Initialize an empty doubly-linked list.
+func mSpanList_Init(list *mspan) {
+       list.state = _MSpanListHead
+       list.next = list
+       list.prev = list
+}
+
+func mSpanList_Remove(span *mspan) {
+       if span.prev == nil && span.next == nil {
+               return
+       }
+       span.prev.next = span.next
+       span.next.prev = span.prev
+       span.prev = nil
+       span.next = nil
+}
+
+func mSpanList_IsEmpty(list *mspan) bool {
+       return list.next == list
+}
+
+func mSpanList_Insert(list *mspan, span *mspan) {
+       if span.next != nil || span.prev != nil {
+               println("failed MSpanList_Insert", span, span.next, span.prev)
+               gothrow("MSpanList_Insert")
+       }
+       span.next = list.next
+       span.prev = list
+       span.next.prev = span
+       span.prev.next = span
+}
+
+func mSpanList_InsertBack(list *mspan, span *mspan) {
+       if span.next != nil || span.prev != nil {
+               println("failed MSpanList_InsertBack", span, span.next, span.prev)
+               gothrow("MSpanList_InsertBack")
+       }
+       span.next = list
+       span.prev = list.prev
+       span.next.prev = span
+       span.prev.next = span
+}
+
+// Adds the special record s to the list of special records for
+// the object p.  All fields of s should be filled in except for
+// offset & next, which this routine will fill in.
+// Returns true if the special was successfully added, false otherwise.
+// (The add will fail only if a record with the same p and s->kind
+//  already exists.)
+func addspecial(p unsafe.Pointer, s *special) bool {
+       span := mHeap_LookupMaybe(&mheap_, p)
+       if span == nil {
+               gothrow("addspecial on invalid pointer")
+       }
+
+       // Ensure that the span is swept.
+       // GC accesses specials list w/o locks. And it's just much safer.
+       mp := acquirem()
+       mSpan_EnsureSwept(span)
+
+       offset := uintptr(p) - uintptr(span.start<<_PageShift)
+       kind := s.kind
+
+       lock(&span.speciallock)
+
+       // Find splice point, check for existing record.
+       t := &span.specials
+       for {
+               x := *t
+               if x == nil {
+                       break
+               }
+               if offset == uintptr(x.offset) && kind == x.kind {
+                       unlock(&span.speciallock)
+                       releasem(mp)
+                       return false // already exists
+               }
+               if offset < uintptr(x.offset) || (offset == uintptr(x.offset) && kind < x.kind) {
+                       break
+               }
+               t = &x.next
+       }
+
+       // Splice in record, fill in offset.
+       s.offset = uint16(offset)
+       s.next = *t
+       *t = s
+       unlock(&span.speciallock)
+       releasem(mp)
+
+       return true
+}
+
+// Removes the Special record of the given kind for the object p.
+// Returns the record if the record existed, nil otherwise.
+// The caller must FixAlloc_Free the result.
+func removespecial(p unsafe.Pointer, kind uint8) *special {
+       span := mHeap_LookupMaybe(&mheap_, p)
+       if span == nil {
+               gothrow("removespecial on invalid pointer")
+       }
+
+       // Ensure that the span is swept.
+       // GC accesses specials list w/o locks. And it's just much safer.
+       mp := acquirem()
+       mSpan_EnsureSwept(span)
+
+       offset := uintptr(p) - uintptr(span.start<<_PageShift)
+
+       lock(&span.speciallock)
+       t := &span.specials
+       for {
+               s := *t
+               if s == nil {
+                       break
+               }
+               // This function is used for finalizers only, so we don't check for
+               // "interior" specials (p must be exactly equal to s->offset).
+               if offset == uintptr(s.offset) && kind == s.kind {
+                       *t = s.next
+                       unlock(&span.speciallock)
+                       releasem(mp)
+                       return s
+               }
+               t = &s.next
+       }
+       unlock(&span.speciallock)
+       releasem(mp)
+       return nil
+}
+
+// Adds a finalizer to the object p.  Returns true if it succeeded.
+func addfinalizer(p unsafe.Pointer, f *funcval, nret uintptr, fint *_type, ot *ptrtype) bool {
+       lock(&mheap_.speciallock)
+       s := (*specialfinalizer)(fixAlloc_Alloc(&mheap_.specialfinalizeralloc))
+       unlock(&mheap_.speciallock)
+       s.special.kind = _KindSpecialFinalizer
+       s.fn = f
+       s.nret = nret
+       s.fint = fint
+       s.ot = ot
+       if addspecial(p, &s.special) {
+               return true
+       }
+
+       // There was an old finalizer
+       lock(&mheap_.speciallock)
+       fixAlloc_Free(&mheap_.specialfinalizeralloc, (unsafe.Pointer)(s))
+       unlock(&mheap_.speciallock)
+       return false
+}
+
+// Removes the finalizer (if any) from the object p.
+func removefinalizer(p unsafe.Pointer) {
+       s := (*specialfinalizer)(unsafe.Pointer(removespecial(p, _KindSpecialFinalizer)))
+       if s == nil {
+               return // there wasn't a finalizer to remove
+       }
+       lock(&mheap_.speciallock)
+       fixAlloc_Free(&mheap_.specialfinalizeralloc, (unsafe.Pointer)(s))
+       unlock(&mheap_.speciallock)
+}
+
+// Set the heap profile bucket associated with addr to b.
+func setprofilebucket(p unsafe.Pointer, b *bucket) {
+       lock(&mheap_.speciallock)
+       s := (*specialprofile)(fixAlloc_Alloc(&mheap_.specialprofilealloc))
+       unlock(&mheap_.speciallock)
+       s.special.kind = _KindSpecialProfile
+       s.b = b
+       if !addspecial(p, &s.special) {
+               gothrow("setprofilebucket: profile already set")
+       }
+}
+
+// Do whatever cleanup needs to be done to deallocate s.  It has
+// already been unlinked from the MSpan specials list.
+// Returns true if we should keep working on deallocating p.
+func freespecial(s *special, p unsafe.Pointer, size uintptr, freed bool) bool {
+       switch s.kind {
+       case _KindSpecialFinalizer:
+               sf := (*specialfinalizer)(unsafe.Pointer(s))
+               queuefinalizer(p, sf.fn, sf.nret, sf.fint, sf.ot)
+               lock(&mheap_.speciallock)
+               fixAlloc_Free(&mheap_.specialfinalizeralloc, (unsafe.Pointer)(sf))
+               unlock(&mheap_.speciallock)
+               return false // don't free p until finalizer is done
+       case _KindSpecialProfile:
+               sp := (*specialprofile)(unsafe.Pointer(s))
+               mProf_Free(sp.b, size, freed)
+               lock(&mheap_.speciallock)
+               fixAlloc_Free(&mheap_.specialprofilealloc, (unsafe.Pointer)(sp))
+               unlock(&mheap_.speciallock)
+               return true
+       default:
+               gothrow("bad special kind")
+               panic("not reached")
+       }
+}
index d409c6c306a3fe9f32ae5a43ceac739b785f0eaf..6ff33743b78614a33e4fdb460994391ba3325efd 100644 (file)
@@ -190,8 +190,6 @@ func stkbucket(typ bucketType, size uintptr, stk []uintptr, alloc bool) *bucket
        return b
 }
 
-func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer
-
 func eqslice(x, y []uintptr) bool {
        if len(x) != len(y) {
                return false
@@ -246,16 +244,9 @@ func mProf_Malloc(p unsafe.Pointer, size uintptr) {
        // This reduces potential contention and chances of deadlocks.
        // Since the object must be alive during call to mProf_Malloc,
        // it's fine to do this non-atomically.
-       setprofilebucket(p, b)
-}
-
-func setprofilebucket_m() // mheap.c
-
-func setprofilebucket(p unsafe.Pointer, b *bucket) {
-       g := getg()
-       g.m.ptrarg[0] = p
-       g.m.ptrarg[1] = unsafe.Pointer(b)
-       onM(setprofilebucket_m)
+       onM(func() {
+               setprofilebucket(p, b)
+       })
 }
 
 // Called when freeing a profiled block.
@@ -519,8 +510,6 @@ func ThreadCreateProfile(p []StackRecord) (n int, ok bool) {
        return
 }
 
-var allgs []*g // proc.c
-
 // GoroutineProfile returns n, the number of records in the active goroutine stack profile.
 // If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
 // If len(p) < n, GoroutineProfile does not change p and returns n, false.
diff --git a/src/runtime/msize.c b/src/runtime/msize.c
deleted file mode 100644 (file)
index 7cb65da..0000000
+++ /dev/null
@@ -1,184 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Malloc small size classes.
-//
-// See malloc.h for overview.
-//
-// The size classes are chosen so that rounding an allocation
-// request up to the next size class wastes at most 12.5% (1.125x).
-//
-// Each size class has its own page count that gets allocated
-// and chopped up when new objects of the size class are needed.
-// That page count is chosen so that chopping up the run of
-// pages into objects of the given size wastes at most 12.5% (1.125x)
-// of the memory.  It is not necessary that the cutoff here be
-// the same as above.
-//
-// The two sources of waste multiply, so the worst possible case
-// for the above constraints would be that allocations of some
-// size might have a 26.6% (1.266x) overhead.
-// In practice, only one of the wastes comes into play for a
-// given size (sizes < 512 waste mainly on the round-up,
-// sizes > 512 waste mainly on the page chopping).
-//
-// TODO(rsc): Compute max waste for any given size.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-#include "textflag.h"
-
-#pragma dataflag NOPTR
-int32 runtime·class_to_size[NumSizeClasses];
-#pragma dataflag NOPTR
-int32 runtime·class_to_allocnpages[NumSizeClasses];
-
-// The SizeToClass lookup is implemented using two arrays,
-// one mapping sizes <= 1024 to their class and one mapping
-// sizes >= 1024 and <= MaxSmallSize to their class.
-// All objects are 8-aligned, so the first array is indexed by
-// the size divided by 8 (rounded up).  Objects >= 1024 bytes
-// are 128-aligned, so the second array is indexed by the
-// size divided by 128 (rounded up).  The arrays are filled in
-// by InitSizes.
-
-#pragma dataflag NOPTR
-int8 runtime·size_to_class8[1024/8 + 1];
-#pragma dataflag NOPTR
-int8 runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1];
-
-void runtime·testdefersizes(void);
-
-int32
-runtime·SizeToClass(int32 size)
-{
-       if(size > MaxSmallSize)
-               runtime·throw("SizeToClass - invalid size");
-       if(size > 1024-8)
-               return runtime·size_to_class128[(size-1024+127) >> 7];
-       return runtime·size_to_class8[(size+7)>>3];
-}
-
-void
-runtime·InitSizes(void)
-{
-       int32 align, sizeclass, size, nextsize, n;
-       uint32 i;
-       uintptr allocsize, npages;
-
-       // Initialize the runtime·class_to_size table (and choose class sizes in the process).
-       runtime·class_to_size[0] = 0;
-       sizeclass = 1;  // 0 means no class
-       align = 8;
-       for(size = align; size <= MaxSmallSize; size += align) {
-               if((size&(size-1)) == 0) {      // bump alignment once in a while
-                       if(size >= 2048)
-                               align = 256;
-                       else if(size >= 128)
-                               align = size / 8;
-                       else if(size >= 16)
-                               align = 16;     // required for x86 SSE instructions, if we want to use them
-               }
-               if((align&(align-1)) != 0)
-                       runtime·throw("InitSizes - bug");
-
-               // Make the allocnpages big enough that
-               // the leftover is less than 1/8 of the total,
-               // so wasted space is at most 12.5%.
-               allocsize = PageSize;
-               while(allocsize%size > allocsize/8)
-                       allocsize += PageSize;
-               npages = allocsize >> PageShift;
-
-               // If the previous sizeclass chose the same
-               // allocation size and fit the same number of
-               // objects into the page, we might as well
-               // use just this size instead of having two
-               // different sizes.
-               if(sizeclass > 1 &&
-                       npages == runtime·class_to_allocnpages[sizeclass-1] &&
-                       allocsize/size == allocsize/runtime·class_to_size[sizeclass-1]) {
-                       runtime·class_to_size[sizeclass-1] = size;
-                       continue;
-               }
-
-               runtime·class_to_allocnpages[sizeclass] = npages;
-               runtime·class_to_size[sizeclass] = size;
-               sizeclass++;
-       }
-       if(sizeclass != NumSizeClasses) {
-               runtime·printf("sizeclass=%d NumSizeClasses=%d\n", sizeclass, NumSizeClasses);
-               runtime·throw("InitSizes - bad NumSizeClasses");
-       }
-
-       // Initialize the size_to_class tables.
-       nextsize = 0;
-       for (sizeclass = 1; sizeclass < NumSizeClasses; sizeclass++) {
-               for(; nextsize < 1024 && nextsize <= runtime·class_to_size[sizeclass]; nextsize+=8)
-                       runtime·size_to_class8[nextsize/8] = sizeclass;
-               if(nextsize >= 1024)
-                       for(; nextsize <= runtime·class_to_size[sizeclass]; nextsize += 128)
-                               runtime·size_to_class128[(nextsize-1024)/128] = sizeclass;
-       }
-
-       // Double-check SizeToClass.
-       if(0) {
-               for(n=0; n < MaxSmallSize; n++) {
-                       sizeclass = runtime·SizeToClass(n);
-                       if(sizeclass < 1 || sizeclass >= NumSizeClasses || runtime·class_to_size[sizeclass] < n) {
-                               runtime·printf("size=%d sizeclass=%d runtime·class_to_size=%d\n", n, sizeclass, runtime·class_to_size[sizeclass]);
-                               runtime·printf("incorrect SizeToClass");
-                               goto dump;
-                       }
-                       if(sizeclass > 1 && runtime·class_to_size[sizeclass-1] >= n) {
-                               runtime·printf("size=%d sizeclass=%d runtime·class_to_size=%d\n", n, sizeclass, runtime·class_to_size[sizeclass]);
-                               runtime·printf("SizeToClass too big");
-                               goto dump;
-                       }
-               }
-       }
-
-       runtime·testdefersizes();
-
-       // Copy out for statistics table.
-       for(i=0; i<nelem(runtime·class_to_size); i++)
-               mstats.by_size[i].size = runtime·class_to_size[i];
-       return;
-
-dump:
-       if(1){
-               runtime·printf("NumSizeClasses=%d\n", NumSizeClasses);
-               runtime·printf("runtime·class_to_size:");
-               for(sizeclass=0; sizeclass<NumSizeClasses; sizeclass++)
-                       runtime·printf(" %d", runtime·class_to_size[sizeclass]);
-               runtime·printf("\n\n");
-               runtime·printf("size_to_class8:");
-               for(i=0; i<nelem(runtime·size_to_class8); i++)
-                       runtime·printf(" %d=>%d(%d)\n", i*8, runtime·size_to_class8[i],
-                               runtime·class_to_size[runtime·size_to_class8[i]]);
-               runtime·printf("\n");
-               runtime·printf("size_to_class128:");
-               for(i=0; i<nelem(runtime·size_to_class128); i++)
-                       runtime·printf(" %d=>%d(%d)\n", i*128, runtime·size_to_class128[i],
-                               runtime·class_to_size[runtime·size_to_class128[i]]);
-               runtime·printf("\n");
-       }
-       runtime·throw("InitSizes failed");
-}
-
-// Returns size of the memory block that mallocgc will allocate if you ask for the size.
-uintptr
-runtime·roundupsize(uintptr size)
-{
-       if(size < MaxSmallSize) {
-               if(size <= 1024-8)
-                       return runtime·class_to_size[runtime·size_to_class8[(size+7)>>3]];
-               else
-                       return runtime·class_to_size[runtime·size_to_class128[(size-1024+127) >> 7]];
-       }
-       if(size + PageSize < size)
-               return size;
-       return ROUND(size, PageSize);
-}
diff --git a/src/runtime/msize.go b/src/runtime/msize.go
new file mode 100644 (file)
index 0000000..aa2b43e
--- /dev/null
@@ -0,0 +1,174 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Malloc small size classes.
+//
+// See malloc.h for overview.
+//
+// The size classes are chosen so that rounding an allocation
+// request up to the next size class wastes at most 12.5% (1.125x).
+//
+// Each size class has its own page count that gets allocated
+// and chopped up when new objects of the size class are needed.
+// That page count is chosen so that chopping up the run of
+// pages into objects of the given size wastes at most 12.5% (1.125x)
+// of the memory.  It is not necessary that the cutoff here be
+// the same as above.
+//
+// The two sources of waste multiply, so the worst possible case
+// for the above constraints would be that allocations of some
+// size might have a 26.6% (1.266x) overhead.
+// In practice, only one of the wastes comes into play for a
+// given size (sizes < 512 waste mainly on the round-up,
+// sizes > 512 waste mainly on the page chopping).
+//
+// TODO(rsc): Compute max waste for any given size.
+
+package runtime
+
+//var class_to_size [_NumSizeClasses]int32
+//var class_to_allocnpages [_NumSizeClasses]int32
+
+// The SizeToClass lookup is implemented using two arrays,
+// one mapping sizes <= 1024 to their class and one mapping
+// sizes >= 1024 and <= MaxSmallSize to their class.
+// All objects are 8-aligned, so the first array is indexed by
+// the size divided by 8 (rounded up).  Objects >= 1024 bytes
+// are 128-aligned, so the second array is indexed by the
+// size divided by 128 (rounded up).  The arrays are filled in
+// by InitSizes.
+//var size_to_class8 [1024/8 + 1]int8
+//var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
+
+func sizeToClass(size int32) int32 {
+       if size > _MaxSmallSize {
+               gothrow("SizeToClass - invalid size")
+       }
+       if size > 1024-8 {
+               return int32(size_to_class128[(size-1024+127)>>7])
+       }
+       return int32(size_to_class8[(size+7)>>3])
+}
+
+func initSizes() {
+       // Initialize the runtime·class_to_size table (and choose class sizes in the process).
+       class_to_size[0] = 0
+       sizeclass := 1 // 0 means no class
+       align := 8
+       for size := align; size <= _MaxSmallSize; size += align {
+               if size&(size-1) == 0 { // bump alignment once in a while
+                       if size >= 2048 {
+                               align = 256
+                       } else if size >= 128 {
+                               align = size / 8
+                       } else if size >= 16 {
+                               align = 16 // required for x86 SSE instructions, if we want to use them
+                       }
+               }
+               if align&(align-1) != 0 {
+                       gothrow("InitSizes - bug")
+               }
+
+               // Make the allocnpages big enough that
+               // the leftover is less than 1/8 of the total,
+               // so wasted space is at most 12.5%.
+               allocsize := _PageSize
+               for allocsize%size > allocsize/8 {
+                       allocsize += _PageSize
+               }
+               npages := allocsize >> _PageShift
+
+               // If the previous sizeclass chose the same
+               // allocation size and fit the same number of
+               // objects into the page, we might as well
+               // use just this size instead of having two
+               // different sizes.
+               if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) {
+                       class_to_size[sizeclass-1] = int32(size)
+                       continue
+               }
+
+               class_to_allocnpages[sizeclass] = int32(npages)
+               class_to_size[sizeclass] = int32(size)
+               sizeclass++
+       }
+       if sizeclass != _NumSizeClasses {
+               print("sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
+               gothrow("InitSizes - bad NumSizeClasses")
+       }
+
+       // Initialize the size_to_class tables.
+       nextsize := 0
+       for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
+               for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 {
+                       size_to_class8[nextsize/8] = int8(sizeclass)
+               }
+               if nextsize >= 1024 {
+                       for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 {
+                               size_to_class128[(nextsize-1024)/128] = int8(sizeclass)
+                       }
+               }
+       }
+
+       // Double-check SizeToClass.
+       if false {
+               for n := int32(0); n < _MaxSmallSize; n++ {
+                       sizeclass := sizeToClass(n)
+                       if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
+                               print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
+                               print("incorrect SizeToClass\n")
+                               goto dump
+                       }
+                       if sizeclass > 1 && class_to_size[sizeclass-1] >= n {
+                               print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
+                               print("SizeToClass too big\n")
+                               goto dump
+                       }
+               }
+       }
+
+       testdefersizes()
+
+       // Copy out for statistics table.
+       for i := 0; i < len(class_to_size); i++ {
+               memstats.by_size[i].size = uint32(class_to_size[i])
+       }
+       return
+
+dump:
+       if true {
+               print("NumSizeClasses=", _NumSizeClasses, "\n")
+               print("runtime·class_to_size:")
+               for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ {
+                       print(" ", class_to_size[sizeclass], "")
+               }
+               print("\n\n")
+               print("size_to_class8:")
+               for i := 0; i < len(size_to_class8); i++ {
+                       print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n")
+               }
+               print("\n")
+               print("size_to_class128:")
+               for i := 0; i < len(size_to_class128); i++ {
+                       print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n")
+               }
+               print("\n")
+       }
+       gothrow("InitSizes failed")
+}
+
+// Returns size of the memory block that mallocgc will allocate if you ask for the size.
+func roundupsize(size uintptr) uintptr {
+       if size < _MaxSmallSize {
+               if size <= 1024-8 {
+                       return uintptr(class_to_size[size_to_class8[(size+7)>>3]])
+               } else {
+                       return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]])
+               }
+       }
+       if size+_PageSize < size {
+               return size
+       }
+       return round(size, _PageSize)
+}
index 171087d7f6fe51c0c243e77d6338c04b4eff7282..93cea5cc38bd812a44b1535c73ae15215d5e7918 100644 (file)
@@ -22,11 +22,11 @@ func makeslice(t *slicetype, len64 int64, cap64 int64) sliceStruct {
        // but since the cap is only being supplied implicitly, saying len is clearer.
        // See issue 4085.
        len := int(len64)
-       if len64 < 0 || int64(len) != len64 || t.elem.size > 0 && uintptr(len) > maxmem/uintptr(t.elem.size) {
+       if len64 < 0 || int64(len) != len64 || t.elem.size > 0 && uintptr(len) > _MaxMem/uintptr(t.elem.size) {
                panic(errorString("makeslice: len out of range"))
        }
        cap := int(cap64)
-       if cap < len || int64(cap) != cap64 || t.elem.size > 0 && uintptr(cap) > maxmem/uintptr(t.elem.size) {
+       if cap < len || int64(cap) != cap64 || t.elem.size > 0 && uintptr(cap) > _MaxMem/uintptr(t.elem.size) {
                panic(errorString("makeslice: cap out of range"))
        }
        p := newarray(t.elem, uintptr(cap))
@@ -42,7 +42,7 @@ func growslice(t *slicetype, old sliceStruct, n int64) sliceStruct {
        cap64 := int64(old.cap) + n
        cap := int(cap64)
 
-       if int64(cap) != cap64 || cap < old.cap || t.elem.size > 0 && uintptr(cap) > maxmem/uintptr(t.elem.size) {
+       if int64(cap) != cap64 || cap < old.cap || t.elem.size > 0 && uintptr(cap) > _MaxMem/uintptr(t.elem.size) {
                panic(errorString("growslice: cap out of range"))
        }
 
@@ -72,7 +72,7 @@ func growslice(t *slicetype, old sliceStruct, n int64) sliceStruct {
                }
        }
 
-       if uintptr(newcap) >= maxmem/uintptr(et.size) {
+       if uintptr(newcap) >= _MaxMem/uintptr(et.size) {
                panic(errorString("growslice: cap out of range"))
        }
        lenmem := uintptr(old.len) * uintptr(et.size)
index 0809f89bc1fe1796b3fcb54885185cb270522814..0845c94e248e7b0c6368c5610c9de65e642c940b 100644 (file)
@@ -225,7 +225,7 @@ func rawbyteslice(size int) (b []byte) {
 
 // rawruneslice allocates a new rune slice. The rune slice is not zeroed.
 func rawruneslice(size int) (b []rune) {
-       if uintptr(size) > maxmem/4 {
+       if uintptr(size) > _MaxMem/4 {
                gothrow("out of memory")
        }
        mem := goroundupsize(uintptr(size) * 4)
@@ -255,9 +255,6 @@ func gostringsize(n int) string {
        return s
 }
 
-//go:noescape
-func findnull(*byte) int
-
 func gostring(p *byte) string {
        l := findnull(p)
        if l == 0 {
@@ -296,3 +293,12 @@ func contains(s, t string) bool {
 func hasprefix(s, t string) bool {
        return len(s) >= len(t) && s[:len(t)] == t
 }
+
+func goatoi(s string) int {
+       n := 0
+       for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
+               n = n*10 + int(s[0]) - '0'
+               s = s[1:]
+       }
+       return n
+}