#include "arch_GOARCH.h"
#include "malloc.h"
#include "stack.h"
+#include "mgc0.h"
#include "race.h"
enum {
Debug = 0,
DebugMark = 0, // run second pass to check mark
- DataBlock = 8*1024,
// Four bits per word (see #defines below).
wordsPerBitmapWord = sizeof(void*)*8/4,
bitShift = sizeof(void*)*8/4,
+
+ handoffThreshold = 4,
+ IntermediateBufferCapacity = 64,
};
// Bits in per-word bitmap.
static int32 gctrace;
+typedef struct Obj Obj;
+struct Obj
+{
+ byte *p; // data pointer
+ uintptr n; // size of data in bytes
+ uintptr ti; // type info
+};
+
+// The size of Workbuf is N*PageSize.
typedef struct Workbuf Workbuf;
struct Workbuf
{
- LFNode node; // must be first
+#define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr))
+ LFNode node; // must be first
uintptr nobj;
- byte *obj[512-(sizeof(LFNode)+sizeof(uintptr))/sizeof(byte*)];
+ Obj obj[SIZE/sizeof(Obj) - 1];
+ uint8 _padding[SIZE%sizeof(Obj) + sizeof(Obj)];
+#undef SIZE
};
typedef struct Finalizer Finalizer;
};
extern byte data[];
-extern byte etext[];
+extern byte edata[];
+extern byte bss[];
extern byte ebss[];
+extern byte gcdata[];
+extern byte gcbss[];
+
static G *fing;
static FinBlock *finq; // list of finalizers that are to be executed
static FinBlock *finc; // cache of free blocks
static void putempty(Workbuf*);
static Workbuf* handoff(Workbuf*);
-typedef struct GcRoot GcRoot;
-struct GcRoot
-{
- byte *p;
- uintptr n;
-};
-
static struct {
uint64 full; // lock-free list of full blocks
uint64 empty; // lock-free list of empty blocks
byte *chunk;
uintptr nchunk;
- GcRoot *roots;
+ Obj *roots;
uint32 nroot;
uint32 rootcap;
} work;
-// scanblock scans a block of n bytes starting at pointer b for references
-// to other objects, scanning any it finds recursively until there are no
-// unscanned objects left. Instead of using an explicit recursion, it keeps
-// a work list in the Workbuf* structures and loops in the main function
-// body. Keeping an explicit work list is easier on the stack allocator and
-// more efficient.
+enum {
+ // TODO(atom): to be expanded in a next CL
+ GC_DEFAULT_PTR = GC_NUM_INSTR,
+};
+
+// PtrTarget and BitTarget are structures used by intermediate buffers.
+// The intermediate buffers hold GC data before it
+// is moved/flushed to the work buffer (Workbuf).
+// The size of an intermediate buffer is very small,
+// such as 32 or 64 elements.
+struct PtrTarget
+{
+ void *p;
+ uintptr ti;
+};
+
+struct BitTarget
+{
+ void *p;
+ uintptr ti;
+ uintptr *bitp, shift;
+};
+
+struct BufferList
+{
+ struct PtrTarget ptrtarget[IntermediateBufferCapacity];
+ struct BitTarget bittarget[IntermediateBufferCapacity];
+ struct BufferList *next;
+};
+static struct BufferList *bufferList;
+
+static Lock lock;
+
+// flushptrbuf moves data from the PtrTarget buffer to the work buffer.
+// The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned,
+// while the work buffer contains blocks which have been marked
+// and are prepared to be scanned by the garbage collector.
+//
+// _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer.
+// bitbuf holds temporary data generated by this function.
+//
+// A simplified drawing explaining how the todo-list moves from a structure to another:
+//
+// scanblock
+// (find pointers)
+// Obj ------> PtrTarget (pointer targets)
+// ↑ |
+// | | flushptrbuf (1st part,
+// | | find block start)
+// | ↓
+// `--------- BitTarget (pointer targets and the corresponding locations in bitmap)
+// flushptrbuf
+// (2nd part, mark and enqueue)
static void
-scanblock(byte *b, uintptr n)
+flushptrbuf(struct PtrTarget *ptrbuf, uintptr n, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj, struct BitTarget *bitbuf)
{
- byte *obj, *arena_start, *arena_used, *p;
- void **vp;
- uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
+ byte *p, *arena_start, *obj;
+ uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti;
MSpan *s;
PageID k;
- void **wp;
+ Obj *wp;
Workbuf *wbuf;
- bool keepworking;
-
- if((intptr)n < 0) {
- runtime·printf("scanblock %p %D\n", b, (int64)n);
- runtime·throw("scanblock");
- }
+ struct PtrTarget *ptrbuf_end;
+ struct BitTarget *bitbufpos, *bt;
- // Memory arena parameters.
arena_start = runtime·mheap.arena_start;
- arena_used = runtime·mheap.arena_used;
- nproc = work.nproc;
- wbuf = nil; // current work buffer
- wp = nil; // storage for next queued pointer (write pointer)
- nobj = 0; // number of queued objects
+ wp = *_wp;
+ wbuf = *_wbuf;
+ nobj = *_nobj;
- // Scanblock helpers pass b==nil.
- // Procs needs to return to make more
- // calls to scanblock. But if work.nproc==1 then
- // might as well process blocks as soon as we
- // have them.
- keepworking = b == nil || work.nproc == 1;
+ ptrbuf_end = ptrbuf + n;
- // Align b to a word boundary.
- off = (uintptr)b & (PtrSize-1);
- if(off != 0) {
- b += PtrSize - off;
- n -= PtrSize - off;
+ // If buffer is nearly full, get a new one.
+ if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) {
+ if(wbuf != nil)
+ wbuf->nobj = nobj;
+ wbuf = getempty(wbuf);
+ wp = wbuf->obj;
+ nobj = 0;
+
+ if(n >= nelem(wbuf->obj))
+ runtime·throw("ptrbuf has to be smaller than WorkBuf");
}
- for(;;) {
- // Each iteration scans the block b of length n, queueing pointers in
- // the work buffer.
- if(Debug > 1)
- runtime·printf("scanblock %p %D\n", b, (int64)n);
+ // TODO(atom): This block is a branch of an if-then-else statement.
+ // The single-threaded branch may be added in a next CL.
+ {
+ // Multi-threaded version.
- vp = (void**)b;
- n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
- for(i=0; i<n; i++) {
- obj = (byte*)vp[i];
+ bitbufpos = bitbuf;
- // Words outside the arena cannot be pointers.
- if((byte*)obj < arena_start || (byte*)obj >= arena_used)
- continue;
+ while(ptrbuf < ptrbuf_end) {
+ obj = ptrbuf->p;
+ ti = ptrbuf->ti;
+ ptrbuf++;
+
+ // obj belongs to interval [mheap.arena_start, mheap.arena_used).
+ if(Debug > 1) {
+ if(obj < runtime·mheap.arena_start || obj >= runtime·mheap.arena_used)
+ runtime·throw("object is outside of mheap");
+ }
// obj may be a pointer to a live object.
// Try to find the beginning of the object.
// Round down to word boundary.
- obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
+ if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) {
+ obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
+ ti = 0;
+ }
// Find bits for this word.
off = (uintptr*)obj - (uintptr*)arena_start;
if((bits & (bitAllocated|bitBlockBoundary)) != 0)
goto found;
+ ti = 0;
+
// Pointing just past the beginning?
// Scan backward a little to find a block boundary.
for(j=shift; j-->0; ) {
s = runtime·mheap.map[x];
if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
continue;
- p = (byte*)((uintptr)s->start<<PageShift);
+ p = (byte*)((uintptr)s->start<<PageShift);
if(s->sizeclass == 0) {
obj = p;
} else {
if((byte*)obj >= (byte*)s->limit)
continue;
- size = runtime·class_to_size[s->sizeclass];
+ size = s->elemsize;
int32 i = ((byte*)obj - p)/size;
obj = p+i*size;
}
bits = xbits >> shift;
found:
- // If another proc wants a pointer, give it some.
- if(work.nwait > 0 && nobj > 4 && work.full == 0) {
- wbuf->nobj = nobj;
- wbuf = handoff(wbuf);
- nobj = wbuf->nobj;
- wp = wbuf->obj + nobj;
- }
-
// Now we have bits, bitp, and shift correct for
// obj pointing at the base of the object.
// Only care about allocated and not marked.
if((bits & (bitAllocated|bitMarked)) != bitAllocated)
continue;
- if(nproc == 1)
- *bitp |= bitMarked<<shift;
- else {
- for(;;) {
- x = *bitp;
- if(x & (bitMarked<<shift))
- goto continue_obj;
- if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
- break;
- }
- }
+
+ *bitbufpos = (struct BitTarget){obj, ti, bitp, shift};
+ bitbufpos++;
+ }
+
+ runtime·lock(&lock);
+ for(bt=bitbuf; bt<bitbufpos; bt++){
+ xbits = *bt->bitp;
+ bits = xbits >> bt->shift;
+ if((bits & bitMarked) != 0)
+ continue;
+
+ // Mark the block
+ *bt->bitp = xbits | (bitMarked << bt->shift);
// If object has no pointers, don't need to scan further.
if((bits & bitNoPointers) != 0)
continue;
+ obj = bt->p;
+
+ // Ask span about size class.
+ // (Manually inlined copy of MHeap_Lookup.)
+ x = (uintptr)obj >> PageShift;
+ if(sizeof(void*) == 8)
+ x -= (uintptr)arena_start>>PageShift;
+ s = runtime·mheap.map[x];
+
PREFETCH(obj);
- // If buffer is full, get a new one.
- if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
- if(wbuf != nil)
- wbuf->nobj = nobj;
- wbuf = getempty(wbuf);
- wp = wbuf->obj;
- nobj = 0;
- }
- *wp++ = obj;
+ *wp = (Obj){obj, s->elemsize, bt->ti};
+ wp++;
nobj++;
- continue_obj:;
}
+ runtime·unlock(&lock);
+
+ // If another proc wants a pointer, give it some.
+ if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
+ wbuf->nobj = nobj;
+ wbuf = handoff(wbuf);
+ nobj = wbuf->nobj;
+ wp = wbuf->obj + nobj;
+ }
+ }
+
+ *_wp = wp;
+ *_wbuf = wbuf;
+ *_nobj = nobj;
+}
+
+// Program that scans the whole block and treats every block element as a potential pointer
+static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
+// scanblock scans a block of n bytes starting at pointer b for references
+// to other objects, scanning any it finds recursively until there are no
+// unscanned objects left. Instead of using an explicit recursion, it keeps
+// a work list in the Workbuf* structures and loops in the main function
+// body. Keeping an explicit work list is easier on the stack allocator and
+// more efficient.
+//
+// wbuf: current work buffer
+// wp: storage for next queued pointer (write pointer)
+// nobj: number of queued objects
+static void
+scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
+{
+ byte *b, *arena_start, *arena_used;
+ uintptr n, i, end_b;
+ void *obj;
+
+ // TODO(atom): to be expanded in a next CL
+ struct Frame {uintptr count, b; uintptr *loop_or_ret;};
+ struct Frame stack_top;
+
+ uintptr *pc;
+
+ struct BufferList *scanbuffers;
+ struct PtrTarget *ptrbuf, *ptrbuf_end;
+ struct BitTarget *bitbuf;
+
+ struct PtrTarget *ptrbufpos;
+
+ // End of local variable declarations.
+
+ if(sizeof(Workbuf) % PageSize != 0)
+ runtime·throw("scanblock: size of Workbuf is suboptimal");
+
+ // Memory arena parameters.
+ arena_start = runtime·mheap.arena_start;
+ arena_used = runtime·mheap.arena_used;
+
+ // Allocate ptrbuf, bitbuf
+ {
+ runtime·lock(&lock);
+
+ if(bufferList == nil) {
+ bufferList = runtime·SysAlloc(sizeof(*bufferList));
+ bufferList->next = nil;
+ }
+ scanbuffers = bufferList;
+ bufferList = bufferList->next;
+
+ ptrbuf = &scanbuffers->ptrtarget[0];
+ ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget);
+ bitbuf = &scanbuffers->bittarget[0];
+
+ runtime·unlock(&lock);
+ }
+
+ ptrbufpos = ptrbuf;
+
+ goto next_block;
+
+ for(;;) {
+ // Each iteration scans the block b of length n, queueing pointers in
+ // the work buffer.
+ if(Debug > 1) {
+ runtime·printf("scanblock %p %D\n", b, (int64)n);
+ }
+
+ // TODO(atom): to be replaced in a next CL
+ pc = defaultProg;
+
+ pc++;
+ stack_top.b = (uintptr)b;
+
+ end_b = (uintptr)b + n - PtrSize;
+
+ next_instr:
+ // TODO(atom): to be expanded in a next CL
+ switch(pc[0]) {
+ case GC_DEFAULT_PTR:
+ while(true) {
+ i = stack_top.b;
+ if(i > end_b)
+ goto next_block;
+ stack_top.b += PtrSize;
+
+ obj = *(byte**)i;
+ if(obj >= arena_start && obj < arena_used) {
+ *ptrbufpos = (struct PtrTarget){obj, 0};
+ ptrbufpos++;
+ if(ptrbufpos == ptrbuf_end)
+ goto flush_buffers;
+ }
+ }
+
+ default:
+ runtime·throw("scanblock: invalid GC instruction");
+ return;
+ }
+
+ flush_buffers:
+ flushptrbuf(ptrbuf, ptrbufpos-ptrbuf, &wp, &wbuf, &nobj, bitbuf);
+ ptrbufpos = ptrbuf;
+ goto next_instr;
+
+ next_block:
// Done scanning [b, b+n). Prepare for the next iteration of
- // the loop by setting b and n to the parameters for the next block.
+ // the loop by setting b, n to the parameters for the next block.
- // Fetch b from the work buffer.
if(nobj == 0) {
- if(!keepworking) {
- if(wbuf)
- putempty(wbuf);
- return;
+ flushptrbuf(ptrbuf, ptrbufpos-ptrbuf, &wp, &wbuf, &nobj, bitbuf);
+ ptrbufpos = ptrbuf;
+
+ if(nobj == 0) {
+ if(!keepworking) {
+ if(wbuf)
+ putempty(wbuf);
+ goto endscan;
+ }
+ // Emptied our buffer: refill.
+ wbuf = getfull(wbuf);
+ if(wbuf == nil)
+ goto endscan;
+ nobj = wbuf->nobj;
+ wp = wbuf->obj + wbuf->nobj;
}
- // Emptied our buffer: refill.
- wbuf = getfull(wbuf);
- if(wbuf == nil)
- return;
- nobj = wbuf->nobj;
- wp = wbuf->obj + wbuf->nobj;
}
- b = *--wp;
- nobj--;
- // Ask span about size class.
- // (Manually inlined copy of MHeap_Lookup.)
- x = (uintptr)b>>PageShift;
- if(sizeof(void*) == 8)
- x -= (uintptr)arena_start>>PageShift;
- s = runtime·mheap.map[x];
- if(s->sizeclass == 0)
- n = s->npages<<PageShift;
- else
- n = runtime·class_to_size[s->sizeclass];
+ // Fetch b from the work buffer.
+ --wp;
+ b = wp->p;
+ n = wp->n;
+ nobj--;
}
+
+endscan:
+ runtime·lock(&lock);
+ scanbuffers->next = bufferList;
+ bufferList = scanbuffers;
+ runtime·unlock(&lock);
}
// debug_scanblock is the debug copy of scanblock.
continue;
p = (byte*)((uintptr)s->start<<PageShift);
+ size = s->elemsize;
if(s->sizeclass == 0) {
obj = p;
- size = (uintptr)s->npages<<PageShift;
} else {
if((byte*)obj >= (byte*)s->limit)
continue;
- size = runtime·class_to_size[s->sizeclass];
int32 i = ((byte*)obj - p)/size;
obj = p+i*size;
}
}
}
+// Append obj to the work buffer.
+// _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer.
+static void
+enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj)
+{
+ uintptr nobj, off;
+ Obj *wp;
+ Workbuf *wbuf;
+
+ if(Debug > 1)
+ runtime·printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti);
+
+ // Align obj.b to a word boundary.
+ off = (uintptr)obj.p & (PtrSize-1);
+ if(off != 0) {
+ obj.p += PtrSize - off;
+ obj.n -= PtrSize - off;
+ obj.ti = 0;
+ }
+
+ if(obj.p == nil || obj.n == 0)
+ return;
+
+ // Load work buffer state
+ wp = *_wp;
+ wbuf = *_wbuf;
+ nobj = *_nobj;
+
+ // If another proc wants a pointer, give it some.
+ if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
+ wbuf->nobj = nobj;
+ wbuf = handoff(wbuf);
+ nobj = wbuf->nobj;
+ wp = wbuf->obj + nobj;
+ }
+
+ // If buffer is full, get a new one.
+ if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
+ if(wbuf != nil)
+ wbuf->nobj = nobj;
+ wbuf = getempty(wbuf);
+ wp = wbuf->obj;
+ nobj = 0;
+ }
+
+ *wp = obj;
+ wp++;
+ nobj++;
+
+ // Save work buffer state
+ *_wp = wp;
+ *_wbuf = wbuf;
+ *_nobj = nobj;
+}
+
static void
markroot(ParFor *desc, uint32 i)
{
+ Obj *wp;
+ Workbuf *wbuf;
+ uintptr nobj;
+
USED(&desc);
- scanblock(work.roots[i].p, work.roots[i].n);
+ wp = nil;
+ wbuf = nil;
+ nobj = 0;
+ enqueue(work.roots[i], &wbuf, &wp, &nobj);
+ scanblock(wbuf, wp, nobj, false);
}
// Get an empty work buffer off the work.empty list,
}
static void
-addroot(byte *p, uintptr n)
+addroot(Obj obj)
{
uint32 cap;
- GcRoot *new;
+ Obj *new;
if(work.nroot >= work.rootcap) {
- cap = PageSize/sizeof(GcRoot);
+ cap = PageSize/sizeof(Obj);
if(cap < 2*work.rootcap)
cap = 2*work.rootcap;
- new = (GcRoot*)runtime·SysAlloc(cap*sizeof(GcRoot));
+ new = (Obj*)runtime·SysAlloc(cap*sizeof(Obj));
if(work.roots != nil) {
- runtime·memmove(new, work.roots, work.rootcap*sizeof(GcRoot));
- runtime·SysFree(work.roots, work.rootcap*sizeof(GcRoot));
+ runtime·memmove(new, work.roots, work.rootcap*sizeof(Obj));
+ runtime·SysFree(work.roots, work.rootcap*sizeof(Obj));
}
work.roots = new;
work.rootcap = cap;
}
- work.roots[work.nroot].p = p;
- work.roots[work.nroot].n = n;
+ work.roots[work.nroot] = obj;
work.nroot++;
}
runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk);
runtime·throw("scanstack");
}
- addroot(sp, (byte*)stk - sp);
+ addroot((Obj){sp, (byte*)stk - sp, 0});
sp = (byte*)stk->gobuf.sp;
guard = stk->stackguard;
stk = (Stktop*)stk->stackbase;
runtime·throw("mark - finalizer inconsistency");
// do not mark the finalizer block itself. just mark the things it points at.
- addroot(v, size);
+ addroot((Obj){v, size, 0});
}
static void
{
G *gp;
FinBlock *fb;
- byte *p;
MSpan *s, **allspans;
uint32 spanidx;
work.nroot = 0;
- // mark data+bss.
- for(p=data; p<ebss; p+=DataBlock)
- addroot(p, p+DataBlock < ebss ? DataBlock : ebss-p);
+ // data & bss
+ // TODO(atom): load balancing
+ addroot((Obj){data, edata - data, (uintptr)gcdata});
+ addroot((Obj){bss, ebss - bss, (uintptr)gcbss});
// MSpan.types
allspans = runtime·mheap.allspans;
break;
case MTypes_Words:
case MTypes_Bytes:
- addroot((byte*)&s->types.data, sizeof(void*));
+ // TODO(atom): consider using defaultProg instead of 0
+ addroot((Obj){(byte*)&s->types.data, sizeof(void*), 0});
break;
}
}
}
+ // stacks
for(gp=runtime·allg; gp!=nil; gp=gp->alllink) {
switch(gp->status){
default:
runtime·walkfintab(addfinroots);
for(fb=allfin; fb; fb=fb->alllink)
- addroot((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
+ addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
}
static bool
{
// parallel mark for over gc roots
runtime·parfordo(work.markfor);
+
// help other threads scan secondary blocks
- scanblock(nil, 0);
+ scanblock(nil, nil, 0, true);
if(DebugMark) {
// wait while the main thread executes mark(debug_scanblock)
obj0 = mstats.nmalloc - mstats.nfree;
}
+ m->locks++; // disable gc during mallocs in parforalloc
+ if(work.markfor == nil)
+ work.markfor = runtime·parforalloc(MaxGcproc);
+ if(work.sweepfor == nil)
+ work.sweepfor = runtime·parforalloc(MaxGcproc);
+ m->locks--;
+
work.nwait = 0;
work.ndone = 0;
work.debugmarkdone = 0;
work.nproc = runtime·gcprocs();
addroots();
- m->locks++; // disable gc during mallocs in parforalloc
- if(work.markfor == nil)
- work.markfor = runtime·parforalloc(MaxGcproc);
runtime·parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
- if(work.sweepfor == nil)
- work.sweepfor = runtime·parforalloc(MaxGcproc);
runtime·parforsetup(work.sweepfor, work.nproc, runtime·mheap.nspan, nil, true, sweepspan);
- m->locks--;
if(work.nproc > 1) {
runtime·noteclear(&work.alldone);
runtime·helpgc(work.nproc);
}
runtime·parfordo(work.markfor);
- scanblock(nil, 0);
+ scanblock(nil, nil, 0, true);
if(DebugMark) {
for(i=0; i<work.nroot; i++)