enum {
Debug = 0,
- UseCas = 1,
PtrSize = sizeof(void*),
-
+ DebugMark = 0, // run second pass to check mark
+
// Four bits per word (see #defines below).
wordsPerBitmapWord = sizeof(void*)*8/4,
bitShift = sizeof(void*)*8/4,
#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
+// TODO: Make these per-M.
static uint64 nlookup;
static uint64 nsizelookup;
static uint64 naddrlookup;
+static uint64 nhandoff;
+
static int32 gctrace;
typedef struct Workbuf Workbuf;
struct Workbuf
{
Workbuf *next;
- uintptr nw;
- byte *w[2048-2];
+ uintptr nobj;
+ byte *obj[512-2];
};
extern byte data[];
static void runfinq(void);
static Workbuf* getempty(Workbuf*);
static Workbuf* getfull(Workbuf*);
+static void putempty(Workbuf*);
+static Workbuf* handoff(Workbuf*);
+
+static struct {
+ Lock fmu;
+ Workbuf *full;
+ Lock emu;
+ Workbuf *empty;
+ uint32 nproc;
+ volatile uint32 nwait;
+ volatile uint32 ndone;
+ Note alldone;
+ Lock markgate;
+ Lock sweepgate;
+ MSpan *spans;
+
+ Lock;
+ byte *chunk;
+ uintptr nchunk;
+} work;
// scanblock scans a block of n bytes starting at pointer b for references
// to other objects, scanning any it finds recursively until there are no
static void
scanblock(byte *b, int64 n)
{
- byte *obj, *arena_start, *p;
+ byte *obj, *arena_start, *arena_used, *p;
void **vp;
- uintptr size, *bitp, bits, shift, i, j, x, xbits, off;
+ uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
MSpan *s;
PageID k;
- void **bw, **w, **ew;
+ void **wp;
Workbuf *wbuf;
+ bool keepworking;
if((int64)(uintptr)n != n || n < 0) {
runtime·printf("scanblock %p %D\n", b, n);
// Memory arena parameters.
arena_start = runtime·mheap.arena_start;
-
+ arena_used = runtime·mheap.arena_used;
+ nproc = work.nproc;
+
wbuf = nil; // current work buffer
- ew = nil; // end of work buffer
- bw = nil; // beginning of work buffer
- w = nil; // current pointer into work buffer
+ wp = nil; // storage for next queued pointer (write pointer)
+ nobj = 0; // number of queued objects
+
+ // Scanblock helpers pass b==nil.
+ // The main proc needs to return to make more
+ // calls to scanblock. But if work.nproc==1 then
+ // might as well process blocks as soon as we
+ // have them.
+ keepworking = b == nil || work.nproc == 1;
// Align b to a word boundary.
off = (uintptr)b & (PtrSize-1);
runtime·printf("scanblock %p %D\n", b, n);
vp = (void**)b;
- n /= PtrSize;
+ n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
for(i=0; i<n; i++) {
obj = (byte*)vp[i];
-
+
// Words outside the arena cannot be pointers.
- if((byte*)obj < arena_start || (byte*)obj >= runtime·mheap.arena_used)
+ if((byte*)obj < arena_start || (byte*)obj >= arena_used)
continue;
-
+
// obj may be a pointer to a live object.
// Try to find the beginning of the object.
-
+
// Round down to word boundary.
obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
found:
// Now we have bits, bitp, and shift correct for
// obj pointing at the base of the object.
- // If not allocated or already marked, done.
- if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0)
+ // Only care about allocated and not marked.
+ if((bits & (bitAllocated|bitMarked)) != bitAllocated)
continue;
- *bitp |= bitMarked<<shift;
+ if(nproc == 1)
+ *bitp |= bitMarked<<shift;
+ else {
+ for(;;) {
+ x = *bitp;
+ if(x & (bitMarked<<shift))
+ goto continue_obj;
+ if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
+ break;
+ }
+ }
// If object has no pointers, don't need to scan further.
if((bits & bitNoPointers) != 0)
continue;
+ // If another proc wants a pointer, give it some.
+ if(nobj > 4 && work.nwait > 0 && work.full == nil) {
+ wbuf->nobj = nobj;
+ wbuf = handoff(wbuf);
+ nobj = wbuf->nobj;
+ wp = wbuf->obj + nobj;
+ }
+
// If buffer is full, get a new one.
- if(w >= ew) {
+ if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
+ if(wbuf != nil)
+ wbuf->nobj = nobj;
wbuf = getempty(wbuf);
- bw = wbuf->w;
- w = bw;
- ew = bw + nelem(wbuf->w);
+ wp = wbuf->obj;
+ nobj = 0;
}
- *w++ = obj;
+ *wp++ = obj;
+ nobj++;
+ continue_obj:;
}
-
+
// Done scanning [b, b+n). Prepare for the next iteration of
// the loop by setting b and n to the parameters for the next block.
- // Fetch b from the work buffers.
- if(w <= bw) {
+ // Fetch b from the work buffer.
+ if(nobj == 0) {
+ if(!keepworking) {
+ putempty(wbuf);
+ return;
+ }
// Emptied our buffer: refill.
wbuf = getfull(wbuf);
if(wbuf == nil)
- break;
- bw = wbuf->w;
- ew = wbuf->w + nelem(wbuf->w);
- w = bw+wbuf->nw;
+ return;
+ nobj = wbuf->nobj;
+ wp = wbuf->obj + wbuf->nobj;
}
- b = *--w;
-
+ b = *--wp;
+ nobj--;
+
// Figure out n = size of b. Start by loading bits for b.
off = (uintptr*)b - (uintptr*)arena_start;
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
xbits = *bitp;
bits = xbits >> shift;
-
+
// Might be small; look for nearby block boundary.
// A block boundary is marked by either bitBlockBoundary
// or bitAllocated being set (see notes near their definition).
// apply a mask to keep only the bits corresponding
// to shift+j < bitShift aka j < bitShift-shift.
bits &= (boundary<<(bitShift-shift)) - boundary;
-
+
// A block boundary j words before b is indicated by
// xbits>>(shift-j) & boundary
// (assuming shift >= j). There is no cleverness here
// avoid the test, because when j gets too large the shift
- // turns negative, which is undefined in C.
+ // turns negative, which is undefined in C.
for(j=1; j<bitShift; j++) {
if(((bits>>j)&boundary) != 0 || shift>=j && ((xbits>>(shift-j))&boundary) != 0) {
goto scan;
}
}
-
+
// Fall back to asking span about size class.
// (Manually inlined copy of MHeap_Lookup.)
nlookup++;
}
}
-static struct {
- Workbuf *full;
- Workbuf *empty;
- byte *chunk;
- uintptr nchunk;
-} work;
+// debug_scanblock is the debug copy of scanblock.
+// it is simpler, slower, single-threaded, recursive,
+// and uses bitSpecial as the mark bit.
+static void
+debug_scanblock(byte *b, int64 n)
+{
+ byte *obj, *p;
+ void **vp;
+ uintptr size, *bitp, bits, shift, i, xbits, off;
+ MSpan *s;
+
+ if(!DebugMark)
+ runtime·throw("debug_scanblock without DebugMark");
+
+ if((int64)(uintptr)n != n || n < 0) {
+ runtime·printf("debug_scanblock %p %D\n", b, n);
+ runtime·throw("debug_scanblock");
+ }
+
+ // Align b to a word boundary.
+ off = (uintptr)b & (PtrSize-1);
+ if(off != 0) {
+ b += PtrSize - off;
+ n -= PtrSize - off;
+ }
+
+ vp = (void**)b;
+ n /= PtrSize;
+ for(i=0; i<n; i++) {
+ obj = (byte*)vp[i];
+
+ // Words outside the arena cannot be pointers.
+ if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runtime·mheap.arena_used)
+ continue;
+
+ // Round down to word boundary.
+ obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
+
+ // Consult span table to find beginning.
+ s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj);
+ if(s == nil)
+ continue;
+
+
+ p = (byte*)((uintptr)s->start<<PageShift);
+ if(s->sizeclass == 0) {
+ obj = p;
+ size = (uintptr)s->npages<<PageShift;
+ } else {
+ if((byte*)obj >= (byte*)s->limit)
+ continue;
+ size = runtime·class_to_size[s->sizeclass];
+ int32 i = ((byte*)obj - p)/size;
+ obj = p+i*size;
+ }
+
+ // Now that we know the object header, reload bits.
+ off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start;
+ bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ xbits = *bitp;
+ bits = xbits >> shift;
+
+ // Now we have bits, bitp, and shift correct for
+ // obj pointing at the base of the object.
+ // If not allocated or already marked, done.
+ if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
+ continue;
+ *bitp |= bitSpecial<<shift;
+ if(!(bits & bitMarked))
+ runtime·printf("found unmarked block %p in %p\n", obj, vp+i);
+
+ // If object has no pointers, don't need to scan further.
+ if((bits & bitNoPointers) != 0)
+ continue;
+
+ debug_scanblock(obj, size);
+ }
+}
// Get an empty work buffer off the work.empty list,
// allocating new buffers as needed.
static Workbuf*
getempty(Workbuf *b)
{
- if(b != nil) {
- b->nw = nelem(b->w);
- b->next = work.full;
- work.full = b;
- }
- b = work.empty;
- if(b != nil) {
- work.empty = b->next;
- return b;
+ if(work.nproc == 1) {
+ // Put b on full list.
+ if(b != nil) {
+ b->next = work.full;
+ work.full = b;
+ }
+ // Grab from empty list if possible.
+ b = work.empty;
+ if(b != nil) {
+ work.empty = b->next;
+ goto haveb;
+ }
+ } else {
+ // Put b on full list.
+ if(b != nil) {
+ runtime·lock(&work.fmu);
+ b->next = work.full;
+ work.full = b;
+ runtime·unlock(&work.fmu);
+ }
+ // Grab from empty list if possible.
+ runtime·lock(&work.emu);
+ b = work.empty;
+ if(b != nil)
+ work.empty = b->next;
+ runtime·unlock(&work.emu);
+ if(b != nil)
+ goto haveb;
}
-
+
+ // Need to allocate.
+ runtime·lock(&work);
if(work.nchunk < sizeof *b) {
work.nchunk = 1<<20;
work.chunk = runtime·SysAlloc(work.nchunk);
b = (Workbuf*)work.chunk;
work.chunk += sizeof *b;
work.nchunk -= sizeof *b;
+ runtime·unlock(&work);
+
+haveb:
+ b->nobj = 0;
return b;
}
+static void
+putempty(Workbuf *b)
+{
+ if(b == nil)
+ return;
+
+ if(work.nproc == 1) {
+ b->next = work.empty;
+ work.empty = b;
+ return;
+ }
+
+ runtime·lock(&work.emu);
+ b->next = work.empty;
+ work.empty = b->next;
+ runtime·unlock(&work.emu);
+}
+
// Get a full work buffer off the work.full list, or return nil.
static Workbuf*
getfull(Workbuf *b)
{
- if(b != nil) {
- b->nw = 0;
- b->next = work.empty;
- work.empty = b;
+ int32 i;
+ Workbuf *b1;
+
+ if(work.nproc == 1) {
+ // Put b on empty list.
+ if(b != nil) {
+ b->next = work.empty;
+ work.empty = b;
+ }
+ // Grab from full list if possible.
+ // Since work.nproc==1, no one else is
+ // going to give us work.
+ b = work.full;
+ if(b != nil)
+ work.full = b->next;
+ return b;
+ }
+
+ putempty(b);
+
+ // Grab buffer from full list if possible.
+ for(;;) {
+ b1 = work.full;
+ if(b1 == nil)
+ break;
+ runtime·lock(&work.fmu);
+ if(work.full != nil) {
+ b1 = work.full;
+ work.full = b1->next;
+ runtime·unlock(&work.fmu);
+ return b1;
+ }
+ runtime·unlock(&work.fmu);
+ }
+
+ runtime·xadd(&work.nwait, +1);
+ for(i=0;; i++) {
+ b1 = work.full;
+ if(b1 != nil) {
+ runtime·lock(&work.fmu);
+ if(work.full != nil) {
+ runtime·xadd(&work.nwait, -1);
+ b1 = work.full;
+ work.full = b1->next;
+ runtime·unlock(&work.fmu);
+ return b1;
+ }
+ runtime·unlock(&work.fmu);
+ continue;
+ }
+ if(work.nwait == work.nproc)
+ return nil;
+ if(i < 10)
+ runtime·procyield(20);
+ else if(i < 20)
+ runtime·osyield();
+ else
+ runtime·usleep(100);
}
- b = work.full;
- if(b != nil)
- work.full = b->next;
- return b;
+}
+
+static Workbuf*
+handoff(Workbuf *b)
+{
+ int32 n;
+ Workbuf *b1;
+
+ // Make new buffer with half of b's pointers.
+ b1 = getempty(nil);
+ n = b->nobj/2;
+ b->nobj -= n;
+ b1->nobj = n;
+ runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
+ nhandoff += n;
+
+ // Put b on full list - let first half of b get stolen.
+ runtime·lock(&work.fmu);
+ b->next = work.full;
+ work.full = b;
+ runtime·unlock(&work.fmu);
+
+ return b1;
}
// Scanstack calls scanblock on each of gp's stack segments.
static void
-scanstack(G *gp)
+scanstack(void (*scanblock)(byte*, int64), G *gp)
{
int32 n;
Stktop *stk;
if(gp == g) {
// Scanning our own stack: start at &gp.
sp = (byte*)&gp;
+ } else if(gp->m != nil && gp->m->helpgc) {
+ // Gc helper scans its own stack.
+ return;
} else {
// Scanning another goroutine's stack.
// The goroutine is usually asleep (the world is stopped).
scanblock(v, size);
}
-// Mark
static void
-mark(void)
+debug_markfin(void *v)
+{
+ uintptr size;
+
+ if(!runtime·mlookup(v, &v, &size, nil))
+ runtime·throw("debug_mark - finalizer inconsistency");
+ debug_scanblock(v, size);
+}
+
+// Mark
+static void
+mark(void (*scan)(byte*, int64))
{
G *gp;
// mark data+bss.
// skip runtime·mheap itself, which has no interesting pointers
// and is mostly zeroed and would not otherwise be paged in.
- scanblock(data, (byte*)&runtime·mheap - data);
- scanblock((byte*)(&runtime·mheap+1), end - (byte*)(&runtime·mheap+1));
+ scan(data, (byte*)&runtime·mheap - data);
+ scan((byte*)(&runtime·mheap+1), end - (byte*)(&runtime·mheap+1));
// mark stacks
for(gp=runtime·allg; gp!=nil; gp=gp->alllink) {
case Grunning:
if(gp != g)
runtime·throw("mark - world not stopped");
- scanstack(gp);
+ scanstack(scan, gp);
break;
case Grunnable:
case Gsyscall:
case Gwaiting:
- scanstack(gp);
+ scanstack(scan, gp);
break;
}
}
// mark things pointed at by objects with finalizers
- runtime·walkfintab(markfin);
+ if(scan == debug_scanblock)
+ runtime·walkfintab(debug_markfin);
+ else
+ runtime·walkfintab(markfin);
+
+ // in multiproc mode, join in the queued work.
+ scan(nil, 0);
}
// Sweep frees or calls finalizers for blocks not marked in the mark phase.
byte *p;
MCache *c;
Finalizer *f;
+ byte *arena_start;
+
+ arena_start = runtime·mheap.arena_start;
+
+ for(;;) {
+ s = work.spans;
+ if(s == nil)
+ break;
+ if(!runtime·casp(&work.spans, s, s->allnext))
+ continue;
- for(s = runtime·mheap.allspans; s != nil; s = s->allnext) {
if(s->state != MSpanInUse)
continue;
npages = runtime·class_to_allocnpages[cl];
n = (npages << PageShift) / size;
}
-
- // sweep through n objects of given size starting at p.
+
+ // Sweep through n objects of given size starting at p.
+ // This thread owns the span now, so it can manipulate
+ // the block bitmap without atomic operations.
for(; n > 0; n--, p += size) {
uintptr off, *bitp, shift, bits;
- off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start;
- bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)p - (uintptr*)arena_start;
+ bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
bits = *bitp>>shift;
continue;
if((bits & bitMarked) != 0) {
+ if(DebugMark) {
+ if(!(bits & bitSpecial))
+ runtime·printf("found spurious mark on %p\n", p);
+ *bitp &= ~(bitSpecial<<shift);
+ }
*bitp &= ~(bitMarked<<shift);
continue;
}
- if((bits & bitSpecial) != 0) {
+ if(DebugMark || (bits & bitSpecial) != 0) {
// Special means it has a finalizer or is being profiled.
+ // In DebugMark mode, the bit has been coopted so
+ // we have to assume all blocks are special.
f = runtime·getfinalizer(p, 1);
if(f != nil) {
f->arg = p;
- f->next = finq;
- finq = f;
+ for(;;) {
+ f->next = finq;
+ if(runtime·casp(&finq, f->next, f))
+ break;
+ }
continue;
}
runtime·MProf_Free(p, size);
}
}
+void
+runtime·gchelper(void)
+{
+ // Wait until main proc is ready for mark help.
+ runtime·lock(&work.markgate);
+ runtime·unlock(&work.markgate);
+ scanblock(nil, 0);
+
+ // Wait until main proc is ready for sweep help.
+ runtime·lock(&work.sweepgate);
+ runtime·unlock(&work.sweepgate);
+ sweep();
+
+ if(runtime·xadd(&work.ndone, +1) == work.nproc-1)
+ runtime·notewakeup(&work.alldone);
+}
+
// Semaphore, not Lock, so that the goroutine
// reschedules when there is contention rather
// than spinning.
stealcache(void)
{
M *m;
-
+
for(m=runtime·allm; m; m=m->alllink)
runtime·MCache_ReleaseAll(m->mcache);
}
uint64 heap0, heap1, obj0, obj1;
byte *p;
Finalizer *fp;
+ bool extra;
// The gc is turned off (via enablegc) until
// the bootstrap has completed.
gcpercent = -1;
else
gcpercent = runtime·atoi(p);
-
+
p = runtime·getenv("GOGCTRACE");
if(p != nil)
gctrace = runtime·atoi(p);
nlookup = 0;
nsizelookup = 0;
naddrlookup = 0;
+ nhandoff = 0;
m->gcing = 1;
runtime·stoptheworld();
heap0 = mstats.heap_alloc;
obj0 = mstats.nmalloc - mstats.nfree;
- mark();
+ runtime·lock(&work.markgate);
+ runtime·lock(&work.sweepgate);
+
+ work.nproc = 1;
+ if(runtime·gomaxprocs > 1 && runtime·ncpu > 1) {
+ runtime·noteclear(&work.alldone);
+ work.nproc += runtime·helpgc();
+ }
+ work.nwait = 0;
+ work.ndone = 0;
+
+ runtime·unlock(&work.markgate); // let the helpers in
+ mark(scanblock);
+ if(DebugMark)
+ mark(debug_scanblock);
t1 = runtime·nanotime();
+
+ work.spans = runtime·mheap.allspans;
+ runtime·unlock(&work.sweepgate); // let the helpers in
sweep();
+ if(work.nproc > 1)
+ runtime·notesleep(&work.alldone);
t2 = runtime·nanotime();
+
stealcache();
cachestats();
mstats.numgc++;
if(mstats.debuggc)
runtime·printf("pause %D\n", t3-t0);
-
+
if(gctrace) {
- runtime·printf("gc%d: %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D pointer lookups (%D size, %D addr)\n",
- mstats.numgc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
+ runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D pointer lookups (%D size, %D addr) %D handoff\n",
+ mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
heap0>>20, heap1>>20, obj0, obj1,
mstats.nmalloc, mstats.nfree,
- nlookup, nsizelookup, naddrlookup);
+ nlookup, nsizelookup, naddrlookup, nhandoff);
}
runtime·semrelease(&gcsema);
- runtime·starttheworld();
-
+
+ // If we could have used another helper proc, start one now,
+ // in the hope that it will be available next time.
+ // It would have been even better to start it before the collection,
+ // but doing so requires allocating memory, so it's tricky to
+ // coordinate. This lazy approach works out in practice:
+ // we don't mind if the first couple gc rounds don't have quite
+ // the maximum number of procs.
+ extra = work.nproc < runtime·gomaxprocs && work.nproc < MaxGcproc;
+
+ runtime·starttheworld(extra);
+
// give the queued finalizers, if any, a chance to run
if(fp != nil)
runtime·gosched();
-
+
if(gctrace > 1 && !force)
runtime·gc(1);
}
cachestats();
m->gcing = 0;
runtime·semrelease(&gcsema);
- runtime·starttheworld();
+ runtime·starttheworld(0);
}
static void
{
uintptr *b, off, shift;
+ if(DebugMark)
+ return true;
+
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
{
uintptr *b, off, shift, bits, obits;
+ if(DebugMark)
+ return;
+
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
}
}
}
-
+
void
runtime·MHeap_MapBits(MHeap *h)
{
bitmapChunk = 8192
};
uintptr n;
-
+
n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
if(h->bitmap_mapped >= n)
static void schedule(G*);
static void acquireproc(void);
static void releaseproc(void);
+static M *startm(void);
typedef struct Sched Sched;
m->fastrand = 0x49f6428aUL + m->id;
m->stackalloc = runtime·malloc(sizeof(*m->stackalloc));
runtime·FixAlloc_Init(m->stackalloc, FixedStack, runtime·SysAlloc, nil, nil);
+
+ if(m->mcache == nil)
+ m->mcache = runtime·allocmcache();
}
// Try to increment mcpu. Report whether succeeded.
M *m;
// if g has its own m, use it.
- if((m = g->lockedm) != nil)
+ if(g && (m = g->lockedm) != nil)
return m;
// otherwise use general m pool.
G *gp;
uint32 v;
+top:
if(atomic_mcpu(runtime·sched.atomic) >= maxgomaxprocs)
runtime·throw("negative mcpu");
schedunlock();
runtime·notesleep(&m->havenextg);
+ if(m->helpgc) {
+ runtime·gchelper();
+ m->helpgc = 0;
+ runtime·lock(&runtime·sched);
+ goto top;
+ }
if((gp = m->nextg) == nil)
runtime·throw("bad m->nextg in nextgoroutine");
m->nextg = nil;
return gp;
}
+int32
+runtime·helpgc(void)
+{
+ M *m;
+ int32 n, max;
+
+ // Figure out how many CPUs to use.
+ // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
+ max = runtime·gomaxprocs;
+ if(max > runtime·ncpu)
+ max = runtime·ncpu;
+ if(max > MaxGcproc)
+ max = MaxGcproc;
+
+
+ // We're going to use one CPU no matter what.
+ // Figure out the max number of additional CPUs.
+ max--;
+
+ runtime·lock(&runtime·sched);
+ n = 0;
+ while(n < max && (m = mget(nil)) != nil) {
+ n++;
+ m->helpgc = 1;
+ m->waitnextg = 0;
+ runtime·notewakeup(&m->havenextg);
+ }
+ runtime·unlock(&runtime·sched);
+ return n;
+}
+
void
runtime·stoptheworld(void)
{
schedunlock();
}
-// TODO(rsc): Remove. This is only temporary,
-// for the mark and sweep collector.
void
-runtime·starttheworld(void)
+runtime·starttheworld(bool extra)
{
+ M *m;
+
schedlock();
runtime·gcwaiting = 0;
setmcpumax(runtime·gomaxprocs);
matchmg();
+ if(extra && canaddmcpu()) {
+ // Start a new m that will (we hope) be idle
+ // and so available to help when the next
+ // garbage collection happens.
+ // canaddmcpu above did mcpu++
+ // (necessary, because m will be doing various
+ // initialization work so is definitely running),
+ // but m is not running a specific goroutine,
+ // so set the helpgc flag as a signal to m's
+ // first schedule(nil) to mcpu--.
+ m = startm();
+ m->helpgc = 1;
+ }
schedunlock();
}
{
if(g != m->g0)
runtime·throw("bad runtime·mstart");
- if(m->mcache == nil)
- m->mcache = runtime·allocmcache();
// Record top of stack for use by mcall.
// Once we call schedule we're never coming back,
static void
matchmg(void)
{
- G *g;
+ G *gp;
+ M *mp;
if(m->mallocing || m->gcing)
return;
while(haveg() && canaddmcpu()) {
- g = gget();
- if(g == nil)
+ gp = gget();
+ if(gp == nil)
runtime·throw("gget inconsistency");
- // Find the m that will run g.
- M *m;
- if((m = mget(g)) == nil){
- m = runtime·malloc(sizeof(M));
- mcommoninit(m);
-
- if(runtime·iscgo) {
- CgoThreadStart ts;
-
- if(libcgo_thread_start == nil)
- runtime·throw("libcgo_thread_start missing");
- // pthread_create will make us a stack.
- m->g0 = runtime·malg(-1);
- ts.m = m;
- ts.g = m->g0;
- ts.fn = runtime·mstart;
- runtime·asmcgocall(libcgo_thread_start, &ts);
- } else {
- if(Windows)
- // windows will layout sched stack on os stack
- m->g0 = runtime·malg(-1);
- else
- m->g0 = runtime·malg(8192);
- runtime·newosproc(m, m->g0, m->g0->stackbase, runtime·mstart);
- }
- }
- mnextg(m, g);
+ // Find the m that will run gp.
+ if((mp = mget(gp)) == nil)
+ mp = startm();
+ mnextg(mp, gp);
}
}
+static M*
+startm(void)
+{
+ M *m;
+
+ m = runtime·malloc(sizeof(M));
+ mcommoninit(m);
+
+ if(runtime·iscgo) {
+ CgoThreadStart ts;
+
+ if(libcgo_thread_start == nil)
+ runtime·throw("libcgo_thread_start missing");
+ // pthread_create will make us a stack.
+ m->g0 = runtime·malg(-1);
+ ts.m = m;
+ ts.g = m->g0;
+ ts.fn = runtime·mstart;
+ runtime·asmcgocall(libcgo_thread_start, &ts);
+ } else {
+ if(Windows)
+ // windows will layout sched stack on os stack
+ m->g0 = runtime·malg(-1);
+ else
+ m->g0 = runtime·malg(8192);
+ runtime·newosproc(m, m->g0, m->g0->stackbase, runtime·mstart);
+ }
+
+ return m;
+}
+
// One round of scheduler: find a goroutine and run it.
// The argument is the goroutine that was running before
// schedule was called, or nil if this is the first call.
gp->readyonstop = 0;
readylocked(gp);
}
+ } else if(m->helpgc) {
+ // atomic { mcpu-- }
+ v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift);
+ if(atomic_mcpu(v) > maxgomaxprocs)
+ runtime·throw("negative mcpu in scheduler");
+ m->helpgc = 0;
}
// Find (or wait for) g to run. Unlocks runtime·sched.
//printf("newproc1 %p %p narg=%d nret=%d\n", fn, argp, narg, nret);
siz = narg + nret;
siz = (siz+7) & ~7;
-
+
// We could instead create a secondary stack frame
// and make it look like goexit was on the original but
// the call to the actual goroutine function was split.