We call scanblock for lots of small root pieces
e.g. for every stack frame args and locals area.
Every scanblock invocation calls getempty/putempty,
which accesses lock-free stack shared among all worker threads.
One-element local cache allows most scanblock calls
to proceed without accessing the shared stack.
LGTM=rsc
R=golang-codereviews, rlh
CC=golang-codereviews, khr, rsc
https://golang.org/cl/
121250043
StackFreeList stackcache[NumStackOrders];
+ void* gcworkbuf;
+
// Local allocator stats, flushed during GC.
uintptr local_nlookup; // number of pointer lookups
uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize)
MSpan* runtime·MCache_Refill(MCache *c, int32 sizeclass);
void runtime·MCache_ReleaseAll(MCache *c);
void runtime·stackcache_clear(MCache *c);
+void runtime·gcworkbuffree(void *b);
enum
{
{
runtime·MCache_ReleaseAll(c);
runtime·stackcache_clear(c);
+ runtime·gcworkbuffree(c->gcworkbuf);
runtime·lock(&runtime·mheap);
runtime·purgecachedstats(c);
runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c);
static Workbuf*
getempty(Workbuf *b)
{
+ MCache *c;
+
if(b != nil)
runtime·lfstackpush(&work.full, &b->node);
- b = (Workbuf*)runtime·lfstackpop(&work.empty);
+ b = nil;
+ c = g->m->mcache;
+ if(c->gcworkbuf != nil) {
+ b = c->gcworkbuf;
+ c->gcworkbuf = nil;
+ }
+ if(b == nil)
+ b = (Workbuf*)runtime·lfstackpop(&work.empty);
if(b == nil)
b = runtime·persistentalloc(sizeof(*b), CacheLineSize, &mstats.gc_sys);
b->nobj = 0;
static void
putempty(Workbuf *b)
{
+ MCache *c;
+
+ c = g->m->mcache;
+ if(c->gcworkbuf == nil) {
+ c->gcworkbuf = b;
+ return;
+ }
runtime·lfstackpush(&work.empty, &b->node);
}
+void
+runtime·gcworkbuffree(void *b)
+{
+ if(b != nil)
+ putempty(b);
+}
+
// Get a full work buffer off the work.full list, or return nil.
static Workbuf*
getfull(Workbuf *b)