]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: implement transfer cache
authorDmitriy Vyukov <dvyukov@google.com>
Mon, 18 Aug 2014 12:52:31 +0000 (16:52 +0400)
committerDmitriy Vyukov <dvyukov@google.com>
Mon, 18 Aug 2014 12:52:31 +0000 (16:52 +0400)
Currently we do the following dance after sweeping a span:
1. lock mcentral
2. remove the span from a list
3. unlock mcentral
4. unmark span
5. lock mheap
6. insert the span into heap
7. unlock mheap
8. lock mcentral
9. observe empty list
10. unlock mcentral
11. lock mheap
12. grab the span
13. unlock mheap
14. mark span
15. lock mcentral
16. insert the span into empty list
17. unlock mcentral

This change short-circuits this sequence to nothing,
that is, we just cache and use the span after sweeping.

This gives us functionality similar (even better) to tcmalloc's transfer cache.

benchmark            old ns/op     new ns/op     delta
BenchmarkMalloc8     22.2          19.5          -12.16%
BenchmarkMalloc16    31.0          26.6          -14.19%

LGTM=khr
R=golang-codereviews, khr
CC=golang-codereviews, rlh, rsc
https://golang.org/cl/119550043

src/pkg/runtime/malloc.h
src/pkg/runtime/mcentral.c
src/pkg/runtime/mgc0.c
src/pkg/runtime/mheap.c

index 4612dddb16e4535f8e66926650f04ad8b0c8d0bb..963e71c42f9f907e797380b7346e66b72fc8a658 100644 (file)
@@ -423,7 +423,7 @@ struct MSpan
 
 void   runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages);
 void   runtime·MSpan_EnsureSwept(MSpan *span);
-bool   runtime·MSpan_Sweep(MSpan *span);
+bool   runtime·MSpan_Sweep(MSpan *span, bool preserve);
 
 // Every MSpan is in one doubly-linked list,
 // either one of the MHeap's free lists or one of the
@@ -447,7 +447,7 @@ struct MCentral
 void   runtime·MCentral_Init(MCentral *c, int32 sizeclass);
 MSpan* runtime·MCentral_CacheSpan(MCentral *c);
 void   runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s);
-bool   runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end);
+bool   runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve);
 
 // Main malloc heap.
 // The heap itself is the "free[]" and "large" arrays,
index 6b2de02c42143f2063ad568a72e4f9e01fffbe66..fe6bcfeb132abd126d0c27631e7a17a62b5455c2 100644 (file)
@@ -9,16 +9,12 @@
 // The MCentral doesn't actually contain the list of free objects; the MSpan does.
 // Each MCentral is two lists of MSpans: those with free objects (c->nonempty)
 // and those that are completely allocated (c->empty).
-//
-// TODO(rsc): tcmalloc uses a "transfer cache" to split the list
-// into sections of class_to_transfercount[sizeclass] objects
-// so that it is faster to move those lists between MCaches and MCentrals.
 
 #include "runtime.h"
 #include "arch_GOARCH.h"
 #include "malloc.h"
 
-static bool MCentral_Grow(MCentral *c);
+static MSpan* MCentral_Grow(MCentral *c);
 
 // Initialize a single central free list.
 void
@@ -42,17 +38,20 @@ runtime·MCentral_CacheSpan(MCentral *c)
 retry:
        for(s = c->nonempty.next; s != &c->nonempty; s = s->next) {
                if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
+                       runtime·MSpanList_Remove(s);
+                       runtime·MSpanList_InsertBack(&c->empty, s);
                        runtime·unlock(&c->lock);
-                       runtime·MSpan_Sweep(s);
-                       runtime·lock(&c->lock);
-                       // the span could have been moved to heap, retry
-                       goto retry;
+                       runtime·MSpan_Sweep(s, true);
+                       goto havespan;
                }
                if(s->sweepgen == sg-1) {
                        // the span is being swept by background sweeper, skip
                        continue;
                }
                // we have a nonempty span that does not require sweeping, allocate from it
+               runtime·MSpanList_Remove(s);
+               runtime·MSpanList_InsertBack(&c->empty, s);
+               runtime·unlock(&c->lock);
                goto havespan;
        }
 
@@ -64,9 +63,12 @@ retry:
                        // swept spans are at the end of the list
                        runtime·MSpanList_InsertBack(&c->empty, s);
                        runtime·unlock(&c->lock);
-                       runtime·MSpan_Sweep(s);
+                       runtime·MSpan_Sweep(s, true);
+                       if(s->freelist != nil)
+                               goto havespan;
                        runtime·lock(&c->lock);
-                       // the span could be moved to nonempty or heap, retry
+                       // the span is still empty after sweep
+                       // it is already in the empty list, so just retry
                        goto retry;
                }
                if(s->sweepgen == sg-1) {
@@ -77,25 +79,26 @@ retry:
                // all subsequent ones must also be either swept or in process of sweeping
                break;
        }
+       runtime·unlock(&c->lock);
 
        // Replenish central list if empty.
-       if(!MCentral_Grow(c)) {
-               runtime·unlock(&c->lock);
+       s = MCentral_Grow(c);
+       if(s == nil)
                return nil;
-       }
-       goto retry;
+       runtime·lock(&c->lock);
+       runtime·MSpanList_InsertBack(&c->empty, s);
+       runtime·unlock(&c->lock);
 
 havespan:
+       // At this point s is a non-empty span, queued at the end of the empty list,
+       // c is unlocked.
        cap = (s->npages << PageShift) / s->elemsize;
        n = cap - s->ref;
        if(n == 0)
                runtime·throw("empty span");
        if(s->freelist == nil)
                runtime·throw("freelist empty");
-       runtime·MSpanList_Remove(s);
-       runtime·MSpanList_InsertBack(&c->empty, s);
        s->incache = true;
-       runtime·unlock(&c->lock);
        return s;
 }
 
@@ -125,24 +128,39 @@ runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s)
 // Called during sweep.
 // Returns true if the span was returned to heap.  Sets sweepgen to
 // the latest generation.
+// If preserve=true, don't return the span to heap nor relink in MCentral lists;
+// caller takes care of it.
 bool
-runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end)
+runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve)
 {
+       bool wasempty;
+
        if(s->incache)
                runtime·throw("freespan into cached span");
+
+       // Add the objects back to s's free list.
+       wasempty = s->freelist == nil;
+       end->next = s->freelist;
+       s->freelist = start;
+       s->ref -= n;
+
+       if(preserve) {
+               // preserve is set only when called from MCentral_CacheSpan above,
+               // the span must be in the empty list.
+               if(s->next == nil)
+                       runtime·throw("can't preserve unlinked span");
+               runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
+               return false;
+       }
+
        runtime·lock(&c->lock);
 
        // Move to nonempty if necessary.
-       if(s->freelist == nil) {
+       if(wasempty) {
                runtime·MSpanList_Remove(s);
                runtime·MSpanList_Insert(&c->nonempty, s);
        }
 
-       // Add the objects back to s's free list.
-       end->next = s->freelist;
-       s->freelist = start;
-       s->ref -= n;
-       
        // delay updating sweepgen until here.  This is the signal that
        // the span may be used in an MCache, so it must come after the
        // linked list operations above (actually, just after the
@@ -164,9 +182,8 @@ runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *
        return true;
 }
 
-// Fetch a new span from the heap and
-// carve into objects for the free list.
-static bool
+// Fetch a new span from the heap and carve into objects for the free list.
+static MSpan*
 MCentral_Grow(MCentral *c)
 {
        uintptr size, npages, i, n;
@@ -174,16 +191,12 @@ MCentral_Grow(MCentral *c)
        byte *p;
        MSpan *s;
 
-       runtime·unlock(&c->lock);
        npages = runtime·class_to_allocnpages[c->sizeclass];
        size = runtime·class_to_size[c->sizeclass];
        n = (npages << PageShift) / size;
        s = runtime·MHeap_Alloc(&runtime·mheap, npages, c->sizeclass, 0, 1);
-       if(s == nil) {
-               // TODO(rsc): Log out of memory
-               runtime·lock(&c->lock);
-               return false;
-       }
+       if(s == nil)
+               return nil;
 
        // Carve span into sequence of blocks.
        tailp = &s->freelist;
@@ -197,8 +210,5 @@ MCentral_Grow(MCentral *c)
        }
        *tailp = nil;
        runtime·markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift));
-
-       runtime·lock(&c->lock);
-       runtime·MSpanList_Insert(&c->nonempty, s);
-       return true;
+       return s;
 }
index 5389538eae7fa49d66ea436377ac8ba161b164cc..ef44d7f786f81552e531ac24b376c3038872d262 100644 (file)
@@ -884,7 +884,7 @@ runtime·MSpan_EnsureSwept(MSpan *s)
        if(runtime·atomicload(&s->sweepgen) == sg)
                return;
        if(runtime·cas(&s->sweepgen, sg-2, sg-1)) {
-               runtime·MSpan_Sweep(s);
+               runtime·MSpan_Sweep(s, false);
                return;
        }
        // unfortunate condition, and we don't have efficient means to wait
@@ -895,8 +895,10 @@ runtime·MSpan_EnsureSwept(MSpan *s)
 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
 // It clears the mark bits in preparation for the next GC round.
 // Returns true if the span was returned to heap.
+// If preserve=true, don't return it to heap nor relink in MCentral lists;
+// caller takes care of it.
 bool
-runtime·MSpan_Sweep(MSpan *s)
+runtime·MSpan_Sweep(MSpan *s, bool preserve)
 {
        int32 cl, n, npages, nfree;
        uintptr size, off, *bitp, shift, xbits, bits;
@@ -995,6 +997,8 @@ runtime·MSpan_Sweep(MSpan *s)
                *bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
                if(cl == 0) {
                        // Free large span.
+                       if(preserve)
+                               runtime·throw("can't preserve large span");
                        runtime·unmarkspan(p, s->npages<<PageShift);
                        s->needzero = 1;
                        // important to set sweepgen before returning it to heap
@@ -1056,7 +1060,7 @@ runtime·MSpan_Sweep(MSpan *s)
                c->local_nsmallfree[cl] += nfree;
                c->local_cachealloc -= nfree * size;
                runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
-               res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end);
+               res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
                // MCentral_FreeSpan updates sweepgen
        }
        return res;
@@ -1129,7 +1133,7 @@ runtime·sweepone(void)
                if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
                        continue;
                npages = s->npages;
-               if(!runtime·MSpan_Sweep(s))
+               if(!runtime·MSpan_Sweep(s, false))
                        npages = 0;
                g->m->locks--;
                return npages;
index 186fd48d47aefa3eba773b07a268bd433f82e5ee..46cf80007b542734b0d75ee2a3cbd03629ce2279 100644 (file)
@@ -107,7 +107,7 @@ retry:
                        // swept spans are at the end of the list
                        runtime·MSpanList_InsertBack(list, s);
                        runtime·unlock(&h->lock);
-                       n += runtime·MSpan_Sweep(s);
+                       n += runtime·MSpan_Sweep(s, false);
                        runtime·lock(&h->lock);
                        if(n >= npages)
                                return n;