]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: release unused memory to the OS.
authorSébastien Paolacci <sebastien.paolacci@gmail.com>
Thu, 16 Feb 2012 18:30:04 +0000 (13:30 -0500)
committerRuss Cox <rsc@golang.org>
Thu, 16 Feb 2012 18:30:04 +0000 (13:30 -0500)
Periodically browse MHeap's freelists for long unused spans and release them if any.

Current hardcoded settings:
        - GC is forced if none occured over the last 2 minutes.
        - spans are handed back after 5 minutes of uselessness.

SysUnused (for Unix) is a wrapper on madvise MADV_DONTNEED on Linux and MADV_FREE on BSDs.

R=rsc, dvyukov, remyoudompheng
CC=golang-dev
https://golang.org/cl/5451057

src/pkg/runtime/malloc.h
src/pkg/runtime/mem.go
src/pkg/runtime/mgc0.c
src/pkg/runtime/mheap.c
src/pkg/runtime/proc.c

index d79c86d1244e865782ada0d4f15761f61ad39900..5f03693f52a7b003350fb378d99fb6ca5a7d9029 100644 (file)
@@ -205,6 +205,7 @@ struct MStats
        uint64  heap_sys;       // bytes obtained from system
        uint64  heap_idle;      // bytes in idle spans
        uint64  heap_inuse;     // bytes in non-idle spans
+       uint64  heap_released;  // bytes released to the OS
        uint64  heap_objects;   // total number of allocated objects
 
        // Statistics about allocation of low-level fixed-size structures.
@@ -220,6 +221,7 @@ struct MStats
        // Statistics about garbage collector.
        // Protected by stopping the world during GC.
        uint64  next_gc;        // next GC (in heap_alloc time)
+       uint64  last_gc;        // last GC (in absolute time)
        uint64  pause_total_ns;
        uint64  pause_ns[256];
        uint32  numgc;
@@ -304,14 +306,16 @@ struct MSpan
 {
        MSpan   *next;          // in a span linked list
        MSpan   *prev;          // in a span linked list
-       MSpan   *allnext;               // in the list of all spans
+       MSpan   *allnext;       // in the list of all spans
        PageID  start;          // starting page number
        uintptr npages;         // number of pages in span
        MLink   *freelist;      // list of free objects
        uint32  ref;            // number of allocated objects in this span
        uint32  sizeclass;      // size class
        uint32  state;          // MSpanInUse etc
-       byte    *limit; // end of data in span
+       int64   unusedsince;    // First time spotted by GC in MSpanFree state
+       uintptr npreleased;     // number of pages released to the OS
+       byte    *limit;         // end of data in span
 };
 
 void   runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages);
@@ -381,6 +385,7 @@ MSpan*      runtime·MHeap_LookupMaybe(MHeap *h, void *v);
 void   runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj);
 void*  runtime·MHeap_SysAlloc(MHeap *h, uintptr n);
 void   runtime·MHeap_MapBits(MHeap *h);
+void   runtime·MHeap_Scavenger(void);
 
 void*  runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed);
 int32  runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s);
index 3ad906ac276b684bd41bd918ebf42464700a7eff..76680086cd471925690b6c77a8cc203afc9169ff 100644 (file)
@@ -17,11 +17,12 @@ type MemStats struct {
        Frees      uint64 // number of frees
 
        // Main allocation heap statistics.
-       HeapAlloc   uint64 // bytes allocated and still in use
-       HeapSys     uint64 // bytes obtained from system
-       HeapIdle    uint64 // bytes in idle spans
-       HeapInuse   uint64 // bytes in non-idle span
-       HeapObjects uint64 // total number of allocated objects
+       HeapAlloc    uint64 // bytes allocated and still in use
+       HeapSys      uint64 // bytes obtained from system
+       HeapIdle     uint64 // bytes in idle spans
+       HeapInuse    uint64 // bytes in non-idle span
+       HeapReleased uint64 // bytes released to the OS
+       HeapObjects  uint64 // total number of allocated objects
 
        // Low-level fixed-size structure allocator statistics.
        //      Inuse is bytes used now.
@@ -35,7 +36,8 @@ type MemStats struct {
        BuckHashSys uint64 // profiling bucket hash table
 
        // Garbage collector statistics.
-       NextGC       uint64
+       NextGC       uint64 // next run in HeapAlloc time (bytes)
+       LastGC       uint64 // last run in absolute time (ns)
        PauseTotalNs uint64
        PauseNs      [256]uint64 // most recent GC pause times
        NumGC        uint32
index 1b959286ac3d483482675c21885d620cc68ff1d4..8efa7afc0324150926da81de6203f6335ada0e87 100644 (file)
@@ -716,8 +716,10 @@ sweep(void)
        byte *p;
        MCache *c;
        byte *arena_start;
+       int64 now;
 
        arena_start = runtime·mheap.arena_start;
+       now = runtime·nanotime();
 
        for(;;) {
                s = work.spans;
@@ -726,6 +728,11 @@ sweep(void)
                if(!runtime·casp(&work.spans, s, s->allnext))
                        continue;
 
+               // Stamp newly unused spans. The scavenger will use that
+               // info to potentially give back some pages to the OS.
+               if(s->state == MSpanFree && s->unusedsince == 0)
+                       s->unusedsince = now;
+
                if(s->state != MSpanInUse)
                        continue;
 
@@ -963,6 +970,7 @@ runtime·gc(int32 force)
        obj1 = mstats.nmalloc - mstats.nfree;
 
        t3 = runtime·nanotime();
+       mstats.last_gc = t3;
        mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
        mstats.pause_total_ns += t3 - t0;
        mstats.numgc++;
index d75c18d828ae9d13b30e37aeb77c9d1f518a575f..a40a14512b6b4f0129f30d4a585df6134e47a1fc 100644 (file)
@@ -103,6 +103,8 @@ HaveSpan:
        runtime·MSpanList_Remove(s);
        s->state = MSpanInUse;
        mstats.heap_idle -= s->npages<<PageShift;
+       mstats.heap_released -= s->npreleased<<PageShift;
+       s->npreleased = 0;
 
        if(s->npages > npage) {
                // Trim extra and put it back in the heap.
@@ -280,6 +282,8 @@ MHeap_FreeLocked(MHeap *h, MSpan *s)
        }
        mstats.heap_idle += s->npages<<PageShift;
        s->state = MSpanFree;
+       s->unusedsince = 0;
+       s->npreleased = 0;
        runtime·MSpanList_Remove(s);
        sp = (uintptr*)(s->start<<PageShift);
 
@@ -292,6 +296,7 @@ MHeap_FreeLocked(MHeap *h, MSpan *s)
                *tp |= *sp;     // propagate "needs zeroing" mark
                s->start = t->start;
                s->npages += t->npages;
+               s->npreleased = t->npreleased; // absorb released pages
                p -= t->npages;
                h->map[p] = s;
                runtime·MSpanList_Remove(t);
@@ -304,6 +309,7 @@ MHeap_FreeLocked(MHeap *h, MSpan *s)
                tp = (uintptr*)(t->start<<PageShift);
                *sp |= *tp;     // propagate "needs zeroing" mark
                s->npages += t->npages;
+               s->npreleased += t->npreleased;
                h->map[p + s->npages - 1] = s;
                runtime·MSpanList_Remove(t);
                t->state = MSpanDead;
@@ -317,8 +323,81 @@ MHeap_FreeLocked(MHeap *h, MSpan *s)
                runtime·MSpanList_Insert(&h->free[s->npages], s);
        else
                runtime·MSpanList_Insert(&h->large, s);
+}
 
-       // TODO(rsc): IncrementalScavenge() to return memory to OS.
+// Release (part of) unused memory to OS.
+// Goroutine created in runtime·schedinit.
+// Loop forever.
+void
+runtime·MHeap_Scavenger(void)
+{
+       MHeap *h;
+       MSpan *s, *list;
+       uint64 tick, now, forcegc, limit;
+       uint32 k, i;
+       uintptr released, sumreleased;
+       byte *env;
+       bool trace;
+       Note note;
+
+       // If we go two minutes without a garbage collection, force one to run.
+       forcegc = 2*60*1e9;
+       // If a span goes unused for 5 minutes after a garbage collection,
+       // we hand it back to the operating system.
+       limit = 5*60*1e9;
+       // Make wake-up period small enough for the sampling to be correct.
+       tick = forcegc < limit ? forcegc/2 : limit/2;
+
+       trace = false;
+       env = runtime·getenv("GOGCTRACE");
+       if(env != nil)
+               trace = runtime·atoi(env) > 0;
+
+       h = &runtime·mheap;
+       for(k=0;; k++) {
+               runtime·noteclear(&note);
+               runtime·entersyscall();
+               runtime·notetsleep(&note, tick);
+               runtime·exitsyscall();
+
+               runtime·lock(h);
+               now = runtime·nanotime();
+               if(now - mstats.last_gc > forcegc) {
+                       runtime·unlock(h);
+                       runtime·gc(1);
+                       runtime·lock(h);
+                       now = runtime·nanotime();
+                       if (trace)
+                               runtime·printf("scvg%d: GC forced\n", k);
+               }
+               sumreleased = 0;
+               for(i=0; i < nelem(h->free)+1; i++) {
+                       if(i < nelem(h->free))
+                               list = &h->free[i];
+                       else
+                               list = &h->large;
+                       if(runtime·MSpanList_IsEmpty(list))
+                               continue;
+                       for(s=list->next; s != list; s=s->next) {
+                               if(s->unusedsince != 0 && (now - s->unusedsince) > limit) {
+                                       released = (s->npages - s->npreleased) << PageShift;
+                                       mstats.heap_released += released;
+                                       sumreleased += released;
+                                       s->npreleased = s->npages;
+                                       runtime·SysUnused((void*)(s->start << PageShift), s->npages << PageShift);
+                               }
+                       }
+               }
+               runtime·unlock(h);
+
+               if(trace) {
+                       if(sumreleased > 0)
+                               runtime·printf("scvg%d: %p MB released\n", k, sumreleased>>20);
+                       runtime·printf("scvg%d: inuse: %D, idle: %D, sys: %D, released: %D, consumed: %D (MB)\n",
+                               k, mstats.heap_inuse>>20, mstats.heap_idle>>20, mstats.heap_sys>>20,
+                               mstats.heap_released>>20, (mstats.heap_sys - mstats.heap_released)>>20);
+               }
+       }
 }
 
 // Initialize a new span with the given start and npages.
@@ -333,6 +412,8 @@ runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages)
        span->ref = 0;
        span->sizeclass = 0;
        span->state = 0;
+       span->unusedsince = 0;
+       span->npreleased = 0;
 }
 
 // Initialize an empty doubly-linked list.
index 9a4d205901110d533652ef772abcdf61a9d84648..3dbf77a105d9fbb2ed4638ae476a6565095d4f4d 100644 (file)
@@ -164,6 +164,9 @@ setmcpumax(uint32 n)
        }
 }
 
+// Keep trace of scavenger's goroutine for deadlock detection.
+static G *scvg;
+
 // The bootstrap sequence is:
 //
 //     call osinit
@@ -206,6 +209,8 @@ runtime·schedinit(void)
 
        mstats.enablegc = 1;
        m->nomemprof--;
+
+       scvg = runtime·newproc1((byte*)runtime·MHeap_Scavenger, nil, 0, 0, runtime·schedinit);
 }
 
 extern void main·init(void);
@@ -582,9 +587,12 @@ top:
                mput(m);
        }
 
-       v = runtime·atomicload(&runtime·sched.atomic);
-       if(runtime·sched.grunning == 0)
-               runtime·throw("all goroutines are asleep - deadlock!");
+       // Look for deadlock situation: one single active g which happens to be scvg.
+       if(runtime·sched.grunning == 1 && runtime·sched.gwait == 0) {
+               if(scvg->status == Grunning || scvg->status == Gsyscall)
+                       runtime·throw("all goroutines are asleep - deadlock!");
+       }
+
        m->nextg = nil;
        m->waitnextg = 1;
        runtime·noteclear(&m->havenextg);
@@ -593,6 +601,7 @@ top:
        // Entersyscall might have decremented mcpu too, but if so
        // it will see the waitstop and take the slow path.
        // Exitsyscall never increments mcpu beyond mcpumax.
+       v = runtime·atomicload(&runtime·sched.atomic);
        if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
                // set waitstop = 0 (known to be 1)
                runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift);