]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: add per-p page allocation cache
authorMichael Anthony Knyszek <mknyszek@google.com>
Mon, 16 Sep 2019 21:23:24 +0000 (21:23 +0000)
committerMichael Knyszek <mknyszek@google.com>
Fri, 8 Nov 2019 18:00:54 +0000 (18:00 +0000)
This change adds a per-p free page cache which the page allocator may
allocate out of without a lock. The change also introduces a completely
lockless page allocator fast path.

Although the cache contains at most 64 pages (and usually less), the
vast majority (85%+) of page allocations are exactly 1 page in size.

Updates #35112.

Change-Id: I170bf0a9375873e7e3230845eb1df7e5cf741b78
Reviewed-on: https://go-review.googlesource.com/c/go/+/195701
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Austin Clements <austin@google.com>
src/runtime/export_test.go
src/runtime/malloc_test.go
src/runtime/mheap.go
src/runtime/proc.go
src/runtime/runtime2.go

index b1ebfba0d119e145cfac19c5da476a4a616be3aa..ea3f1c177635813bdbf14ccc2c974ca3727c65c2 100644 (file)
@@ -7,6 +7,7 @@
 package runtime
 
 import (
+       "math/bits"
        "runtime/internal/atomic"
        "runtime/internal/sys"
        "unsafe"
@@ -358,6 +359,10 @@ func ReadMemStatsSlow() (base, slow MemStats) {
                        pg := mheap_.pages.chunks[i].scavenged.popcntRange(0, pallocChunkPages)
                        slow.HeapReleased += uint64(pg) * pageSize
                }
+               for _, p := range allp {
+                       pg := bits.OnesCount64(p.pcache.scav)
+                       slow.HeapReleased += uint64(pg) * pageSize
+               }
 
                // Unused space in the current arena also counts as released space.
                slow.HeapReleased += uint64(mheap_.curArena.end - mheap_.curArena.base)
@@ -879,3 +884,20 @@ func CheckScavengedBitsCleared(mismatches []BitsMismatch) (n int, ok bool) {
        })
        return
 }
+
+func PageCachePagesLeaked() (leaked uintptr) {
+       stopTheWorld("PageCachePagesLeaked")
+
+       // Walk over destroyed Ps and look for unflushed caches.
+       deadp := allp[len(allp):cap(allp)]
+       for _, p := range deadp {
+               // Since we're going past len(allp) we may see nil Ps.
+               // Just ignore them.
+               if p != nil {
+                       leaked += uintptr(bits.OnesCount64(p.pcache.cache))
+               }
+       }
+
+       startTheWorld()
+       return
+}
index 1040fb6a8f5588741cf24f42b9ca074209d2db68..5ed4feb77d88b69f10528499acef5bda62b1f184 100644 (file)
@@ -168,6 +168,14 @@ func TestTinyAlloc(t *testing.T) {
        }
 }
 
+func TestPageCacheLeak(t *testing.T) {
+       defer GOMAXPROCS(GOMAXPROCS(1))
+       leaked := PageCachePagesLeaked()
+       if leaked != 0 {
+               t.Fatalf("found %d leaked pages in page caches", leaked)
+       }
+}
+
 func TestPhysicalMemoryUtilization(t *testing.T) {
        got := runTestProg(t, "testprog", "GCPhys")
        want := "OK\n"
index c9f9d24bba0dc44aa4f29594d076cfb85c445dfe..e87da933263d05ba9ee2108bdeb9e5a498c3ac1c 100644 (file)
@@ -1073,28 +1073,60 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS
        gp := getg()
        base, scav := uintptr(0), uintptr(0)
 
-       // Try to allocate a cached span.
-       s = h.tryAllocMSpan()
+       // If the allocation is small enough, try the page cache!
+       pp := gp.m.p.ptr()
+       if pp != nil && npages < pageCachePages/4 {
+               c := &pp.pcache
 
-       // We failed to do what we need to do without the lock.
-       lock(&h.lock)
+               // If the cache is empty, refill it.
+               if c.empty() {
+                       lock(&h.lock)
+                       *c = h.pages.allocToCache()
+                       unlock(&h.lock)
+               }
 
-       // Try to acquire a base address.
-       base, scav = h.pages.alloc(npages)
-       if base != 0 {
-               goto HaveBase
-       }
-       if !h.grow(npages) {
-               unlock(&h.lock)
-               return nil
-       }
-       base, scav = h.pages.alloc(npages)
-       if base != 0 {
-               goto HaveBase
+               // Try to allocate from the cache.
+               base, scav = c.alloc(npages)
+               if base != 0 {
+                       s = h.tryAllocMSpan()
+
+                       if s != nil && gcBlackenEnabled == 0 && (manual || spanclass.sizeclass() != 0) {
+                               goto HaveSpan
+                       }
+                       // We're either running duing GC, failed to acquire a mspan,
+                       // or the allocation is for a large object. This means we
+                       // have to lock the heap and do a bunch of extra work,
+                       // so go down the HaveBaseLocked path.
+                       //
+                       // We must do this during GC to avoid skew with heap_scan
+                       // since we flush mcache stats whenever we lock.
+                       //
+                       // TODO(mknyszek): It would be nice to not have to
+                       // lock the heap if it's a large allocation, but
+                       // it's fine for now. The critical section here is
+                       // short and large object allocations are relatively
+                       // infrequent.
+               }
        }
-       throw("grew heap, but no adequate free space found")
 
-HaveBase:
+       // For one reason or another, we couldn't get the
+       // whole job done without the heap lock.
+       lock(&h.lock)
+
+       if base == 0 {
+               // Try to acquire a base address.
+               base, scav = h.pages.alloc(npages)
+               if base == 0 {
+                       if !h.grow(npages) {
+                               unlock(&h.lock)
+                               return nil
+                       }
+                       base, scav = h.pages.alloc(npages)
+                       if base == 0 {
+                               throw("grew heap, but no adequate free space found")
+                       }
+               }
+       }
        if s == nil {
                // We failed to get an mspan earlier, so grab
                // one now that we have the heap lock.
@@ -1124,7 +1156,9 @@ HaveBase:
        }
        unlock(&h.lock)
 
-       // Initialize the span.
+HaveSpan:
+       // At this point, both s != nil and base != 0, and the heap
+       // lock is no longer held. Initialize the span.
        s.init(base, npages)
        if h.allocNeedsZero(base, npages) {
                s.needzero = 1
index 3c3acf0dd7c655aff2dda5d46d62607299971e5f..67ff556ac4acfc73ca27780caa60145747d997ec 100644 (file)
@@ -4088,6 +4088,7 @@ func (pp *p) destroy() {
                        mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i]))
                }
                pp.mspancache.len = 0
+               pp.pcache.flush(&mheap_.pages)
        })
        freemcache(pp.mcache)
        pp.mcache = nil
index eba2aed09283f92bbf7e6a1bdc1dda8da64ba8a8..fe1147e2471baada0b6ea5b8d01fbf0e2a294a18 100644 (file)
@@ -555,6 +555,7 @@ type p struct {
        sysmontick  sysmontick // last tick observed by sysmon
        m           muintptr   // back-link to associated m (nil if idle)
        mcache      *mcache
+       pcache      pageCache
        raceprocctx uintptr
 
        deferpool    [5][]*_defer // pool of available defer structs of different sizes (see panic.go)
@@ -611,7 +612,7 @@ type p struct {
 
        palloc persistentAlloc // per-P to avoid mutex
 
-       // _ uint32 // Alignment for atomic fields below
+       _ uint32 // Alignment for atomic fields below
 
        // Per-P GC state
        gcAssistTime         int64    // Nanoseconds in assistAlloc