]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: don't acquirem on malloc fast path
authorDmitriy Vyukov <dvyukov@google.com>
Mon, 18 Aug 2014 12:33:39 +0000 (16:33 +0400)
committerDmitriy Vyukov <dvyukov@google.com>
Mon, 18 Aug 2014 12:33:39 +0000 (16:33 +0400)
Mallocgc must be atomic wrt GC, but for performance reasons
don't acquirem/releasem on fast path. The code does not have
split stack checks, so it can't be preempted by GC.
Functions like roundup/add are inlined. And onM/racemalloc are nosplit.
Also add debug code that checks these assumptions.

benchmark                     old ns/op     new ns/op     delta
BenchmarkMalloc8              20.5          17.2          -16.10%
BenchmarkMalloc16             29.5          27.0          -8.47%
BenchmarkMallocTypeInfo8      31.5          27.6          -12.38%
BenchmarkMallocTypeInfo16     34.7          30.9          -10.95%

LGTM=khr
R=golang-codereviews, khr
CC=golang-codereviews, rlh, rsc
https://golang.org/cl/123100043

src/cmd/api/goapi.go
src/pkg/runtime/malloc.go
src/pkg/runtime/race.c
src/pkg/runtime/stubs.go
src/pkg/runtime/stubs.goc

index 38bf9592f2fa0ee7d5f4b12e11a9ffdc58496e9b..e0d17c9564cbcb04907aa6e5fdff92e2a254981f 100644 (file)
@@ -378,7 +378,10 @@ func (w *Walker) parseFile(dir, file string) (*ast.File, error) {
        }
        if w.context != nil && file == fmt.Sprintf("zruntime_defs_%s_%s.go", w.context.GOOS, w.context.GOARCH) {
                // Just enough to keep the api checker happy.
-               src := "package runtime; type maptype struct{}; type _type struct{}; type alg struct{}; type mspan struct{}; type m struct{}; type lock struct{}; type slicetype struct{}; type iface struct{}; type eface struct{}; type interfacetype struct{}; type itab struct{}"
+               src := "package runtime; type maptype struct{}; type _type struct{}; type alg struct{};" +
+                       " type mspan struct{}; type m struct{}; type lock struct{}; type slicetype struct{};" +
+                       " type iface struct{}; type eface struct{}; type interfacetype struct{}; type itab struct{};" +
+                       " type mcache struct{}"
                f, err = parser.ParseFile(fset, filename, src, 0)
                if err != nil {
                        log.Fatalf("incorrect generated file: %s", err)
index 76c06f314b14ad3c86e8db384daf88b8873e7c4d..f116efaba4db8ec364ea27653adf71817db49163 100644 (file)
@@ -59,14 +59,25 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
        if size == 0 {
                return unsafe.Pointer(&zeroObject)
        }
-       mp := acquirem()
-       if mp.mallocing != 0 {
-               gothrow("malloc/free - deadlock")
-       }
-       mp.mallocing = 1
        size0 := size
 
-       c := mp.mcache
+       // This function must be atomic wrt GC, but for performance reasons
+       // we don't acquirem/releasem on fast path. The code below does not have
+       // split stack checks, so it can't be preempted by GC.
+       // Functions like roundup/add are inlined. And onM/racemalloc are nosplit.
+       // If debugMalloc = true, these assumptions are checked below.
+       if debugMalloc {
+               mp := acquirem()
+               if mp.mallocing != 0 {
+                       gothrow("malloc deadlock")
+               }
+               mp.mallocing = 1
+               if mp.curg != nil {
+                       mp.curg.stackguard0 = ^uint(0xfff) | 0xbad
+               }
+       }
+
+       c := gomcache()
        var s *mspan
        var x unsafe.Pointer
        if size <= maxSmallSize {
@@ -118,8 +129,18 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
                                        x = tiny
                                        c.tiny = (*byte)(add(x, size))
                                        c.tinysize -= uint(size1)
-                                       mp.mallocing = 0
-                                       releasem(mp)
+                                       if debugMalloc {
+                                               mp := acquirem()
+                                               if mp.mallocing == 0 {
+                                                       gothrow("bad malloc")
+                                               }
+                                               mp.mallocing = 0
+                                               if mp.curg != nil {
+                                                       mp.curg.stackguard0 = mp.curg.stackguard
+                                               }
+                                               releasem(mp)
+                                               releasem(mp)
+                                       }
                                        return x
                                }
                        }
@@ -127,8 +148,10 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
                        s = c.alloc[tinySizeClass]
                        v := s.freelist
                        if v == nil {
+                               mp := acquirem()
                                mp.scalararg[0] = tinySizeClass
                                onM(&mcacheRefill_m)
+                               releasem(mp)
                                s = c.alloc[tinySizeClass]
                                v = s.freelist
                        }
@@ -156,8 +179,10 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
                        s = c.alloc[sizeclass]
                        v := s.freelist
                        if v == nil {
+                               mp := acquirem()
                                mp.scalararg[0] = uint(sizeclass)
                                onM(&mcacheRefill_m)
+                               releasem(mp)
                                s = c.alloc[sizeclass]
                                v = s.freelist
                        }
@@ -174,11 +199,13 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
                }
                c.local_cachealloc += int(size)
        } else {
+               mp := acquirem()
                mp.scalararg[0] = uint(size)
                mp.scalararg[1] = uint(flags)
                onM(&largeAlloc_m)
                s = (*mspan)(mp.ptrarg[0])
                mp.ptrarg[0] = nil
+               releasem(mp)
                x = unsafe.Pointer(uintptr(s.start << pageShift))
                size = uintptr(s.elemsize)
        }
@@ -221,18 +248,22 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
                                        // into the GC bitmap. It's 7 times slower than copying
                                        // from the pre-unrolled mask, but saves 1/16 of type size
                                        // memory for the mask.
+                                       mp := acquirem()
                                        mp.ptrarg[0] = x
                                        mp.ptrarg[1] = unsafe.Pointer(typ)
                                        mp.scalararg[0] = uint(size)
                                        mp.scalararg[1] = uint(size0)
                                        onM(&unrollgcproginplace_m)
+                                       releasem(mp)
                                        goto marked
                                }
                                ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
                                // Check whether the program is already unrolled.
                                if uintptr(goatomicloadp(unsafe.Pointer(ptrmask)))&0xff == 0 {
+                                       mp := acquirem()
                                        mp.ptrarg[0] = unsafe.Pointer(typ)
                                        onM(&unrollgcprog_m)
+                                       releasem(mp)
                                }
                                ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
                        } else {
@@ -287,11 +318,23 @@ func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer {
                }
        }
 marked:
-       mp.mallocing = 0
-
        if raceenabled {
                racemalloc(x, size)
        }
+
+       if debugMalloc {
+               mp := acquirem()
+               if mp.mallocing == 0 {
+                       gothrow("bad malloc")
+               }
+               mp.mallocing = 0
+               if mp.curg != nil {
+                       mp.curg.stackguard0 = mp.curg.stackguard
+               }
+               releasem(mp)
+               releasem(mp)
+       }
+
        if debug.allocfreetrace != 0 {
                tracealloc(x, size, typ)
        }
@@ -300,12 +343,12 @@ marked:
                if size < uintptr(rate) && int32(size) < c.next_sample {
                        c.next_sample -= int32(size)
                } else {
+                       mp := acquirem()
                        profilealloc(mp, x, size)
+                       releasem(mp)
                }
        }
 
-       releasem(mp)
-
        if memstats.heap_alloc >= memstats.next_gc {
                gogc(0)
        }
index 12cc6a0dd877ac6da9269e632eb39799087c6156..fa04a39310993ad7c6aff049282462902178d61d 100644 (file)
@@ -11,6 +11,7 @@
 #include "race.h"
 #include "type.h"
 #include "typekind.h"
+#include "../../cmd/ld/textflag.h"
 
 // Race runtime functions called via runtime·racecall.
 void __tsan_init(void);
@@ -106,6 +107,7 @@ runtime·racemapshadow(void *addr, uintptr size)
        runtime·racecall(__tsan_map_shadow, addr, size);
 }
 
+#pragma textflag NOSPLIT
 void
 runtime·racemalloc(void *p, uintptr sz)
 {
index 9c18434d5d44ae924d9d7d0bad0ecd2f0e883881..a4ef9d3d55cbd40c5d9e2ed6f8d70fd6e11843e2 100644 (file)
@@ -43,6 +43,7 @@ func roundup(p unsafe.Pointer, n uintptr) unsafe.Pointer {
 // in stubs.goc
 func acquirem() *m
 func releasem(mp *m)
+func gomcache() *mcache
 
 // An mFunction represents a C function that runs on the M stack.  It
 // can be called from Go using mcall or onM.  Through the magic of
index 8a043c63b0d16343b45e0e2e06b34c4b10a8b28d..e646b5518132f8c58971121041f83c9042d00ec3 100644 (file)
@@ -83,6 +83,11 @@ func runtime·releasem(mp *M) {
        }
 }
 
+#pragma textflag NOSPLIT
+func runtime·gomcache() (ret *MCache) {
+       ret = g->m->mcache;
+}
+
 // For testing.
 // TODO: find a better place for this.
 func GCMask(x Eface) (mask Slice) {