]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: increase malloc size classes
authorDmitry Vyukov <dvyukov@google.com>
Mon, 27 Jun 2016 10:23:39 +0000 (12:23 +0200)
committerDmitry Vyukov <dvyukov@google.com>
Fri, 19 Aug 2016 21:24:28 +0000 (21:24 +0000)
When we calculate class sizes, in some cases we discard considerable
amounts of memory without an apparent reason. For example, we choose
size 8448 with 6 objects in 7 pages. But we can well use object
size 9472, which is also 6 objects in 7 pages but +1024 bytes (+12.12%).

Increase class sizes to the max value that leads to the same
page count/number of objects. Full list of affected size classes:

class 36: pages: 2 size: 1664->1792 +128 (7.69%)
class 39: pages: 1 size: 2560->2688 +128 (5.0%)
class 40: pages: 3 size: 2816->3072 +256 (9.9%)
class 41: pages: 2 size: 3072->3200 +128 (4.16%)
class 42: pages: 3 size: 3328->3456 +128 (3.84%)
class 44: pages: 3 size: 4608->4864 +256 (5.55%)
class 47: pages: 4 size: 6400->6528 +128 (2.0%)
class 48: pages: 5 size: 6656->6784 +128 (1.92%)
class 51: pages: 7 size: 8448->9472 +1024 (12.12%)
class 52: pages: 6 size: 8704->9728 +1024 (11.76%)
class 53: pages: 5 size: 9472->10240 +768 (8.10%)
class 54: pages: 4 size: 10496->10880 +384 (3.65%)
class 57: pages: 7 size: 14080->14336 +256 (1.81%)
class 59: pages: 9 size: 16640->18432 +1792 (10.76%)
class 60: pages: 7 size: 17664->19072 +1408 (7.97%)
class 62: pages: 8 size: 21248->21760 +512 (2.40%)
class 64: pages: 10 size: 24832->27264 +2432 (9.79%)
class 65: pages: 7 size: 28416->28672 +256 (0.90%)

name                      old time/op    new time/op    delta
BinaryTree17-12              2.59s ± 5%     2.52s ± 4%    ~     (p=0.132 n=6+6)
Fannkuch11-12                2.13s ± 3%     2.17s ± 3%    ~     (p=0.180 n=6+6)
FmtFprintfEmpty-12          47.0ns ± 3%    46.6ns ± 1%    ~     (p=0.355 n=6+5)
FmtFprintfString-12          131ns ± 0%     131ns ± 1%    ~     (p=0.476 n=4+6)
FmtFprintfInt-12             121ns ± 6%     122ns ± 2%    ~     (p=0.511 n=6+6)
FmtFprintfIntInt-12          182ns ± 2%     186ns ± 1%  +2.20%  (p=0.015 n=6+6)
FmtFprintfPrefixedInt-12     184ns ± 5%     181ns ± 2%    ~     (p=0.645 n=6+6)
FmtFprintfFloat-12           272ns ± 7%     265ns ± 1%    ~     (p=1.000 n=6+5)
FmtManyArgs-12               783ns ± 2%     802ns ± 2%  +2.38%  (p=0.017 n=6+6)
GobDecode-12                7.04ms ± 4%    7.00ms ± 2%    ~     (p=1.000 n=6+6)
GobEncode-12                6.36ms ± 6%    6.17ms ± 6%    ~     (p=0.240 n=6+6)
Gzip-12                      242ms ±14%     233ms ± 7%    ~     (p=0.310 n=6+6)
Gunzip-12                   36.6ms ±22%    36.0ms ± 9%    ~     (p=0.841 n=5+5)
HTTPClientServer-12         93.1µs ±29%    88.0µs ±32%    ~     (p=0.240 n=6+6)
JSONEncode-12               27.1ms ±39%    26.2ms ±35%    ~     (p=0.589 n=6+6)
JSONDecode-12               71.7ms ±36%    71.5ms ±36%    ~     (p=0.937 n=6+6)
Mandelbrot200-12            4.78ms ±10%    4.70ms ±16%    ~     (p=0.394 n=6+6)
GoParse-12                  4.86ms ±34%    4.95ms ±36%    ~     (p=1.000 n=6+6)
RegexpMatchEasy0_32-12       110ns ±37%     110ns ±36%    ~     (p=0.660 n=6+6)
RegexpMatchEasy0_1K-12       240ns ±38%     234ns ±47%    ~     (p=0.554 n=6+6)
RegexpMatchEasy1_32-12      77.2ns ± 2%    77.2ns ±10%    ~     (p=0.699 n=6+6)
RegexpMatchEasy1_1K-12       337ns ± 5%     331ns ± 4%    ~     (p=0.552 n=6+6)
RegexpMatchMedium_32-12      125ns ±13%     132ns ±26%    ~     (p=0.561 n=6+6)
RegexpMatchMedium_1K-12     35.9µs ± 3%    36.1µs ± 5%    ~     (p=0.818 n=6+6)
RegexpMatchHard_32-12       1.81µs ± 4%    1.82µs ± 5%    ~     (p=0.452 n=5+5)
RegexpMatchHard_1K-12       52.4µs ± 2%    54.4µs ± 3%  +3.84%  (p=0.002 n=6+6)
Revcomp-12                   401ms ± 2%     390ms ± 1%  -2.82%  (p=0.002 n=6+6)
Template-12                 54.5ms ± 3%    54.6ms ± 1%    ~     (p=0.589 n=6+6)
TimeParse-12                 294ns ± 1%     298ns ± 2%    ~     (p=0.160 n=6+6)
TimeFormat-12                323ns ± 4%     318ns ± 5%    ~     (p=0.297 n=6+6)

name                      old speed      new speed      delta
GobDecode-12               109MB/s ± 4%   110MB/s ± 2%    ~     (p=1.000 n=6+6)
GobEncode-12               121MB/s ± 6%   125MB/s ± 6%    ~     (p=0.240 n=6+6)
Gzip-12                   80.4MB/s ±12%  83.3MB/s ± 7%    ~     (p=0.310 n=6+6)
Gunzip-12                  495MB/s ±41%   541MB/s ± 9%    ~     (p=0.931 n=6+5)
JSONEncode-12             80.7MB/s ±39%  82.8MB/s ±34%    ~     (p=0.589 n=6+6)
JSONDecode-12             30.4MB/s ±40%  31.0MB/s ±37%    ~     (p=0.937 n=6+6)
GoParse-12                13.2MB/s ±33%  13.2MB/s ±35%    ~     (p=1.000 n=6+6)
RegexpMatchEasy0_32-12     321MB/s ±34%   326MB/s ±34%    ~     (p=0.699 n=6+6)
RegexpMatchEasy0_1K-12    4.49GB/s ±31%  4.74GB/s ±37%    ~     (p=0.589 n=6+6)
RegexpMatchEasy1_32-12     414MB/s ± 2%   415MB/s ± 9%    ~     (p=0.699 n=6+6)
RegexpMatchEasy1_1K-12    3.03GB/s ± 5%  3.09GB/s ± 4%    ~     (p=0.699 n=6+6)
RegexpMatchMedium_32-12   7.99MB/s ±12%  7.68MB/s ±22%    ~     (p=0.589 n=6+6)
RegexpMatchMedium_1K-12   28.5MB/s ± 3%  28.4MB/s ± 5%    ~     (p=0.818 n=6+6)
RegexpMatchHard_32-12     17.7MB/s ± 4%  17.0MB/s ±15%    ~     (p=0.351 n=5+6)
RegexpMatchHard_1K-12     19.6MB/s ± 2%  18.8MB/s ± 3%  -3.67%  (p=0.002 n=6+6)
Revcomp-12                 634MB/s ± 2%   653MB/s ± 1%  +2.89%  (p=0.002 n=6+6)
Template-12               35.6MB/s ± 3%  35.5MB/s ± 1%    ~     (p=0.615 n=6+6)

Change-Id: I465a47f74227f316e3abea231444f48c7a30ef85
Reviewed-on: https://go-review.googlesource.com/24493
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
src/runtime/malloc.go
src/runtime/msize.go

index b079a07d518456a962634802aa019693c31b9424..38c7a3b8474fa1bfc9657d95451800e0612908cc 100644 (file)
@@ -491,7 +491,7 @@ func nextFreeFast(s *mspan) gclinkptr {
 // weight allocation. If it is a heavy weight allocation the caller must
 // determine whether a new GC cycle needs to be started or if the GC is active
 // whether this goroutine needs to assist the GC.
-func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, s *mspan, shouldhelpgc bool) {
+func (c *mcache) nextFree(sizeclass uint8) (v gclinkptr, s *mspan, shouldhelpgc bool) {
        s = c.alloc[sizeclass]
        shouldhelpgc = false
        freeIndex := s.nextFreeIndex()
@@ -645,11 +645,11 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
                        }
                        size = maxTinySize
                } else {
-                       var sizeclass int8
-                       if size <= 1024-8 {
-                               sizeclass = size_to_class8[(size+7)>>3]
+                       var sizeclass uint8
+                       if size <= smallSizeMax-8 {
+                               sizeclass = size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]
                        } else {
-                               sizeclass = size_to_class128[(size-1024+127)>>7]
+                               sizeclass = size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]
                        }
                        size = uintptr(class_to_size[sizeclass])
                        span := c.alloc[sizeclass]
index 18577b309bda5961d411ea49d85f9f9c92db3315..00c1e9d34081ab97a4253eba32e0f8fde092a219 100644 (file)
@@ -46,21 +46,27 @@ package runtime
 // size divided by 128 (rounded up).  The arrays are filled in
 // by InitSizes.
 
-var class_to_size [_NumSizeClasses]int32
-var class_to_allocnpages [_NumSizeClasses]int32
+const (
+       smallSizeDiv = 8
+       smallSizeMax = 1024
+       largeSizeDiv = 128
+)
+
+var class_to_size [_NumSizeClasses]uint32
+var class_to_allocnpages [_NumSizeClasses]uint32
 var class_to_divmagic [_NumSizeClasses]divMagic
 
-var size_to_class8 [1024/8 + 1]int8
-var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
+var size_to_class8 [smallSizeMax/smallSizeDiv + 1]uint8
+var size_to_class128 [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8
 
-func sizeToClass(size int32) int32 {
+func sizeToClass(size uint32) uint32 {
        if size > _MaxSmallSize {
                throw("invalid size")
        }
-       if size > 1024-8 {
-               return int32(size_to_class128[(size-1024+127)>>7])
+       if size > smallSizeMax-8 {
+               return uint32(size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv])
        }
-       return int32(size_to_class8[(size+7)>>3])
+       return uint32(size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv])
 }
 
 func initSizes() {
@@ -97,20 +103,41 @@ func initSizes() {
                // use just this size instead of having two
                // different sizes.
                if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) {
-                       class_to_size[sizeclass-1] = int32(size)
+                       class_to_size[sizeclass-1] = uint32(size)
                        continue
                }
 
-               class_to_allocnpages[sizeclass] = int32(npages)
-               class_to_size[sizeclass] = int32(size)
+               class_to_allocnpages[sizeclass] = uint32(npages)
+               class_to_size[sizeclass] = uint32(size)
                sizeclass++
        }
        if sizeclass != _NumSizeClasses {
                print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
                throw("bad NumSizeClasses")
        }
+
+       // Increase object sizes if we can fit the same number of larger objects
+       // into the same number of pages. For example, we choose size 8448 above
+       // with 6 objects in 7 pages. But we can well use object size 9472,
+       // which is also 6 objects in 7 pages but +1024 bytes (+12.12%).
+       // We need to preserve at least largeSizeDiv alignment otherwise
+       // sizeToClass won't work.
+       for i := 1; i < _NumSizeClasses; i++ {
+               npages := class_to_allocnpages[i]
+               psize := npages * _PageSize
+               size := class_to_size[i]
+               new_size := (psize / (psize / size)) &^ (largeSizeDiv - 1)
+               if new_size > size {
+                       class_to_size[i] = new_size
+               }
+       }
+
        // Check maxObjsPerSpan => number of objects invariant.
        for i, size := range class_to_size {
+               if i != 0 && class_to_size[i-1] >= size {
+                       throw("non-monotonic size classes")
+               }
+
                if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan {
                        throw("span contains too many objects")
                }
@@ -122,18 +149,18 @@ func initSizes() {
        nextsize := 0
        for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
                for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 {
-                       size_to_class8[nextsize/8] = int8(sizeclass)
+                       size_to_class8[nextsize/8] = uint8(sizeclass)
                }
                if nextsize >= 1024 {
                        for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 {
-                               size_to_class128[(nextsize-1024)/128] = int8(sizeclass)
+                               size_to_class128[(nextsize-1024)/128] = uint8(sizeclass)
                        }
                }
        }
 
        // Double-check SizeToClass.
        if false {
-               for n := int32(0); n < _MaxSmallSize; n++ {
+               for n := uint32(0); n < _MaxSmallSize; n++ {
                        sizeclass := sizeToClass(n)
                        if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
                                print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
@@ -186,10 +213,10 @@ dump:
 // Returns size of the memory block that mallocgc will allocate if you ask for the size.
 func roundupsize(size uintptr) uintptr {
        if size < _MaxSmallSize {
-               if size <= 1024-8 {
-                       return uintptr(class_to_size[size_to_class8[(size+7)>>3]])
+               if size <= smallSizeMax-8 {
+                       return uintptr(class_to_size[size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]])
                } else {
-                       return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]])
+                       return uintptr(class_to_size[size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]])
                }
        }
        if size+_PageSize < size {