From 14e59511661303ab1406f7c21ee27e58bcd0750e Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 27 Jun 2016 12:23:39 +0200 Subject: [PATCH] runtime: increase malloc size classes MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When we calculate class sizes, in some cases we discard considerable amounts of memory without an apparent reason. For example, we choose size 8448 with 6 objects in 7 pages. But we can well use object size 9472, which is also 6 objects in 7 pages but +1024 bytes (+12.12%). Increase class sizes to the max value that leads to the same page count/number of objects. Full list of affected size classes: class 36: pages: 2 size: 1664->1792 +128 (7.69%) class 39: pages: 1 size: 2560->2688 +128 (5.0%) class 40: pages: 3 size: 2816->3072 +256 (9.9%) class 41: pages: 2 size: 3072->3200 +128 (4.16%) class 42: pages: 3 size: 3328->3456 +128 (3.84%) class 44: pages: 3 size: 4608->4864 +256 (5.55%) class 47: pages: 4 size: 6400->6528 +128 (2.0%) class 48: pages: 5 size: 6656->6784 +128 (1.92%) class 51: pages: 7 size: 8448->9472 +1024 (12.12%) class 52: pages: 6 size: 8704->9728 +1024 (11.76%) class 53: pages: 5 size: 9472->10240 +768 (8.10%) class 54: pages: 4 size: 10496->10880 +384 (3.65%) class 57: pages: 7 size: 14080->14336 +256 (1.81%) class 59: pages: 9 size: 16640->18432 +1792 (10.76%) class 60: pages: 7 size: 17664->19072 +1408 (7.97%) class 62: pages: 8 size: 21248->21760 +512 (2.40%) class 64: pages: 10 size: 24832->27264 +2432 (9.79%) class 65: pages: 7 size: 28416->28672 +256 (0.90%) name old time/op new time/op delta BinaryTree17-12 2.59s ± 5% 2.52s ± 4% ~ (p=0.132 n=6+6) Fannkuch11-12 2.13s ± 3% 2.17s ± 3% ~ (p=0.180 n=6+6) FmtFprintfEmpty-12 47.0ns ± 3% 46.6ns ± 1% ~ (p=0.355 n=6+5) FmtFprintfString-12 131ns ± 0% 131ns ± 1% ~ (p=0.476 n=4+6) FmtFprintfInt-12 121ns ± 6% 122ns ± 2% ~ (p=0.511 n=6+6) FmtFprintfIntInt-12 182ns ± 2% 186ns ± 1% +2.20% (p=0.015 n=6+6) FmtFprintfPrefixedInt-12 184ns ± 5% 181ns ± 2% ~ (p=0.645 n=6+6) FmtFprintfFloat-12 272ns ± 7% 265ns ± 1% ~ (p=1.000 n=6+5) FmtManyArgs-12 783ns ± 2% 802ns ± 2% +2.38% (p=0.017 n=6+6) GobDecode-12 7.04ms ± 4% 7.00ms ± 2% ~ (p=1.000 n=6+6) GobEncode-12 6.36ms ± 6% 6.17ms ± 6% ~ (p=0.240 n=6+6) Gzip-12 242ms ±14% 233ms ± 7% ~ (p=0.310 n=6+6) Gunzip-12 36.6ms ±22% 36.0ms ± 9% ~ (p=0.841 n=5+5) HTTPClientServer-12 93.1µs ±29% 88.0µs ±32% ~ (p=0.240 n=6+6) JSONEncode-12 27.1ms ±39% 26.2ms ±35% ~ (p=0.589 n=6+6) JSONDecode-12 71.7ms ±36% 71.5ms ±36% ~ (p=0.937 n=6+6) Mandelbrot200-12 4.78ms ±10% 4.70ms ±16% ~ (p=0.394 n=6+6) GoParse-12 4.86ms ±34% 4.95ms ±36% ~ (p=1.000 n=6+6) RegexpMatchEasy0_32-12 110ns ±37% 110ns ±36% ~ (p=0.660 n=6+6) RegexpMatchEasy0_1K-12 240ns ±38% 234ns ±47% ~ (p=0.554 n=6+6) RegexpMatchEasy1_32-12 77.2ns ± 2% 77.2ns ±10% ~ (p=0.699 n=6+6) RegexpMatchEasy1_1K-12 337ns ± 5% 331ns ± 4% ~ (p=0.552 n=6+6) RegexpMatchMedium_32-12 125ns ±13% 132ns ±26% ~ (p=0.561 n=6+6) RegexpMatchMedium_1K-12 35.9µs ± 3% 36.1µs ± 5% ~ (p=0.818 n=6+6) RegexpMatchHard_32-12 1.81µs ± 4% 1.82µs ± 5% ~ (p=0.452 n=5+5) RegexpMatchHard_1K-12 52.4µs ± 2% 54.4µs ± 3% +3.84% (p=0.002 n=6+6) Revcomp-12 401ms ± 2% 390ms ± 1% -2.82% (p=0.002 n=6+6) Template-12 54.5ms ± 3% 54.6ms ± 1% ~ (p=0.589 n=6+6) TimeParse-12 294ns ± 1% 298ns ± 2% ~ (p=0.160 n=6+6) TimeFormat-12 323ns ± 4% 318ns ± 5% ~ (p=0.297 n=6+6) name old speed new speed delta GobDecode-12 109MB/s ± 4% 110MB/s ± 2% ~ (p=1.000 n=6+6) GobEncode-12 121MB/s ± 6% 125MB/s ± 6% ~ (p=0.240 n=6+6) Gzip-12 80.4MB/s ±12% 83.3MB/s ± 7% ~ (p=0.310 n=6+6) Gunzip-12 495MB/s ±41% 541MB/s ± 9% ~ (p=0.931 n=6+5) JSONEncode-12 80.7MB/s ±39% 82.8MB/s ±34% ~ (p=0.589 n=6+6) JSONDecode-12 30.4MB/s ±40% 31.0MB/s ±37% ~ (p=0.937 n=6+6) GoParse-12 13.2MB/s ±33% 13.2MB/s ±35% ~ (p=1.000 n=6+6) RegexpMatchEasy0_32-12 321MB/s ±34% 326MB/s ±34% ~ (p=0.699 n=6+6) RegexpMatchEasy0_1K-12 4.49GB/s ±31% 4.74GB/s ±37% ~ (p=0.589 n=6+6) RegexpMatchEasy1_32-12 414MB/s ± 2% 415MB/s ± 9% ~ (p=0.699 n=6+6) RegexpMatchEasy1_1K-12 3.03GB/s ± 5% 3.09GB/s ± 4% ~ (p=0.699 n=6+6) RegexpMatchMedium_32-12 7.99MB/s ±12% 7.68MB/s ±22% ~ (p=0.589 n=6+6) RegexpMatchMedium_1K-12 28.5MB/s ± 3% 28.4MB/s ± 5% ~ (p=0.818 n=6+6) RegexpMatchHard_32-12 17.7MB/s ± 4% 17.0MB/s ±15% ~ (p=0.351 n=5+6) RegexpMatchHard_1K-12 19.6MB/s ± 2% 18.8MB/s ± 3% -3.67% (p=0.002 n=6+6) Revcomp-12 634MB/s ± 2% 653MB/s ± 1% +2.89% (p=0.002 n=6+6) Template-12 35.6MB/s ± 3% 35.5MB/s ± 1% ~ (p=0.615 n=6+6) Change-Id: I465a47f74227f316e3abea231444f48c7a30ef85 Reviewed-on: https://go-review.googlesource.com/24493 Run-TryBot: Dmitry Vyukov TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall Reviewed-by: Austin Clements --- src/runtime/malloc.go | 10 +++---- src/runtime/msize.go | 61 +++++++++++++++++++++++++++++++------------ 2 files changed, 49 insertions(+), 22 deletions(-) diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index b079a07d51..38c7a3b847 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -491,7 +491,7 @@ func nextFreeFast(s *mspan) gclinkptr { // weight allocation. If it is a heavy weight allocation the caller must // determine whether a new GC cycle needs to be started or if the GC is active // whether this goroutine needs to assist the GC. -func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, s *mspan, shouldhelpgc bool) { +func (c *mcache) nextFree(sizeclass uint8) (v gclinkptr, s *mspan, shouldhelpgc bool) { s = c.alloc[sizeclass] shouldhelpgc = false freeIndex := s.nextFreeIndex() @@ -645,11 +645,11 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { } size = maxTinySize } else { - var sizeclass int8 - if size <= 1024-8 { - sizeclass = size_to_class8[(size+7)>>3] + var sizeclass uint8 + if size <= smallSizeMax-8 { + sizeclass = size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv] } else { - sizeclass = size_to_class128[(size-1024+127)>>7] + sizeclass = size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv] } size = uintptr(class_to_size[sizeclass]) span := c.alloc[sizeclass] diff --git a/src/runtime/msize.go b/src/runtime/msize.go index 18577b309b..00c1e9d340 100644 --- a/src/runtime/msize.go +++ b/src/runtime/msize.go @@ -46,21 +46,27 @@ package runtime // size divided by 128 (rounded up). The arrays are filled in // by InitSizes. -var class_to_size [_NumSizeClasses]int32 -var class_to_allocnpages [_NumSizeClasses]int32 +const ( + smallSizeDiv = 8 + smallSizeMax = 1024 + largeSizeDiv = 128 +) + +var class_to_size [_NumSizeClasses]uint32 +var class_to_allocnpages [_NumSizeClasses]uint32 var class_to_divmagic [_NumSizeClasses]divMagic -var size_to_class8 [1024/8 + 1]int8 -var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8 +var size_to_class8 [smallSizeMax/smallSizeDiv + 1]uint8 +var size_to_class128 [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8 -func sizeToClass(size int32) int32 { +func sizeToClass(size uint32) uint32 { if size > _MaxSmallSize { throw("invalid size") } - if size > 1024-8 { - return int32(size_to_class128[(size-1024+127)>>7]) + if size > smallSizeMax-8 { + return uint32(size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]) } - return int32(size_to_class8[(size+7)>>3]) + return uint32(size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]) } func initSizes() { @@ -97,20 +103,41 @@ func initSizes() { // use just this size instead of having two // different sizes. if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) { - class_to_size[sizeclass-1] = int32(size) + class_to_size[sizeclass-1] = uint32(size) continue } - class_to_allocnpages[sizeclass] = int32(npages) - class_to_size[sizeclass] = int32(size) + class_to_allocnpages[sizeclass] = uint32(npages) + class_to_size[sizeclass] = uint32(size) sizeclass++ } if sizeclass != _NumSizeClasses { print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n") throw("bad NumSizeClasses") } + + // Increase object sizes if we can fit the same number of larger objects + // into the same number of pages. For example, we choose size 8448 above + // with 6 objects in 7 pages. But we can well use object size 9472, + // which is also 6 objects in 7 pages but +1024 bytes (+12.12%). + // We need to preserve at least largeSizeDiv alignment otherwise + // sizeToClass won't work. + for i := 1; i < _NumSizeClasses; i++ { + npages := class_to_allocnpages[i] + psize := npages * _PageSize + size := class_to_size[i] + new_size := (psize / (psize / size)) &^ (largeSizeDiv - 1) + if new_size > size { + class_to_size[i] = new_size + } + } + // Check maxObjsPerSpan => number of objects invariant. for i, size := range class_to_size { + if i != 0 && class_to_size[i-1] >= size { + throw("non-monotonic size classes") + } + if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan { throw("span contains too many objects") } @@ -122,18 +149,18 @@ func initSizes() { nextsize := 0 for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ { for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 { - size_to_class8[nextsize/8] = int8(sizeclass) + size_to_class8[nextsize/8] = uint8(sizeclass) } if nextsize >= 1024 { for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 { - size_to_class128[(nextsize-1024)/128] = int8(sizeclass) + size_to_class128[(nextsize-1024)/128] = uint8(sizeclass) } } } // Double-check SizeToClass. if false { - for n := int32(0); n < _MaxSmallSize; n++ { + for n := uint32(0); n < _MaxSmallSize; n++ { sizeclass := sizeToClass(n) if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n { print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n") @@ -186,10 +213,10 @@ dump: // Returns size of the memory block that mallocgc will allocate if you ask for the size. func roundupsize(size uintptr) uintptr { if size < _MaxSmallSize { - if size <= 1024-8 { - return uintptr(class_to_size[size_to_class8[(size+7)>>3]]) + if size <= smallSizeMax-8 { + return uintptr(class_to_size[size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]]) } else { - return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]]) + return uintptr(class_to_size[size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]]) } } if size+_PageSize < size { -- 2.48.1