From: Ilya Tocar Date: Fri, 23 Mar 2018 20:45:03 +0000 (-0500) Subject: runtime: avoid division in growslice X-Git-Tag: go1.11beta1~832 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=e53cc7ae931eeec0863d066d98972fc52fcd09f0;p=gostls13.git runtime: avoid division in growslice Add a special case for power-of-2 sized elements. We can replace div/mul with left/right shift and avoid expensive operation. growslice is hotter for short slices of small elements, such as int16, so add an int16 version for GrowSlice benchmark. name old time/op new time/op delta GrowSlice/Byte-6 61.3ns ± 3% 60.5ns ± 4% -1.33% (p=0.002 n=30+30) GrowSlice/Int16-6 94.0ns ± 4% 84.7ns ± 2% -9.82% (p=0.000 n=30+30) GrowSlice/Int-6 100ns ± 1% 99ns ± 1% -0.25% (p=0.032 n=29+28) GrowSlice/Ptr-6 197ns ± 2% 195ns ± 2% -0.94% (p=0.001 n=30+29) GrowSlice/Struct/24-6 168ns ± 1% 166ns ± 2% -1.09% (p=0.000 n=25+30) GrowSlice/Struct/32-6 187ns ± 2% 180ns ± 1% -3.59% (p=0.000 n=30+30) GrowSlice/Struct/40-6 241ns ± 2% 238ns ± 2% -1.41% (p=0.000 n=30+30) Change-Id: I31e8388d73fd9356e2dcc091d8d92eef3e3ccdbc Reviewed-on: https://go-review.googlesource.com/102279 Run-TryBot: Ilya Tocar TryBot-Result: Gobot Gobot Reviewed-by: Josh Bleecher Snyder --- diff --git a/src/runtime/slice.go b/src/runtime/slice.go index 9f35a89400..0924d1d8e6 100644 --- a/src/runtime/slice.go +++ b/src/runtime/slice.go @@ -5,6 +5,7 @@ package runtime import ( + "runtime/internal/sys" "unsafe" ) @@ -128,19 +129,36 @@ func growslice(et *_type, old slice, cap int) slice { var overflow bool var lenmem, newlenmem, capmem uintptr const ptrSize = unsafe.Sizeof((*byte)(nil)) - switch et.size { - case 1: + // Specialize for common values of et.size. + // For 1 we don't need any division/multiplication. + // For ptrSize, compiler will optimize division/multiplication into a shift by a constant. + // For powers of 2, use a variable shift. + switch { + case et.size == 1: lenmem = uintptr(old.len) newlenmem = uintptr(cap) capmem = roundupsize(uintptr(newcap)) overflow = uintptr(newcap) > maxAlloc newcap = int(capmem) - case ptrSize: + case et.size == ptrSize: lenmem = uintptr(old.len) * ptrSize newlenmem = uintptr(cap) * ptrSize capmem = roundupsize(uintptr(newcap) * ptrSize) overflow = uintptr(newcap) > maxAlloc/ptrSize newcap = int(capmem / ptrSize) + case isPowerOfTwo(et.size): + var shift uintptr + if ptrSize == 8 { + // Mask shift for better code generation. + shift = uintptr(sys.Ctz64(uint64(et.size))) & 63 + } else { + shift = uintptr(sys.Ctz32(uint32(et.size))) & 31 + } + lenmem = uintptr(old.len) << shift + newlenmem = uintptr(cap) << shift + capmem = roundupsize(uintptr(newcap) << shift) + overflow = uintptr(newcap) > (maxAlloc >> shift) + newcap = int(capmem >> shift) default: lenmem = uintptr(old.len) * et.size newlenmem = uintptr(cap) * et.size @@ -189,6 +207,10 @@ func growslice(et *_type, old slice, cap int) slice { return slice{p, old.len, newcap} } +func isPowerOfTwo(x uintptr) bool { + return x&(x-1) == 0 +} + func slicecopy(to, fm slice, width uintptr) int { if fm.len == 0 || to.len == 0 { return 0 diff --git a/src/runtime/slice_test.go b/src/runtime/slice_test.go index ef1e812c0d..46db071ebe 100644 --- a/src/runtime/slice_test.go +++ b/src/runtime/slice_test.go @@ -31,6 +31,12 @@ func BenchmarkGrowSlice(b *testing.B) { _ = append([]byte(nil), x...) } }) + b.Run("Int16", func(b *testing.B) { + x := make([]int16, 9) + for i := 0; i < b.N; i++ { + _ = append([]int16(nil), x...) + } + }) b.Run("Int", func(b *testing.B) { x := make([]int, 9) for i := 0; i < b.N; i++ {