From: Josh Bleecher Snyder Date: Mon, 27 Jan 2020 19:28:05 +0000 (-0800) Subject: runtime: prevent allocation when converting small ints to interfaces X-Git-Tag: go1.15beta1~961 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=9828c43288a53d3df75b1f73edad0d037a91dff8;p=gostls13.git runtime: prevent allocation when converting small ints to interfaces Prior to this change, we avoid allocation when converting 0 to an interface. This change extends that optimization to larger value types whose values happens to be in the range 0 to 255. This is marginally more expensive in the case of a 0 value, in that the address is computed rather than fixed. name old time/op new time/op delta ConvT2ESmall-8 2.36ns ± 4% 2.65ns ± 4% +12.23% (p=0.000 n=87+91) ConvT2EUintptr-8 2.36ns ± 4% 2.84ns ± 6% +20.05% (p=0.000 n=96+99) ConvT2ELarge-8 23.8ns ± 2% 23.1ns ± 3% -2.94% (p=0.000 n=93+95) ConvT2ISmall-8 2.67ns ± 5% 2.74ns ±27% ~ (p=0.214 n=99+100) ConvT2IUintptr-8 2.65ns ± 5% 2.46ns ± 5% -7.19% (p=0.000 n=98+98) ConvT2ILarge-8 24.2ns ± 2% 23.5ns ± 4% -3.16% (p=0.000 n=91+97) ConvT2Ezero/zero/16-8 2.79ns ± 6% 2.99ns ± 4% +7.52% (p=0.000 n=94+88) ConvT2Ezero/zero/32-8 2.34ns ± 3% 2.65ns ± 3% +13.06% (p=0.000 n=92+98) ConvT2Ezero/zero/64-8 2.35ns ± 4% 2.65ns ± 6% +12.86% (p=0.000 n=99+94) ConvT2Ezero/zero/str-8 2.55ns ± 4% 2.54ns ± 4% ~ (p=0.063 n=97+99) ConvT2Ezero/zero/slice-8 2.82ns ± 4% 2.85ns ± 5% +1.00% (p=0.000 n=99+95) ConvT2Ezero/zero/big-8 94.3ns ± 5% 93.4ns ± 4% -0.94% (p=0.000 n=88+90) ConvT2Ezero/nonzero/str-8 29.6ns ± 3% 27.7ns ± 3% -6.69% (p=0.000 n=98+97) ConvT2Ezero/nonzero/slice-8 36.6ns ± 2% 37.1ns ± 2% +1.31% (p=0.000 n=94+90) ConvT2Ezero/nonzero/big-8 93.4ns ± 3% 92.7ns ± 3% -0.74% (p=0.000 n=88+84) ConvT2Ezero/smallint/16-8 13.3ns ± 4% 2.7ns ± 6% -79.82% (p=0.000 n=100+97) ConvT2Ezero/smallint/32-8 12.5ns ± 1% 2.9ns ± 5% -77.17% (p=0.000 n=85+96) ConvT2Ezero/smallint/64-8 14.7ns ± 3% 2.6ns ± 3% -82.05% (p=0.000 n=94+94) ConvT2Ezero/largeint/16-8 14.0ns ± 4% 13.2ns ± 7% -5.44% (p=0.000 n=95+99) ConvT2Ezero/largeint/32-8 12.8ns ± 4% 12.9ns ± 3% ~ (p=0.096 n=99+87) ConvT2Ezero/largeint/64-8 15.5ns ± 2% 15.0ns ± 2% -3.46% (p=0.000 n=95+96) An example of a program for which this makes a perceptible difference is running the compiler with the -S flag: name old time/op new time/op delta Template 349ms ± 2% 344ms ± 2% -1.48% (p=0.000 n=23+25) Unicode 138ms ± 4% 136ms ± 3% -1.67% (p=0.003 n=25+25) GoTypes 1.25s ± 2% 1.24s ± 2% -1.11% (p=0.001 n=24+25) Compiler 5.73s ± 2% 5.67s ± 2% -1.09% (p=0.002 n=25+24) SSA 20.2s ± 2% 19.9s ± 2% -1.45% (p=0.000 n=25+23) Flate 216ms ± 4% 210ms ± 2% -2.77% (p=0.000 n=25+24) GoParser 283ms ± 2% 278ms ± 3% -1.58% (p=0.000 n=23+23) Reflect 757ms ± 2% 745ms ± 2% -1.58% (p=0.000 n=25+25) Tar 303ms ± 4% 296ms ± 2% -2.20% (p=0.000 n=22+23) XML 415ms ± 2% 411ms ± 3% -0.94% (p=0.002 n=25+22) [Geo mean] 726ms 715ms -1.59% name old user-time/op new user-time/op delta Template 434ms ± 3% 427ms ± 2% -1.66% (p=0.000 n=23+24) Unicode 204ms ±12% 198ms ±12% -2.83% (p=0.032 n=25+25) GoTypes 1.59s ± 2% 1.56s ± 2% -1.64% (p=0.000 n=22+25) Compiler 7.50s ± 1% 7.40s ± 2% -1.32% (p=0.000 n=25+25) SSA 27.2s ± 2% 26.8s ± 2% -1.50% (p=0.000 n=24+23) Flate 266ms ± 6% 254ms ± 3% -4.38% (p=0.000 n=25+25) GoParser 357ms ± 2% 351ms ± 2% -1.90% (p=0.000 n=24+23) Reflect 966ms ± 2% 947ms ± 2% -1.94% (p=0.000 n=24+25) Tar 387ms ± 2% 380ms ± 3% -1.83% (p=0.000 n=22+24) XML 538ms ± 1% 532ms ± 1% -1.15% (p=0.000 n=24+20) [Geo mean] 942ms 923ms -2.02% name old alloc/op new alloc/op delta Template 54.1MB ± 0% 52.9MB ± 0% -2.26% (p=0.000 n=25+25) Unicode 33.5MB ± 0% 33.1MB ± 0% -1.03% (p=0.000 n=25+24) GoTypes 189MB ± 0% 185MB ± 0% -2.27% (p=0.000 n=25+25) Compiler 875MB ± 0% 858MB ± 0% -1.99% (p=0.000 n=23+25) SSA 3.19GB ± 0% 3.13GB ± 0% -1.95% (p=0.000 n=25+25) Flate 32.9MB ± 0% 32.2MB ± 0% -2.26% (p=0.000 n=25+25) GoParser 44.0MB ± 0% 42.9MB ± 0% -2.33% (p=0.000 n=25+25) Reflect 117MB ± 0% 114MB ± 0% -2.60% (p=0.000 n=25+25) Tar 48.6MB ± 0% 47.5MB ± 0% -2.18% (p=0.000 n=25+24) XML 65.7MB ± 0% 64.4MB ± 0% -1.96% (p=0.000 n=23+25) [Geo mean] 118MB 115MB -2.08% name old allocs/op new allocs/op delta Template 1.07M ± 0% 0.92M ± 0% -14.29% (p=0.000 n=25+25) Unicode 539k ± 0% 494k ± 0% -8.27% (p=0.000 n=25+25) GoTypes 3.97M ± 0% 3.43M ± 0% -13.71% (p=0.000 n=24+25) Compiler 17.6M ± 0% 15.4M ± 0% -12.69% (p=0.000 n=25+24) SSA 66.1M ± 0% 58.1M ± 0% -12.17% (p=0.000 n=25+25) Flate 629k ± 0% 536k ± 0% -14.73% (p=0.000 n=24+24) GoParser 929k ± 0% 799k ± 0% -13.96% (p=0.000 n=25+25) Reflect 2.49M ± 0% 2.11M ± 0% -15.28% (p=0.000 n=25+25) Tar 919k ± 0% 788k ± 0% -14.30% (p=0.000 n=25+25) XML 1.28M ± 0% 1.11M ± 0% -12.85% (p=0.000 n=24+25) [Geo mean] 2.32M 2.01M -13.24% There is a slight increase in binary size from this change: file before after Δ % addr2line 4307728 4307760 +32 +0.001% api 5972680 5972728 +48 +0.001% asm 5114200 5114232 +32 +0.001% buildid 2843720 2847848 +4128 +0.145% cgo 4823736 4827864 +4128 +0.086% compile 24912056 24912104 +48 +0.000% cover 5259800 5259832 +32 +0.001% dist 3665080 3665128 +48 +0.001% doc 4672712 4672744 +32 +0.001% fix 3376952 3376984 +32 +0.001% link 6618008 6622152 +4144 +0.063% nm 4253280 4257424 +4144 +0.097% objdump 4655376 4659504 +4128 +0.089% pack 2294280 2294328 +48 +0.002% pprof 14747476 14751620 +4144 +0.028% test2json 2819320 2823448 +4128 +0.146% trace 11665068 11669212 +4144 +0.036% vet 8342360 8342408 +48 +0.001% Change-Id: I38ef70244e23069bfd14334061d43ae22a294519 Reviewed-on: https://go-review.googlesource.com/c/go/+/216401 Run-TryBot: Josh Bleecher Snyder TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- diff --git a/src/runtime/iface.go b/src/runtime/iface.go index 05de282aa7..892e5a400f 100644 --- a/src/runtime/iface.go +++ b/src/runtime/iface.go @@ -331,8 +331,11 @@ func convT2E(t *_type, elem unsafe.Pointer) (e eface) { } func convT16(val uint16) (x unsafe.Pointer) { - if val == 0 { - x = unsafe.Pointer(&zeroVal[0]) + if val < uint16(len(staticuint64s)) { + x = unsafe.Pointer(&staticuint64s[val]) + if sys.BigEndian { + x = add(x, 6) + } } else { x = mallocgc(2, uint16Type, false) *(*uint16)(x) = val @@ -341,8 +344,11 @@ func convT16(val uint16) (x unsafe.Pointer) { } func convT32(val uint32) (x unsafe.Pointer) { - if val == 0 { - x = unsafe.Pointer(&zeroVal[0]) + if val < uint32(len(staticuint64s)) { + x = unsafe.Pointer(&staticuint64s[val]) + if sys.BigEndian { + x = add(x, 4) + } } else { x = mallocgc(4, uint32Type, false) *(*uint32)(x) = val @@ -351,8 +357,8 @@ func convT32(val uint32) (x unsafe.Pointer) { } func convT64(val uint64) (x unsafe.Pointer) { - if val == 0 { - x = unsafe.Pointer(&zeroVal[0]) + if val < uint64(len(staticuint64s)) { + x = unsafe.Pointer(&staticuint64s[val]) } else { x = mallocgc(8, uint64Type, false) *(*uint64)(x) = val @@ -556,3 +562,39 @@ var staticbytes = [...]byte{ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, } + +// staticuint64s is used to avoid allocating in convTx for small integer values. +var staticuint64s = [...]uint64{ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, +} diff --git a/src/runtime/iface_test.go b/src/runtime/iface_test.go index 6d8f8614d9..73beebffe2 100644 --- a/src/runtime/iface_test.go +++ b/src/runtime/iface_test.go @@ -311,17 +311,20 @@ var ( eight8 uint8 = 8 eight8I T8 = 8 - zero16 uint16 = 0 - zero16I T16 = 0 - one16 uint16 = 1 + zero16 uint16 = 0 + zero16I T16 = 0 + one16 uint16 = 1 + thousand16 uint16 = 1000 - zero32 uint32 = 0 - zero32I T32 = 0 - one32 uint32 = 1 + zero32 uint32 = 0 + zero32I T32 = 0 + one32 uint32 = 1 + thousand32 uint32 = 1000 - zero64 uint64 = 0 - zero64I T64 = 0 - one64 uint64 = 1 + zero64 uint64 = 0 + zero64I T64 = 0 + one64 uint64 = 1 + thousand64 uint64 = 1000 zerostr string = "" zerostrI Tstr = "" @@ -369,6 +372,23 @@ func BenchmarkConvT2Ezero(b *testing.B) { }) }) b.Run("nonzero", func(b *testing.B) { + b.Run("str", func(b *testing.B) { + for i := 0; i < b.N; i++ { + e = nzstr + } + }) + b.Run("slice", func(b *testing.B) { + for i := 0; i < b.N; i++ { + e = nzslice + } + }) + b.Run("big", func(b *testing.B) { + for i := 0; i < b.N; i++ { + e = nzbig + } + }) + }) + b.Run("smallint", func(b *testing.B) { b.Run("16", func(b *testing.B) { for i := 0; i < b.N; i++ { e = one16 @@ -384,19 +404,21 @@ func BenchmarkConvT2Ezero(b *testing.B) { e = one64 } }) - b.Run("str", func(b *testing.B) { + }) + b.Run("largeint", func(b *testing.B) { + b.Run("16", func(b *testing.B) { for i := 0; i < b.N; i++ { - e = nzstr + e = thousand16 } }) - b.Run("slice", func(b *testing.B) { + b.Run("32", func(b *testing.B) { for i := 0; i < b.N; i++ { - e = nzslice + e = thousand32 } }) - b.Run("big", func(b *testing.B) { + b.Run("64", func(b *testing.B) { for i := 0; i < b.N; i++ { - e = nzbig + e = thousand64 } }) })