]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: prevent allocation when converting small ints to interfaces
authorJosh Bleecher Snyder <josharian@gmail.com>
Mon, 27 Jan 2020 19:28:05 +0000 (11:28 -0800)
committerJosh Bleecher Snyder <josharian@gmail.com>
Mon, 2 Mar 2020 23:14:55 +0000 (23:14 +0000)
Prior to this change, we avoid allocation when
converting 0 to an interface.

This change extends that optimization to larger value types
whose values happens to be in the range 0 to 255.
This is marginally more expensive in the case of a 0 value,
in that the address is computed rather than fixed.

name                         old time/op  new time/op  delta
ConvT2ESmall-8               2.36ns ± 4%  2.65ns ± 4%  +12.23%  (p=0.000 n=87+91)
ConvT2EUintptr-8             2.36ns ± 4%  2.84ns ± 6%  +20.05%  (p=0.000 n=96+99)
ConvT2ELarge-8               23.8ns ± 2%  23.1ns ± 3%   -2.94%  (p=0.000 n=93+95)
ConvT2ISmall-8               2.67ns ± 5%  2.74ns ±27%     ~     (p=0.214 n=99+100)
ConvT2IUintptr-8             2.65ns ± 5%  2.46ns ± 5%   -7.19%  (p=0.000 n=98+98)
ConvT2ILarge-8               24.2ns ± 2%  23.5ns ± 4%   -3.16%  (p=0.000 n=91+97)
ConvT2Ezero/zero/16-8        2.79ns ± 6%  2.99ns ± 4%   +7.52%  (p=0.000 n=94+88)
ConvT2Ezero/zero/32-8        2.34ns ± 3%  2.65ns ± 3%  +13.06%  (p=0.000 n=92+98)
ConvT2Ezero/zero/64-8        2.35ns ± 4%  2.65ns ± 6%  +12.86%  (p=0.000 n=99+94)
ConvT2Ezero/zero/str-8       2.55ns ± 4%  2.54ns ± 4%     ~     (p=0.063 n=97+99)
ConvT2Ezero/zero/slice-8     2.82ns ± 4%  2.85ns ± 5%   +1.00%  (p=0.000 n=99+95)
ConvT2Ezero/zero/big-8       94.3ns ± 5%  93.4ns ± 4%   -0.94%  (p=0.000 n=88+90)
ConvT2Ezero/nonzero/str-8    29.6ns ± 3%  27.7ns ± 3%   -6.69%  (p=0.000 n=98+97)
ConvT2Ezero/nonzero/slice-8  36.6ns ± 2%  37.1ns ± 2%   +1.31%  (p=0.000 n=94+90)
ConvT2Ezero/nonzero/big-8    93.4ns ± 3%  92.7ns ± 3%   -0.74%  (p=0.000 n=88+84)
ConvT2Ezero/smallint/16-8    13.3ns ± 4%   2.7ns ± 6%  -79.82%  (p=0.000 n=100+97)
ConvT2Ezero/smallint/32-8    12.5ns ± 1%   2.9ns ± 5%  -77.17%  (p=0.000 n=85+96)
ConvT2Ezero/smallint/64-8    14.7ns ± 3%   2.6ns ± 3%  -82.05%  (p=0.000 n=94+94)
ConvT2Ezero/largeint/16-8    14.0ns ± 4%  13.2ns ± 7%   -5.44%  (p=0.000 n=95+99)
ConvT2Ezero/largeint/32-8    12.8ns ± 4%  12.9ns ± 3%     ~     (p=0.096 n=99+87)
ConvT2Ezero/largeint/64-8    15.5ns ± 2%  15.0ns ± 2%   -3.46%  (p=0.000 n=95+96)

An example of a program for which this makes a perceptible difference
is running the compiler with the -S flag:

name        old time/op       new time/op       delta
Template          349ms ± 2%        344ms ± 2%   -1.48%  (p=0.000 n=23+25)
Unicode           138ms ± 4%        136ms ± 3%   -1.67%  (p=0.003 n=25+25)
GoTypes           1.25s ± 2%        1.24s ± 2%   -1.11%  (p=0.001 n=24+25)
Compiler          5.73s ± 2%        5.67s ± 2%   -1.09%  (p=0.002 n=25+24)
SSA               20.2s ± 2%        19.9s ± 2%   -1.45%  (p=0.000 n=25+23)
Flate             216ms ± 4%        210ms ± 2%   -2.77%  (p=0.000 n=25+24)
GoParser          283ms ± 2%        278ms ± 3%   -1.58%  (p=0.000 n=23+23)
Reflect           757ms ± 2%        745ms ± 2%   -1.58%  (p=0.000 n=25+25)
Tar               303ms ± 4%        296ms ± 2%   -2.20%  (p=0.000 n=22+23)
XML               415ms ± 2%        411ms ± 3%   -0.94%  (p=0.002 n=25+22)
[Geo mean]        726ms             715ms        -1.59%

name        old user-time/op  new user-time/op  delta
Template          434ms ± 3%        427ms ± 2%   -1.66%  (p=0.000 n=23+24)
Unicode           204ms ±12%        198ms ±12%   -2.83%  (p=0.032 n=25+25)
GoTypes           1.59s ± 2%        1.56s ± 2%   -1.64%  (p=0.000 n=22+25)
Compiler          7.50s ± 1%        7.40s ± 2%   -1.32%  (p=0.000 n=25+25)
SSA               27.2s ± 2%        26.8s ± 2%   -1.50%  (p=0.000 n=24+23)
Flate             266ms ± 6%        254ms ± 3%   -4.38%  (p=0.000 n=25+25)
GoParser          357ms ± 2%        351ms ± 2%   -1.90%  (p=0.000 n=24+23)
Reflect           966ms ± 2%        947ms ± 2%   -1.94%  (p=0.000 n=24+25)
Tar               387ms ± 2%        380ms ± 3%   -1.83%  (p=0.000 n=22+24)
XML               538ms ± 1%        532ms ± 1%   -1.15%  (p=0.000 n=24+20)
[Geo mean]        942ms             923ms        -2.02%

name        old alloc/op      new alloc/op      delta
Template         54.1MB ± 0%       52.9MB ± 0%   -2.26%  (p=0.000 n=25+25)
Unicode          33.5MB ± 0%       33.1MB ± 0%   -1.03%  (p=0.000 n=25+24)
GoTypes           189MB ± 0%        185MB ± 0%   -2.27%  (p=0.000 n=25+25)
Compiler          875MB ± 0%        858MB ± 0%   -1.99%  (p=0.000 n=23+25)
SSA              3.19GB ± 0%       3.13GB ± 0%   -1.95%  (p=0.000 n=25+25)
Flate            32.9MB ± 0%       32.2MB ± 0%   -2.26%  (p=0.000 n=25+25)
GoParser         44.0MB ± 0%       42.9MB ± 0%   -2.33%  (p=0.000 n=25+25)
Reflect           117MB ± 0%        114MB ± 0%   -2.60%  (p=0.000 n=25+25)
Tar              48.6MB ± 0%       47.5MB ± 0%   -2.18%  (p=0.000 n=25+24)
XML              65.7MB ± 0%       64.4MB ± 0%   -1.96%  (p=0.000 n=23+25)
[Geo mean]        118MB             115MB        -2.08%

name        old allocs/op     new allocs/op     delta
Template          1.07M ± 0%        0.92M ± 0%  -14.29%  (p=0.000 n=25+25)
Unicode            539k ± 0%         494k ± 0%   -8.27%  (p=0.000 n=25+25)
GoTypes           3.97M ± 0%        3.43M ± 0%  -13.71%  (p=0.000 n=24+25)
Compiler          17.6M ± 0%        15.4M ± 0%  -12.69%  (p=0.000 n=25+24)
SSA               66.1M ± 0%        58.1M ± 0%  -12.17%  (p=0.000 n=25+25)
Flate              629k ± 0%         536k ± 0%  -14.73%  (p=0.000 n=24+24)
GoParser           929k ± 0%         799k ± 0%  -13.96%  (p=0.000 n=25+25)
Reflect           2.49M ± 0%        2.11M ± 0%  -15.28%  (p=0.000 n=25+25)
Tar                919k ± 0%         788k ± 0%  -14.30%  (p=0.000 n=25+25)
XML               1.28M ± 0%        1.11M ± 0%  -12.85%  (p=0.000 n=24+25)
[Geo mean]        2.32M             2.01M       -13.24%

There is a slight increase in binary size from this change:

file      before    after     Δ       %
addr2line 4307728   4307760   +32     +0.001%
api       5972680   5972728   +48     +0.001%
asm       5114200   5114232   +32     +0.001%
buildid   2843720   2847848   +4128   +0.145%
cgo       4823736   4827864   +4128   +0.086%
compile   24912056  24912104  +48     +0.000%
cover     5259800   5259832   +32     +0.001%
dist      3665080   3665128   +48     +0.001%
doc       4672712   4672744   +32     +0.001%
fix       3376952   3376984   +32     +0.001%
link      6618008   6622152   +4144   +0.063%
nm        4253280   4257424   +4144   +0.097%
objdump   4655376   4659504   +4128   +0.089%
pack      2294280   2294328   +48     +0.002%
pprof     14747476  14751620  +4144   +0.028%
test2json 2819320   2823448   +4128   +0.146%
trace     11665068  11669212  +4144   +0.036%
vet       8342360   8342408   +48     +0.001%

Change-Id: I38ef70244e23069bfd14334061d43ae22a294519
Reviewed-on: https://go-review.googlesource.com/c/go/+/216401
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/runtime/iface.go
src/runtime/iface_test.go

index 05de282aa758eab136e5fd7c88caf15ee675a19b..892e5a400f9cb999f4837c300167937a5e0b0797 100644 (file)
@@ -331,8 +331,11 @@ func convT2E(t *_type, elem unsafe.Pointer) (e eface) {
 }
 
 func convT16(val uint16) (x unsafe.Pointer) {
-       if val == 0 {
-               x = unsafe.Pointer(&zeroVal[0])
+       if val < uint16(len(staticuint64s)) {
+               x = unsafe.Pointer(&staticuint64s[val])
+               if sys.BigEndian {
+                       x = add(x, 6)
+               }
        } else {
                x = mallocgc(2, uint16Type, false)
                *(*uint16)(x) = val
@@ -341,8 +344,11 @@ func convT16(val uint16) (x unsafe.Pointer) {
 }
 
 func convT32(val uint32) (x unsafe.Pointer) {
-       if val == 0 {
-               x = unsafe.Pointer(&zeroVal[0])
+       if val < uint32(len(staticuint64s)) {
+               x = unsafe.Pointer(&staticuint64s[val])
+               if sys.BigEndian {
+                       x = add(x, 4)
+               }
        } else {
                x = mallocgc(4, uint32Type, false)
                *(*uint32)(x) = val
@@ -351,8 +357,8 @@ func convT32(val uint32) (x unsafe.Pointer) {
 }
 
 func convT64(val uint64) (x unsafe.Pointer) {
-       if val == 0 {
-               x = unsafe.Pointer(&zeroVal[0])
+       if val < uint64(len(staticuint64s)) {
+               x = unsafe.Pointer(&staticuint64s[val])
        } else {
                x = mallocgc(8, uint64Type, false)
                *(*uint64)(x) = val
@@ -556,3 +562,39 @@ var staticbytes = [...]byte{
        0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
        0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
 }
+
+// staticuint64s is used to avoid allocating in convTx for small integer values.
+var staticuint64s = [...]uint64{
+       0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+       0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+       0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+       0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+       0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+       0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+       0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+       0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+       0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+       0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+       0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+       0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+       0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+       0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+       0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+       0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+       0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+       0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+       0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+       0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+       0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+       0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+       0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+       0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+       0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+       0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+       0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+       0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+       0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+       0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+       0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+       0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+}
index 6d8f8614d985070fcb9cd3774b390498c5475472..73beebffe283f5b64b16d438c8c6746f7c739e63 100644 (file)
@@ -311,17 +311,20 @@ var (
        eight8  uint8 = 8
        eight8I T8    = 8
 
-       zero16  uint16 = 0
-       zero16I T16    = 0
-       one16   uint16 = 1
+       zero16     uint16 = 0
+       zero16I    T16    = 0
+       one16      uint16 = 1
+       thousand16 uint16 = 1000
 
-       zero32  uint32 = 0
-       zero32I T32    = 0
-       one32   uint32 = 1
+       zero32     uint32 = 0
+       zero32I    T32    = 0
+       one32      uint32 = 1
+       thousand32 uint32 = 1000
 
-       zero64  uint64 = 0
-       zero64I T64    = 0
-       one64   uint64 = 1
+       zero64     uint64 = 0
+       zero64I    T64    = 0
+       one64      uint64 = 1
+       thousand64 uint64 = 1000
 
        zerostr  string = ""
        zerostrI Tstr   = ""
@@ -369,6 +372,23 @@ func BenchmarkConvT2Ezero(b *testing.B) {
                })
        })
        b.Run("nonzero", func(b *testing.B) {
+               b.Run("str", func(b *testing.B) {
+                       for i := 0; i < b.N; i++ {
+                               e = nzstr
+                       }
+               })
+               b.Run("slice", func(b *testing.B) {
+                       for i := 0; i < b.N; i++ {
+                               e = nzslice
+                       }
+               })
+               b.Run("big", func(b *testing.B) {
+                       for i := 0; i < b.N; i++ {
+                               e = nzbig
+                       }
+               })
+       })
+       b.Run("smallint", func(b *testing.B) {
                b.Run("16", func(b *testing.B) {
                        for i := 0; i < b.N; i++ {
                                e = one16
@@ -384,19 +404,21 @@ func BenchmarkConvT2Ezero(b *testing.B) {
                                e = one64
                        }
                })
-               b.Run("str", func(b *testing.B) {
+       })
+       b.Run("largeint", func(b *testing.B) {
+               b.Run("16", func(b *testing.B) {
                        for i := 0; i < b.N; i++ {
-                               e = nzstr
+                               e = thousand16
                        }
                })
-               b.Run("slice", func(b *testing.B) {
+               b.Run("32", func(b *testing.B) {
                        for i := 0; i < b.N; i++ {
-                               e = nzslice
+                               e = thousand32
                        }
                })
-               b.Run("big", func(b *testing.B) {
+               b.Run("64", func(b *testing.B) {
                        for i := 0; i < b.N; i++ {
-                               e = nzbig
+                               e = thousand64
                        }
                })
        })