]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: use shorter MOVs for clears of 9, 10, 11 and 12 bytes
authorJakub Ciolek <jakub@ciolek.dev>
Mon, 21 Aug 2023 07:36:29 +0000 (09:36 +0200)
committerGopher Robot <gobot@golang.org>
Tue, 22 Aug 2023 20:48:57 +0000 (20:48 +0000)
Instead of using two MOVQ, use a smaller MOV(B|W|L) for the remainder.

compilecmp:

vendor/golang.org/x/net/dns/dnsmessage
vendor/golang.org/x/net/dns/dnsmessage.(*SRVResource).GoString 1263 -> 1231  (-2.53%)
vendor/golang.org/x/net/dns/dnsmessage.(*OpCode).GoString 285 -> 279  (-2.11%)
vendor/golang.org/x/net/dns/dnsmessage.(*MXResource).GoString 603 -> 581  (-3.65%)
vendor/golang.org/x/net/dns/dnsmessage.(*Option).GoString 485 -> 478  (-1.44%)

os/signal
os/signal.cancel.func1 378 -> 346  (-8.47%)

fmt
fmt.newPrinter 145 -> 138  (-4.83%)
fmt.(*pp).catchPanic 1369 -> 1357  (-0.88%)
fmt.(*fmt).clearflags 17 -> 13  (-23.53%)
fmt.(*fmt).init 85 -> 81  (-4.71%)

vendor/golang.org/x/crypto/chacha20poly1305
vendor/golang.org/x/crypto/chacha20poly1305.(*xchacha20poly1305).Seal 527 -> 526  (-0.19%)

cmd/vendor/golang.org/x/sys/unix
cmd/vendor/golang.org/x/sys/unix.(*TIPCSocketAddr).tipcAddr 88 -> 86  (-2.27%)
cmd/vendor/golang.org/x/sys/unix.(*TIPCServiceRange).tipcAddr 95 -> 93  (-2.11%)
cmd/vendor/golang.org/x/sys/unix.(*TIPCServiceName).tipcAddr 95 -> 93  (-2.11%)

fmt [cmd/compile]
fmt.(*fmt).clearflags 17 -> 13  (-23.53%)
fmt.newPrinter 304 -> 298  (-1.97%)
fmt.(*pp).catchPanic 1369 -> 1357  (-0.88%)
fmt.Fprint 536 -> 535  (-0.19%)
fmt.Sprintf 5009 -> 4945  (-1.28%)
fmt.(*fmt).init 85 -> 81  (-4.71%)

net/http
net/http.http2FrameHeader.Header 89 -> 88  (-1.12%)
net/http.http2HeadersFrame.Header 95 -> 94  (-1.05%)
net/http.http2RSTStreamFrame.Header 120 -> 119  (-0.83%)
net/http.http2WindowUpdateFrame.Header 120 -> 119  (-0.83%)
net/http.http2Frame.Header 181 -> 179  (-1.10%)
net/http.http2PingFrame.Header 95 -> 94  (-1.05%)
net/http.(*http2FrameHeader).Header 119 -> 113  (-5.04%)
net/http.http2GoAwayFrame.Header 95 -> 94  (-1.05%)
net/http.http2MetaHeadersFrame.Header 133 -> 132  (-0.75%)

cmd/vendor/golang.org/x/term
cmd/vendor/golang.org/x/term.(*Terminal).clearLineToRight 338 -> 337  (-0.30%)

cmd/link/internal/ld
cmd/link/internal/ld.appendString 424 -> 421  (-0.71%)

cmd/compile/internal/reflectdata [cmd/compile]
cmd/compile/internal/reflectdata.dnameData 1574 -> 1561  (-0.83%)

file                                             before   after    Δ       %
vendor/golang.org/x/net/dns/dnsmessage.s         74536    74469    -67     -0.090%
os/signal.s                                      7192     7160     -32     -0.445%
fmt.s                                            70428    70401    -27     -0.038%
vendor/golang.org/x/crypto/chacha20poly1305.s    7103     7102     -1      -0.014%
cmd/vendor/golang.org/x/sys/unix.s               118033   118027   -6      -0.005%
fmt [cmd/compile].s                              83679    83588    -91     -0.109%
net/http.s                                       536737   536722   -15     -0.003%
cmd/vendor/golang.org/x/term.s                   23505    23504    -1      -0.004%
cmd/link/internal/ld.s                           571046   571043   -3      -0.001%
cmd/compile/internal/reflectdata [cmd/compile].s 72645    72632    -13     -0.018%
cmd/compile/internal/ssa.s                       3193233  3193841  +608    +0.019%
cmd/compile/internal/ssa [cmd/compile].s         3362126  3362734  +608    +0.018%
total                                            30810158 30811118 +960    +0.003%

Change-Id: Iade49c590027c0a09a6e546a3b94d2dccd5b7116
Reviewed-on: https://go-review.googlesource.com/c/go/+/521455
Reviewed-by: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Jakub Ciolek <jakub@ciolek.dev>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
src/cmd/compile/internal/ssa/_gen/AMD64.rules
src/cmd/compile/internal/ssa/rewriteAMD64.go

index c4f74bb0d90ed324a33edf45aa6653439e0fac3b..9ffb196880a1a361a721a4a4127bef8c4e49ed45 100644 (file)
                        (MOVQstoreconst [makeValAndOff(0,8)] destptr
                                (MOVQstoreconst [makeValAndOff(0,0)] destptr mem))))
 
-(Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE =>
+(Zero [9] destptr mem) && config.useSSE =>
+       (MOVBstoreconst [makeValAndOff(0,8)] destptr
+               (MOVQstoreconst [makeValAndOff(0,0)] destptr mem))
+
+(Zero [10] destptr mem) && config.useSSE =>
+       (MOVWstoreconst [makeValAndOff(0,8)] destptr
+               (MOVQstoreconst [makeValAndOff(0,0)] destptr mem))
+
+(Zero [11] destptr mem) && config.useSSE =>
+       (MOVLstoreconst [makeValAndOff(0,7)] destptr
+               (MOVQstoreconst [makeValAndOff(0,0)] destptr mem))
+
+(Zero [12] destptr mem) && config.useSSE =>
+       (MOVLstoreconst [makeValAndOff(0,8)] destptr
+               (MOVQstoreconst [makeValAndOff(0,0)] destptr mem))
+
+(Zero [s] destptr mem) && s > 12 && s < 16 && config.useSSE =>
        (MOVQstoreconst [makeValAndOff(0,int32(s-8))] destptr
                (MOVQstoreconst [makeValAndOff(0,0)] destptr mem))
 
index 979d9be3a726435c601d277f454ac62b4567d164..d3e009d644043b08019a2d3784c8defd1cb8af97 100644 (file)
@@ -29944,14 +29944,94 @@ func rewriteValueAMD64_OpZero(v *Value) bool {
                v.AddArg2(destptr, v0)
                return true
        }
+       // match: (Zero [9] destptr mem)
+       // cond: config.useSSE
+       // result: (MOVBstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 9 {
+                       break
+               }
+               destptr := v_0
+               mem := v_1
+               if !(config.useSSE) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstoreconst)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 8))
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
+               v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
+               v0.AddArg2(destptr, mem)
+               v.AddArg2(destptr, v0)
+               return true
+       }
+       // match: (Zero [10] destptr mem)
+       // cond: config.useSSE
+       // result: (MOVWstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 10 {
+                       break
+               }
+               destptr := v_0
+               mem := v_1
+               if !(config.useSSE) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 8))
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
+               v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
+               v0.AddArg2(destptr, mem)
+               v.AddArg2(destptr, v0)
+               return true
+       }
+       // match: (Zero [11] destptr mem)
+       // cond: config.useSSE
+       // result: (MOVLstoreconst [makeValAndOff(0,7)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 11 {
+                       break
+               }
+               destptr := v_0
+               mem := v_1
+               if !(config.useSSE) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 7))
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
+               v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
+               v0.AddArg2(destptr, mem)
+               v.AddArg2(destptr, v0)
+               return true
+       }
+       // match: (Zero [12] destptr mem)
+       // cond: config.useSSE
+       // result: (MOVLstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 12 {
+                       break
+               }
+               destptr := v_0
+               mem := v_1
+               if !(config.useSSE) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 8))
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem)
+               v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
+               v0.AddArg2(destptr, mem)
+               v.AddArg2(destptr, v0)
+               return true
+       }
        // match: (Zero [s] destptr mem)
-       // cond: s > 8 && s < 16 && config.useSSE
+       // cond: s > 12 && s < 16 && config.useSSE
        // result: (MOVQstoreconst [makeValAndOff(0,int32(s-8))] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem))
        for {
                s := auxIntToInt64(v.AuxInt)
                destptr := v_0
                mem := v_1
-               if !(s > 8 && s < 16 && config.useSSE) {
+               if !(s > 12 && s < 16 && config.useSSE) {
                        break
                }
                v.reset(OpAMD64MOVQstoreconst)