From 3d4f41aea8f8b4d5d3548274f9d70620818a379f Mon Sep 17 00:00:00 2001 From: Jakub Ciolek Date: Mon, 21 Aug 2023 09:36:29 +0200 Subject: [PATCH] cmd/compile: use shorter MOVs for clears of 9, 10, 11 and 12 bytes MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Instead of using two MOVQ, use a smaller MOV(B|W|L) for the remainder. compilecmp: vendor/golang.org/x/net/dns/dnsmessage vendor/golang.org/x/net/dns/dnsmessage.(*SRVResource).GoString 1263 -> 1231 (-2.53%) vendor/golang.org/x/net/dns/dnsmessage.(*OpCode).GoString 285 -> 279 (-2.11%) vendor/golang.org/x/net/dns/dnsmessage.(*MXResource).GoString 603 -> 581 (-3.65%) vendor/golang.org/x/net/dns/dnsmessage.(*Option).GoString 485 -> 478 (-1.44%) os/signal os/signal.cancel.func1 378 -> 346 (-8.47%) fmt fmt.newPrinter 145 -> 138 (-4.83%) fmt.(*pp).catchPanic 1369 -> 1357 (-0.88%) fmt.(*fmt).clearflags 17 -> 13 (-23.53%) fmt.(*fmt).init 85 -> 81 (-4.71%) vendor/golang.org/x/crypto/chacha20poly1305 vendor/golang.org/x/crypto/chacha20poly1305.(*xchacha20poly1305).Seal 527 -> 526 (-0.19%) cmd/vendor/golang.org/x/sys/unix cmd/vendor/golang.org/x/sys/unix.(*TIPCSocketAddr).tipcAddr 88 -> 86 (-2.27%) cmd/vendor/golang.org/x/sys/unix.(*TIPCServiceRange).tipcAddr 95 -> 93 (-2.11%) cmd/vendor/golang.org/x/sys/unix.(*TIPCServiceName).tipcAddr 95 -> 93 (-2.11%) fmt [cmd/compile] fmt.(*fmt).clearflags 17 -> 13 (-23.53%) fmt.newPrinter 304 -> 298 (-1.97%) fmt.(*pp).catchPanic 1369 -> 1357 (-0.88%) fmt.Fprint 536 -> 535 (-0.19%) fmt.Sprintf 5009 -> 4945 (-1.28%) fmt.(*fmt).init 85 -> 81 (-4.71%) net/http net/http.http2FrameHeader.Header 89 -> 88 (-1.12%) net/http.http2HeadersFrame.Header 95 -> 94 (-1.05%) net/http.http2RSTStreamFrame.Header 120 -> 119 (-0.83%) net/http.http2WindowUpdateFrame.Header 120 -> 119 (-0.83%) net/http.http2Frame.Header 181 -> 179 (-1.10%) net/http.http2PingFrame.Header 95 -> 94 (-1.05%) net/http.(*http2FrameHeader).Header 119 -> 113 (-5.04%) net/http.http2GoAwayFrame.Header 95 -> 94 (-1.05%) net/http.http2MetaHeadersFrame.Header 133 -> 132 (-0.75%) cmd/vendor/golang.org/x/term cmd/vendor/golang.org/x/term.(*Terminal).clearLineToRight 338 -> 337 (-0.30%) cmd/link/internal/ld cmd/link/internal/ld.appendString 424 -> 421 (-0.71%) cmd/compile/internal/reflectdata [cmd/compile] cmd/compile/internal/reflectdata.dnameData 1574 -> 1561 (-0.83%) file before after Δ % vendor/golang.org/x/net/dns/dnsmessage.s 74536 74469 -67 -0.090% os/signal.s 7192 7160 -32 -0.445% fmt.s 70428 70401 -27 -0.038% vendor/golang.org/x/crypto/chacha20poly1305.s 7103 7102 -1 -0.014% cmd/vendor/golang.org/x/sys/unix.s 118033 118027 -6 -0.005% fmt [cmd/compile].s 83679 83588 -91 -0.109% net/http.s 536737 536722 -15 -0.003% cmd/vendor/golang.org/x/term.s 23505 23504 -1 -0.004% cmd/link/internal/ld.s 571046 571043 -3 -0.001% cmd/compile/internal/reflectdata [cmd/compile].s 72645 72632 -13 -0.018% cmd/compile/internal/ssa.s 3193233 3193841 +608 +0.019% cmd/compile/internal/ssa [cmd/compile].s 3362126 3362734 +608 +0.018% total 30810158 30811118 +960 +0.003% Change-Id: Iade49c590027c0a09a6e546a3b94d2dccd5b7116 Reviewed-on: https://go-review.googlesource.com/c/go/+/521455 Reviewed-by: Keith Randall TryBot-Result: Gopher Robot Reviewed-by: Keith Randall Auto-Submit: Matthew Dempsky Run-TryBot: Jakub Ciolek Reviewed-by: Matthew Dempsky --- src/cmd/compile/internal/ssa/_gen/AMD64.rules | 18 +++- src/cmd/compile/internal/ssa/rewriteAMD64.go | 84 ++++++++++++++++++- 2 files changed, 99 insertions(+), 3 deletions(-) diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index c4f74bb0d9..9ffb196880 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -380,7 +380,23 @@ (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)))) -(Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE => +(Zero [9] destptr mem) && config.useSSE => + (MOVBstoreconst [makeValAndOff(0,8)] destptr + (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) + +(Zero [10] destptr mem) && config.useSSE => + (MOVWstoreconst [makeValAndOff(0,8)] destptr + (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) + +(Zero [11] destptr mem) && config.useSSE => + (MOVLstoreconst [makeValAndOff(0,7)] destptr + (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) + +(Zero [12] destptr mem) && config.useSSE => + (MOVLstoreconst [makeValAndOff(0,8)] destptr + (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) + +(Zero [s] destptr mem) && s > 12 && s < 16 && config.useSSE => (MOVQstoreconst [makeValAndOff(0,int32(s-8))] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 979d9be3a7..d3e009d644 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -29944,14 +29944,94 @@ func rewriteValueAMD64_OpZero(v *Value) bool { v.AddArg2(destptr, v0) return true } + // match: (Zero [9] destptr mem) + // cond: config.useSSE + // result: (MOVBstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + if auxIntToInt64(v.AuxInt) != 9 { + break + } + destptr := v_0 + mem := v_1 + if !(config.useSSE) { + break + } + v.reset(OpAMD64MOVBstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 8)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v0.AddArg2(destptr, mem) + v.AddArg2(destptr, v0) + return true + } + // match: (Zero [10] destptr mem) + // cond: config.useSSE + // result: (MOVWstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + if auxIntToInt64(v.AuxInt) != 10 { + break + } + destptr := v_0 + mem := v_1 + if !(config.useSSE) { + break + } + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 8)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v0.AddArg2(destptr, mem) + v.AddArg2(destptr, v0) + return true + } + // match: (Zero [11] destptr mem) + // cond: config.useSSE + // result: (MOVLstoreconst [makeValAndOff(0,7)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + if auxIntToInt64(v.AuxInt) != 11 { + break + } + destptr := v_0 + mem := v_1 + if !(config.useSSE) { + break + } + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 7)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v0.AddArg2(destptr, mem) + v.AddArg2(destptr, v0) + return true + } + // match: (Zero [12] destptr mem) + // cond: config.useSSE + // result: (MOVLstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + if auxIntToInt64(v.AuxInt) != 12 { + break + } + destptr := v_0 + mem := v_1 + if !(config.useSSE) { + break + } + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 8)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v0.AddArg2(destptr, mem) + v.AddArg2(destptr, v0) + return true + } // match: (Zero [s] destptr mem) - // cond: s > 8 && s < 16 && config.useSSE + // cond: s > 12 && s < 16 && config.useSSE // result: (MOVQstoreconst [makeValAndOff(0,int32(s-8))] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) for { s := auxIntToInt64(v.AuxInt) destptr := v_0 mem := v_1 - if !(s > 8 && s < 16 && config.useSSE) { + if !(s > 12 && s < 16 && config.useSSE) { break } v.reset(OpAMD64MOVQstoreconst) -- 2.48.1