From: Ben Shi Date: Mon, 15 Oct 2018 01:19:48 +0000 (+0000) Subject: cmd/compile: optimize 386's load/store combination X-Git-Tag: go1.12beta1~760 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=4b78fe57a81fe4fd9a1c370c65e3a694066ac1ab;p=gostls13.git cmd/compile: optimize 386's load/store combination This CL adds more combinations of two consequtive MOVBload/MOVBstore to a unique MOVWload/MOVWstore. 1. The size of the go executable decreases about 4KB, and the total size of pkg/linux_386 (excluding cmd/compile) decreases about 1.5KB. 2. There is no regression in the go1 benchmark result, excluding noise. name old time/op new time/op delta BinaryTree17-4 3.28s ± 2% 3.29s ± 2% ~ (p=0.151 n=40+40) Fannkuch11-4 3.52s ± 1% 3.51s ± 1% -0.28% (p=0.002 n=40+40) FmtFprintfEmpty-4 45.4ns ± 4% 45.0ns ± 4% -0.89% (p=0.019 n=40+40) FmtFprintfString-4 81.9ns ± 7% 81.3ns ± 1% ~ (p=0.660 n=40+25) FmtFprintfInt-4 91.9ns ± 9% 91.4ns ± 9% ~ (p=0.249 n=40+40) FmtFprintfIntInt-4 143ns ± 4% 143ns ± 4% ~ (p=0.760 n=40+40) FmtFprintfPrefixedInt-4 184ns ± 3% 183ns ± 4% ~ (p=0.485 n=40+40) FmtFprintfFloat-4 408ns ± 3% 409ns ± 3% ~ (p=0.961 n=40+40) FmtManyArgs-4 597ns ± 4% 602ns ± 3% ~ (p=0.413 n=40+40) GobDecode-4 7.13ms ± 6% 7.14ms ± 6% ~ (p=0.859 n=40+40) GobEncode-4 6.86ms ± 9% 6.94ms ± 7% ~ (p=0.162 n=40+40) Gzip-4 395ms ± 4% 396ms ± 3% ~ (p=0.099 n=40+40) Gunzip-4 40.9ms ± 4% 41.1ms ± 3% ~ (p=0.064 n=40+40) HTTPClientServer-4 63.6µs ± 2% 63.6µs ± 3% ~ (p=0.832 n=36+39) JSONEncode-4 16.1ms ± 3% 15.8ms ± 3% -1.60% (p=0.001 n=40+40) JSONDecode-4 61.0ms ± 3% 61.5ms ± 4% ~ (p=0.065 n=40+40) Mandelbrot200-4 5.16ms ± 3% 5.18ms ± 3% ~ (p=0.056 n=40+40) GoParse-4 3.25ms ± 2% 3.23ms ± 3% ~ (p=0.727 n=40+40) RegexpMatchEasy0_32-4 90.2ns ± 3% 89.3ns ± 6% -0.98% (p=0.002 n=40+40) RegexpMatchEasy0_1K-4 812ns ± 3% 815ns ± 3% ~ (p=0.309 n=40+40) RegexpMatchEasy1_32-4 103ns ± 6% 103ns ± 5% ~ (p=0.680 n=40+40) RegexpMatchEasy1_1K-4 1.01µs ± 4% 1.02µs ± 3% ~ (p=0.326 n=40+33) RegexpMatchMedium_32-4 120ns ± 4% 120ns ± 5% ~ (p=0.834 n=40+40) RegexpMatchMedium_1K-4 40.1µs ± 3% 39.5µs ± 4% -1.35% (p=0.000 n=40+40) RegexpMatchHard_32-4 2.27µs ± 6% 2.23µs ± 4% -1.67% (p=0.011 n=40+40) RegexpMatchHard_1K-4 67.2µs ± 3% 67.2µs ± 3% ~ (p=0.149 n=40+40) Revcomp-4 1.84s ± 2% 1.86s ± 3% +0.70% (p=0.020 n=40+40) Template-4 69.0ms ± 4% 69.8ms ± 3% +1.20% (p=0.003 n=40+40) TimeParse-4 438ns ± 3% 439ns ± 4% ~ (p=0.650 n=40+40) TimeFormat-4 412ns ± 3% 412ns ± 3% ~ (p=0.888 n=40+40) [Geo mean] 65.2µs 65.2µs -0.04% name old speed new speed delta GobDecode-4 108MB/s ± 6% 108MB/s ± 6% ~ (p=0.855 n=40+40) GobEncode-4 112MB/s ± 9% 111MB/s ± 8% ~ (p=0.159 n=40+40) Gzip-4 49.2MB/s ± 4% 49.1MB/s ± 3% ~ (p=0.102 n=40+40) Gunzip-4 474MB/s ± 3% 472MB/s ± 3% ~ (p=0.063 n=40+40) JSONEncode-4 121MB/s ± 3% 123MB/s ± 3% +1.62% (p=0.001 n=40+40) JSONDecode-4 31.9MB/s ± 3% 31.6MB/s ± 4% ~ (p=0.070 n=40+40) GoParse-4 17.9MB/s ± 2% 17.9MB/s ± 3% ~ (p=0.696 n=40+40) RegexpMatchEasy0_32-4 355MB/s ± 3% 358MB/s ± 5% +0.99% (p=0.002 n=40+40) RegexpMatchEasy0_1K-4 1.26GB/s ± 3% 1.26GB/s ± 3% ~ (p=0.381 n=40+40) RegexpMatchEasy1_32-4 310MB/s ± 5% 310MB/s ± 4% ~ (p=0.655 n=40+40) RegexpMatchEasy1_1K-4 1.01GB/s ± 4% 1.01GB/s ± 3% ~ (p=0.351 n=40+33) RegexpMatchMedium_32-4 8.32MB/s ± 4% 8.34MB/s ± 5% ~ (p=0.696 n=40+40) RegexpMatchMedium_1K-4 25.6MB/s ± 3% 25.9MB/s ± 4% +1.36% (p=0.000 n=40+40) RegexpMatchHard_32-4 14.1MB/s ± 6% 14.3MB/s ± 4% +1.64% (p=0.011 n=40+40) RegexpMatchHard_1K-4 15.2MB/s ± 3% 15.2MB/s ± 3% ~ (p=0.147 n=40+40) Revcomp-4 138MB/s ± 2% 137MB/s ± 3% -0.70% (p=0.021 n=40+40) Template-4 28.1MB/s ± 4% 27.8MB/s ± 3% -1.19% (p=0.003 n=40+40) [Geo mean] 83.7MB/s 83.7MB/s +0.03% Change-Id: I2a2b3a942b5c45467491515d201179fd192e65c9 Reviewed-on: https://go-review.googlesource.com/c/141650 Run-TryBot: Ben Shi TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules index e8d19cf3c9..3a54a2a206 100644 --- a/src/cmd/compile/internal/ssa/gen/386.rules +++ b/src/cmd/compile/internal/ssa/gen/386.rules @@ -1151,10 +1151,14 @@ -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst [1] i) mem) // Combine stores into larger (unaligned) stores. -(MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) +(MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem)) && x.Uses == 1 && clobber(x) -> (MOVWstore [i-1] {s} p w mem) +(MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHR(W|L)const [8] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstore [i] {s} p w mem) (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem)) && x.Uses == 1 && clobber(x) @@ -1168,10 +1172,14 @@ && clobber(x) -> (MOVLstore [i-2] {s} p w0 mem) -(MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) +(MOVBstoreidx1 [i] {s} p idx (SHR(L|W)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) && x.Uses == 1 && clobber(x) -> (MOVWstoreidx1 [i-1] {s} p idx w mem) +(MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHR(L|W)const [8] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstoreidx1 [i] {s} p idx w mem) (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem)) && x.Uses == 1 && clobber(x) diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go index 14784bef3a..2fce9c151e 100644 --- a/src/cmd/compile/internal/ssa/rewrite386.go +++ b/src/cmd/compile/internal/ssa/rewrite386.go @@ -106,13 +106,13 @@ func rewriteValue386(v *Value) bool { case Op386MOVBloadidx1: return rewriteValue386_Op386MOVBloadidx1_0(v) case Op386MOVBstore: - return rewriteValue386_Op386MOVBstore_0(v) + return rewriteValue386_Op386MOVBstore_0(v) || rewriteValue386_Op386MOVBstore_10(v) case Op386MOVBstoreconst: return rewriteValue386_Op386MOVBstoreconst_0(v) case Op386MOVBstoreconstidx1: return rewriteValue386_Op386MOVBstoreconstidx1_0(v) case Op386MOVBstoreidx1: - return rewriteValue386_Op386MOVBstoreidx1_0(v) || rewriteValue386_Op386MOVBstoreidx1_10(v) + return rewriteValue386_Op386MOVBstoreidx1_0(v) || rewriteValue386_Op386MOVBstoreidx1_10(v) || rewriteValue386_Op386MOVBstoreidx1_20(v) case Op386MOVLload: return rewriteValue386_Op386MOVLload_0(v) case Op386MOVLloadidx1: @@ -5545,6 +5545,51 @@ func rewriteValue386_Op386MOVBstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + p := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386SHRWconst { + break + } + if v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] + x := v.Args[2] + if x.Op != Op386MOVBstore { + break + } + if x.AuxInt != i-1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[2] + if p != x.Args[0] { + break + } + if w != x.Args[1] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg(p) + v.AddArg(w) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) // cond: x.Uses == 1 && clobber(x) // result: (MOVWstore [i-1] {s} p w mem) @@ -5590,6 +5635,99 @@ func rewriteValue386_Op386MOVBstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + p := v.Args[0] + w := v.Args[1] + x := v.Args[2] + if x.Op != Op386MOVBstore { + break + } + if x.AuxInt != i+1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[2] + if p != x.Args[0] { + break + } + x_1 := x.Args[1] + if x_1.Op != Op386SHRWconst { + break + } + if x_1.AuxInt != 8 { + break + } + if w != x_1.Args[0] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg(p) + v.AddArg(w) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386MOVBstore_10(v *Value) bool { + // match: (MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[2] + p := v.Args[0] + w := v.Args[1] + x := v.Args[2] + if x.Op != Op386MOVBstore { + break + } + if x.AuxInt != i+1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[2] + if p != x.Args[0] { + break + } + x_1 := x.Args[1] + if x_1.Op != Op386SHRLconst { + break + } + if x_1.AuxInt != 8 { + break + } + if w != x_1.Args[0] { + break + } + mem := x.Args[2] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg(p) + v.AddArg(w) + v.AddArg(mem) + return true + } // match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem)) // cond: x.Uses == 1 && clobber(x) // result: (MOVWstore [i-1] {s} p w0 mem) @@ -6166,9 +6304,9 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool { v.AddArg(mem) return true } - // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem)) + // match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem) + // result: (MOVWstoreidx1 [i-1] {s} p idx w mem) for { i := v.AuxInt s := v.Aux @@ -6176,10 +6314,12 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool { p := v.Args[0] idx := v.Args[1] v_2 := v.Args[2] - if v_2.Op != Op386SHRLconst { + if v_2.Op != Op386SHRWconst { + break + } + if v_2.AuxInt != 8 { break } - j := v_2.AuxInt w := v_2.Args[0] x := v.Args[3] if x.Op != Op386MOVBstoreidx1 { @@ -6198,14 +6338,7 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool { if idx != x.Args[1] { break } - w0 := x.Args[2] - if w0.Op != Op386SHRLconst { - break - } - if w0.AuxInt != j-8 { - break - } - if w != w0.Args[0] { + if w != x.Args[2] { break } mem := x.Args[3] @@ -6217,13 +6350,13 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool { v.Aux = s v.AddArg(p) v.AddArg(idx) - v.AddArg(w0) + v.AddArg(w) v.AddArg(mem) return true } - // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} idx p w0:(SHRLconst [j-8] w) mem)) + // match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} idx p w mem)) // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem) + // result: (MOVWstoreidx1 [i-1] {s} p idx w mem) for { i := v.AuxInt s := v.Aux @@ -6231,10 +6364,12 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool { p := v.Args[0] idx := v.Args[1] v_2 := v.Args[2] - if v_2.Op != Op386SHRLconst { + if v_2.Op != Op386SHRWconst { + break + } + if v_2.AuxInt != 8 { break } - j := v_2.AuxInt w := v_2.Args[0] x := v.Args[3] if x.Op != Op386MOVBstoreidx1 { @@ -6253,14 +6388,60 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool { if p != x.Args[1] { break } - w0 := x.Args[2] - if w0.Op != Op386SHRLconst { + if w != x.Args[2] { break } - if w0.AuxInt != j-8 { + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { break } - if w != w0.Args[0] { + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386MOVBstoreidx1_10(v *Value) bool { + // match: (MOVBstoreidx1 [i] {s} idx p (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i-1] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[3] + idx := v.Args[0] + p := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != Op386SHRWconst { + break + } + if v_2.AuxInt != 8 { + break + } + w := v_2.Args[0] + x := v.Args[3] + if x.Op != Op386MOVBstoreidx1 { + break + } + if x.AuxInt != i-1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[3] + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + if w != x.Args[2] { break } mem := x.Args[3] @@ -6272,13 +6453,573 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool { v.Aux = s v.AddArg(p) v.AddArg(idx) - v.AddArg(w0) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx1 [i] {s} idx p (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} idx p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i-1] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[3] + idx := v.Args[0] + p := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != Op386SHRWconst { + break + } + if v_2.AuxInt != 8 { + break + } + w := v_2.Args[0] + x := v.Args[3] + if x.Op != Op386MOVBstoreidx1 { + break + } + if x.AuxInt != i-1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[3] + if idx != x.Args[0] { + break + } + if p != x.Args[1] { + break + } + if w != x.Args[2] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[3] + p := v.Args[0] + idx := v.Args[1] + w := v.Args[2] + x := v.Args[3] + if x.Op != Op386MOVBstoreidx1 { + break + } + if x.AuxInt != i+1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[3] + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + x_2 := x.Args[2] + if x_2.Op != Op386SHRLconst { + break + } + if x_2.AuxInt != 8 { + break + } + if w != x_2.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} idx p (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[3] + p := v.Args[0] + idx := v.Args[1] + w := v.Args[2] + x := v.Args[3] + if x.Op != Op386MOVBstoreidx1 { + break + } + if x.AuxInt != i+1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[3] + if idx != x.Args[0] { + break + } + if p != x.Args[1] { + break + } + x_2 := x.Args[2] + if x_2.Op != Op386SHRLconst { + break + } + if x_2.AuxInt != 8 { + break + } + if w != x_2.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx1 [i] {s} idx p w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[3] + idx := v.Args[0] + p := v.Args[1] + w := v.Args[2] + x := v.Args[3] + if x.Op != Op386MOVBstoreidx1 { + break + } + if x.AuxInt != i+1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[3] + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + x_2 := x.Args[2] + if x_2.Op != Op386SHRLconst { + break + } + if x_2.AuxInt != 8 { + break + } + if w != x_2.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx1 [i] {s} idx p w x:(MOVBstoreidx1 [i+1] {s} idx p (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[3] + idx := v.Args[0] + p := v.Args[1] + w := v.Args[2] + x := v.Args[3] + if x.Op != Op386MOVBstoreidx1 { + break + } + if x.AuxInt != i+1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[3] + if idx != x.Args[0] { + break + } + if p != x.Args[1] { + break + } + x_2 := x.Args[2] + if x_2.Op != Op386SHRLconst { + break + } + if x_2.AuxInt != 8 { + break + } + if w != x_2.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[3] + p := v.Args[0] + idx := v.Args[1] + w := v.Args[2] + x := v.Args[3] + if x.Op != Op386MOVBstoreidx1 { + break + } + if x.AuxInt != i+1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[3] + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + x_2 := x.Args[2] + if x_2.Op != Op386SHRWconst { + break + } + if x_2.AuxInt != 8 { + break + } + if w != x_2.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} idx p (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[3] + p := v.Args[0] + idx := v.Args[1] + w := v.Args[2] + x := v.Args[3] + if x.Op != Op386MOVBstoreidx1 { + break + } + if x.AuxInt != i+1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[3] + if idx != x.Args[0] { + break + } + if p != x.Args[1] { + break + } + x_2 := x.Args[2] + if x_2.Op != Op386SHRWconst { + break + } + if x_2.AuxInt != 8 { + break + } + if w != x_2.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx1 [i] {s} idx p w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[3] + idx := v.Args[0] + p := v.Args[1] + w := v.Args[2] + x := v.Args[3] + if x.Op != Op386MOVBstoreidx1 { + break + } + if x.AuxInt != i+1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[3] + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + x_2 := x.Args[2] + if x_2.Op != Op386SHRWconst { + break + } + if x_2.AuxInt != 8 { + break + } + if w != x_2.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx1 [i] {s} idx p w x:(MOVBstoreidx1 [i+1] {s} idx p (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[3] + idx := v.Args[0] + p := v.Args[1] + w := v.Args[2] + x := v.Args[3] + if x.Op != Op386MOVBstoreidx1 { + break + } + if x.AuxInt != i+1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[3] + if idx != x.Args[0] { + break + } + if p != x.Args[1] { + break + } + x_2 := x.Args[2] + if x_2.Op != Op386SHRWconst { + break + } + if x_2.AuxInt != 8 { + break + } + if w != x_2.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386MOVBstoreidx1_20(v *Value) bool { + // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[3] + p := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != Op386SHRLconst { + break + } + j := v_2.AuxInt + w := v_2.Args[0] + x := v.Args[3] + if x.Op != Op386MOVBstoreidx1 { + break + } + if x.AuxInt != i-1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[3] + if p != x.Args[0] { + break + } + if idx != x.Args[1] { + break + } + w0 := x.Args[2] + if w0.Op != Op386SHRLconst { + break + } + if w0.AuxInt != j-8 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w0) + v.AddArg(mem) + return true + } + // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} idx p w0:(SHRLconst [j-8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem) + for { + i := v.AuxInt + s := v.Aux + _ = v.Args[3] + p := v.Args[0] + idx := v.Args[1] + v_2 := v.Args[2] + if v_2.Op != Op386SHRLconst { + break + } + j := v_2.AuxInt + w := v_2.Args[0] + x := v.Args[3] + if x.Op != Op386MOVBstoreidx1 { + break + } + if x.AuxInt != i-1 { + break + } + if x.Aux != s { + break + } + _ = x.Args[3] + if idx != x.Args[0] { + break + } + if p != x.Args[1] { + break + } + w0 := x.Args[2] + if w0.Op != Op386SHRLconst { + break + } + if w0.AuxInt != j-8 { + break + } + if w != w0.Args[0] { + break + } + mem := x.Args[3] + if !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg(p) + v.AddArg(idx) + v.AddArg(w0) v.AddArg(mem) return true } - return false -} -func rewriteValue386_Op386MOVBstoreidx1_10(v *Value) bool { // match: (MOVBstoreidx1 [i] {s} idx p (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem)) // cond: x.Uses == 1 && clobber(x) // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem) diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index 230aadfb74..d0043da7ef 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -113,11 +113,13 @@ func load_be16_idx(b []byte, idx int) { func load_le_byte2_uint16(s []byte) uint16 { // arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB` + // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`ORL` return uint16(s[0]) | uint16(s[1])<<8 } func load_le_byte2_uint16_inv(s []byte) uint16 { // arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB` + // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`ORL` return uint16(s[1])<<8 | uint16(s[0]) } @@ -173,11 +175,13 @@ func load_be_byte8_uint64_inv(s []byte) uint64 { func load_le_byte2_uint16_idx(s []byte, idx int) uint16 { // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB` + // 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB` return uint16(s[idx]) | uint16(s[idx+1])<<8 } func load_le_byte2_uint16_idx_inv(s []byte, idx int) uint16 { // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB` + // 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB` return uint16(s[idx+1])<<8 | uint16(s[idx]) } @@ -405,9 +409,16 @@ func store_be16_idx(b []byte, idx int) { func store_le_byte_2(b []byte, val uint16) { _ = b[2] // arm64:`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB` + // 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB` b[1], b[2] = byte(val), byte(val>>8) } +func store_le_byte_2_inv(b []byte, val uint16) { + _ = b[2] + // 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB` + b[2], b[1] = byte(val>>8), byte(val) +} + func store_le_byte_4(b []byte, val uint32) { _ = b[4] // arm64:`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH` @@ -441,9 +452,16 @@ func store_be_byte_8(b []byte, val uint64) { func store_le_byte_2_idx(b []byte, idx int, val uint16) { _, _ = b[idx+0], b[idx+1] // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB` + // 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB` b[idx+1], b[idx+0] = byte(val>>8), byte(val) } +func store_le_byte_2_idx_inv(b []byte, idx int, val uint16) { + _, _ = b[idx+0], b[idx+1] + // 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB` + b[idx+0], b[idx+1] = byte(val), byte(val>>8) +} + func store_le_byte_4_idx(b []byte, idx int, val uint32) { _, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3] // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`