]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize 386's load/store combination
authorBen Shi <powerman1st@163.com>
Mon, 15 Oct 2018 01:19:48 +0000 (01:19 +0000)
committerBen Shi <powerman1st@163.com>
Tue, 16 Oct 2018 07:17:11 +0000 (07:17 +0000)
This CL adds more combinations of two consequtive MOVBload/MOVBstore
to a unique MOVWload/MOVWstore.

1. The size of the go executable decreases about 4KB, and the total
size of pkg/linux_386 (excluding cmd/compile) decreases about 1.5KB.

2. There is no regression in the go1 benchmark result, excluding noise.
name                     old time/op    new time/op    delta
BinaryTree17-4              3.28s ± 2%     3.29s ± 2%    ~     (p=0.151 n=40+40)
Fannkuch11-4                3.52s ± 1%     3.51s ± 1%  -0.28%  (p=0.002 n=40+40)
FmtFprintfEmpty-4          45.4ns ± 4%    45.0ns ± 4%  -0.89%  (p=0.019 n=40+40)
FmtFprintfString-4         81.9ns ± 7%    81.3ns ± 1%    ~     (p=0.660 n=40+25)
FmtFprintfInt-4            91.9ns ± 9%    91.4ns ± 9%    ~     (p=0.249 n=40+40)
FmtFprintfIntInt-4          143ns ± 4%     143ns ± 4%    ~     (p=0.760 n=40+40)
FmtFprintfPrefixedInt-4     184ns ± 3%     183ns ± 4%    ~     (p=0.485 n=40+40)
FmtFprintfFloat-4           408ns ± 3%     409ns ± 3%    ~     (p=0.961 n=40+40)
FmtManyArgs-4               597ns ± 4%     602ns ± 3%    ~     (p=0.413 n=40+40)
GobDecode-4                7.13ms ± 6%    7.14ms ± 6%    ~     (p=0.859 n=40+40)
GobEncode-4                6.86ms ± 9%    6.94ms ± 7%    ~     (p=0.162 n=40+40)
Gzip-4                      395ms ± 4%     396ms ± 3%    ~     (p=0.099 n=40+40)
Gunzip-4                   40.9ms ± 4%    41.1ms ± 3%    ~     (p=0.064 n=40+40)
HTTPClientServer-4         63.6µs ± 2%    63.6µs ± 3%    ~     (p=0.832 n=36+39)
JSONEncode-4               16.1ms ± 3%    15.8ms ± 3%  -1.60%  (p=0.001 n=40+40)
JSONDecode-4               61.0ms ± 3%    61.5ms ± 4%    ~     (p=0.065 n=40+40)
Mandelbrot200-4            5.16ms ± 3%    5.18ms ± 3%    ~     (p=0.056 n=40+40)
GoParse-4                  3.25ms ± 2%    3.23ms ± 3%    ~     (p=0.727 n=40+40)
RegexpMatchEasy0_32-4      90.2ns ± 3%    89.3ns ± 6%  -0.98%  (p=0.002 n=40+40)
RegexpMatchEasy0_1K-4       812ns ± 3%     815ns ± 3%    ~     (p=0.309 n=40+40)
RegexpMatchEasy1_32-4       103ns ± 6%     103ns ± 5%    ~     (p=0.680 n=40+40)
RegexpMatchEasy1_1K-4      1.01µs ± 4%    1.02µs ± 3%    ~     (p=0.326 n=40+33)
RegexpMatchMedium_32-4      120ns ± 4%     120ns ± 5%    ~     (p=0.834 n=40+40)
RegexpMatchMedium_1K-4     40.1µs ± 3%    39.5µs ± 4%  -1.35%  (p=0.000 n=40+40)
RegexpMatchHard_32-4       2.27µs ± 6%    2.23µs ± 4%  -1.67%  (p=0.011 n=40+40)
RegexpMatchHard_1K-4       67.2µs ± 3%    67.2µs ± 3%    ~     (p=0.149 n=40+40)
Revcomp-4                   1.84s ± 2%     1.86s ± 3%  +0.70%  (p=0.020 n=40+40)
Template-4                 69.0ms ± 4%    69.8ms ± 3%  +1.20%  (p=0.003 n=40+40)
TimeParse-4                 438ns ± 3%     439ns ± 4%    ~     (p=0.650 n=40+40)
TimeFormat-4                412ns ± 3%     412ns ± 3%    ~     (p=0.888 n=40+40)
[Geo mean]                 65.2µs         65.2µs       -0.04%

name                     old speed      new speed      delta
GobDecode-4               108MB/s ± 6%   108MB/s ± 6%    ~     (p=0.855 n=40+40)
GobEncode-4               112MB/s ± 9%   111MB/s ± 8%    ~     (p=0.159 n=40+40)
Gzip-4                   49.2MB/s ± 4%  49.1MB/s ± 3%    ~     (p=0.102 n=40+40)
Gunzip-4                  474MB/s ± 3%   472MB/s ± 3%    ~     (p=0.063 n=40+40)
JSONEncode-4              121MB/s ± 3%   123MB/s ± 3%  +1.62%  (p=0.001 n=40+40)
JSONDecode-4             31.9MB/s ± 3%  31.6MB/s ± 4%    ~     (p=0.070 n=40+40)
GoParse-4                17.9MB/s ± 2%  17.9MB/s ± 3%    ~     (p=0.696 n=40+40)
RegexpMatchEasy0_32-4     355MB/s ± 3%   358MB/s ± 5%  +0.99%  (p=0.002 n=40+40)
RegexpMatchEasy0_1K-4    1.26GB/s ± 3%  1.26GB/s ± 3%    ~     (p=0.381 n=40+40)
RegexpMatchEasy1_32-4     310MB/s ± 5%   310MB/s ± 4%    ~     (p=0.655 n=40+40)
RegexpMatchEasy1_1K-4    1.01GB/s ± 4%  1.01GB/s ± 3%    ~     (p=0.351 n=40+33)
RegexpMatchMedium_32-4   8.32MB/s ± 4%  8.34MB/s ± 5%    ~     (p=0.696 n=40+40)
RegexpMatchMedium_1K-4   25.6MB/s ± 3%  25.9MB/s ± 4%  +1.36%  (p=0.000 n=40+40)
RegexpMatchHard_32-4     14.1MB/s ± 6%  14.3MB/s ± 4%  +1.64%  (p=0.011 n=40+40)
RegexpMatchHard_1K-4     15.2MB/s ± 3%  15.2MB/s ± 3%    ~     (p=0.147 n=40+40)
Revcomp-4                 138MB/s ± 2%   137MB/s ± 3%  -0.70%  (p=0.021 n=40+40)
Template-4               28.1MB/s ± 4%  27.8MB/s ± 3%  -1.19%  (p=0.003 n=40+40)
[Geo mean]               83.7MB/s       83.7MB/s       +0.03%

Change-Id: I2a2b3a942b5c45467491515d201179fd192e65c9
Reviewed-on: https://go-review.googlesource.com/c/141650
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/ssa/gen/386.rules
src/cmd/compile/internal/ssa/rewrite386.go
test/codegen/memcombine.go

index e8d19cf3c97c34fb22f19ab1a09e2cd7b6f2f564..3a54a2a2061da7096582ef5ec1af74f5c44de5ad 100644 (file)
   -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst <i.Type> [1] i) mem)
 
 // Combine stores into larger (unaligned) stores.
-(MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
+(MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
   && x.Uses == 1
   && clobber(x)
   -> (MOVWstore [i-1] {s} p w mem)
+(MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHR(W|L)const [8] w) mem))
+  && x.Uses == 1
+  && clobber(x)
+  -> (MOVWstore [i] {s} p w mem)
 (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
   && x.Uses == 1
   && clobber(x)
   && clobber(x)
   -> (MOVLstore [i-2] {s} p w0 mem)
 
-(MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
+(MOVBstoreidx1 [i] {s} p idx (SHR(L|W)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
   && x.Uses == 1
   && clobber(x)
   -> (MOVWstoreidx1 [i-1] {s} p idx w mem)
+(MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHR(L|W)const [8] w) mem))
+  && x.Uses == 1
+  && clobber(x)
+  -> (MOVWstoreidx1 [i] {s} p idx w mem)
 (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
   && x.Uses == 1
   && clobber(x)
index 14784bef3a9797763d8f96de5e089ecf788715df..2fce9c151e3bc567448e2fea72e2af56c3632823 100644 (file)
@@ -106,13 +106,13 @@ func rewriteValue386(v *Value) bool {
        case Op386MOVBloadidx1:
                return rewriteValue386_Op386MOVBloadidx1_0(v)
        case Op386MOVBstore:
-               return rewriteValue386_Op386MOVBstore_0(v)
+               return rewriteValue386_Op386MOVBstore_0(v) || rewriteValue386_Op386MOVBstore_10(v)
        case Op386MOVBstoreconst:
                return rewriteValue386_Op386MOVBstoreconst_0(v)
        case Op386MOVBstoreconstidx1:
                return rewriteValue386_Op386MOVBstoreconstidx1_0(v)
        case Op386MOVBstoreidx1:
-               return rewriteValue386_Op386MOVBstoreidx1_0(v) || rewriteValue386_Op386MOVBstoreidx1_10(v)
+               return rewriteValue386_Op386MOVBstoreidx1_0(v) || rewriteValue386_Op386MOVBstoreidx1_10(v) || rewriteValue386_Op386MOVBstoreidx1_20(v)
        case Op386MOVLload:
                return rewriteValue386_Op386MOVLload_0(v)
        case Op386MOVLloadidx1:
@@ -5545,6 +5545,51 @@ func rewriteValue386_Op386MOVBstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstore [i-1] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386SHRWconst {
+                       break
+               }
+               if v_1.AuxInt != 8 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != Op386MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
        // cond: x.Uses == 1 && clobber(x)
        // result: (MOVWstore [i-1] {s} p w mem)
@@ -5590,6 +5635,99 @@ func rewriteValue386_Op386MOVBstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHRWconst [8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstore [i] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != Op386MOVBstore {
+                       break
+               }
+               if x.AuxInt != i+1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if p != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != Op386SHRWconst {
+                       break
+               }
+               if x_1.AuxInt != 8 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstore)
+               v.AuxInt = i
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVBstore_10(v *Value) bool {
+       // match: (MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHRLconst [8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstore [i] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != Op386MOVBstore {
+                       break
+               }
+               if x.AuxInt != i+1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if p != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != Op386SHRLconst {
+                       break
+               }
+               if x_1.AuxInt != 8 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstore)
+               v.AuxInt = i
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
        // cond: x.Uses == 1 && clobber(x)
        // result: (MOVWstore [i-1] {s} p w0 mem)
@@ -6166,9 +6304,9 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
        for {
                i := v.AuxInt
                s := v.Aux
@@ -6176,10 +6314,12 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool {
                p := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != Op386SHRLconst {
+               if v_2.Op != Op386SHRWconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
                        break
                }
-               j := v_2.AuxInt
                w := v_2.Args[0]
                x := v.Args[3]
                if x.Op != Op386MOVBstoreidx1 {
@@ -6198,14 +6338,7 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool {
                if idx != x.Args[1] {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != Op386SHRLconst {
-                       break
-               }
-               if w0.AuxInt != j-8 {
-                       break
-               }
-               if w != w0.Args[0] {
+               if w != x.Args[2] {
                        break
                }
                mem := x.Args[3]
@@ -6217,13 +6350,13 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool {
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} idx p w0:(SHRLconst [j-8] w) mem))
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} idx p w mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
        for {
                i := v.AuxInt
                s := v.Aux
@@ -6231,10 +6364,12 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool {
                p := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != Op386SHRLconst {
+               if v_2.Op != Op386SHRWconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
                        break
                }
-               j := v_2.AuxInt
                w := v_2.Args[0]
                x := v.Args[3]
                if x.Op != Op386MOVBstoreidx1 {
@@ -6253,14 +6388,60 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool {
                if p != x.Args[1] {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != Op386SHRLconst {
+               if w != x.Args[2] {
                        break
                }
-               if w0.AuxInt != j-8 {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               if w != w0.Args[0] {
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVBstoreidx1_10(v *Value) bool {
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRWconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
                        break
                }
                mem := x.Args[3]
@@ -6272,13 +6453,573 @@ func rewriteValue386_Op386MOVBstoreidx1_0(v *Value) bool {
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} idx p w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               idx := v.Args[0]
+               p := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRWconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRLconst [8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx1 [i] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i+1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != Op386SHRLconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} idx p (SHRLconst [8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx1 [i] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i+1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != Op386SHRLconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRLconst [8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx1 [i] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i+1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != Op386SHRLconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x:(MOVBstoreidx1 [i+1] {s} idx p (SHRLconst [8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx1 [i] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i+1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != Op386SHRLconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRWconst [8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx1 [i] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i+1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != Op386SHRWconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} idx p (SHRWconst [8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx1 [i] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i+1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != Op386SHRWconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRWconst [8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx1 [i] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i+1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != Op386SHRWconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} idx p w x:(MOVBstoreidx1 [i+1] {s} idx p (SHRWconst [8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx1 [i] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               idx := v.Args[0]
+               p := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i+1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               x_2 := x.Args[2]
+               if x_2.Op != Op386SHRWconst {
+                       break
+               }
+               if x_2.AuxInt != 8 {
+                       break
+               }
+               if w != x_2.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MOVBstoreidx1_20(v *Value) bool {
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} idx p w0:(SHRLconst [j-8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != Op386SHRLconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != Op386MOVBstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if idx != x.Args[0] {
+                       break
+               }
+               if p != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != Op386SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(Op386MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386MOVBstoreidx1_10(v *Value) bool {
        // match: (MOVBstoreidx1 [i] {s} idx p (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
        // cond: x.Uses == 1 && clobber(x)
        // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
index 230aadfb74a3cc890ebb28c46bf1fe7a4a343b8f..d0043da7ef0a56869bd6b8976b6d642ddc78cd78 100644 (file)
@@ -113,11 +113,13 @@ func load_be16_idx(b []byte, idx int) {
 
 func load_le_byte2_uint16(s []byte) uint16 {
        // arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
+       // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`ORL`
        return uint16(s[0]) | uint16(s[1])<<8
 }
 
 func load_le_byte2_uint16_inv(s []byte) uint16 {
        // arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
+       // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`ORL`
        return uint16(s[1])<<8 | uint16(s[0])
 }
 
@@ -173,11 +175,13 @@ func load_be_byte8_uint64_inv(s []byte) uint64 {
 
 func load_le_byte2_uint16_idx(s []byte, idx int) uint16 {
        // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
+       // 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
        return uint16(s[idx]) | uint16(s[idx+1])<<8
 }
 
 func load_le_byte2_uint16_idx_inv(s []byte, idx int) uint16 {
        // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
+       // 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
        return uint16(s[idx+1])<<8 | uint16(s[idx])
 }
 
@@ -405,9 +409,16 @@ func store_be16_idx(b []byte, idx int) {
 func store_le_byte_2(b []byte, val uint16) {
        _ = b[2]
        // arm64:`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
+       // 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
        b[1], b[2] = byte(val), byte(val>>8)
 }
 
+func store_le_byte_2_inv(b []byte, val uint16) {
+       _ = b[2]
+       // 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+       b[2], b[1] = byte(val>>8), byte(val)
+}
+
 func store_le_byte_4(b []byte, val uint32) {
        _ = b[4]
        // arm64:`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`
@@ -441,9 +452,16 @@ func store_be_byte_8(b []byte, val uint64) {
 func store_le_byte_2_idx(b []byte, idx int, val uint16) {
        _, _ = b[idx+0], b[idx+1]
        // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
+       // 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
        b[idx+1], b[idx+0] = byte(val>>8), byte(val)
 }
 
+func store_le_byte_2_idx_inv(b []byte, idx int, val uint16) {
+       _, _ = b[idx+0], b[idx+1]
+       // 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
+       b[idx+0], b[idx+1] = byte(val), byte(val>>8)
+}
+
 func store_le_byte_4_idx(b []byte, idx int, val uint32) {
        _, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3]
        // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`