]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize 386 binary operations with a memory operand
authorBen Shi <powerman1st@163.com>
Thu, 22 Mar 2018 02:18:50 +0000 (02:18 +0000)
committerBrad Fitzpatrick <bradfitz@golang.org>
Thu, 5 Apr 2018 16:09:32 +0000 (16:09 +0000)
Some integer/float binary operations of 386 can take a direct memory
operand, which is more efficient than loading to a register.

These CL does this optimization by copying the similar solution
of amd64. And the go1 benchmark shows some inprovements, especially
the test case Template. (excluding noise)

name                     old time/op    new time/op    delta
BinaryTree17-4              3.42s ± 2%     3.40s ± 2%    ~     (p=0.069 n=38+39)
Fannkuch11-4                3.48s ± 1%     3.53s ± 1%  +1.59%  (p=0.000 n=40+40)
FmtFprintfEmpty-4          46.7ns ± 4%    46.3ns ± 3%  -1.03%  (p=0.001 n=40+40)
FmtFprintfString-4         80.1ns ± 3%    80.6ns ± 3%  +0.56%  (p=0.029 n=40+40)
FmtFprintfInt-4            92.4ns ± 2%    92.3ns ± 3%    ~     (p=0.847 n=40+40)
FmtFprintfIntInt-4          147ns ± 3%     144ns ± 3%  -1.87%  (p=0.000 n=40+40)
FmtFprintfPrefixedInt-4     182ns ± 2%     184ns ± 3%  +0.99%  (p=0.002 n=40+40)
FmtFprintfFloat-4           387ns ± 3%     384ns ± 3%    ~     (p=0.069 n=40+40)
FmtManyArgs-4               619ns ± 3%     616ns ± 3%    ~     (p=0.320 n=40+40)
GobDecode-4                7.28ms ± 6%    7.27ms ± 5%    ~     (p=0.897 n=40+40)
GobEncode-4                7.33ms ± 6%    7.21ms ± 6%  -1.56%  (p=0.022 n=38+40)
Gzip-4                      357ms ± 4%     357ms ± 4%    ~     (p=0.071 n=40+40)
Gunzip-4                   45.3ms ± 3%    45.4ms ± 3%    ~     (p=0.452 n=40+40)
HTTPClientServer-4         63.0µs ± 2%    62.9µs ± 3%    ~     (p=0.760 n=38+39)
JSONEncode-4               22.0ms ± 4%    21.7ms ± 4%  -1.49%  (p=0.000 n=40+40)
JSONDecode-4               67.7ms ± 4%    68.3ms ± 3%  +0.86%  (p=0.039 n=40+40)
Mandelbrot200-4            5.16ms ± 3%    5.17ms ± 3%    ~     (p=0.418 n=40+40)
GoParse-4                  3.30ms ± 2%    3.32ms ± 3%  +0.55%  (p=0.017 n=40+40)
RegexpMatchEasy0_32-4       104ns ± 3%     104ns ± 4%    ~     (p=0.992 n=40+40)
RegexpMatchEasy0_1K-4       852ns ± 3%     851ns ± 2%    ~     (p=0.344 n=40+40)
RegexpMatchEasy1_32-4       113ns ± 4%     113ns ± 5%    ~     (p=0.937 n=40+40)
RegexpMatchEasy1_1K-4      1.03µs ± 5%    1.04µs ± 4%    ~     (p=0.430 n=40+40)
RegexpMatchMedium_32-4      132ns ± 4%     131ns ± 3%  -1.06%  (p=0.027 n=40+40)
RegexpMatchMedium_1K-4     43.0µs ± 3%    43.2µs ± 3%    ~     (p=0.122 n=40+40)
RegexpMatchHard_32-4       2.21µs ± 4%    2.20µs ± 4%    ~     (p=0.146 n=40+40)
RegexpMatchHard_1K-4       67.1µs ± 4%    67.2µs ± 3%    ~     (p=0.859 n=40+40)
Revcomp-4                   1.85s ± 2%     1.85s ± 3%    ~     (p=0.184 n=40+40)
Template-4                 70.1ms ± 4%    67.5ms ± 3%  -3.65%  (p=0.000 n=40+40)
TimeParse-4                 457ns ±16%     439ns ± 4%    ~     (p=0.683 n=40+34)
TimeFormat-4                413ns ± 3%     414ns ± 3%    ~     (p=0.850 n=40+40)
[Geo mean]                 67.5µs         67.3µs       -0.38%

name                     old speed      new speed      delta
GobDecode-4               105MB/s ± 6%   106MB/s ± 5%    ~     (p=0.893 n=40+40)
GobEncode-4               105MB/s ± 6%   107MB/s ± 7%  +1.60%  (p=0.023 n=38+40)
Gzip-4                   54.4MB/s ± 4%  54.5MB/s ± 4%    ~     (p=0.073 n=40+40)
Gunzip-4                  429MB/s ± 3%   428MB/s ± 3%    ~     (p=0.453 n=40+40)
JSONEncode-4             88.3MB/s ± 5%  89.6MB/s ± 4%  +1.51%  (p=0.000 n=40+40)
JSONDecode-4             28.7MB/s ± 4%  28.4MB/s ± 3%  -0.87%  (p=0.039 n=40+40)
GoParse-4                17.6MB/s ± 3%  17.5MB/s ± 3%  -0.55%  (p=0.020 n=40+40)
RegexpMatchEasy0_32-4     308MB/s ± 4%   308MB/s ± 5%    ~     (p=0.988 n=40+40)
RegexpMatchEasy0_1K-4    1.20GB/s ± 3%  1.20GB/s ± 2%    ~     (p=0.329 n=40+40)
RegexpMatchEasy1_32-4     283MB/s ± 4%   283MB/s ± 4%    ~     (p=0.507 n=40+40)
RegexpMatchEasy1_1K-4     991MB/s ± 5%   987MB/s ± 4%    ~     (p=0.446 n=40+40)
RegexpMatchMedium_32-4   7.54MB/s ± 4%  7.63MB/s ± 3%  +1.26%  (p=0.004 n=40+40)
RegexpMatchMedium_1K-4   23.8MB/s ± 3%  23.7MB/s ± 4%    ~     (p=0.121 n=40+40)
RegexpMatchHard_32-4     14.5MB/s ± 4%  14.6MB/s ± 4%    ~     (p=0.145 n=40+40)
RegexpMatchHard_1K-4     15.3MB/s ± 4%  15.2MB/s ± 3%    ~     (p=0.874 n=40+40)
Revcomp-4                 137MB/s ± 2%   137MB/s ± 3%    ~     (p=0.179 n=40+40)
Template-4               27.7MB/s ± 4%  28.7MB/s ± 3%  +3.78%  (p=0.000 n=40+40)
[Geo mean]               78.9MB/s       79.2MB/s       +0.38%

Change-Id: I3ba688c253b665485c1ebdf5a75f4ce82cc3def3
Reviewed-on: https://go-review.googlesource.com/102036
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/ssa/gen/386.rules
src/cmd/compile/internal/ssa/gen/386Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewrite386.go
src/cmd/compile/internal/x86/ssa.go

index 35db2e216d658b0fe9c6e14f9c3875edb8c1745b..d6533e1772fa4bceae5a19ceeae8ebc8c05f8901 100644 (file)
 (MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVSSstore [off1+off2] {sym} ptr val mem)
 (MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVSDstore [off1+off2] {sym} ptr val mem)
 
+((ADD|SUB|AND|OR|XOR)Lmem [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
+       ((ADD|SUB|AND|OR|XOR)Lmem [off1+off2] {sym} val base mem)
+((ADD|SUB|MUL)SSmem [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
+       ((ADD|SUB|MUL)SSmem [off1+off2] {sym} val base mem)
+((ADD|SUB|MUL)SDmem [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
+       ((ADD|SUB|MUL)SDmem [off1+off2] {sym} val base mem)
+
 // Fold constants into stores.
 (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
        (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
 (MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
        (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
 
+((ADD|SUB|AND|OR|XOR)Lmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
+       ((ADD|SUB|AND|OR|XOR)Lmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+((ADD|SUB|MUL)SSmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
+       ((ADD|SUB|MUL)SSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+((ADD|SUB|MUL)SDmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
+       ((ADD|SUB|MUL)SDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+
 (MOVBload [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOVBloadidx1 [off] {sym} ptr idx mem)
 (MOVWload [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOVWloadidx1 [off] {sym} ptr idx mem)
 (MOVLload [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOVLloadidx1 [off] {sym} ptr idx mem)
 (MOVSDstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVSDstoreidx1 [int64(int32(c+d))]   {sym} ptr idx val mem)
 (MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem)
 
+// Merge load to op
+((ADD|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR)Lmem x [off] {sym} ptr mem)
+((ADD|SUB|MUL)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL)SDmem x [off] {sym} ptr mem)
+((ADD|SUB|MUL)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL)SSmem x [off] {sym} ptr mem)
+
 (MOVBstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
        (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
 (MOVWstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
 // For PIC, break floating-point constant loading into two instructions so we have
 // a register to use for holding the address of the constant pool entry.
 (MOVSSconst [c]) && config.ctxt.Flag_shared -> (MOVSSconst2 (MOVSSconst1 [c]))
-(MOVSDconst [c]) && config.ctxt.Flag_shared -> (MOVSDconst2 (MOVSDconst1 [c]))
+(MOVSDconst [c]) && config.ctxt.Flag_shared -> (MOVSDconst2 (MOVSDconst1 [c]))
\ No newline at end of file
index d1f8bc97889ce07076748370b013eca282b01371..6fed5807834d0dd67423761f01bb2e74f9cd3b6e 100644 (file)
@@ -125,6 +125,7 @@ func init() {
                flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
 
                gpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
+               gp21load  = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
                gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
 
                gpstore         = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
@@ -134,6 +135,7 @@ func init() {
 
                fp01     = regInfo{inputs: nil, outputs: fponly}
                fp21     = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
+               fp21load  = regInfo{inputs: []regMask{fp, gpspsb, 0}, outputs: fponly}
                fpgp     = regInfo{inputs: fponly, outputs: gponly}
                gpfp     = regInfo{inputs: gponly, outputs: fponly}
                fp11     = regInfo{inputs: fponly, outputs: fponly}
@@ -173,6 +175,13 @@ func init() {
                {name: "MOVSDstoreidx1", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff", symEffect: "Write"},                // fp64 indexed by i store
                {name: "MOVSDstoreidx8", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff", symEffect: "Write"},                // fp64 indexed by 8i store
 
+               {name: "ADDSSmem", argLength: 3, reg: fp21load, asm: "ADDSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 + tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
+               {name: "ADDSDmem", argLength: 3, reg: fp21load, asm: "ADDSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 + tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
+               {name: "SUBSSmem", argLength: 3, reg: fp21load, asm: "SUBSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 - tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
+               {name: "SUBSDmem", argLength: 3, reg: fp21load, asm: "SUBSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 - tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
+               {name: "MULSSmem", argLength: 3, reg: fp21load, asm: "MULSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 * tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
+               {name: "MULSDmem", argLength: 3, reg: fp21load, asm: "MULSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 * tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
+
                // binary ops
                {name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true},                // arg0 + arg1
                {name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32", typ: "UInt32", clobberFlags: true}, // arg0 + auxint
@@ -258,6 +267,12 @@ func init() {
                {name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15
                {name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true},  // arg0 rotate left auxint, rotate amount 0-7
 
+               {name: "ADDLmem", argLength: 3, reg: gp21load, asm: "ADDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from  arg1+auxint+aux, arg2 = mem
+               {name: "SUBLmem", argLength: 3, reg: gp21load, asm: "SUBL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from  arg1+auxint+aux, arg2 = mem
+               {name: "ANDLmem", argLength: 3, reg: gp21load, asm: "ANDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from  arg1+auxint+aux, arg2 = mem
+               {name: "ORLmem", argLength: 3, reg: gp21load, asm: "ORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"},   // arg0 | tmp, tmp loaded from  arg1+auxint+aux, arg2 = mem
+               {name: "XORLmem", argLength: 3, reg: gp21load, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from  arg1+auxint+aux, arg2 = mem
+
                // unary ops
                {name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, // -arg0
 
@@ -516,4 +531,4 @@ func init() {
                framepointerreg: int8(num["BP"]),
                linkreg:         -1, // not used
        })
-}
+}
\ No newline at end of file
index ef3875ec0283f1ef2982e05db1c49ad8ed11855c..4b782acfa609705fe14bf8f7d125ea04eb851e90 100644 (file)
@@ -255,6 +255,12 @@ const (
        Op386MOVSSstoreidx4
        Op386MOVSDstoreidx1
        Op386MOVSDstoreidx8
+       Op386ADDSSmem
+       Op386ADDSDmem
+       Op386SUBSSmem
+       Op386SUBSDmem
+       Op386MULSSmem
+       Op386MULSDmem
        Op386ADDL
        Op386ADDLconst
        Op386ADDLcarry
@@ -318,6 +324,11 @@ const (
        Op386ROLLconst
        Op386ROLWconst
        Op386ROLBconst
+       Op386ADDLmem
+       Op386SUBLmem
+       Op386ANDLmem
+       Op386ORLmem
+       Op386XORLmem
        Op386NEGL
        Op386NOTL
        Op386BSFL
@@ -2487,6 +2498,114 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "ADDSSmem",
+               auxType:        auxSymOff,
+               argLen:         3,
+               resultInArg0:   true,
+               faultOnNilArg1: true,
+               symEffect:      SymRead,
+               asm:            x86.AADDSS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
+                               {1, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
+                       },
+               },
+       },
+       {
+               name:           "ADDSDmem",
+               auxType:        auxSymOff,
+               argLen:         3,
+               resultInArg0:   true,
+               faultOnNilArg1: true,
+               symEffect:      SymRead,
+               asm:            x86.AADDSD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
+                               {1, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
+                       },
+               },
+       },
+       {
+               name:           "SUBSSmem",
+               auxType:        auxSymOff,
+               argLen:         3,
+               resultInArg0:   true,
+               faultOnNilArg1: true,
+               symEffect:      SymRead,
+               asm:            x86.ASUBSS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
+                               {1, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
+                       },
+               },
+       },
+       {
+               name:           "SUBSDmem",
+               auxType:        auxSymOff,
+               argLen:         3,
+               resultInArg0:   true,
+               faultOnNilArg1: true,
+               symEffect:      SymRead,
+               asm:            x86.ASUBSD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
+                               {1, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
+                       },
+               },
+       },
+       {
+               name:           "MULSSmem",
+               auxType:        auxSymOff,
+               argLen:         3,
+               resultInArg0:   true,
+               faultOnNilArg1: true,
+               symEffect:      SymRead,
+               asm:            x86.AMULSS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
+                               {1, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
+                       },
+               },
+       },
+       {
+               name:           "MULSDmem",
+               auxType:        auxSymOff,
+               argLen:         3,
+               resultInArg0:   true,
+               faultOnNilArg1: true,
+               symEffect:      SymRead,
+               asm:            x86.AMULSD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
+                               {1, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
+                       },
+               },
+       },
        {
                name:         "ADDL",
                argLen:       2,
@@ -3437,6 +3556,101 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "ADDLmem",
+               auxType:        auxSymOff,
+               argLen:         3,
+               resultInArg0:   true,
+               clobberFlags:   true,
+               faultOnNilArg1: true,
+               symEffect:      SymRead,
+               asm:            x86.AADDL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 239},   // AX CX DX BX BP SI DI
+                               {1, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 239}, // AX CX DX BX BP SI DI
+                       },
+               },
+       },
+       {
+               name:           "SUBLmem",
+               auxType:        auxSymOff,
+               argLen:         3,
+               resultInArg0:   true,
+               clobberFlags:   true,
+               faultOnNilArg1: true,
+               symEffect:      SymRead,
+               asm:            x86.ASUBL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 239},   // AX CX DX BX BP SI DI
+                               {1, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 239}, // AX CX DX BX BP SI DI
+                       },
+               },
+       },
+       {
+               name:           "ANDLmem",
+               auxType:        auxSymOff,
+               argLen:         3,
+               resultInArg0:   true,
+               clobberFlags:   true,
+               faultOnNilArg1: true,
+               symEffect:      SymRead,
+               asm:            x86.AANDL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 239},   // AX CX DX BX BP SI DI
+                               {1, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 239}, // AX CX DX BX BP SI DI
+                       },
+               },
+       },
+       {
+               name:           "ORLmem",
+               auxType:        auxSymOff,
+               argLen:         3,
+               resultInArg0:   true,
+               clobberFlags:   true,
+               faultOnNilArg1: true,
+               symEffect:      SymRead,
+               asm:            x86.AORL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 239},   // AX CX DX BX BP SI DI
+                               {1, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 239}, // AX CX DX BX BP SI DI
+                       },
+               },
+       },
+       {
+               name:           "XORLmem",
+               auxType:        auxSymOff,
+               argLen:         3,
+               resultInArg0:   true,
+               clobberFlags:   true,
+               faultOnNilArg1: true,
+               symEffect:      SymRead,
+               asm:            x86.AXORL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 239},   // AX CX DX BX BP SI DI
+                               {1, 65791}, // AX CX DX BX SP BP SI DI SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 239}, // AX CX DX BX BP SI DI
+                       },
+               },
+       },
        {
                name:         "NEGL",
                argLen:       1,
index a1ea307121f93b9a632ca7542555affb797cd438..cebc016486f6e7c424f8c2dbba303f9b56f5f704 100644 (file)
@@ -23,10 +23,22 @@ func rewriteValue386(v *Value) bool {
                return rewriteValue386_Op386ADDLcarry_0(v)
        case Op386ADDLconst:
                return rewriteValue386_Op386ADDLconst_0(v)
+       case Op386ADDLmem:
+               return rewriteValue386_Op386ADDLmem_0(v)
+       case Op386ADDSD:
+               return rewriteValue386_Op386ADDSD_0(v)
+       case Op386ADDSDmem:
+               return rewriteValue386_Op386ADDSDmem_0(v)
+       case Op386ADDSS:
+               return rewriteValue386_Op386ADDSS_0(v)
+       case Op386ADDSSmem:
+               return rewriteValue386_Op386ADDSSmem_0(v)
        case Op386ANDL:
                return rewriteValue386_Op386ANDL_0(v)
        case Op386ANDLconst:
                return rewriteValue386_Op386ANDLconst_0(v)
+       case Op386ANDLmem:
+               return rewriteValue386_Op386ANDLmem_0(v)
        case Op386CMPB:
                return rewriteValue386_Op386CMPB_0(v)
        case Op386CMPBconst:
@@ -141,6 +153,14 @@ func rewriteValue386(v *Value) bool {
                return rewriteValue386_Op386MULL_0(v)
        case Op386MULLconst:
                return rewriteValue386_Op386MULLconst_0(v) || rewriteValue386_Op386MULLconst_10(v) || rewriteValue386_Op386MULLconst_20(v) || rewriteValue386_Op386MULLconst_30(v)
+       case Op386MULSD:
+               return rewriteValue386_Op386MULSD_0(v)
+       case Op386MULSDmem:
+               return rewriteValue386_Op386MULSDmem_0(v)
+       case Op386MULSS:
+               return rewriteValue386_Op386MULSS_0(v)
+       case Op386MULSSmem:
+               return rewriteValue386_Op386MULSSmem_0(v)
        case Op386NEGL:
                return rewriteValue386_Op386NEGL_0(v)
        case Op386NOTL:
@@ -149,6 +169,8 @@ func rewriteValue386(v *Value) bool {
                return rewriteValue386_Op386ORL_0(v) || rewriteValue386_Op386ORL_10(v) || rewriteValue386_Op386ORL_20(v) || rewriteValue386_Op386ORL_30(v) || rewriteValue386_Op386ORL_40(v) || rewriteValue386_Op386ORL_50(v)
        case Op386ORLconst:
                return rewriteValue386_Op386ORLconst_0(v)
+       case Op386ORLmem:
+               return rewriteValue386_Op386ORLmem_0(v)
        case Op386ROLBconst:
                return rewriteValue386_Op386ROLBconst_0(v)
        case Op386ROLLconst:
@@ -213,10 +235,22 @@ func rewriteValue386(v *Value) bool {
                return rewriteValue386_Op386SUBLcarry_0(v)
        case Op386SUBLconst:
                return rewriteValue386_Op386SUBLconst_0(v)
+       case Op386SUBLmem:
+               return rewriteValue386_Op386SUBLmem_0(v)
+       case Op386SUBSD:
+               return rewriteValue386_Op386SUBSD_0(v)
+       case Op386SUBSDmem:
+               return rewriteValue386_Op386SUBSDmem_0(v)
+       case Op386SUBSS:
+               return rewriteValue386_Op386SUBSS_0(v)
+       case Op386SUBSSmem:
+               return rewriteValue386_Op386SUBSSmem_0(v)
        case Op386XORL:
-               return rewriteValue386_Op386XORL_0(v)
+               return rewriteValue386_Op386XORL_0(v) || rewriteValue386_Op386XORL_10(v)
        case Op386XORLconst:
                return rewriteValue386_Op386XORLconst_0(v)
+       case Op386XORLmem:
+               return rewriteValue386_Op386XORLmem_0(v)
        case OpAdd16:
                return rewriteValue386_OpAdd16_0(v)
        case OpAdd32:
@@ -1194,6 +1228,58 @@ func rewriteValue386_Op386ADDL_20(v *Value) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADDL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ADDLmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != Op386MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386ADDLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADDL l:(MOVLload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ADDLmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != Op386MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386ADDLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        // match: (ADDL x (NEGL y))
        // cond:
        // result: (SUBL x y)
@@ -1445,6 +1531,296 @@ func rewriteValue386_Op386ADDLconst_0(v *Value) bool {
        }
        return false
 }
+func rewriteValue386_Op386ADDLmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (ADDLmem [off1] {sym} val (ADDLconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ADDLmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386ADDLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADDLmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (ADDLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386ADDLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386ADDSD_0(v *Value) bool {
+       // match: (ADDSD x l:(MOVSDload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ADDSDmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != Op386MOVSDload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386ADDSDmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADDSD l:(MOVSDload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ADDSDmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != Op386MOVSDload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386ADDSDmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386ADDSDmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (ADDSDmem [off1] {sym} val (ADDLconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ADDSDmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386ADDSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADDSDmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (ADDSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386ADDSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386ADDSS_0(v *Value) bool {
+       // match: (ADDSS x l:(MOVSSload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ADDSSmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != Op386MOVSSload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386ADDSSmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADDSS l:(MOVSSload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ADDSSmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != Op386MOVSSload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386ADDSSmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386ADDSSmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (ADDSSmem [off1] {sym} val (ADDLconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ADDSSmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386ADDSSmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADDSSmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (ADDSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386ADDSSmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValue386_Op386ANDL_0(v *Value) bool {
        // match: (ANDL x (MOVLconst [c]))
        // cond:
@@ -1478,6 +1854,58 @@ func rewriteValue386_Op386ANDL_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (ANDL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ANDLmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != Op386MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386ANDLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ANDL l:(MOVLload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ANDLmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != Op386MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386ANDLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        // match: (ANDL x x)
        // cond:
        // result: x
@@ -1553,6 +1981,66 @@ func rewriteValue386_Op386ANDLconst_0(v *Value) bool {
        }
        return false
 }
+func rewriteValue386_Op386ANDLmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (ANDLmem [off1] {sym} val (ADDLconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ANDLmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386ANDLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ANDLmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (ANDLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386ANDLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValue386_Op386CMPB_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -8825,6 +9313,236 @@ func rewriteValue386_Op386MULLconst_30(v *Value) bool {
        }
        return false
 }
+func rewriteValue386_Op386MULSD_0(v *Value) bool {
+       // match: (MULSD x l:(MOVSDload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (MULSDmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != Op386MOVSDload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386MULSDmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULSD l:(MOVSDload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (MULSDmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != Op386MOVSDload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386MULSDmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MULSDmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MULSDmem [off1] {sym} val (ADDLconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MULSDmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386MULSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULSDmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MULSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MULSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MULSS_0(v *Value) bool {
+       // match: (MULSS x l:(MOVSSload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (MULSSmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != Op386MOVSSload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386MULSSmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULSS l:(MOVSSload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (MULSSmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != Op386MOVSSload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386MULSSmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386MULSSmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MULSSmem [off1] {sym} val (ADDLconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MULSSmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386MULSSmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULSSmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MULSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386MULSSmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValue386_Op386NEGL_0(v *Value) bool {
        // match: (NEGL (MOVLconst [c]))
        // cond:
@@ -8858,10 +9576,6 @@ func rewriteValue386_Op386NOTL_0(v *Value) bool {
        return false
 }
 func rewriteValue386_Op386ORL_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
        // match: (ORL x (MOVLconst [c]))
        // cond:
        // result: (ORLconst [c] x)
@@ -9060,6 +9774,65 @@ func rewriteValue386_Op386ORL_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (ORL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ORLmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != Op386MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386ORLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ORL l:(MOVLload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (ORLmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != Op386MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386ORLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386ORL_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
        // match: (ORL x x)
        // cond:
        // result: x
@@ -9123,13 +9896,6 @@ func rewriteValue386_Op386ORL_0(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386ORL_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
        // match: (ORL s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)) x0:(MOVBload [i0] {s} p mem))
        // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)
        // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
@@ -9645,6 +10411,11 @@ func rewriteValue386_Op386ORL_10(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
+       return false
+}
+func rewriteValue386_Op386ORL_20(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)))
        // cond: i1==i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)
        // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
@@ -9753,11 +10524,6 @@ func rewriteValue386_Op386ORL_10(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386ORL_20(v *Value) bool {
-       b := v.Block
-       _ = b
        // match: (ORL s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem))
        // cond: i1==i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0)
        // result: @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem)
@@ -10340,6 +11106,11 @@ func rewriteValue386_Op386ORL_20(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
+       return false
+}
+func rewriteValue386_Op386ORL_30(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
        // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)
        // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
@@ -10508,11 +11279,6 @@ func rewriteValue386_Op386ORL_20(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386ORL_30(v *Value) bool {
-       b := v.Block
-       _ = b
        // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem)))
        // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)
        // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
@@ -11185,6 +11951,11 @@ func rewriteValue386_Op386ORL_30(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
+       return false
+}
+func rewriteValue386_Op386ORL_40(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)))
        // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)
        // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
@@ -11353,11 +12124,6 @@ func rewriteValue386_Op386ORL_30(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386ORL_40(v *Value) bool {
-       b := v.Block
-       _ = b
        // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))))
        // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)
        // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
@@ -12030,6 +12796,11 @@ func rewriteValue386_Op386ORL_40(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
+       return false
+}
+func rewriteValue386_Op386ORL_50(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)))
        // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)
        // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
@@ -12198,11 +12969,6 @@ func rewriteValue386_Op386ORL_40(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValue386_Op386ORL_50(v *Value) bool {
-       b := v.Block
-       _ = b
        // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)))
        // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0)
        // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 <v.Type> [i0] {s} p idx mem)
@@ -12668,6 +13434,66 @@ func rewriteValue386_Op386ORLconst_0(v *Value) bool {
        }
        return false
 }
+func rewriteValue386_Op386ORLmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (ORLmem [off1] {sym} val (ADDLconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ORLmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386ORLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ORLmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (ORLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386ORLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValue386_Op386ROLBconst_0(v *Value) bool {
        // match: (ROLBconst [c] (ROLBconst [d] x))
        // cond:
@@ -13961,134 +14787,372 @@ func rewriteValue386_Op386SHRW_0(v *Value) bool {
        // cond: c&31 >= 16
        // result: (MOVLconst [0])
        for {
-               _ = v.Args[1]
+               _ = v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c&31 >= 16) {
+                       break
+               }
+               v.reset(Op386MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386SHRWconst_0(v *Value) bool {
+       // match: (SHRWconst x [0])
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386SUBL_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SUBL x (MOVLconst [c]))
+       // cond:
+       // result: (SUBLconst x [c])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(Op386SUBLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBL (MOVLconst [c]) x)
+       // cond:
+       // result: (NEGL (SUBLconst <v.Type> x [c]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(Op386NEGL)
+               v0 := b.NewValue0(v.Pos, Op386SUBLconst, v.Type)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SUBL x x)
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(Op386MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386SUBLcarry_0(v *Value) bool {
+       // match: (SUBLcarry x (MOVLconst [c]))
+       // cond:
+       // result: (SUBLconstcarry [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(Op386SUBLconstcarry)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386SUBLconst_0(v *Value) bool {
+       // match: (SUBLconst [c] x)
+       // cond: int32(c) == 0
+       // result: x
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(int32(c) == 0) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBLconst [c] x)
+       // cond:
+       // result: (ADDLconst [int64(int32(-c))] x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               v.reset(Op386ADDLconst)
+               v.AuxInt = int64(int32(-c))
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValue386_Op386SUBLmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (SUBLmem [off1] {sym} val (ADDLconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SUBLmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386SUBLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SUBLmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (SUBLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != Op386MOVLconst {
+               if v_1.Op != Op386LEAL {
                        break
                }
-               c := v_1.AuxInt
-               if !(c&31 >= 16) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(Op386MOVLconst)
-               v.AuxInt = 0
+               v.reset(Op386SUBLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386SHRWconst_0(v *Value) bool {
-       // match: (SHRWconst x [0])
-       // cond:
-       // result: x
+func rewriteValue386_Op386SUBSD_0(v *Value) bool {
+       // match: (SUBSD x l:(MOVSDload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (SUBSDmem x [off] {sym} ptr mem)
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != Op386MOVSDload {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386SUBSDmem)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386SUBL_0(v *Value) bool {
+func rewriteValue386_Op386SUBSDmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SUBL x (MOVLconst [c]))
-       // cond:
-       // result: (SUBLconst x [c])
+       config := b.Func.Config
+       _ = config
+       // match: (SUBSDmem [off1] {sym} val (ADDLconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SUBSDmem [off1+off2] {sym} val base mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != Op386MOVLconst {
+               if v_1.Op != Op386ADDLconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(Op386SUBLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (SUBL (MOVLconst [c]) x)
-       // cond:
-       // result: (NEGL (SUBLconst <v.Type> x [c]))
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != Op386MOVLconst {
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(Op386NEGL)
-               v0 := b.NewValue0(v.Pos, Op386SUBLconst, v.Type)
-               v0.AuxInt = c
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(Op386SUBSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (SUBL x x)
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (SUBSDmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (SUBSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386LEAL {
                        break
                }
-               v.reset(Op386MOVLconst)
-               v.AuxInt = 0
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386SUBSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386SUBLcarry_0(v *Value) bool {
-       // match: (SUBLcarry x (MOVLconst [c]))
-       // cond:
-       // result: (SUBLconstcarry [c] x)
+func rewriteValue386_Op386SUBSS_0(v *Value) bool {
+       // match: (SUBSS x l:(MOVSSload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (SUBSSmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != Op386MOVLconst {
+               l := v.Args[1]
+               if l.Op != Op386MOVSSload {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(Op386SUBLconstcarry)
-               v.AuxInt = c
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386SUBSSmem)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValue386_Op386SUBLconst_0(v *Value) bool {
-       // match: (SUBLconst [c] x)
-       // cond: int32(c) == 0
-       // result: x
+func rewriteValue386_Op386SUBSSmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (SUBSSmem [off1] {sym} val (ADDLconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SUBSSmem [off1+off2] {sym} val base mem)
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(int32(c) == 0) {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386SUBSSmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (SUBLconst [c] x)
-       // cond:
-       // result: (ADDLconst [int64(int32(-c))] x)
+       // match: (SUBSSmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (SUBSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               v.reset(Op386ADDLconst)
-               v.AuxInt = int64(int32(-c))
-               v.AddArg(x)
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386SUBSSmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
+       return false
 }
 func rewriteValue386_Op386XORL_0(v *Value) bool {
        // match: (XORL x (MOVLconst [c]))
@@ -14289,6 +15353,61 @@ func rewriteValue386_Op386XORL_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (XORL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (XORLmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != Op386MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386XORLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XORL l:(MOVLload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (XORLmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != Op386MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(Op386XORLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValue386_Op386XORL_10(v *Value) bool {
        // match: (XORL x x)
        // cond:
        // result: (MOVLconst [0])
@@ -14351,6 +15470,66 @@ func rewriteValue386_Op386XORLconst_0(v *Value) bool {
        }
        return false
 }
+func rewriteValue386_Op386XORLmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (XORLmem [off1] {sym} val (ADDLconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XORLmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386ADDLconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(Op386XORLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XORLmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (XORLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != Op386LEAL {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(Op386XORLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValue386_OpAdd16_0(v *Value) bool {
        // match: (Add16 x y)
        // cond:
@@ -20230,4 +21409,4 @@ func rewriteBlock386(b *Block) bool {
                }
        }
        return false
-}
+}
\ No newline at end of file
index 5f456a146d4fd17908f0d4d55808d0ea0736e40c..91cf70ff89219f3428a646388287db32a0b008cb 100644 (file)
@@ -510,6 +510,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                gc.AddAux(&p.From, v)
                p.To.Type = obj.TYPE_REG
                p.To.Reg = v.Reg()
+       case ssa.Op386ADDLmem, ssa.Op386SUBLmem, ssa.Op386ANDLmem, ssa.Op386ORLmem, ssa.Op386XORLmem,
+               ssa.Op386ADDSDmem, ssa.Op386ADDSSmem, ssa.Op386SUBSDmem, ssa.Op386SUBSSmem, ssa.Op386MULSDmem, ssa.Op386MULSSmem:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_MEM
+               p.From.Reg = v.Args[1].Reg()
+               gc.AddAux(&p.From, v)
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Reg()
+               if v.Reg() != v.Args[0].Reg() {
+                       v.Fatalf("input[0] and output not in same register %s", v.LongString())
+               }
        case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REG