]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: introduce more read-modify-write operations for amd64
authorBen Shi <powerman1st@163.com>
Fri, 29 Jun 2018 02:11:53 +0000 (02:11 +0000)
committerBen Shi <powerman1st@163.com>
Fri, 24 Aug 2018 23:38:25 +0000 (23:38 +0000)
Add suport of read-modify-write for AND/SUB/AND/OR/XOR on amd64.

1. The total size of pkg/linux_amd64 decreases about 4KB, excluding
cmd/compile.

2. The go1 benchmark shows a little improvement, excluding noise.

name                     old time/op    new time/op    delta
BinaryTree17-4              2.63s ± 3%     2.65s ± 4%   +1.01%  (p=0.037 n=35+35)
Fannkuch11-4                2.33s ± 2%     2.39s ± 2%   +2.49%  (p=0.000 n=35+35)
FmtFprintfEmpty-4          45.4ns ± 5%    40.8ns ± 6%  -10.09%  (p=0.000 n=35+35)
FmtFprintfString-4         73.3ns ± 4%    70.9ns ± 3%   -3.23%  (p=0.000 n=30+35)
FmtFprintfInt-4            79.9ns ± 4%    79.5ns ± 3%     ~     (p=0.736 n=34+35)
FmtFprintfIntInt-4          126ns ± 4%     125ns ± 4%     ~     (p=0.083 n=35+35)
FmtFprintfPrefixedInt-4     152ns ± 6%     152ns ± 3%     ~     (p=0.855 n=34+35)
FmtFprintfFloat-4           215ns ± 4%     213ns ± 4%     ~     (p=0.066 n=35+35)
FmtManyArgs-4               522ns ± 3%     506ns ± 3%   -3.15%  (p=0.000 n=35+35)
GobDecode-4                6.45ms ± 8%    6.51ms ± 7%   +0.96%  (p=0.026 n=35+35)
GobEncode-4                6.10ms ± 6%    6.02ms ± 8%     ~     (p=0.160 n=35+35)
Gzip-4                      228ms ± 3%     221ms ± 3%   -2.92%  (p=0.000 n=35+35)
Gunzip-4                   37.5ms ± 4%    37.2ms ± 3%   -0.78%  (p=0.036 n=35+35)
HTTPClientServer-4         58.7µs ± 2%    59.2µs ± 1%   +0.80%  (p=0.000 n=33+33)
JSONEncode-4               12.0ms ± 3%    12.2ms ± 3%   +1.84%  (p=0.008 n=35+35)
JSONDecode-4               57.0ms ± 4%    56.6ms ± 3%     ~     (p=0.320 n=35+35)
Mandelbrot200-4            3.82ms ± 3%    3.79ms ± 3%     ~     (p=0.074 n=35+35)
GoParse-4                  3.21ms ± 5%    3.24ms ± 4%     ~     (p=0.119 n=35+35)
RegexpMatchEasy0_32-4      76.3ns ± 4%    75.4ns ± 4%   -1.14%  (p=0.014 n=34+33)
RegexpMatchEasy0_1K-4       251ns ± 4%     254ns ± 3%   +1.28%  (p=0.016 n=35+35)
RegexpMatchEasy1_32-4      69.6ns ± 3%    70.1ns ± 3%   +0.82%  (p=0.005 n=35+35)
RegexpMatchEasy1_1K-4       367ns ± 4%     376ns ± 4%   +2.47%  (p=0.000 n=35+35)
RegexpMatchMedium_32-4      108ns ± 5%     104ns ± 4%   -3.18%  (p=0.000 n=35+35)
RegexpMatchMedium_1K-4     33.8µs ± 3%    32.7µs ± 3%   -3.27%  (p=0.000 n=35+35)
RegexpMatchHard_32-4       1.55µs ± 3%    1.52µs ± 3%   -1.64%  (p=0.000 n=35+35)
RegexpMatchHard_1K-4       46.6µs ± 3%    46.6µs ± 4%     ~     (p=0.149 n=35+35)
Revcomp-4                   416ms ± 7%     412ms ± 6%   -0.95%  (p=0.033 n=33+35)
Template-4                 64.3ms ± 3%    62.4ms ± 7%   -2.94%  (p=0.000 n=35+35)
TimeParse-4                 320ns ± 2%     322ns ± 3%     ~     (p=0.589 n=35+35)
TimeFormat-4                300ns ± 3%     300ns ± 3%     ~     (p=0.597 n=35+35)
[Geo mean]                 47.4µs         47.0µs        -0.86%

name                     old speed      new speed      delta
GobDecode-4               119MB/s ± 7%   118MB/s ± 7%   -0.96%  (p=0.027 n=35+35)
GobEncode-4               126MB/s ± 7%   127MB/s ± 6%     ~     (p=0.157 n=34+34)
Gzip-4                   85.3MB/s ± 3%  87.9MB/s ± 3%   +3.02%  (p=0.000 n=35+35)
Gunzip-4                  518MB/s ± 4%   522MB/s ± 3%   +0.79%  (p=0.037 n=35+35)
JSONEncode-4              162MB/s ± 3%   159MB/s ± 3%   -1.81%  (p=0.009 n=35+35)
JSONDecode-4             34.1MB/s ± 4%  34.3MB/s ± 3%     ~     (p=0.318 n=35+35)
GoParse-4                18.0MB/s ± 5%  17.9MB/s ± 4%     ~     (p=0.117 n=35+35)
RegexpMatchEasy0_32-4     419MB/s ± 3%   425MB/s ± 4%   +1.46%  (p=0.003 n=32+33)
RegexpMatchEasy0_1K-4    4.07GB/s ± 4%  4.02GB/s ± 3%   -1.28%  (p=0.014 n=35+35)
RegexpMatchEasy1_32-4     460MB/s ± 3%   456MB/s ± 4%   -0.82%  (p=0.004 n=35+35)
RegexpMatchEasy1_1K-4    2.79GB/s ± 4%  2.72GB/s ± 4%   -2.39%  (p=0.000 n=35+35)
RegexpMatchMedium_32-4   9.23MB/s ± 4%  9.53MB/s ± 4%   +3.16%  (p=0.000 n=35+35)
RegexpMatchMedium_1K-4   30.3MB/s ± 3%  31.3MB/s ± 3%   +3.38%  (p=0.000 n=35+35)
RegexpMatchHard_32-4     20.7MB/s ± 3%  21.0MB/s ± 3%   +1.67%  (p=0.000 n=35+35)
RegexpMatchHard_1K-4     22.0MB/s ± 3%  21.9MB/s ± 4%     ~     (p=0.277 n=35+33)
Revcomp-4                 612MB/s ± 7%   618MB/s ± 6%   +0.96%  (p=0.034 n=33+35)
Template-4               30.2MB/s ± 3%  31.1MB/s ± 6%   +3.05%  (p=0.000 n=35+35)
[Geo mean]                123MB/s        124MB/s        +0.64%

Change-Id: Ia025da272e07d0069413824bfff3471b106d6280
Reviewed-on: https://go-review.googlesource.com/121535
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
test/codegen/arithmetic.go

index 4ecdb769f3a46fcf7432044b4e296c032c976845..ae6141dd12ebd3e6e49549537190357ec210e5ba 100644 (file)
@@ -699,7 +699,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                gc.AddAux(&p.From, v)
                p.To.Type = obj.TYPE_REG
                p.To.Reg = v.Reg()
-       case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore:
+       case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
+               ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
+               ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REG
                p.From.Reg = v.Args[1].Reg()
index eab66d17abc34185785cc9d9be5c28678a32f79c..10d917632e0ceee886b28bc8dd9448324aa41e99 100644 (file)
        ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
 ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
        ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+       ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {sym} base val mem)
+((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+       ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
 
 // Fold constants into stores.
 (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) ->
 ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
        && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
        ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+       ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+       ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 
 // generating indexed loads and stores
 (MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
 ((ADD|SUB|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Lload x [off] {sym} ptr mem)
 ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
 ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
+(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
+(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
+       ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
+(MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
+(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
+       ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
 
 // Merge ADDQconst and LEAQ into atomic loads.
 (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
index 4735ea1bc08f49a80152f4f63e4df4726e2756bb..512df99694c9bedcffe33485474b186e918176ae 100644 (file)
@@ -346,6 +346,18 @@ func init() {
                {name: "XORQload", argLength: 3, reg: gp21load, asm: "XORQ", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from  arg1+auxint+aux, arg2 = mem
                {name: "XORLload", argLength: 3, reg: gp21load, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from  arg1+auxint+aux, arg2 = mem
 
+               // direct binary-op on memory (read-modify-write)
+               {name: "ADDQmodify", argLength: 3, reg: gpstore, asm: "ADDQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) += arg1, arg2=mem
+               {name: "SUBQmodify", argLength: 3, reg: gpstore, asm: "SUBQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) -= arg1, arg2=mem
+               {name: "ANDQmodify", argLength: 3, reg: gpstore, asm: "ANDQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) &= arg1, arg2=mem
+               {name: "ORQmodify", argLength: 3, reg: gpstore, asm: "ORQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},   // *(arg0+auxint+aux) |= arg1, arg2=mem
+               {name: "XORQmodify", argLength: 3, reg: gpstore, asm: "XORQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) ^= arg1, arg2=mem
+               {name: "ADDLmodify", argLength: 3, reg: gpstore, asm: "ADDL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) += arg1, arg2=mem
+               {name: "SUBLmodify", argLength: 3, reg: gpstore, asm: "SUBL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) -= arg1, arg2=mem
+               {name: "ANDLmodify", argLength: 3, reg: gpstore, asm: "ANDL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) &= arg1, arg2=mem
+               {name: "ORLmodify", argLength: 3, reg: gpstore, asm: "ORL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},   // *(arg0+auxint+aux) |= arg1, arg2=mem
+               {name: "XORLmodify", argLength: 3, reg: gpstore, asm: "XORL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) ^= arg1, arg2=mem
+
                // unary ops
                {name: "NEGQ", argLength: 1, reg: gp11, asm: "NEGQ", resultInArg0: true, clobberFlags: true}, // -arg0
                {name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, // -arg0
index 704792c9af554d6c6d78472fcd0a0ee1c596cc85..374949c60231b6f87935fe7c9434c81a4e91ee18 100644 (file)
@@ -600,6 +600,16 @@ const (
        OpAMD64ORLload
        OpAMD64XORQload
        OpAMD64XORLload
+       OpAMD64ADDQmodify
+       OpAMD64SUBQmodify
+       OpAMD64ANDQmodify
+       OpAMD64ORQmodify
+       OpAMD64XORQmodify
+       OpAMD64ADDLmodify
+       OpAMD64SUBLmodify
+       OpAMD64ANDLmodify
+       OpAMD64ORLmodify
+       OpAMD64XORLmodify
        OpAMD64NEGQ
        OpAMD64NEGL
        OpAMD64NOTQ
@@ -7661,6 +7671,156 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "ADDQmodify",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AADDQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
+       {
+               name:           "SUBQmodify",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.ASUBQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
+       {
+               name:           "ANDQmodify",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AANDQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
+       {
+               name:           "ORQmodify",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AORQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
+       {
+               name:           "XORQmodify",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AXORQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
+       {
+               name:           "ADDLmodify",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AADDL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
+       {
+               name:           "SUBLmodify",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.ASUBL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
+       {
+               name:           "ANDLmodify",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AANDL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
+       {
+               name:           "ORLmodify",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AORL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
+       {
+               name:           "XORLmodify",
+               auxType:        auxSymOff,
+               argLen:         3,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AXORL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 65535},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
        {
                name:         "NEGQ",
                argLen:       1,
index 245f795d900598224956ac67e7a1b212dfce6f5a..e592610c26577c2798ff45da1fae9f684233a717 100644 (file)
@@ -23,6 +23,8 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64ADDLconstmodify_0(v)
        case OpAMD64ADDLload:
                return rewriteValueAMD64_OpAMD64ADDLload_0(v)
+       case OpAMD64ADDLmodify:
+               return rewriteValueAMD64_OpAMD64ADDLmodify_0(v)
        case OpAMD64ADDQ:
                return rewriteValueAMD64_OpAMD64ADDQ_0(v) || rewriteValueAMD64_OpAMD64ADDQ_10(v) || rewriteValueAMD64_OpAMD64ADDQ_20(v)
        case OpAMD64ADDQconst:
@@ -31,6 +33,8 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64ADDQconstmodify_0(v)
        case OpAMD64ADDQload:
                return rewriteValueAMD64_OpAMD64ADDQload_0(v)
+       case OpAMD64ADDQmodify:
+               return rewriteValueAMD64_OpAMD64ADDQmodify_0(v)
        case OpAMD64ADDSD:
                return rewriteValueAMD64_OpAMD64ADDSD_0(v)
        case OpAMD64ADDSDload:
@@ -47,6 +51,8 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64ANDLconstmodify_0(v)
        case OpAMD64ANDLload:
                return rewriteValueAMD64_OpAMD64ANDLload_0(v)
+       case OpAMD64ANDLmodify:
+               return rewriteValueAMD64_OpAMD64ANDLmodify_0(v)
        case OpAMD64ANDQ:
                return rewriteValueAMD64_OpAMD64ANDQ_0(v)
        case OpAMD64ANDQconst:
@@ -55,6 +61,8 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64ANDQconstmodify_0(v)
        case OpAMD64ANDQload:
                return rewriteValueAMD64_OpAMD64ANDQload_0(v)
+       case OpAMD64ANDQmodify:
+               return rewriteValueAMD64_OpAMD64ANDQmodify_0(v)
        case OpAMD64BSFQ:
                return rewriteValueAMD64_OpAMD64BSFQ_0(v)
        case OpAMD64BTLconst:
@@ -224,7 +232,7 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAMD64MOVLloadidx8:
                return rewriteValueAMD64_OpAMD64MOVLloadidx8_0(v)
        case OpAMD64MOVLstore:
-               return rewriteValueAMD64_OpAMD64MOVLstore_0(v) || rewriteValueAMD64_OpAMD64MOVLstore_10(v)
+               return rewriteValueAMD64_OpAMD64MOVLstore_0(v) || rewriteValueAMD64_OpAMD64MOVLstore_10(v) || rewriteValueAMD64_OpAMD64MOVLstore_20(v) || rewriteValueAMD64_OpAMD64MOVLstore_30(v)
        case OpAMD64MOVLstoreconst:
                return rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v)
        case OpAMD64MOVLstoreconstidx1:
@@ -254,7 +262,7 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAMD64MOVQloadidx8:
                return rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v)
        case OpAMD64MOVQstore:
-               return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v)
+               return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v) || rewriteValueAMD64_OpAMD64MOVQstore_20(v)
        case OpAMD64MOVQstoreconst:
                return rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v)
        case OpAMD64MOVQstoreconstidx1:
@@ -345,6 +353,8 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64ORLconstmodify_0(v)
        case OpAMD64ORLload:
                return rewriteValueAMD64_OpAMD64ORLload_0(v)
+       case OpAMD64ORLmodify:
+               return rewriteValueAMD64_OpAMD64ORLmodify_0(v)
        case OpAMD64ORQ:
                return rewriteValueAMD64_OpAMD64ORQ_0(v) || rewriteValueAMD64_OpAMD64ORQ_10(v) || rewriteValueAMD64_OpAMD64ORQ_20(v) || rewriteValueAMD64_OpAMD64ORQ_30(v) || rewriteValueAMD64_OpAMD64ORQ_40(v) || rewriteValueAMD64_OpAMD64ORQ_50(v) || rewriteValueAMD64_OpAMD64ORQ_60(v) || rewriteValueAMD64_OpAMD64ORQ_70(v) || rewriteValueAMD64_OpAMD64ORQ_80(v) || rewriteValueAMD64_OpAMD64ORQ_90(v) || rewriteValueAMD64_OpAMD64ORQ_100(v) || rewriteValueAMD64_OpAMD64ORQ_110(v) || rewriteValueAMD64_OpAMD64ORQ_120(v) || rewriteValueAMD64_OpAMD64ORQ_130(v) || rewriteValueAMD64_OpAMD64ORQ_140(v) || rewriteValueAMD64_OpAMD64ORQ_150(v) || rewriteValueAMD64_OpAMD64ORQ_160(v)
        case OpAMD64ORQconst:
@@ -353,6 +363,8 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64ORQconstmodify_0(v)
        case OpAMD64ORQload:
                return rewriteValueAMD64_OpAMD64ORQload_0(v)
+       case OpAMD64ORQmodify:
+               return rewriteValueAMD64_OpAMD64ORQmodify_0(v)
        case OpAMD64ROLB:
                return rewriteValueAMD64_OpAMD64ROLB_0(v)
        case OpAMD64ROLBconst:
@@ -467,12 +479,16 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64SUBLconst_0(v)
        case OpAMD64SUBLload:
                return rewriteValueAMD64_OpAMD64SUBLload_0(v)
+       case OpAMD64SUBLmodify:
+               return rewriteValueAMD64_OpAMD64SUBLmodify_0(v)
        case OpAMD64SUBQ:
                return rewriteValueAMD64_OpAMD64SUBQ_0(v)
        case OpAMD64SUBQconst:
                return rewriteValueAMD64_OpAMD64SUBQconst_0(v)
        case OpAMD64SUBQload:
                return rewriteValueAMD64_OpAMD64SUBQload_0(v)
+       case OpAMD64SUBQmodify:
+               return rewriteValueAMD64_OpAMD64SUBQmodify_0(v)
        case OpAMD64SUBSD:
                return rewriteValueAMD64_OpAMD64SUBSD_0(v)
        case OpAMD64SUBSDload:
@@ -513,6 +529,8 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64XORLconstmodify_0(v)
        case OpAMD64XORLload:
                return rewriteValueAMD64_OpAMD64XORLload_0(v)
+       case OpAMD64XORLmodify:
+               return rewriteValueAMD64_OpAMD64XORLmodify_0(v)
        case OpAMD64XORQ:
                return rewriteValueAMD64_OpAMD64XORQ_0(v) || rewriteValueAMD64_OpAMD64XORQ_10(v)
        case OpAMD64XORQconst:
@@ -521,6 +539,8 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64XORQconstmodify_0(v)
        case OpAMD64XORQload:
                return rewriteValueAMD64_OpAMD64XORQload_0(v)
+       case OpAMD64XORQmodify:
+               return rewriteValueAMD64_OpAMD64XORQmodify_0(v)
        case OpAdd16:
                return rewriteValueAMD64_OpAdd16_0(v)
        case OpAdd32:
@@ -2038,6 +2058,62 @@ func rewriteValueAMD64_OpAMD64ADDLload_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64ADDLmodify_0(v *Value) bool {
+       // match: (ADDLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ADDLmodify [off1+off2] {sym} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64ADDLmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADDLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (ADDLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64ADDLmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64ADDQ_0(v *Value) bool {
        // match: (ADDQ x (MOVQconst [c]))
        // cond: is32Bit(c)
@@ -2902,6 +2978,62 @@ func rewriteValueAMD64_OpAMD64ADDQload_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64ADDQmodify_0(v *Value) bool {
+       // match: (ADDQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ADDQmodify [off1+off2] {sym} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ADDQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (ADDQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64ADDSD_0(v *Value) bool {
        // match: (ADDSD x l:(MOVSDload [off] {sym} ptr mem))
        // cond: canMergeLoad(v, l, x) && clobber(l)
@@ -3643,6 +3775,62 @@ func rewriteValueAMD64_OpAMD64ANDLload_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64ANDLmodify_0(v *Value) bool {
+       // match: (ANDLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ANDLmodify [off1+off2] {sym} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64ANDLmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ANDLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (ANDLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64ANDLmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64ANDQ_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -4108,6 +4296,62 @@ func rewriteValueAMD64_OpAMD64ANDQload_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64ANDQmodify_0(v *Value) bool {
+       // match: (ANDQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ANDQmodify [off1+off2] {sym} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ANDQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (ANDQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64BSFQ_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -14574,6 +14818,548 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVLstore {sym} [off] ptr y:(ADDLload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (ADDLmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ADDLload {
+                       break
+               }
+               if y.AuxInt != off {
+                       break
+               }
+               if y.Aux != sym {
+                       break
+               }
+               _ = y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] {
+                       break
+               }
+               mem := y.Args[2]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && clobber(y)) {
+                       break
+               }
+               v.reset(OpAMD64ADDLmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore {sym} [off] ptr y:(ANDLload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (ANDLmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ANDLload {
+                       break
+               }
+               if y.AuxInt != off {
+                       break
+               }
+               if y.Aux != sym {
+                       break
+               }
+               _ = y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] {
+                       break
+               }
+               mem := y.Args[2]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && clobber(y)) {
+                       break
+               }
+               v.reset(OpAMD64ANDLmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore {sym} [off] ptr y:(ORLload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (ORLmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ORLload {
+                       break
+               }
+               if y.AuxInt != off {
+                       break
+               }
+               if y.Aux != sym {
+                       break
+               }
+               _ = y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] {
+                       break
+               }
+               mem := y.Args[2]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && clobber(y)) {
+                       break
+               }
+               v.reset(OpAMD64ORLmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore {sym} [off] ptr y:(XORLload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (XORLmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64XORLload {
+                       break
+               }
+               if y.AuxInt != off {
+                       break
+               }
+               if y.Aux != sym {
+                       break
+               }
+               _ = y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] {
+                       break
+               }
+               mem := y.Args[2]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && clobber(y)) {
+                       break
+               }
+               v.reset(OpAMD64XORLmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore {sym} [off] ptr y:(ADDL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (ADDLmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ADDL {
+                       break
+               }
+               _ = y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               x := y.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ADDLmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore {sym} [off] ptr y:(ADDL x l:(MOVLload [off] {sym} ptr mem)) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (ADDLmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ADDL {
+                       break
+               }
+               _ = y.Args[1]
+               x := y.Args[0]
+               l := y.Args[1]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ADDLmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLstore_20(v *Value) bool {
+       // match: (MOVLstore {sym} [off] ptr y:(SUBL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (SUBLmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SUBL {
+                       break
+               }
+               _ = y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               x := y.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SUBLmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore {sym} [off] ptr y:(ANDL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (ANDLmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               x := y.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ANDLmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore {sym} [off] ptr y:(ANDL x l:(MOVLload [off] {sym} ptr mem)) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (ANDLmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = y.Args[1]
+               x := y.Args[0]
+               l := y.Args[1]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ANDLmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore {sym} [off] ptr y:(ORL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (ORLmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ORL {
+                       break
+               }
+               _ = y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               x := y.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ORLmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore {sym} [off] ptr y:(ORL x l:(MOVLload [off] {sym} ptr mem)) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (ORLmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ORL {
+                       break
+               }
+               _ = y.Args[1]
+               x := y.Args[0]
+               l := y.Args[1]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ORLmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore {sym} [off] ptr y:(XORL l:(MOVLload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (XORLmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64XORL {
+                       break
+               }
+               _ = y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               x := y.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64XORLmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore {sym} [off] ptr y:(XORL x l:(MOVLload [off] {sym} ptr mem)) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (XORLmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64XORL {
+                       break
+               }
+               _ = y.Args[1]
+               x := y.Args[0]
+               l := y.Args[1]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64XORLmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
        // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
        // result: (ADDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
@@ -14691,6 +15477,9 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLstore_30(v *Value) bool {
        // match: (MOVLstore [off] {sym} ptr a:(XORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
        // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
        // result: (XORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
@@ -16677,6 +17466,551 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVQstore {sym} [off] ptr y:(ADDQload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (ADDQmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ADDQload {
+                       break
+               }
+               if y.AuxInt != off {
+                       break
+               }
+               if y.Aux != sym {
+                       break
+               }
+               _ = y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] {
+                       break
+               }
+               mem := y.Args[2]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && clobber(y)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore {sym} [off] ptr y:(ANDQload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (ANDQmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ANDQload {
+                       break
+               }
+               if y.AuxInt != off {
+                       break
+               }
+               if y.Aux != sym {
+                       break
+               }
+               _ = y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] {
+                       break
+               }
+               mem := y.Args[2]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && clobber(y)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstore_10(v *Value) bool {
+       // match: (MOVQstore {sym} [off] ptr y:(ORQload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (ORQmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ORQload {
+                       break
+               }
+               if y.AuxInt != off {
+                       break
+               }
+               if y.Aux != sym {
+                       break
+               }
+               _ = y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] {
+                       break
+               }
+               mem := y.Args[2]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && clobber(y)) {
+                       break
+               }
+               v.reset(OpAMD64ORQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore {sym} [off] ptr y:(XORQload x [off] {sym} ptr mem) mem)
+       // cond: y.Uses==1 && clobber(y)
+       // result: (XORQmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64XORQload {
+                       break
+               }
+               if y.AuxInt != off {
+                       break
+               }
+               if y.Aux != sym {
+                       break
+               }
+               _ = y.Args[2]
+               x := y.Args[0]
+               if ptr != y.Args[1] {
+                       break
+               }
+               mem := y.Args[2]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && clobber(y)) {
+                       break
+               }
+               v.reset(OpAMD64XORQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore {sym} [off] ptr y:(ADDQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (ADDQmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ADDQ {
+                       break
+               }
+               _ = y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               x := y.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore {sym} [off] ptr y:(ADDQ x l:(MOVQload [off] {sym} ptr mem)) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (ADDQmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ADDQ {
+                       break
+               }
+               _ = y.Args[1]
+               x := y.Args[0]
+               l := y.Args[1]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore {sym} [off] ptr y:(SUBQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (SUBQmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SUBQ {
+                       break
+               }
+               _ = y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               x := y.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SUBQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore {sym} [off] ptr y:(ANDQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (ANDQmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ANDQ {
+                       break
+               }
+               _ = y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               x := y.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore {sym} [off] ptr y:(ANDQ x l:(MOVQload [off] {sym} ptr mem)) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (ANDQmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ANDQ {
+                       break
+               }
+               _ = y.Args[1]
+               x := y.Args[0]
+               l := y.Args[1]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore {sym} [off] ptr y:(ORQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (ORQmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ORQ {
+                       break
+               }
+               _ = y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               x := y.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ORQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore {sym} [off] ptr y:(ORQ x l:(MOVQload [off] {sym} ptr mem)) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (ORQmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64ORQ {
+                       break
+               }
+               _ = y.Args[1]
+               x := y.Args[0]
+               l := y.Args[1]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64ORQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore {sym} [off] ptr y:(XORQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (XORQmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64XORQ {
+                       break
+               }
+               _ = y.Args[1]
+               l := y.Args[0]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               x := y.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64XORQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstore_20(v *Value) bool {
+       // match: (MOVQstore {sym} [off] ptr y:(XORQ x l:(MOVQload [off] {sym} ptr mem)) mem)
+       // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+       // result: (XORQmodify [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64XORQ {
+                       break
+               }
+               _ = y.Args[1]
+               x := y.Args[0]
+               l := y.Args[1]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               if ptr != l.Args[0] {
+                       break
+               }
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64XORQmodify)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
        // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
        // result: (ADDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
@@ -16755,9 +18089,6 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVQstore_10(v *Value) bool {
        // match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
        // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
        // result: (ORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
@@ -31479,6 +32810,62 @@ func rewriteValueAMD64_OpAMD64ORLload_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64ORLmodify_0(v *Value) bool {
+       // match: (ORLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ORLmodify [off1+off2] {sym} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64ORLmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ORLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (ORLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64ORLmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64ORQ_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -42440,6 +43827,62 @@ func rewriteValueAMD64_OpAMD64ORQload_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64ORQmodify_0(v *Value) bool {
+       // match: (ORQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (ORQmodify [off1+off2] {sym} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64ORQmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ORQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (ORQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64ORQmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64ROLB_0(v *Value) bool {
        // match: (ROLB x (NEGQ y))
        // cond:
@@ -51150,6 +52593,62 @@ func rewriteValueAMD64_OpAMD64SUBLload_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64SUBLmodify_0(v *Value) bool {
+       // match: (SUBLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SUBLmodify [off1+off2] {sym} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64SUBLmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SUBLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SUBLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64SUBLmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64SUBQ_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -51388,6 +52887,62 @@ func rewriteValueAMD64_OpAMD64SUBQload_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64SUBQmodify_0(v *Value) bool {
+       // match: (SUBQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SUBQmodify [off1+off2] {sym} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64SUBQmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SUBQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SUBQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64SUBQmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64SUBSD_0(v *Value) bool {
        // match: (SUBSD x l:(MOVSDload [off] {sym} ptr mem))
        // cond: canMergeLoad(v, l, x) && clobber(l)
@@ -52988,6 +54543,62 @@ func rewriteValueAMD64_OpAMD64XORLload_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64XORLmodify_0(v *Value) bool {
+       // match: (XORLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XORLmodify [off1+off2] {sym} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64XORLmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XORLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (XORLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64XORLmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64XORQ_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -53454,6 +55065,62 @@ func rewriteValueAMD64_OpAMD64XORQload_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64XORQmodify_0(v *Value) bool {
+       // match: (XORQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XORQmodify [off1+off2] {sym} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64XORQmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XORQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (XORQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64XORQmodify)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAdd16_0(v *Value) bool {
        // match: (Add16 x y)
        // cond:
index 32efcaaa3fb7b48ecd25eec6d1f3f1ea78d1d7b7..09a2fa091e803f50fb85bdd949df2f17bd412d4a 100644 (file)
@@ -16,8 +16,10 @@ package codegen
 
 func SubMem(arr []int, b int) int {
        // 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
+       // amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
        arr[2] -= b
        // 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)`
+       // amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)`
        arr[3] -= b
        // 386:`DECL\s16\([A-Z]+\)`
        arr[4]--