]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize AMD64 with more read-modify-write operations
authorBen Shi <powerman1st@163.com>
Wed, 27 Jun 2018 02:46:17 +0000 (02:46 +0000)
committerBen Shi <powerman1st@163.com>
Mon, 20 Aug 2018 14:18:39 +0000 (14:18 +0000)
6 more operations which do read-modify-write with a constant
source operand are added.

1. The total size of pkg/linux_amd64 decreases about 3KB, excluding
cmd/compile.

2. The go1 benckmark shows a slight improvement.
name                     old time/op    new time/op    delta
BinaryTree17-4              2.61s ± 4%     2.67s ± 2%  +2.26%  (p=0.000 n=30+29)
Fannkuch11-4                2.39s ± 2%     2.32s ± 2%  -2.67%  (p=0.000 n=30+30)
FmtFprintfEmpty-4          44.0ns ± 4%    41.7ns ± 4%  -5.15%  (p=0.000 n=30+30)
FmtFprintfString-4         74.2ns ± 4%    72.3ns ± 4%  -2.59%  (p=0.000 n=30+30)
FmtFprintfInt-4            81.7ns ± 3%    78.8ns ± 4%  -3.54%  (p=0.000 n=27+30)
FmtFprintfIntInt-4          130ns ± 4%     124ns ± 5%  -4.60%  (p=0.000 n=30+30)
FmtFprintfPrefixedInt-4     154ns ± 3%     152ns ± 3%  -1.13%  (p=0.012 n=30+30)
FmtFprintfFloat-4           215ns ± 4%     212ns ± 5%  -1.56%  (p=0.002 n=30+30)
FmtManyArgs-4               522ns ± 3%     512ns ± 3%  -1.84%  (p=0.001 n=30+30)
GobDecode-4                6.42ms ± 5%    6.49ms ± 7%    ~     (p=0.070 n=30+30)
GobEncode-4                6.07ms ± 8%    5.98ms ± 8%    ~     (p=0.150 n=30+30)
Gzip-4                      236ms ± 4%     223ms ± 4%  -5.57%  (p=0.000 n=30+30)
Gunzip-4                   37.4ms ± 3%    36.7ms ± 4%  -2.03%  (p=0.000 n=30+30)
HTTPClientServer-4         58.7µs ± 1%    58.5µs ± 2%  -0.37%  (p=0.018 n=30+29)
JSONEncode-4               12.0ms ± 4%    12.1ms ± 3%    ~     (p=0.112 n=30+30)
JSONDecode-4               54.5ms ± 3%    55.5ms ± 4%  +1.80%  (p=0.006 n=30+30)
Mandelbrot200-4            3.78ms ± 4%    3.78ms ± 4%    ~     (p=0.173 n=30+30)
GoParse-4                  3.16ms ± 5%    3.22ms ± 5%  +1.75%  (p=0.010 n=30+30)
RegexpMatchEasy0_32-4      76.6ns ± 1%    75.9ns ± 3%    ~     (p=0.672 n=25+30)
RegexpMatchEasy0_1K-4       252ns ± 3%     253ns ± 3%  +0.57%  (p=0.027 n=30+30)
RegexpMatchEasy1_32-4      69.8ns ± 4%    70.2ns ± 6%    ~     (p=0.539 n=30+30)
RegexpMatchEasy1_1K-4       374ns ± 3%     373ns ± 5%    ~     (p=0.263 n=30+30)
RegexpMatchMedium_32-4      107ns ± 4%     109ns ± 3%    ~     (p=0.067 n=30+30)
RegexpMatchMedium_1K-4     33.9µs ± 5%    34.1µs ± 4%    ~     (p=0.297 n=30+30)
RegexpMatchHard_32-4       1.54µs ± 3%    1.56µs ± 4%  +1.43%  (p=0.002 n=30+30)
RegexpMatchHard_1K-4       46.6µs ± 3%    47.0µs ± 3%    ~     (p=0.055 n=30+30)
Revcomp-4                   411ms ± 6%     407ms ± 6%    ~     (p=0.219 n=30+30)
Template-4                 66.8ms ± 3%    64.8ms ± 5%  -3.01%  (p=0.000 n=30+30)
TimeParse-4                 312ns ± 2%     319ns ± 3%  +2.50%  (p=0.000 n=30+30)
TimeFormat-4                296ns ± 5%     299ns ± 3%  +0.93%  (p=0.005 n=30+30)
[Geo mean]                 47.5µs         47.1µs       -0.75%

name                     old speed      new speed      delta
GobDecode-4               120MB/s ± 5%   118MB/s ± 6%    ~     (p=0.072 n=30+30)
GobEncode-4               127MB/s ± 8%   129MB/s ± 8%    ~     (p=0.150 n=30+30)
Gzip-4                   82.1MB/s ± 4%  87.0MB/s ± 4%  +5.90%  (p=0.000 n=30+30)
Gunzip-4                  519MB/s ± 4%   529MB/s ± 4%  +2.07%  (p=0.001 n=30+30)
JSONEncode-4              162MB/s ± 4%   161MB/s ± 3%    ~     (p=0.110 n=30+30)
JSONDecode-4             35.6MB/s ± 3%  35.0MB/s ± 4%  -1.77%  (p=0.007 n=30+30)
GoParse-4                18.3MB/s ± 4%  18.0MB/s ± 4%  -1.72%  (p=0.009 n=30+30)
RegexpMatchEasy0_32-4     418MB/s ± 1%   422MB/s ± 3%    ~     (p=0.645 n=25+30)
RegexpMatchEasy0_1K-4    4.06GB/s ± 3%  4.04GB/s ± 3%  -0.57%  (p=0.033 n=30+30)
RegexpMatchEasy1_32-4     459MB/s ± 4%   456MB/s ± 6%    ~     (p=0.530 n=30+30)
RegexpMatchEasy1_1K-4    2.73GB/s ± 3%  2.75GB/s ± 5%    ~     (p=0.279 n=30+30)
RegexpMatchMedium_32-4   9.28MB/s ± 5%  9.18MB/s ± 4%    ~     (p=0.086 n=30+30)
RegexpMatchMedium_1K-4   30.2MB/s ± 4%  30.0MB/s ± 4%    ~     (p=0.300 n=30+30)
RegexpMatchHard_32-4     20.8MB/s ± 3%  20.5MB/s ± 4%  -1.41%  (p=0.002 n=30+30)
RegexpMatchHard_1K-4     22.0MB/s ± 3%  21.8MB/s ± 3%    ~     (p=0.051 n=30+30)
Revcomp-4                 619MB/s ± 7%   625MB/s ± 7%    ~     (p=0.219 n=30+30)
Template-4               29.0MB/s ± 3%  29.9MB/s ± 4%  +3.11%  (p=0.000 n=30+30)
[Geo mean]                123MB/s        123MB/s       +0.28%

Change-Id: I850652cfd53329c1af804b7f57f4393d8097bb0d
Reviewed-on: https://go-review.googlesource.com/121135
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
test/codegen/bits.go

index de38772cd28442b5feec2d75b8f2176b22d2d6f4..584cc4c4bd552eb7055cab4f5a3570b028b571ad 100644 (file)
@@ -770,6 +770,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                        p.To.Reg = v.Args[0].Reg()
                        gc.AddAux2(&p.To, v, off)
                }
+       case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
+               ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
+               sc := v.AuxValAndOff()
+               off := sc.Off()
+               val := sc.Val()
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = val
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = v.Args[0].Reg()
+               gc.AddAux2(&p.To, v, off)
        case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_CONST
index 54de6e055debdc48b2aed8c95035b4cac063ed3e..db6bbfb0606d9c092af20e8062ea62fa1f46bba2 100644 (file)
        ((ADD|SUB|MUL)SSload [off1+off2] {sym} val base mem)
 ((ADD|SUB|MUL)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
        ((ADD|SUB|MUL)SDload [off1+off2] {sym} val base mem)
-(ADD(L|Q)constmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
-       (ADD(L|Q)constmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
+       ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
+       ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
 
 // Fold constants into stores.
 (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) ->
 ((ADD|SUB|MUL)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
        && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
        ((ADD|SUB|MUL)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
-(ADD(L|Q)constmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
        && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
-       (ADD(L|Q)constmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+       ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
+       ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
 
 // generating indexed loads and stores
 (MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
 (MOVWQZX (MOVBQZX x)) -> (MOVBQZX x)
 (MOVBQZX (MOVBQZX x)) -> (MOVBQZX x)
 
-(MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+(MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
        && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) ->
-       (ADDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
-(MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+       ((ADD|AND|OR|XOR)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+(MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
        && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) ->
-       (ADDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+       ((ADD|AND|OR|XOR)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
 
 // float <-> int register moves, with no conversion.
 // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}.
index 5a8634abd181fafc631aa71b0044e3a67e5e9ab3..1140958670d01150f0b03f61326a208e45ceabeb 100644 (file)
@@ -225,20 +225,26 @@ func init() {
                {name: "MULQU2", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}, commutative: true, asm: "MULQ", clobberFlags: true}, // arg0 * arg1, returns (hi, lo)
                {name: "DIVQU2", argLength: 3, reg: regInfo{inputs: []regMask{dx, ax, gpsp}, outputs: []regMask{ax, dx}}, asm: "DIVQ", clobberFlags: true},                // arg0:arg1 / arg2 (128-bit divided by 64-bit), returns (q, r)
 
-               {name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
-               {name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1
-               {name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint
-               {name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint
-
-               {name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1
-               {name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1
-               {name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint
-               {name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint
-
-               {name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1
-               {name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1
-               {name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint
-               {name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint
+               {name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true, clobberFlags: true},                                                 // arg0 & arg1
+               {name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true},                                                 // arg0 & arg1
+               {name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int32", resultInArg0: true, clobberFlags: true},                                                 // arg0 & auxint
+               {name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true, clobberFlags: true},                                                 // arg0 & auxint
+               {name: "ANDQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ANDQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
+               {name: "ANDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ANDL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
+
+               {name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ", commutative: true, resultInArg0: true, clobberFlags: true},                                                 // arg0 | arg1
+               {name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true, clobberFlags: true},                                                 // arg0 | arg1
+               {name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int32", resultInArg0: true, clobberFlags: true},                                                 // arg0 | auxint
+               {name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true, clobberFlags: true},                                                 // arg0 | auxint
+               {name: "ORQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ORQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
+               {name: "ORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ORL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
+
+               {name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ", commutative: true, resultInArg0: true, clobberFlags: true},                                                 // arg0 ^ arg1
+               {name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true, clobberFlags: true},                                                 // arg0 ^ arg1
+               {name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int32", resultInArg0: true, clobberFlags: true},                                                 // arg0 ^ auxint
+               {name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true, clobberFlags: true},                                                 // arg0 ^ auxint
+               {name: "XORQconstmodify", argLength: 2, reg: gpstoreconst, asm: "XORQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
+               {name: "XORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "XORL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
 
                {name: "CMPQ", argLength: 2, reg: gp2flags, asm: "CMPQ", typ: "Flags"},                    // arg0 compare to arg1
                {name: "CMPL", argLength: 2, reg: gp2flags, asm: "CMPL", typ: "Flags"},                    // arg0 compare to arg1
index b479bca7ff470a028a159e5c1689270f62e8c15f..6a14ee080178b744f0eba68cdddcce5032952fc7 100644 (file)
@@ -487,14 +487,20 @@ const (
        OpAMD64ANDL
        OpAMD64ANDQconst
        OpAMD64ANDLconst
+       OpAMD64ANDQconstmodify
+       OpAMD64ANDLconstmodify
        OpAMD64ORQ
        OpAMD64ORL
        OpAMD64ORQconst
        OpAMD64ORLconst
+       OpAMD64ORQconstmodify
+       OpAMD64ORLconstmodify
        OpAMD64XORQ
        OpAMD64XORL
        OpAMD64XORQconst
        OpAMD64XORLconst
+       OpAMD64XORQconstmodify
+       OpAMD64XORLconstmodify
        OpAMD64CMPQ
        OpAMD64CMPL
        OpAMD64CMPW
@@ -5948,6 +5954,34 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "ANDQconstmodify",
+               auxType:        auxSymValAndOff,
+               argLen:         2,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AANDQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
+       {
+               name:           "ANDLconstmodify",
+               auxType:        auxSymValAndOff,
+               argLen:         2,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AANDL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
        {
                name:         "ORQ",
                argLen:       2,
@@ -6014,6 +6048,34 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "ORQconstmodify",
+               auxType:        auxSymValAndOff,
+               argLen:         2,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AORQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
+       {
+               name:           "ORLconstmodify",
+               auxType:        auxSymValAndOff,
+               argLen:         2,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AORL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
        {
                name:         "XORQ",
                argLen:       2,
@@ -6080,6 +6142,34 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:           "XORQconstmodify",
+               auxType:        auxSymValAndOff,
+               argLen:         2,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AXORQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
+       {
+               name:           "XORLconstmodify",
+               auxType:        auxSymValAndOff,
+               argLen:         2,
+               clobberFlags:   true,
+               faultOnNilArg0: true,
+               symEffect:      SymRead | SymWrite,
+               asm:            x86.AXORL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+                       },
+               },
+       },
        {
                name:   "CMPQ",
                argLen: 2,
index 47d3f431ab22ef98f623ef29f65a89b6b794ad87..950b926cc14205e6c2652907e24ff4b80d80add2 100644 (file)
@@ -43,12 +43,16 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64ANDL_0(v)
        case OpAMD64ANDLconst:
                return rewriteValueAMD64_OpAMD64ANDLconst_0(v)
+       case OpAMD64ANDLconstmodify:
+               return rewriteValueAMD64_OpAMD64ANDLconstmodify_0(v)
        case OpAMD64ANDLload:
                return rewriteValueAMD64_OpAMD64ANDLload_0(v)
        case OpAMD64ANDQ:
                return rewriteValueAMD64_OpAMD64ANDQ_0(v)
        case OpAMD64ANDQconst:
                return rewriteValueAMD64_OpAMD64ANDQconst_0(v)
+       case OpAMD64ANDQconstmodify:
+               return rewriteValueAMD64_OpAMD64ANDQconstmodify_0(v)
        case OpAMD64ANDQload:
                return rewriteValueAMD64_OpAMD64ANDQload_0(v)
        case OpAMD64BSFQ:
@@ -242,7 +246,7 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAMD64MOVQloadidx8:
                return rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v)
        case OpAMD64MOVQstore:
-               return rewriteValueAMD64_OpAMD64MOVQstore_0(v)
+               return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v)
        case OpAMD64MOVQstoreconst:
                return rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v)
        case OpAMD64MOVQstoreconstidx1:
@@ -329,12 +333,16 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64ORL_0(v) || rewriteValueAMD64_OpAMD64ORL_10(v) || rewriteValueAMD64_OpAMD64ORL_20(v) || rewriteValueAMD64_OpAMD64ORL_30(v) || rewriteValueAMD64_OpAMD64ORL_40(v) || rewriteValueAMD64_OpAMD64ORL_50(v) || rewriteValueAMD64_OpAMD64ORL_60(v) || rewriteValueAMD64_OpAMD64ORL_70(v) || rewriteValueAMD64_OpAMD64ORL_80(v) || rewriteValueAMD64_OpAMD64ORL_90(v) || rewriteValueAMD64_OpAMD64ORL_100(v) || rewriteValueAMD64_OpAMD64ORL_110(v) || rewriteValueAMD64_OpAMD64ORL_120(v) || rewriteValueAMD64_OpAMD64ORL_130(v)
        case OpAMD64ORLconst:
                return rewriteValueAMD64_OpAMD64ORLconst_0(v)
+       case OpAMD64ORLconstmodify:
+               return rewriteValueAMD64_OpAMD64ORLconstmodify_0(v)
        case OpAMD64ORLload:
                return rewriteValueAMD64_OpAMD64ORLload_0(v)
        case OpAMD64ORQ:
                return rewriteValueAMD64_OpAMD64ORQ_0(v) || rewriteValueAMD64_OpAMD64ORQ_10(v) || rewriteValueAMD64_OpAMD64ORQ_20(v) || rewriteValueAMD64_OpAMD64ORQ_30(v) || rewriteValueAMD64_OpAMD64ORQ_40(v) || rewriteValueAMD64_OpAMD64ORQ_50(v) || rewriteValueAMD64_OpAMD64ORQ_60(v) || rewriteValueAMD64_OpAMD64ORQ_70(v) || rewriteValueAMD64_OpAMD64ORQ_80(v) || rewriteValueAMD64_OpAMD64ORQ_90(v) || rewriteValueAMD64_OpAMD64ORQ_100(v) || rewriteValueAMD64_OpAMD64ORQ_110(v) || rewriteValueAMD64_OpAMD64ORQ_120(v) || rewriteValueAMD64_OpAMD64ORQ_130(v) || rewriteValueAMD64_OpAMD64ORQ_140(v) || rewriteValueAMD64_OpAMD64ORQ_150(v) || rewriteValueAMD64_OpAMD64ORQ_160(v)
        case OpAMD64ORQconst:
                return rewriteValueAMD64_OpAMD64ORQconst_0(v)
+       case OpAMD64ORQconstmodify:
+               return rewriteValueAMD64_OpAMD64ORQconstmodify_0(v)
        case OpAMD64ORQload:
                return rewriteValueAMD64_OpAMD64ORQload_0(v)
        case OpAMD64ROLB:
@@ -493,12 +501,16 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64XORL_0(v) || rewriteValueAMD64_OpAMD64XORL_10(v)
        case OpAMD64XORLconst:
                return rewriteValueAMD64_OpAMD64XORLconst_0(v) || rewriteValueAMD64_OpAMD64XORLconst_10(v)
+       case OpAMD64XORLconstmodify:
+               return rewriteValueAMD64_OpAMD64XORLconstmodify_0(v)
        case OpAMD64XORLload:
                return rewriteValueAMD64_OpAMD64XORLload_0(v)
        case OpAMD64XORQ:
                return rewriteValueAMD64_OpAMD64XORQ_0(v) || rewriteValueAMD64_OpAMD64XORQ_10(v)
        case OpAMD64XORQconst:
                return rewriteValueAMD64_OpAMD64XORQconst_0(v)
+       case OpAMD64XORQconstmodify:
+               return rewriteValueAMD64_OpAMD64XORQconstmodify_0(v)
        case OpAMD64XORQload:
                return rewriteValueAMD64_OpAMD64XORQload_0(v)
        case OpAdd16:
@@ -3480,6 +3492,58 @@ func rewriteValueAMD64_OpAMD64ANDLconst_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64ANDLconstmodify_0(v *Value) bool {
+       // match: (ANDLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2)
+       // result: (ANDLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+       for {
+               valoff1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2)) {
+                       break
+               }
+               v.reset(OpAMD64ANDLconstmodify)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ANDLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+       // result: (ANDLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+       for {
+               valoff1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64ANDLconstmodify)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64ANDLload_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -3893,6 +3957,58 @@ func rewriteValueAMD64_OpAMD64ANDQconst_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64ANDQconstmodify_0(v *Value) bool {
+       // match: (ANDQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2)
+       // result: (ANDQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+       for {
+               valoff1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQconstmodify)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ANDQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+       // result: (ANDQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+       for {
+               valoff1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQconstmodify)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64ANDQload_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -14306,6 +14422,123 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVLstore [off] {sym} ptr a:(ANDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+       // result: (ANDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               a := v.Args[1]
+               if a.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               ptr2 := l.Args[0]
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+                       break
+               }
+               v.reset(OpAMD64ANDLconstmodify)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off] {sym} ptr a:(ORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+       // result: (ORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               a := v.Args[1]
+               if a.Op != OpAMD64ORLconst {
+                       break
+               }
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               ptr2 := l.Args[0]
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+                       break
+               }
+               v.reset(OpAMD64ORLconstmodify)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off] {sym} ptr a:(XORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+       // result: (XORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               a := v.Args[1]
+               if a.Op != OpAMD64XORLconst {
+                       break
+               }
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               ptr2 := l.Args[0]
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+                       break
+               }
+               v.reset(OpAMD64XORLconstmodify)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVLstore [off] {sym} ptr (MOVLf2i val) mem)
        // cond:
        // result: (MOVSSstore [off] {sym} ptr val mem)
@@ -16292,6 +16525,126 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+       // result: (ANDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               a := v.Args[1]
+               if a.Op != OpAMD64ANDQconst {
+                       break
+               }
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               ptr2 := l.Args[0]
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQconstmodify)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstore_10(v *Value) bool {
+       // match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+       // result: (ORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               a := v.Args[1]
+               if a.Op != OpAMD64ORQconst {
+                       break
+               }
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               ptr2 := l.Args[0]
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+                       break
+               }
+               v.reset(OpAMD64ORQconstmodify)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+       // result: (XORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               a := v.Args[1]
+               if a.Op != OpAMD64XORQconst {
+                       break
+               }
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               ptr2 := l.Args[0]
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+                       break
+               }
+               v.reset(OpAMD64XORQconstmodify)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem)
        // cond:
        // result: (MOVSDstore [off] {sym} ptr val mem)
@@ -30779,6 +31132,58 @@ func rewriteValueAMD64_OpAMD64ORLconst_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64ORLconstmodify_0(v *Value) bool {
+       // match: (ORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2)
+       // result: (ORLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+       for {
+               valoff1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2)) {
+                       break
+               }
+               v.reset(OpAMD64ORLconstmodify)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+       // result: (ORLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+       for {
+               valoff1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64ORLconstmodify)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64ORLload_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -41688,6 +42093,58 @@ func rewriteValueAMD64_OpAMD64ORQconst_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64ORQconstmodify_0(v *Value) bool {
+       // match: (ORQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2)
+       // result: (ORQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+       for {
+               valoff1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2)) {
+                       break
+               }
+               v.reset(OpAMD64ORQconstmodify)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (ORQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+       // result: (ORQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+       for {
+               valoff1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64ORQconstmodify)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64ORQload_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -52184,6 +52641,58 @@ func rewriteValueAMD64_OpAMD64XORLconst_10(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64XORLconstmodify_0(v *Value) bool {
+       // match: (XORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2)
+       // result: (XORLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+       for {
+               valoff1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2)) {
+                       break
+               }
+               v.reset(OpAMD64XORLconstmodify)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+       // result: (XORLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+       for {
+               valoff1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64XORLconstmodify)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64XORLload_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -52598,6 +53107,58 @@ func rewriteValueAMD64_OpAMD64XORQconst_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64XORQconstmodify_0(v *Value) bool {
+       // match: (XORQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2)
+       // result: (XORQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+       for {
+               valoff1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2)) {
+                       break
+               }
+               v.reset(OpAMD64XORQconstmodify)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XORQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+       // result: (XORQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+       for {
+               valoff1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64XORQconstmodify)
+               v.AuxInt = ValAndOff(valoff1).add(off2)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64XORQload_0(v *Value) bool {
        b := v.Block
        _ = b
index 9de2201cb1b2a4adf6a19ca85dbc34d2077c0617..2d1645b5e32f3ba2f987aab27db413f97f0007f0 100644 (file)
@@ -262,6 +262,16 @@ func bitcompl32(a, b uint32) (n uint32) {
        return n
 }
 
+// check direct operation on memory with constant source
+func bitOpOnMem(a []uint32) {
+       // amd64:`ANDL\s[$]200,\s\([A-Z]+\)`
+       a[0] &= 200
+       // amd64:`ORL\s[$]220,\s4\([A-Z]+\)`
+       a[1] |= 220
+       // amd64:`XORL\s[$]240,\s8\([A-Z]+\)`
+       a[2] ^= 240
+}
+
 // Check AND masking on arm64 (Issue #19857)
 
 func and_mask_1(a uint64) uint64 {