From b75c5c5992686a7eac1a33383e8b9b14b579556e Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Wed, 27 Jun 2018 02:46:17 +0000 Subject: [PATCH] cmd/compile: optimize AMD64 with more read-modify-write operations MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit 6 more operations which do read-modify-write with a constant source operand are added. 1. The total size of pkg/linux_amd64 decreases about 3KB, excluding cmd/compile. 2. The go1 benckmark shows a slight improvement. name old time/op new time/op delta BinaryTree17-4 2.61s ± 4% 2.67s ± 2% +2.26% (p=0.000 n=30+29) Fannkuch11-4 2.39s ± 2% 2.32s ± 2% -2.67% (p=0.000 n=30+30) FmtFprintfEmpty-4 44.0ns ± 4% 41.7ns ± 4% -5.15% (p=0.000 n=30+30) FmtFprintfString-4 74.2ns ± 4% 72.3ns ± 4% -2.59% (p=0.000 n=30+30) FmtFprintfInt-4 81.7ns ± 3% 78.8ns ± 4% -3.54% (p=0.000 n=27+30) FmtFprintfIntInt-4 130ns ± 4% 124ns ± 5% -4.60% (p=0.000 n=30+30) FmtFprintfPrefixedInt-4 154ns ± 3% 152ns ± 3% -1.13% (p=0.012 n=30+30) FmtFprintfFloat-4 215ns ± 4% 212ns ± 5% -1.56% (p=0.002 n=30+30) FmtManyArgs-4 522ns ± 3% 512ns ± 3% -1.84% (p=0.001 n=30+30) GobDecode-4 6.42ms ± 5% 6.49ms ± 7% ~ (p=0.070 n=30+30) GobEncode-4 6.07ms ± 8% 5.98ms ± 8% ~ (p=0.150 n=30+30) Gzip-4 236ms ± 4% 223ms ± 4% -5.57% (p=0.000 n=30+30) Gunzip-4 37.4ms ± 3% 36.7ms ± 4% -2.03% (p=0.000 n=30+30) HTTPClientServer-4 58.7µs ± 1% 58.5µs ± 2% -0.37% (p=0.018 n=30+29) JSONEncode-4 12.0ms ± 4% 12.1ms ± 3% ~ (p=0.112 n=30+30) JSONDecode-4 54.5ms ± 3% 55.5ms ± 4% +1.80% (p=0.006 n=30+30) Mandelbrot200-4 3.78ms ± 4% 3.78ms ± 4% ~ (p=0.173 n=30+30) GoParse-4 3.16ms ± 5% 3.22ms ± 5% +1.75% (p=0.010 n=30+30) RegexpMatchEasy0_32-4 76.6ns ± 1% 75.9ns ± 3% ~ (p=0.672 n=25+30) RegexpMatchEasy0_1K-4 252ns ± 3% 253ns ± 3% +0.57% (p=0.027 n=30+30) RegexpMatchEasy1_32-4 69.8ns ± 4% 70.2ns ± 6% ~ (p=0.539 n=30+30) RegexpMatchEasy1_1K-4 374ns ± 3% 373ns ± 5% ~ (p=0.263 n=30+30) RegexpMatchMedium_32-4 107ns ± 4% 109ns ± 3% ~ (p=0.067 n=30+30) RegexpMatchMedium_1K-4 33.9µs ± 5% 34.1µs ± 4% ~ (p=0.297 n=30+30) RegexpMatchHard_32-4 1.54µs ± 3% 1.56µs ± 4% +1.43% (p=0.002 n=30+30) RegexpMatchHard_1K-4 46.6µs ± 3% 47.0µs ± 3% ~ (p=0.055 n=30+30) Revcomp-4 411ms ± 6% 407ms ± 6% ~ (p=0.219 n=30+30) Template-4 66.8ms ± 3% 64.8ms ± 5% -3.01% (p=0.000 n=30+30) TimeParse-4 312ns ± 2% 319ns ± 3% +2.50% (p=0.000 n=30+30) TimeFormat-4 296ns ± 5% 299ns ± 3% +0.93% (p=0.005 n=30+30) [Geo mean] 47.5µs 47.1µs -0.75% name old speed new speed delta GobDecode-4 120MB/s ± 5% 118MB/s ± 6% ~ (p=0.072 n=30+30) GobEncode-4 127MB/s ± 8% 129MB/s ± 8% ~ (p=0.150 n=30+30) Gzip-4 82.1MB/s ± 4% 87.0MB/s ± 4% +5.90% (p=0.000 n=30+30) Gunzip-4 519MB/s ± 4% 529MB/s ± 4% +2.07% (p=0.001 n=30+30) JSONEncode-4 162MB/s ± 4% 161MB/s ± 3% ~ (p=0.110 n=30+30) JSONDecode-4 35.6MB/s ± 3% 35.0MB/s ± 4% -1.77% (p=0.007 n=30+30) GoParse-4 18.3MB/s ± 4% 18.0MB/s ± 4% -1.72% (p=0.009 n=30+30) RegexpMatchEasy0_32-4 418MB/s ± 1% 422MB/s ± 3% ~ (p=0.645 n=25+30) RegexpMatchEasy0_1K-4 4.06GB/s ± 3% 4.04GB/s ± 3% -0.57% (p=0.033 n=30+30) RegexpMatchEasy1_32-4 459MB/s ± 4% 456MB/s ± 6% ~ (p=0.530 n=30+30) RegexpMatchEasy1_1K-4 2.73GB/s ± 3% 2.75GB/s ± 5% ~ (p=0.279 n=30+30) RegexpMatchMedium_32-4 9.28MB/s ± 5% 9.18MB/s ± 4% ~ (p=0.086 n=30+30) RegexpMatchMedium_1K-4 30.2MB/s ± 4% 30.0MB/s ± 4% ~ (p=0.300 n=30+30) RegexpMatchHard_32-4 20.8MB/s ± 3% 20.5MB/s ± 4% -1.41% (p=0.002 n=30+30) RegexpMatchHard_1K-4 22.0MB/s ± 3% 21.8MB/s ± 3% ~ (p=0.051 n=30+30) Revcomp-4 619MB/s ± 7% 625MB/s ± 7% ~ (p=0.219 n=30+30) Template-4 29.0MB/s ± 3% 29.9MB/s ± 4% +3.11% (p=0.000 n=30+30) [Geo mean] 123MB/s 123MB/s +0.28% Change-Id: I850652cfd53329c1af804b7f57f4393d8097bb0d Reviewed-on: https://go-review.googlesource.com/121135 Run-TryBot: Ben Shi TryBot-Result: Gobot Gobot Reviewed-by: Ilya Tocar --- src/cmd/compile/internal/amd64/ssa.go | 11 + src/cmd/compile/internal/ssa/gen/AMD64.rules | 21 +- src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 34 +- src/cmd/compile/internal/ssa/opGen.go | 90 +++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 563 ++++++++++++++++++- test/codegen/bits.go | 10 + 6 files changed, 706 insertions(+), 23 deletions(-) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index de38772cd2..584cc4c4bd 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -770,6 +770,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.To.Reg = v.Args[0].Reg() gc.AddAux2(&p.To, v, off) } + case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, + ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify: + sc := v.AuxValAndOff() + off := sc.Off() + val := sc.Val() + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_CONST + p.From.Offset = val + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + gc.AddAux2(&p.To, v, off) case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 54de6e055d..db6bbfb060 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -1039,8 +1039,10 @@ ((ADD|SUB|MUL)SSload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL)SDload [off1+off2] {sym} val base mem) -(ADD(L|Q)constmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> - (ADD(L|Q)constmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) +((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> + ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) +((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> + ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) // Fold constants into stores. (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> @@ -1081,9 +1083,12 @@ ((ADD|SUB|MUL)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> ((ADD|SUB|MUL)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) -(ADD(L|Q)constmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) +((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> - (ADD(L|Q)constmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) +((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> + ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) // generating indexed loads and stores (MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> @@ -2352,12 +2357,12 @@ (MOVWQZX (MOVBQZX x)) -> (MOVBQZX x) (MOVBQZX (MOVBQZX x)) -> (MOVBQZX x) -(MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) +(MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) -> - (ADDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) -(MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) + ((ADD|AND|OR|XOR)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem) +(MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) -> - (ADDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + ((ADD|AND|OR|XOR)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem) // float <-> int register moves, with no conversion. // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}. diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 5a8634abd1..1140958670 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -225,20 +225,26 @@ func init() { {name: "MULQU2", argLength: 2, reg: regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}, commutative: true, asm: "MULQ", clobberFlags: true}, // arg0 * arg1, returns (hi, lo) {name: "DIVQU2", argLength: 3, reg: regInfo{inputs: []regMask{dx, ax, gpsp}, outputs: []regMask{ax, dx}}, asm: "DIVQ", clobberFlags: true}, // arg0:arg1 / arg2 (128-bit divided by 64-bit), returns (q, r) - {name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1 - {name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1 - {name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint - {name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint - - {name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1 - {name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1 - {name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint - {name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint - - {name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1 - {name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1 - {name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint - {name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint + {name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1 + {name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 & arg1 + {name: "ANDQconst", argLength: 1, reg: gp11, asm: "ANDQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint + {name: "ANDLconst", argLength: 1, reg: gp11, asm: "ANDL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 & auxint + {name: "ANDQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ANDQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "ANDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ANDL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + + {name: "ORQ", argLength: 2, reg: gp21, asm: "ORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1 + {name: "ORL", argLength: 2, reg: gp21, asm: "ORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 | arg1 + {name: "ORQconst", argLength: 1, reg: gp11, asm: "ORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint + {name: "ORLconst", argLength: 1, reg: gp11, asm: "ORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 | auxint + {name: "ORQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ORQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "ORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ORL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + + {name: "XORQ", argLength: 2, reg: gp21, asm: "XORQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1 + {name: "XORL", argLength: 2, reg: gp21, asm: "XORL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 ^ arg1 + {name: "XORQconst", argLength: 1, reg: gp11, asm: "XORQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint + {name: "XORLconst", argLength: 1, reg: gp11, asm: "XORL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 ^ auxint + {name: "XORQconstmodify", argLength: 2, reg: gpstoreconst, asm: "XORQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem + {name: "XORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "XORL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem {name: "CMPQ", argLength: 2, reg: gp2flags, asm: "CMPQ", typ: "Flags"}, // arg0 compare to arg1 {name: "CMPL", argLength: 2, reg: gp2flags, asm: "CMPL", typ: "Flags"}, // arg0 compare to arg1 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index b479bca7ff..6a14ee0801 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -487,14 +487,20 @@ const ( OpAMD64ANDL OpAMD64ANDQconst OpAMD64ANDLconst + OpAMD64ANDQconstmodify + OpAMD64ANDLconstmodify OpAMD64ORQ OpAMD64ORL OpAMD64ORQconst OpAMD64ORLconst + OpAMD64ORQconstmodify + OpAMD64ORLconstmodify OpAMD64XORQ OpAMD64XORL OpAMD64XORQconst OpAMD64XORLconst + OpAMD64XORQconstmodify + OpAMD64XORLconstmodify OpAMD64CMPQ OpAMD64CMPL OpAMD64CMPW @@ -5948,6 +5954,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ANDQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AANDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "ANDLconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AANDL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, { name: "ORQ", argLen: 2, @@ -6014,6 +6048,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ORQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AORQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "ORLconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AORL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, { name: "XORQ", argLen: 2, @@ -6080,6 +6142,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "XORQconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AXORQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "XORLconstmodify", + auxType: auxSymValAndOff, + argLen: 2, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AXORL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, { name: "CMPQ", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 47d3f431ab..950b926cc1 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -43,12 +43,16 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64ANDL_0(v) case OpAMD64ANDLconst: return rewriteValueAMD64_OpAMD64ANDLconst_0(v) + case OpAMD64ANDLconstmodify: + return rewriteValueAMD64_OpAMD64ANDLconstmodify_0(v) case OpAMD64ANDLload: return rewriteValueAMD64_OpAMD64ANDLload_0(v) case OpAMD64ANDQ: return rewriteValueAMD64_OpAMD64ANDQ_0(v) case OpAMD64ANDQconst: return rewriteValueAMD64_OpAMD64ANDQconst_0(v) + case OpAMD64ANDQconstmodify: + return rewriteValueAMD64_OpAMD64ANDQconstmodify_0(v) case OpAMD64ANDQload: return rewriteValueAMD64_OpAMD64ANDQload_0(v) case OpAMD64BSFQ: @@ -242,7 +246,7 @@ func rewriteValueAMD64(v *Value) bool { case OpAMD64MOVQloadidx8: return rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v) case OpAMD64MOVQstore: - return rewriteValueAMD64_OpAMD64MOVQstore_0(v) + return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v) case OpAMD64MOVQstoreconst: return rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v) case OpAMD64MOVQstoreconstidx1: @@ -329,12 +333,16 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64ORL_0(v) || rewriteValueAMD64_OpAMD64ORL_10(v) || rewriteValueAMD64_OpAMD64ORL_20(v) || rewriteValueAMD64_OpAMD64ORL_30(v) || rewriteValueAMD64_OpAMD64ORL_40(v) || rewriteValueAMD64_OpAMD64ORL_50(v) || rewriteValueAMD64_OpAMD64ORL_60(v) || rewriteValueAMD64_OpAMD64ORL_70(v) || rewriteValueAMD64_OpAMD64ORL_80(v) || rewriteValueAMD64_OpAMD64ORL_90(v) || rewriteValueAMD64_OpAMD64ORL_100(v) || rewriteValueAMD64_OpAMD64ORL_110(v) || rewriteValueAMD64_OpAMD64ORL_120(v) || rewriteValueAMD64_OpAMD64ORL_130(v) case OpAMD64ORLconst: return rewriteValueAMD64_OpAMD64ORLconst_0(v) + case OpAMD64ORLconstmodify: + return rewriteValueAMD64_OpAMD64ORLconstmodify_0(v) case OpAMD64ORLload: return rewriteValueAMD64_OpAMD64ORLload_0(v) case OpAMD64ORQ: return rewriteValueAMD64_OpAMD64ORQ_0(v) || rewriteValueAMD64_OpAMD64ORQ_10(v) || rewriteValueAMD64_OpAMD64ORQ_20(v) || rewriteValueAMD64_OpAMD64ORQ_30(v) || rewriteValueAMD64_OpAMD64ORQ_40(v) || rewriteValueAMD64_OpAMD64ORQ_50(v) || rewriteValueAMD64_OpAMD64ORQ_60(v) || rewriteValueAMD64_OpAMD64ORQ_70(v) || rewriteValueAMD64_OpAMD64ORQ_80(v) || rewriteValueAMD64_OpAMD64ORQ_90(v) || rewriteValueAMD64_OpAMD64ORQ_100(v) || rewriteValueAMD64_OpAMD64ORQ_110(v) || rewriteValueAMD64_OpAMD64ORQ_120(v) || rewriteValueAMD64_OpAMD64ORQ_130(v) || rewriteValueAMD64_OpAMD64ORQ_140(v) || rewriteValueAMD64_OpAMD64ORQ_150(v) || rewriteValueAMD64_OpAMD64ORQ_160(v) case OpAMD64ORQconst: return rewriteValueAMD64_OpAMD64ORQconst_0(v) + case OpAMD64ORQconstmodify: + return rewriteValueAMD64_OpAMD64ORQconstmodify_0(v) case OpAMD64ORQload: return rewriteValueAMD64_OpAMD64ORQload_0(v) case OpAMD64ROLB: @@ -493,12 +501,16 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64XORL_0(v) || rewriteValueAMD64_OpAMD64XORL_10(v) case OpAMD64XORLconst: return rewriteValueAMD64_OpAMD64XORLconst_0(v) || rewriteValueAMD64_OpAMD64XORLconst_10(v) + case OpAMD64XORLconstmodify: + return rewriteValueAMD64_OpAMD64XORLconstmodify_0(v) case OpAMD64XORLload: return rewriteValueAMD64_OpAMD64XORLload_0(v) case OpAMD64XORQ: return rewriteValueAMD64_OpAMD64XORQ_0(v) || rewriteValueAMD64_OpAMD64XORQ_10(v) case OpAMD64XORQconst: return rewriteValueAMD64_OpAMD64XORQconst_0(v) + case OpAMD64XORQconstmodify: + return rewriteValueAMD64_OpAMD64XORQconstmodify_0(v) case OpAMD64XORQload: return rewriteValueAMD64_OpAMD64XORQload_0(v) case OpAdd16: @@ -3480,6 +3492,58 @@ func rewriteValueAMD64_OpAMD64ANDLconst_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64ANDLconstmodify_0(v *Value) bool { + // match: (ANDLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (ANDLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(OpAMD64ANDLconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (ANDLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) + // result: (ANDLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64ANDLconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64ANDLload_0(v *Value) bool { b := v.Block _ = b @@ -3893,6 +3957,58 @@ func rewriteValueAMD64_OpAMD64ANDQconst_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64ANDQconstmodify_0(v *Value) bool { + // match: (ANDQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (ANDQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(OpAMD64ANDQconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (ANDQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) + // result: (ANDQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64ANDQconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64ANDQload_0(v *Value) bool { b := v.Block _ = b @@ -14306,6 +14422,123 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVLstore [off] {sym} ptr a:(ANDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // result: (ANDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + a := v.Args[1] + if a.Op != OpAMD64ANDLconst { + break + } + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + ptr2 := l.Args[0] + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + break + } + v.reset(OpAMD64ANDLconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVLstore [off] {sym} ptr a:(ORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // result: (ORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + a := v.Args[1] + if a.Op != OpAMD64ORLconst { + break + } + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + ptr2 := l.Args[0] + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + break + } + v.reset(OpAMD64ORLconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVLstore [off] {sym} ptr a:(XORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // result: (XORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + a := v.Args[1] + if a.Op != OpAMD64XORLconst { + break + } + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + ptr2 := l.Args[0] + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + break + } + v.reset(OpAMD64XORLconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } // match: (MOVLstore [off] {sym} ptr (MOVLf2i val) mem) // cond: // result: (MOVSSstore [off] {sym} ptr val mem) @@ -16292,6 +16525,126 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // result: (ANDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + a := v.Args[1] + if a.Op != OpAMD64ANDQconst { + break + } + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + ptr2 := l.Args[0] + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + break + } + v.reset(OpAMD64ANDQconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MOVQstore_10(v *Value) bool { + // match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // result: (ORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + a := v.Args[1] + if a.Op != OpAMD64ORQconst { + break + } + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + ptr2 := l.Args[0] + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + break + } + v.reset(OpAMD64ORQconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) + // result: (XORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + a := v.Args[1] + if a.Op != OpAMD64XORQconst { + break + } + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + ptr2 := l.Args[0] + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) { + break + } + v.reset(OpAMD64XORQconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) // cond: // result: (MOVSDstore [off] {sym} ptr val mem) @@ -30779,6 +31132,58 @@ func rewriteValueAMD64_OpAMD64ORLconst_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64ORLconstmodify_0(v *Value) bool { + // match: (ORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (ORLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(OpAMD64ORLconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (ORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) + // result: (ORLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64ORLconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64ORLload_0(v *Value) bool { b := v.Block _ = b @@ -41688,6 +42093,58 @@ func rewriteValueAMD64_OpAMD64ORQconst_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64ORQconstmodify_0(v *Value) bool { + // match: (ORQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (ORQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(OpAMD64ORQconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (ORQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) + // result: (ORQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64ORQconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64ORQload_0(v *Value) bool { b := v.Block _ = b @@ -52184,6 +52641,58 @@ func rewriteValueAMD64_OpAMD64XORLconst_10(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64XORLconstmodify_0(v *Value) bool { + // match: (XORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (XORLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(OpAMD64XORLconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (XORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) + // result: (XORLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64XORLconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64XORLload_0(v *Value) bool { b := v.Block _ = b @@ -52598,6 +53107,58 @@ func rewriteValueAMD64_OpAMD64XORQconst_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64XORQconstmodify_0(v *Value) bool { + // match: (XORQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (XORQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(OpAMD64XORQconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (XORQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) + // result: (XORQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64XORQconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64XORQload_0(v *Value) bool { b := v.Block _ = b diff --git a/test/codegen/bits.go b/test/codegen/bits.go index 9de2201cb1..2d1645b5e3 100644 --- a/test/codegen/bits.go +++ b/test/codegen/bits.go @@ -262,6 +262,16 @@ func bitcompl32(a, b uint32) (n uint32) { return n } +// check direct operation on memory with constant source +func bitOpOnMem(a []uint32) { + // amd64:`ANDL\s[$]200,\s\([A-Z]+\)` + a[0] &= 200 + // amd64:`ORL\s[$]220,\s4\([A-Z]+\)` + a[1] |= 220 + // amd64:`XORL\s[$]240,\s8\([A-Z]+\)` + a[2] ^= 240 +} + // Check AND masking on arm64 (Issue #19857) func and_mask_1(a uint64) uint64 { -- 2.50.0