From 3bc34385faacbcbefb2b4abc0e280b709aab03c9 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Fri, 29 Jun 2018 02:11:53 +0000 Subject: [PATCH] cmd/compile: introduce more read-modify-write operations for amd64 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add suport of read-modify-write for AND/SUB/AND/OR/XOR on amd64. 1. The total size of pkg/linux_amd64 decreases about 4KB, excluding cmd/compile. 2. The go1 benchmark shows a little improvement, excluding noise. name old time/op new time/op delta BinaryTree17-4 2.63s ± 3% 2.65s ± 4% +1.01% (p=0.037 n=35+35) Fannkuch11-4 2.33s ± 2% 2.39s ± 2% +2.49% (p=0.000 n=35+35) FmtFprintfEmpty-4 45.4ns ± 5% 40.8ns ± 6% -10.09% (p=0.000 n=35+35) FmtFprintfString-4 73.3ns ± 4% 70.9ns ± 3% -3.23% (p=0.000 n=30+35) FmtFprintfInt-4 79.9ns ± 4% 79.5ns ± 3% ~ (p=0.736 n=34+35) FmtFprintfIntInt-4 126ns ± 4% 125ns ± 4% ~ (p=0.083 n=35+35) FmtFprintfPrefixedInt-4 152ns ± 6% 152ns ± 3% ~ (p=0.855 n=34+35) FmtFprintfFloat-4 215ns ± 4% 213ns ± 4% ~ (p=0.066 n=35+35) FmtManyArgs-4 522ns ± 3% 506ns ± 3% -3.15% (p=0.000 n=35+35) GobDecode-4 6.45ms ± 8% 6.51ms ± 7% +0.96% (p=0.026 n=35+35) GobEncode-4 6.10ms ± 6% 6.02ms ± 8% ~ (p=0.160 n=35+35) Gzip-4 228ms ± 3% 221ms ± 3% -2.92% (p=0.000 n=35+35) Gunzip-4 37.5ms ± 4% 37.2ms ± 3% -0.78% (p=0.036 n=35+35) HTTPClientServer-4 58.7µs ± 2% 59.2µs ± 1% +0.80% (p=0.000 n=33+33) JSONEncode-4 12.0ms ± 3% 12.2ms ± 3% +1.84% (p=0.008 n=35+35) JSONDecode-4 57.0ms ± 4% 56.6ms ± 3% ~ (p=0.320 n=35+35) Mandelbrot200-4 3.82ms ± 3% 3.79ms ± 3% ~ (p=0.074 n=35+35) GoParse-4 3.21ms ± 5% 3.24ms ± 4% ~ (p=0.119 n=35+35) RegexpMatchEasy0_32-4 76.3ns ± 4% 75.4ns ± 4% -1.14% (p=0.014 n=34+33) RegexpMatchEasy0_1K-4 251ns ± 4% 254ns ± 3% +1.28% (p=0.016 n=35+35) RegexpMatchEasy1_32-4 69.6ns ± 3% 70.1ns ± 3% +0.82% (p=0.005 n=35+35) RegexpMatchEasy1_1K-4 367ns ± 4% 376ns ± 4% +2.47% (p=0.000 n=35+35) RegexpMatchMedium_32-4 108ns ± 5% 104ns ± 4% -3.18% (p=0.000 n=35+35) RegexpMatchMedium_1K-4 33.8µs ± 3% 32.7µs ± 3% -3.27% (p=0.000 n=35+35) RegexpMatchHard_32-4 1.55µs ± 3% 1.52µs ± 3% -1.64% (p=0.000 n=35+35) RegexpMatchHard_1K-4 46.6µs ± 3% 46.6µs ± 4% ~ (p=0.149 n=35+35) Revcomp-4 416ms ± 7% 412ms ± 6% -0.95% (p=0.033 n=33+35) Template-4 64.3ms ± 3% 62.4ms ± 7% -2.94% (p=0.000 n=35+35) TimeParse-4 320ns ± 2% 322ns ± 3% ~ (p=0.589 n=35+35) TimeFormat-4 300ns ± 3% 300ns ± 3% ~ (p=0.597 n=35+35) [Geo mean] 47.4µs 47.0µs -0.86% name old speed new speed delta GobDecode-4 119MB/s ± 7% 118MB/s ± 7% -0.96% (p=0.027 n=35+35) GobEncode-4 126MB/s ± 7% 127MB/s ± 6% ~ (p=0.157 n=34+34) Gzip-4 85.3MB/s ± 3% 87.9MB/s ± 3% +3.02% (p=0.000 n=35+35) Gunzip-4 518MB/s ± 4% 522MB/s ± 3% +0.79% (p=0.037 n=35+35) JSONEncode-4 162MB/s ± 3% 159MB/s ± 3% -1.81% (p=0.009 n=35+35) JSONDecode-4 34.1MB/s ± 4% 34.3MB/s ± 3% ~ (p=0.318 n=35+35) GoParse-4 18.0MB/s ± 5% 17.9MB/s ± 4% ~ (p=0.117 n=35+35) RegexpMatchEasy0_32-4 419MB/s ± 3% 425MB/s ± 4% +1.46% (p=0.003 n=32+33) RegexpMatchEasy0_1K-4 4.07GB/s ± 4% 4.02GB/s ± 3% -1.28% (p=0.014 n=35+35) RegexpMatchEasy1_32-4 460MB/s ± 3% 456MB/s ± 4% -0.82% (p=0.004 n=35+35) RegexpMatchEasy1_1K-4 2.79GB/s ± 4% 2.72GB/s ± 4% -2.39% (p=0.000 n=35+35) RegexpMatchMedium_32-4 9.23MB/s ± 4% 9.53MB/s ± 4% +3.16% (p=0.000 n=35+35) RegexpMatchMedium_1K-4 30.3MB/s ± 3% 31.3MB/s ± 3% +3.38% (p=0.000 n=35+35) RegexpMatchHard_32-4 20.7MB/s ± 3% 21.0MB/s ± 3% +1.67% (p=0.000 n=35+35) RegexpMatchHard_1K-4 22.0MB/s ± 3% 21.9MB/s ± 4% ~ (p=0.277 n=35+33) Revcomp-4 612MB/s ± 7% 618MB/s ± 6% +0.96% (p=0.034 n=33+35) Template-4 30.2MB/s ± 3% 31.1MB/s ± 6% +3.05% (p=0.000 n=35+35) [Geo mean] 123MB/s 124MB/s +0.64% Change-Id: Ia025da272e07d0069413824bfff3471b106d6280 Reviewed-on: https://go-review.googlesource.com/121535 Run-TryBot: Ben Shi TryBot-Result: Gobot Gobot Reviewed-by: Ilya Tocar Reviewed-by: Keith Randall --- src/cmd/compile/internal/amd64/ssa.go | 4 +- src/cmd/compile/internal/ssa/gen/AMD64.rules | 16 + src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 12 + src/cmd/compile/internal/ssa/opGen.go | 160 ++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 1677 +++++++++++++++++- test/codegen/arithmetic.go | 2 + 6 files changed, 1865 insertions(+), 6 deletions(-) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 4ecdb769f3..ae6141dd12 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -699,7 +699,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() - case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore: + case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore, + ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify, + ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index eab66d17ab..10d917632e 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -1045,6 +1045,10 @@ ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) +((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> + ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {sym} base val mem) +((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> + ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem) // Fold constants into stores. (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> @@ -1091,6 +1095,12 @@ ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) +((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) +((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) // generating indexed loads and stores (MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> @@ -2276,6 +2286,12 @@ ((ADD|SUB|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Lload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) +(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) +(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> + ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) +(MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) +(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> + ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) // Merge ADDQconst and LEAQ into atomic loads. (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 4735ea1bc0..512df99694 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -346,6 +346,18 @@ func init() { {name: "XORQload", argLength: 3, reg: gp21load, asm: "XORQ", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+auxint+aux, arg2 = mem {name: "XORLload", argLength: 3, reg: gp21load, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+auxint+aux, arg2 = mem + // direct binary-op on memory (read-modify-write) + {name: "ADDQmodify", argLength: 3, reg: gpstore, asm: "ADDQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) += arg1, arg2=mem + {name: "SUBQmodify", argLength: 3, reg: gpstore, asm: "SUBQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) -= arg1, arg2=mem + {name: "ANDQmodify", argLength: 3, reg: gpstore, asm: "ANDQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) &= arg1, arg2=mem + {name: "ORQmodify", argLength: 3, reg: gpstore, asm: "ORQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) |= arg1, arg2=mem + {name: "XORQmodify", argLength: 3, reg: gpstore, asm: "XORQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) ^= arg1, arg2=mem + {name: "ADDLmodify", argLength: 3, reg: gpstore, asm: "ADDL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) += arg1, arg2=mem + {name: "SUBLmodify", argLength: 3, reg: gpstore, asm: "SUBL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) -= arg1, arg2=mem + {name: "ANDLmodify", argLength: 3, reg: gpstore, asm: "ANDL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) &= arg1, arg2=mem + {name: "ORLmodify", argLength: 3, reg: gpstore, asm: "ORL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) |= arg1, arg2=mem + {name: "XORLmodify", argLength: 3, reg: gpstore, asm: "XORL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) ^= arg1, arg2=mem + // unary ops {name: "NEGQ", argLength: 1, reg: gp11, asm: "NEGQ", resultInArg0: true, clobberFlags: true}, // -arg0 {name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, // -arg0 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 704792c9af..374949c602 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -600,6 +600,16 @@ const ( OpAMD64ORLload OpAMD64XORQload OpAMD64XORLload + OpAMD64ADDQmodify + OpAMD64SUBQmodify + OpAMD64ANDQmodify + OpAMD64ORQmodify + OpAMD64XORQmodify + OpAMD64ADDLmodify + OpAMD64SUBLmodify + OpAMD64ANDLmodify + OpAMD64ORLmodify + OpAMD64XORLmodify OpAMD64NEGQ OpAMD64NEGL OpAMD64NOTQ @@ -7661,6 +7671,156 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ADDQmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AADDQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "SUBQmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ASUBQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "ANDQmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AANDQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "ORQmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AORQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "XORQmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AXORQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "ADDLmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AADDL, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "SUBLmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.ASUBL, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "ANDLmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AANDL, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "ORLmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AORL, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, + { + name: "XORLmodify", + auxType: auxSymOff, + argLen: 3, + clobberFlags: true, + faultOnNilArg0: true, + symEffect: SymRead | SymWrite, + asm: x86.AXORL, + reg: regInfo{ + inputs: []inputInfo{ + {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + }, + }, { name: "NEGQ", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 245f795d90..e592610c26 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -23,6 +23,8 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64ADDLconstmodify_0(v) case OpAMD64ADDLload: return rewriteValueAMD64_OpAMD64ADDLload_0(v) + case OpAMD64ADDLmodify: + return rewriteValueAMD64_OpAMD64ADDLmodify_0(v) case OpAMD64ADDQ: return rewriteValueAMD64_OpAMD64ADDQ_0(v) || rewriteValueAMD64_OpAMD64ADDQ_10(v) || rewriteValueAMD64_OpAMD64ADDQ_20(v) case OpAMD64ADDQconst: @@ -31,6 +33,8 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64ADDQconstmodify_0(v) case OpAMD64ADDQload: return rewriteValueAMD64_OpAMD64ADDQload_0(v) + case OpAMD64ADDQmodify: + return rewriteValueAMD64_OpAMD64ADDQmodify_0(v) case OpAMD64ADDSD: return rewriteValueAMD64_OpAMD64ADDSD_0(v) case OpAMD64ADDSDload: @@ -47,6 +51,8 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64ANDLconstmodify_0(v) case OpAMD64ANDLload: return rewriteValueAMD64_OpAMD64ANDLload_0(v) + case OpAMD64ANDLmodify: + return rewriteValueAMD64_OpAMD64ANDLmodify_0(v) case OpAMD64ANDQ: return rewriteValueAMD64_OpAMD64ANDQ_0(v) case OpAMD64ANDQconst: @@ -55,6 +61,8 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64ANDQconstmodify_0(v) case OpAMD64ANDQload: return rewriteValueAMD64_OpAMD64ANDQload_0(v) + case OpAMD64ANDQmodify: + return rewriteValueAMD64_OpAMD64ANDQmodify_0(v) case OpAMD64BSFQ: return rewriteValueAMD64_OpAMD64BSFQ_0(v) case OpAMD64BTLconst: @@ -224,7 +232,7 @@ func rewriteValueAMD64(v *Value) bool { case OpAMD64MOVLloadidx8: return rewriteValueAMD64_OpAMD64MOVLloadidx8_0(v) case OpAMD64MOVLstore: - return rewriteValueAMD64_OpAMD64MOVLstore_0(v) || rewriteValueAMD64_OpAMD64MOVLstore_10(v) + return rewriteValueAMD64_OpAMD64MOVLstore_0(v) || rewriteValueAMD64_OpAMD64MOVLstore_10(v) || rewriteValueAMD64_OpAMD64MOVLstore_20(v) || rewriteValueAMD64_OpAMD64MOVLstore_30(v) case OpAMD64MOVLstoreconst: return rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v) case OpAMD64MOVLstoreconstidx1: @@ -254,7 +262,7 @@ func rewriteValueAMD64(v *Value) bool { case OpAMD64MOVQloadidx8: return rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v) case OpAMD64MOVQstore: - return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v) + return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v) || rewriteValueAMD64_OpAMD64MOVQstore_20(v) case OpAMD64MOVQstoreconst: return rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v) case OpAMD64MOVQstoreconstidx1: @@ -345,6 +353,8 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64ORLconstmodify_0(v) case OpAMD64ORLload: return rewriteValueAMD64_OpAMD64ORLload_0(v) + case OpAMD64ORLmodify: + return rewriteValueAMD64_OpAMD64ORLmodify_0(v) case OpAMD64ORQ: return rewriteValueAMD64_OpAMD64ORQ_0(v) || rewriteValueAMD64_OpAMD64ORQ_10(v) || rewriteValueAMD64_OpAMD64ORQ_20(v) || rewriteValueAMD64_OpAMD64ORQ_30(v) || rewriteValueAMD64_OpAMD64ORQ_40(v) || rewriteValueAMD64_OpAMD64ORQ_50(v) || rewriteValueAMD64_OpAMD64ORQ_60(v) || rewriteValueAMD64_OpAMD64ORQ_70(v) || rewriteValueAMD64_OpAMD64ORQ_80(v) || rewriteValueAMD64_OpAMD64ORQ_90(v) || rewriteValueAMD64_OpAMD64ORQ_100(v) || rewriteValueAMD64_OpAMD64ORQ_110(v) || rewriteValueAMD64_OpAMD64ORQ_120(v) || rewriteValueAMD64_OpAMD64ORQ_130(v) || rewriteValueAMD64_OpAMD64ORQ_140(v) || rewriteValueAMD64_OpAMD64ORQ_150(v) || rewriteValueAMD64_OpAMD64ORQ_160(v) case OpAMD64ORQconst: @@ -353,6 +363,8 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64ORQconstmodify_0(v) case OpAMD64ORQload: return rewriteValueAMD64_OpAMD64ORQload_0(v) + case OpAMD64ORQmodify: + return rewriteValueAMD64_OpAMD64ORQmodify_0(v) case OpAMD64ROLB: return rewriteValueAMD64_OpAMD64ROLB_0(v) case OpAMD64ROLBconst: @@ -467,12 +479,16 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64SUBLconst_0(v) case OpAMD64SUBLload: return rewriteValueAMD64_OpAMD64SUBLload_0(v) + case OpAMD64SUBLmodify: + return rewriteValueAMD64_OpAMD64SUBLmodify_0(v) case OpAMD64SUBQ: return rewriteValueAMD64_OpAMD64SUBQ_0(v) case OpAMD64SUBQconst: return rewriteValueAMD64_OpAMD64SUBQconst_0(v) case OpAMD64SUBQload: return rewriteValueAMD64_OpAMD64SUBQload_0(v) + case OpAMD64SUBQmodify: + return rewriteValueAMD64_OpAMD64SUBQmodify_0(v) case OpAMD64SUBSD: return rewriteValueAMD64_OpAMD64SUBSD_0(v) case OpAMD64SUBSDload: @@ -513,6 +529,8 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64XORLconstmodify_0(v) case OpAMD64XORLload: return rewriteValueAMD64_OpAMD64XORLload_0(v) + case OpAMD64XORLmodify: + return rewriteValueAMD64_OpAMD64XORLmodify_0(v) case OpAMD64XORQ: return rewriteValueAMD64_OpAMD64XORQ_0(v) || rewriteValueAMD64_OpAMD64XORQ_10(v) case OpAMD64XORQconst: @@ -521,6 +539,8 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64XORQconstmodify_0(v) case OpAMD64XORQload: return rewriteValueAMD64_OpAMD64XORQload_0(v) + case OpAMD64XORQmodify: + return rewriteValueAMD64_OpAMD64XORQmodify_0(v) case OpAdd16: return rewriteValueAMD64_OpAdd16_0(v) case OpAdd32: @@ -2038,6 +2058,62 @@ func rewriteValueAMD64_OpAMD64ADDLload_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64ADDLmodify_0(v *Value) bool { + // match: (ADDLmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (ADDLmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64ADDLmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (ADDLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (ADDLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64ADDLmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64ADDQ_0(v *Value) bool { // match: (ADDQ x (MOVQconst [c])) // cond: is32Bit(c) @@ -2902,6 +2978,62 @@ func rewriteValueAMD64_OpAMD64ADDQload_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64ADDQmodify_0(v *Value) bool { + // match: (ADDQmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (ADDQmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64ADDQmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (ADDQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (ADDQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64ADDQmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64ADDSD_0(v *Value) bool { // match: (ADDSD x l:(MOVSDload [off] {sym} ptr mem)) // cond: canMergeLoad(v, l, x) && clobber(l) @@ -3643,6 +3775,62 @@ func rewriteValueAMD64_OpAMD64ANDLload_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64ANDLmodify_0(v *Value) bool { + // match: (ANDLmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (ANDLmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64ANDLmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (ANDLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (ANDLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64ANDLmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64ANDQ_0(v *Value) bool { b := v.Block _ = b @@ -4108,6 +4296,62 @@ func rewriteValueAMD64_OpAMD64ANDQload_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64ANDQmodify_0(v *Value) bool { + // match: (ANDQmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (ANDQmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64ANDQmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (ANDQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (ANDQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64ANDQmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64BSFQ_0(v *Value) bool { b := v.Block _ = b @@ -14574,6 +14818,548 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVLstore {sym} [off] ptr y:(ADDLload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ADDLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ADDLload { + break + } + if y.AuxInt != off { + break + } + if y.Aux != sym { + break + } + _ = y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] { + break + } + mem := y.Args[2] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(OpAMD64ADDLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVLstore {sym} [off] ptr y:(ANDLload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ANDLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ANDLload { + break + } + if y.AuxInt != off { + break + } + if y.Aux != sym { + break + } + _ = y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] { + break + } + mem := y.Args[2] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(OpAMD64ANDLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVLstore {sym} [off] ptr y:(ORLload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ORLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ORLload { + break + } + if y.AuxInt != off { + break + } + if y.Aux != sym { + break + } + _ = y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] { + break + } + mem := y.Args[2] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(OpAMD64ORLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVLstore {sym} [off] ptr y:(XORLload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (XORLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64XORLload { + break + } + if y.AuxInt != off { + break + } + if y.Aux != sym { + break + } + _ = y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] { + break + } + mem := y.Args[2] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(OpAMD64XORLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVLstore {sym} [off] ptr y:(ADDL l:(MOVLload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (ADDLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ADDL { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64ADDLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVLstore {sym} [off] ptr y:(ADDL x l:(MOVLload [off] {sym} ptr mem)) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (ADDLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ADDL { + break + } + _ = y.Args[1] + x := y.Args[0] + l := y.Args[1] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64ADDLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MOVLstore_20(v *Value) bool { + // match: (MOVLstore {sym} [off] ptr y:(SUBL l:(MOVLload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (SUBLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64SUBL { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64SUBLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVLstore {sym} [off] ptr y:(ANDL l:(MOVLload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (ANDLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ANDL { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64ANDLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVLstore {sym} [off] ptr y:(ANDL x l:(MOVLload [off] {sym} ptr mem)) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (ANDLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ANDL { + break + } + _ = y.Args[1] + x := y.Args[0] + l := y.Args[1] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64ANDLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVLstore {sym} [off] ptr y:(ORL l:(MOVLload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (ORLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ORL { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64ORLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVLstore {sym} [off] ptr y:(ORL x l:(MOVLload [off] {sym} ptr mem)) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (ORLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ORL { + break + } + _ = y.Args[1] + x := y.Args[0] + l := y.Args[1] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64ORLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVLstore {sym} [off] ptr y:(XORL l:(MOVLload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (XORLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64XORL { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64XORLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVLstore {sym} [off] ptr y:(XORL x l:(MOVLload [off] {sym} ptr mem)) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (XORLmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64XORL { + break + } + _ = y.Args[1] + x := y.Args[0] + l := y.Args[1] + if l.Op != OpAMD64MOVLload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64XORLmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } // match: (MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) // result: (ADDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) @@ -14691,6 +15477,9 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool { v.AddArg(mem) return true } + return false +} +func rewriteValueAMD64_OpAMD64MOVLstore_30(v *Value) bool { // match: (MOVLstore [off] {sym} ptr a:(XORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) // result: (XORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem) @@ -16677,6 +17466,551 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool { v.AddArg(mem) return true } + // match: (MOVQstore {sym} [off] ptr y:(ADDQload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ADDQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ADDQload { + break + } + if y.AuxInt != off { + break + } + if y.Aux != sym { + break + } + _ = y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] { + break + } + mem := y.Args[2] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(OpAMD64ADDQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVQstore {sym} [off] ptr y:(ANDQload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ANDQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ANDQload { + break + } + if y.AuxInt != off { + break + } + if y.Aux != sym { + break + } + _ = y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] { + break + } + mem := y.Args[2] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(OpAMD64ANDQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MOVQstore_10(v *Value) bool { + // match: (MOVQstore {sym} [off] ptr y:(ORQload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ORQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ORQload { + break + } + if y.AuxInt != off { + break + } + if y.Aux != sym { + break + } + _ = y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] { + break + } + mem := y.Args[2] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(OpAMD64ORQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVQstore {sym} [off] ptr y:(XORQload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (XORQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64XORQload { + break + } + if y.AuxInt != off { + break + } + if y.Aux != sym { + break + } + _ = y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] { + break + } + mem := y.Args[2] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(OpAMD64XORQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVQstore {sym} [off] ptr y:(ADDQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (ADDQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ADDQ { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64ADDQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVQstore {sym} [off] ptr y:(ADDQ x l:(MOVQload [off] {sym} ptr mem)) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (ADDQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ADDQ { + break + } + _ = y.Args[1] + x := y.Args[0] + l := y.Args[1] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64ADDQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVQstore {sym} [off] ptr y:(SUBQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (SUBQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64SUBQ { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64SUBQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVQstore {sym} [off] ptr y:(ANDQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (ANDQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ANDQ { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64ANDQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVQstore {sym} [off] ptr y:(ANDQ x l:(MOVQload [off] {sym} ptr mem)) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (ANDQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ANDQ { + break + } + _ = y.Args[1] + x := y.Args[0] + l := y.Args[1] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64ANDQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVQstore {sym} [off] ptr y:(ORQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (ORQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ORQ { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64ORQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVQstore {sym} [off] ptr y:(ORQ x l:(MOVQload [off] {sym} ptr mem)) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (ORQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64ORQ { + break + } + _ = y.Args[1] + x := y.Args[0] + l := y.Args[1] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64ORQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + // match: (MOVQstore {sym} [off] ptr y:(XORQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (XORQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64XORQ { + break + } + _ = y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + x := y.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64XORQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MOVQstore_20(v *Value) bool { + // match: (MOVQstore {sym} [off] ptr y:(XORQ x l:(MOVQload [off] {sym} ptr mem)) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) + // result: (XORQmodify [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + _ = v.Args[2] + ptr := v.Args[0] + y := v.Args[1] + if y.Op != OpAMD64XORQ { + break + } + _ = y.Args[1] + x := y.Args[0] + l := y.Args[1] + if l.Op != OpAMD64MOVQload { + break + } + if l.AuxInt != off { + break + } + if l.Aux != sym { + break + } + _ = l.Args[1] + if ptr != l.Args[0] { + break + } + mem := l.Args[1] + if mem != v.Args[2] { + break + } + if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) { + break + } + v.reset(OpAMD64XORQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(x) + v.AddArg(mem) + return true + } // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) // result: (ADDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) @@ -16755,9 +18089,6 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool { v.AddArg(mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVQstore_10(v *Value) bool { // match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) // result: (ORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) @@ -31479,6 +32810,62 @@ func rewriteValueAMD64_OpAMD64ORLload_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64ORLmodify_0(v *Value) bool { + // match: (ORLmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (ORLmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64ORLmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (ORLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (ORLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64ORLmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64ORQ_0(v *Value) bool { b := v.Block _ = b @@ -42440,6 +43827,62 @@ func rewriteValueAMD64_OpAMD64ORQload_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64ORQmodify_0(v *Value) bool { + // match: (ORQmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (ORQmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64ORQmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (ORQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (ORQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64ORQmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64ROLB_0(v *Value) bool { // match: (ROLB x (NEGQ y)) // cond: @@ -51150,6 +52593,62 @@ func rewriteValueAMD64_OpAMD64SUBLload_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64SUBLmodify_0(v *Value) bool { + // match: (SUBLmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (SUBLmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64SUBLmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (SUBLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (SUBLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64SUBLmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64SUBQ_0(v *Value) bool { b := v.Block _ = b @@ -51388,6 +52887,62 @@ func rewriteValueAMD64_OpAMD64SUBQload_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64SUBQmodify_0(v *Value) bool { + // match: (SUBQmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (SUBQmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64SUBQmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (SUBQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (SUBQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64SUBQmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64SUBSD_0(v *Value) bool { // match: (SUBSD x l:(MOVSDload [off] {sym} ptr mem)) // cond: canMergeLoad(v, l, x) && clobber(l) @@ -52988,6 +54543,62 @@ func rewriteValueAMD64_OpAMD64XORLload_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64XORLmodify_0(v *Value) bool { + // match: (XORLmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (XORLmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64XORLmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (XORLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (XORLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64XORLmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64XORQ_0(v *Value) bool { b := v.Block _ = b @@ -53454,6 +55065,62 @@ func rewriteValueAMD64_OpAMD64XORQload_0(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64XORQmodify_0(v *Value) bool { + // match: (XORQmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(off1+off2) + // result: (XORQmodify [off1+off2] {sym} base val mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64XORQmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (XORQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (XORQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64XORQmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + return false +} func rewriteValueAMD64_OpAdd16_0(v *Value) bool { // match: (Add16 x y) // cond: diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index 32efcaaa3f..09a2fa091e 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -16,8 +16,10 @@ package codegen func SubMem(arr []int, b int) int { // 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)` + // amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)` arr[2] -= b // 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)` + // amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)` arr[3] -= b // 386:`DECL\s16\([A-Z]+\)` arr[4]-- -- 2.50.0