From c785633941299f5a0f76b5689b3becf47e1239b2 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Tue, 24 Mar 2020 16:48:05 +0000 Subject: [PATCH] Revert "cmd/compile: convert 386 port to use addressing modes pass" This reverts commit CL 222782. Reason for revert: Reverting to see if 386 errors go away Update #37881 Change-Id: I74f287404c52414db1b6ff1649effa4ed9e5cc0c Reviewed-on: https://go-review.googlesource.com/c/go/+/225218 Reviewed-by: Bryan C. Mills --- .../compile/internal/ssa/addressingmodes.go | 83 +- src/cmd/compile/internal/ssa/gen/386.rules | 289 +- src/cmd/compile/internal/ssa/rewrite386.go | 6442 +++++++++++++---- test/codegen/memops.go | 78 +- 4 files changed, 5358 insertions(+), 1534 deletions(-) diff --git a/src/cmd/compile/internal/ssa/addressingmodes.go b/src/cmd/compile/internal/ssa/addressingmodes.go index 2af8a4d1fc..8874b56a9b 100644 --- a/src/cmd/compile/internal/ssa/addressingmodes.go +++ b/src/cmd/compile/internal/ssa/addressingmodes.go @@ -11,8 +11,8 @@ func addressingModes(f *Func) { default: // Most architectures can't do this. return - case "amd64", "386": - // TODO: s390x? + case "amd64": + // TODO: 386, s390x? } var tmp []*Value @@ -21,17 +21,7 @@ func addressingModes(f *Func) { if !combineFirst[v.Op] { continue } - // All matched operations have the pointer in arg[0]. - // All results have the pointer in arg[0] and the index in arg[1]. - // *Except* for operations which update a register, - // which are marked with resultInArg0. Those have - // the pointer in arg[1], and the corresponding result op - // has the pointer in arg[1] and the index in arg[2]. - ptrIndex := 0 - if opcodeTable[v.Op].resultInArg0 { - ptrIndex = 1 - } - p := v.Args[ptrIndex] + p := v.Args[0] c, ok := combine[[2]Op{v.Op, p.Op}] if !ok { continue @@ -81,11 +71,10 @@ func addressingModes(f *Func) { f.Fatalf("unknown aux combining for %s and %s\n", v.Op, p.Op) } // Combine the operations. - tmp = append(tmp[:0], v.Args[:ptrIndex]...) - tmp = append(tmp, p.Args...) - tmp = append(tmp, v.Args[ptrIndex+1:]...) + tmp = append(tmp[:0], v.Args[1:]...) v.resetArgs() v.Op = c + v.AddArgs(p.Args...) v.AddArgs(tmp...) } } @@ -108,7 +97,6 @@ func init() { // x.Args[0].Args + x.Args[1:] // Additionally, the Aux/AuxInt from x.Args[0] is merged into x. var combine = map[[2]Op]Op{ - // amd64 [2]Op{OpAMD64MOVBload, OpAMD64ADDQ}: OpAMD64MOVBloadidx1, [2]Op{OpAMD64MOVWload, OpAMD64ADDQ}: OpAMD64MOVWloadidx1, [2]Op{OpAMD64MOVLload, OpAMD64ADDQ}: OpAMD64MOVLloadidx1, @@ -162,64 +150,5 @@ var combine = map[[2]Op]Op{ [2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1, [2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8, - // 386 - [2]Op{Op386MOVBload, Op386ADDL}: Op386MOVBloadidx1, - [2]Op{Op386MOVWload, Op386ADDL}: Op386MOVWloadidx1, - [2]Op{Op386MOVLload, Op386ADDL}: Op386MOVLloadidx1, - [2]Op{Op386MOVSSload, Op386ADDL}: Op386MOVSSloadidx1, - [2]Op{Op386MOVSDload, Op386ADDL}: Op386MOVSDloadidx1, - - [2]Op{Op386MOVBstore, Op386ADDL}: Op386MOVBstoreidx1, - [2]Op{Op386MOVWstore, Op386ADDL}: Op386MOVWstoreidx1, - [2]Op{Op386MOVLstore, Op386ADDL}: Op386MOVLstoreidx1, - [2]Op{Op386MOVSSstore, Op386ADDL}: Op386MOVSSstoreidx1, - [2]Op{Op386MOVSDstore, Op386ADDL}: Op386MOVSDstoreidx1, - - [2]Op{Op386MOVBstoreconst, Op386ADDL}: Op386MOVBstoreconstidx1, - [2]Op{Op386MOVWstoreconst, Op386ADDL}: Op386MOVWstoreconstidx1, - [2]Op{Op386MOVLstoreconst, Op386ADDL}: Op386MOVLstoreconstidx1, - - [2]Op{Op386MOVBload, Op386LEAL1}: Op386MOVBloadidx1, - [2]Op{Op386MOVWload, Op386LEAL1}: Op386MOVWloadidx1, - [2]Op{Op386MOVWload, Op386LEAL2}: Op386MOVWloadidx2, - [2]Op{Op386MOVLload, Op386LEAL1}: Op386MOVLloadidx1, - [2]Op{Op386MOVLload, Op386LEAL4}: Op386MOVLloadidx4, - [2]Op{Op386MOVSSload, Op386LEAL1}: Op386MOVSSloadidx1, - [2]Op{Op386MOVSSload, Op386LEAL4}: Op386MOVSSloadidx4, - [2]Op{Op386MOVSDload, Op386LEAL1}: Op386MOVSDloadidx1, - [2]Op{Op386MOVSDload, Op386LEAL8}: Op386MOVSDloadidx8, - - [2]Op{Op386MOVBstore, Op386LEAL1}: Op386MOVBstoreidx1, - [2]Op{Op386MOVWstore, Op386LEAL1}: Op386MOVWstoreidx1, - [2]Op{Op386MOVWstore, Op386LEAL2}: Op386MOVWstoreidx2, - [2]Op{Op386MOVLstore, Op386LEAL1}: Op386MOVLstoreidx1, - [2]Op{Op386MOVLstore, Op386LEAL4}: Op386MOVLstoreidx4, - [2]Op{Op386MOVSSstore, Op386LEAL1}: Op386MOVSSstoreidx1, - [2]Op{Op386MOVSSstore, Op386LEAL4}: Op386MOVSSstoreidx4, - [2]Op{Op386MOVSDstore, Op386LEAL1}: Op386MOVSDstoreidx1, - [2]Op{Op386MOVSDstore, Op386LEAL8}: Op386MOVSDstoreidx8, - - [2]Op{Op386MOVBstoreconst, Op386LEAL1}: Op386MOVBstoreconstidx1, - [2]Op{Op386MOVWstoreconst, Op386LEAL1}: Op386MOVWstoreconstidx1, - [2]Op{Op386MOVWstoreconst, Op386LEAL2}: Op386MOVWstoreconstidx2, - [2]Op{Op386MOVLstoreconst, Op386LEAL1}: Op386MOVLstoreconstidx1, - [2]Op{Op386MOVLstoreconst, Op386LEAL4}: Op386MOVLstoreconstidx4, - - [2]Op{Op386ADDLload, Op386LEAL4}: Op386ADDLloadidx4, - [2]Op{Op386SUBLload, Op386LEAL4}: Op386SUBLloadidx4, - [2]Op{Op386MULLload, Op386LEAL4}: Op386MULLloadidx4, - [2]Op{Op386ANDLload, Op386LEAL4}: Op386ANDLloadidx4, - [2]Op{Op386ORLload, Op386LEAL4}: Op386ORLloadidx4, - [2]Op{Op386XORLload, Op386LEAL4}: Op386XORLloadidx4, - - [2]Op{Op386ADDLmodify, Op386LEAL4}: Op386ADDLmodifyidx4, - [2]Op{Op386SUBLmodify, Op386LEAL4}: Op386SUBLmodifyidx4, - [2]Op{Op386ANDLmodify, Op386LEAL4}: Op386ANDLmodifyidx4, - [2]Op{Op386ORLmodify, Op386LEAL4}: Op386ORLmodifyidx4, - [2]Op{Op386XORLmodify, Op386LEAL4}: Op386XORLmodifyidx4, - - [2]Op{Op386ADDLconstmodify, Op386LEAL4}: Op386ADDLconstmodifyidx4, - [2]Op{Op386ANDLconstmodify, Op386LEAL4}: Op386ANDLconstmodifyidx4, - [2]Op{Op386ORLconstmodify, Op386LEAL4}: Op386ORLconstmodifyidx4, - [2]Op{Op386XORLconstmodify, Op386LEAL4}: Op386XORLconstmodifyidx4, + // TODO: 386 } diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules index 47984734b3..64a6cbaf84 100644 --- a/src/cmd/compile/internal/ssa/gen/386.rules +++ b/src/cmd/compile/internal/ssa/gen/386.rules @@ -588,6 +588,10 @@ (MOVWLSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWLSXload [off] {sym} ptr mem) (MOVWLZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload [off] {sym} ptr mem) +(MOVBLZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 [off] {sym} ptr idx mem) +(MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 [off] {sym} ptr idx mem) +(MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 [off] {sym} ptr idx mem) + // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBLZX x) (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWLZX x) @@ -607,22 +611,34 @@ // fold constants into memory operations // Note that this is not always a good idea because if not all the uses of -// the ADDLconst get eliminated, we still have to compute the ADDLconst and we now -// have potentially two live values (ptr and (ADDLconst [off] ptr)) instead of one. +// the ADDQconst get eliminated, we still have to compute the ADDQconst and we now +// have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one. // Nevertheless, let's do it! (MOV(L|W|B|SS|SD)load [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)load [off1+off2] {sym} ptr mem) (MOV(L|W|B|SS|SD)store [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)store [off1+off2] {sym} ptr val mem) ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {sym} val base mem) +((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) && is32Bit(off1+off2) -> + ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {sym} val base idx mem) +((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) && is32Bit(off1+off2*4) -> + ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2*4] {sym} val base idx mem) ((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) -> ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(off1+off2) -> ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem) +((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) && is32Bit(off1+off2) -> + ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {sym} base idx val mem) +((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) && is32Bit(off1+off2*4) -> + ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2*4] {sym} base idx val mem) ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDLconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) +((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) && ValAndOff(valoff1).canAdd(off2) -> + ((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem) +((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) && ValAndOff(valoff1).canAdd(off2*4) -> + ((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem) // Fold constants into stores. (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) -> @@ -636,7 +652,7 @@ (MOV(L|W|B)storeconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> (MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem) -// We need to fold LEAL into the MOVx ops so that the live variable analysis knows +// We need to fold LEAQ into the MOVx ops so that the live variable analysis knows // what variables are being read/written by the ops. // Note: we turn off this merging for operations on globals when building // position-independent code (when Flag_shared is set). @@ -656,9 +672,31 @@ && (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> (MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) +// generating indexed loads and stores +(MOV(B|W|L|SS|SD)load [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + (MOV(B|W|L|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) +(MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) +(MOV(L|SS)load [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + (MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) +(MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) + +(MOV(B|W|L|SS|SD)store [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + (MOV(B|W|L|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) +(MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) +(MOV(L|SS)store [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + (MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) +(MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem) +((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) + && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> + ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) @@ -668,20 +706,97 @@ ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) +((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem) + && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> + ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem) ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAL [off2] {sym2} base) mem) && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) +((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem) + && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> + ((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem) + +(MOV(B|W|L|SS|SD)load [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)loadidx1 [off] {sym} ptr idx mem) +(MOV(B|W|L|SS|SD)store [off] {sym} (ADDL ptr idx) val mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)storeidx1 [off] {sym} ptr idx val mem) + +(MOV(B|W|L)storeconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> + (MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) +(MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> + (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) +(MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> + (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) + +(MOV(B|W|L)storeconst [x] {sym} (ADDL ptr idx) mem) -> (MOV(B|W|L)storeconstidx1 [x] {sym} ptr idx mem) + +// combine SHLL into indexed loads and stores +(MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem) +(MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLloadidx4 [c] {sym} ptr idx mem) +(MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem) +(MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem) -> (MOVLstoreidx4 [c] {sym} ptr idx val mem) +(MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem) +(MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem) + +// combine ADDL into indexed loads and stores +(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem) +(MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVWloadidx2 [int64(int32(c+d))] {sym} ptr idx mem) +(MOV(L|SS)loadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+d))] {sym} ptr idx mem) +(MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem) -> (MOVSDloadidx8 [int64(int32(c+d))] {sym} ptr idx mem) + +(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem) +(MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVWstoreidx2 [int64(int32(c+d))] {sym} ptr idx val mem) +(MOV(L|SS)storeidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+d))] {sym} ptr idx val mem) +(MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem) -> (MOVSDstoreidx8 [int64(int32(c+d))] {sym} ptr idx val mem) + +(MOV(B|W|L|SS|SD)loadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(B|W|L|SS|SD)loadidx1 [int64(int32(c+d))] {sym} ptr idx mem) +(MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVWloadidx2 [int64(int32(c+2*d))] {sym} ptr idx mem) +(MOV(L|SS)loadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOV(L|SS)loadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem) +(MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem) -> (MOVSDloadidx8 [int64(int32(c+8*d))] {sym} ptr idx mem) + +(MOV(B|W|L|SS|SD)storeidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(B|W|L|SS|SD)storeidx1 [int64(int32(c+d))] {sym} ptr idx val mem) +(MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVWstoreidx2 [int64(int32(c+2*d))] {sym} ptr idx val mem) +(MOV(L|SS)storeidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOV(L|SS)storeidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem) +(MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem) // Merge load/store to op ((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem) +((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> + ((ADD|AND|OR|XOR|SUB|MUL)Lloadidx4 x [off] {sym} ptr idx mem) +((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) + && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + ((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) -> ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) +(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lloadidx4 x [off] {sym} ptr idx mem) mem) && y.Uses==1 && clobber(y) -> + ((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem) +(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|SUB|AND|OR|XOR)L l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) -> + ((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr mem)) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) -> ((ADD|AND|OR|XOR)Lconstmodify [makeValAndOff(c,off)] {sym} ptr mem) +(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem) + && y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) -> + ((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) +((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(c,off) -> + ((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) +(SUBLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(-c,off) -> + (ADDLconstmodifyidx4 [makeValAndOff(-c,off)] {sym} ptr idx mem) + +(MOV(B|W|L)storeconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) -> + (MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) +(MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem) -> + (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem) +(MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem) -> + (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem) + +(MOV(B|W|L)storeconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) -> + (MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) +(MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem) -> + (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem) +(MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem) -> + (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem) // fold LEALs together (LEAL [off1] {sym1} (LEAL [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> @@ -711,16 +826,6 @@ (LEAL [off1] {sym1} (LEAL8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> (LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y) -// LEAL[1248] into LEAL[1248]. Only some such merges are possible. -(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y) -(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x) -(LEAL2 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+2*off2) && sym2 == nil -> - (LEAL4 [off1+2*off2] {sym1} x y) -(LEAL4 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+4*off2) && sym2 == nil -> - (LEAL8 [off1+4*off2] {sym1} x y) - // Absorb InvertFlags into branches. (LT (InvertFlags cmp) yes no) -> (GT cmp yes no) (GT (InvertFlags cmp) yes no) -> (LT cmp yes no) @@ -934,27 +1039,23 @@ // TEST %reg,%reg is shorter than CMP (CMP(L|W|B)const x [0]) -> (TEST(L|W|B) x x) -// Convert LEAL1 back to ADDL if we can -(LEAL1 [0] x y) && v.Aux == nil -> (ADDL x y) - // Combining byte loads into larger (unaligned) loads. // There are many ways these combinations could occur. This is // designed to match the way encoding/binary.LittleEndian does it. -(ORL x0:(MOVBload [i0] {s} p0 mem) - s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem))) +(ORL x0:(MOVBload [i0] {s} p mem) + s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem))) && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0) - -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) + -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) (ORL o0:(ORL - x0:(MOVWload [i0] {s} p0 mem) - s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p1 mem))) - s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p2 mem))) + x0:(MOVWload [i0] {s} p mem) + s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem))) + s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem))) && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 @@ -963,84 +1064,126 @@ && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 - && same(p0, p1, 1) - && same(p1, p2, 1) && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0) - -> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p0 mem) + -> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem) + +(ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) + s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) + && i1==i0+1 + && x0.Uses == 1 + && x1.Uses == 1 + && s0.Uses == 1 + && mergePoint(b,x0,x1) != nil + && clobber(x0, x1, s0) + -> @mergePoint(b,x0,x1) (MOVWloadidx1 [i0] {s} p idx mem) + +(ORL o0:(ORL + x0:(MOVWloadidx1 [i0] {s} p idx mem) + s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) + s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem))) + && i2 == i0+2 + && i3 == i0+3 + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && s0.Uses == 1 + && s1.Uses == 1 + && o0.Uses == 1 + && mergePoint(b,x0,x1,x2) != nil + && clobber(x0, x1, x2, s0, s1, o0) + -> @mergePoint(b,x0,x1,x2) (MOVLloadidx1 [i0] {s} p idx mem) // Combine constant stores into larger (unaligned) stores. -(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem)) +(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) && x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() - && same(p0, p1, 1) && clobber(x) - -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) -(MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem)) + -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) +(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) && x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() - && same(p0, p1, 1) && clobber(x) - -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) -(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem)) + -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) +(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) && x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() - && same(p0, p1, 1) && clobber(x) - -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) -(MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem)) + -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) +(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) && x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() - && same(p0, p1, 1) && clobber(x) - -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) + -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) + +(MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() + && clobber(x) + -> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem) +(MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() + && clobber(x) + -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem) + +(MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem)) + && x.Uses == 1 + && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() + && clobber(x) + -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst [1] i) mem) // Combine stores into larger (unaligned) stores. -(MOVBstore [i] {s} p1 (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) +(MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstore [i-1] {s} p w mem) +(MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHR(W|L)const [8] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstore [i] {s} p w mem) +(MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstore [i-1] {s} p w0 mem) +(MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) && x.Uses == 1 - && same(p0, p1, 1) && clobber(x) - -> (MOVWstore [i-1] {s} p0 w mem) -(MOVBstore [i] {s} p1 w x:(MOVBstore {s} [i+1] p0 (SHR(W|L)const [8] w) mem)) + -> (MOVLstore [i-2] {s} p w mem) +(MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem)) && x.Uses == 1 - && same(p0, p1, 1) && clobber(x) - -> (MOVWstore [i] {s} p0 w mem) -(MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRLconst [j-8] w) mem)) + -> (MOVLstore [i-2] {s} p w0 mem) + +(MOVBstoreidx1 [i] {s} p idx (SHR(L|W)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) && x.Uses == 1 - && same(p0, p1, 1) && clobber(x) - -> (MOVWstore [i-1] {s} p0 w0 mem) -(MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem)) + -> (MOVWstoreidx1 [i-1] {s} p idx w mem) +(MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHR(L|W)const [8] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstoreidx1 [i] {s} p idx w mem) +(MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstoreidx1 [i-1] {s} p idx w0 mem) +(MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVLstoreidx1 [i-2] {s} p idx w mem) +(MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVLstoreidx1 [i-2] {s} p idx w0 mem) + +(MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) && x.Uses == 1 - && same(p0, p1, 1) && clobber(x) - -> (MOVLstore [i-2] {s} p0 w mem) -(MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRLconst [j-16] w) mem)) + -> (MOVLstoreidx1 [i-2] {s} p (SHLLconst [1] idx) w mem) +(MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem)) && x.Uses == 1 - && same(p0, p1, 1) && clobber(x) - -> (MOVLstore [i-2] {s} p0 w0 mem) - -// Move constant offsets from LEALx up into load. This lets the above combining -// rules discover indexed load-combining instances. -(MOV(B|W|L)load [i0] {s0} l:(LEAL1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L)load [i0+i1] {s0} (LEAL1 [0] {s1} x y) mem) -(MOV(B|W|L)load [i0] {s0} l:(LEAL2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L)load [i0+i1] {s0} (LEAL2 [0] {s1} x y) mem) -(MOV(B|W|L)load [i0] {s0} l:(LEAL4 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L)load [i0+i1] {s0} (LEAL4 [0] {s1} x y) mem) -(MOV(B|W|L)load [i0] {s0} l:(LEAL8 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L)load [i0+i1] {s0} (LEAL8 [0] {s1} x y) mem) - -(MOV(B|W|L)store [i0] {s0} l:(LEAL1 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L)store [i0+i1] {s0} (LEAL1 [0] {s1} x y) val mem) -(MOV(B|W|L)store [i0] {s0} l:(LEAL2 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L)store [i0+i1] {s0} (LEAL2 [0] {s1} x y) val mem) -(MOV(B|W|L)store [i0] {s0} l:(LEAL4 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L)store [i0+i1] {s0} (LEAL4 [0] {s1} x y) val mem) -(MOV(B|W|L)store [i0] {s0} l:(LEAL8 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L)store [i0+i1] {s0} (LEAL8 [0] {s1} x y) val mem) + -> (MOVLstoreidx1 [i-2] {s} p (SHLLconst [1] idx) w0 mem) // For PIC, break floating-point constant loading into two instructions so we have // a register to use for holding the address of the constant pool entry. diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go index e9a4b6601d..8b2da94c13 100644 --- a/src/cmd/compile/internal/ssa/rewrite386.go +++ b/src/cmd/compile/internal/ssa/rewrite386.go @@ -18,10 +18,16 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386ADDLconst(v) case Op386ADDLconstmodify: return rewriteValue386_Op386ADDLconstmodify(v) + case Op386ADDLconstmodifyidx4: + return rewriteValue386_Op386ADDLconstmodifyidx4(v) case Op386ADDLload: return rewriteValue386_Op386ADDLload(v) + case Op386ADDLloadidx4: + return rewriteValue386_Op386ADDLloadidx4(v) case Op386ADDLmodify: return rewriteValue386_Op386ADDLmodify(v) + case Op386ADDLmodifyidx4: + return rewriteValue386_Op386ADDLmodifyidx4(v) case Op386ADDSD: return rewriteValue386_Op386ADDSD(v) case Op386ADDSDload: @@ -36,10 +42,16 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386ANDLconst(v) case Op386ANDLconstmodify: return rewriteValue386_Op386ANDLconstmodify(v) + case Op386ANDLconstmodifyidx4: + return rewriteValue386_Op386ANDLconstmodifyidx4(v) case Op386ANDLload: return rewriteValue386_Op386ANDLload(v) + case Op386ANDLloadidx4: + return rewriteValue386_Op386ANDLloadidx4(v) case Op386ANDLmodify: return rewriteValue386_Op386ANDLmodify(v) + case Op386ANDLmodifyidx4: + return rewriteValue386_Op386ANDLmodifyidx4(v) case Op386CMPB: return rewriteValue386_Op386CMPB(v) case Op386CMPBconst: @@ -84,28 +96,62 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386MOVBLZX(v) case Op386MOVBload: return rewriteValue386_Op386MOVBload(v) + case Op386MOVBloadidx1: + return rewriteValue386_Op386MOVBloadidx1(v) case Op386MOVBstore: return rewriteValue386_Op386MOVBstore(v) case Op386MOVBstoreconst: return rewriteValue386_Op386MOVBstoreconst(v) + case Op386MOVBstoreconstidx1: + return rewriteValue386_Op386MOVBstoreconstidx1(v) + case Op386MOVBstoreidx1: + return rewriteValue386_Op386MOVBstoreidx1(v) case Op386MOVLload: return rewriteValue386_Op386MOVLload(v) + case Op386MOVLloadidx1: + return rewriteValue386_Op386MOVLloadidx1(v) + case Op386MOVLloadidx4: + return rewriteValue386_Op386MOVLloadidx4(v) case Op386MOVLstore: return rewriteValue386_Op386MOVLstore(v) case Op386MOVLstoreconst: return rewriteValue386_Op386MOVLstoreconst(v) + case Op386MOVLstoreconstidx1: + return rewriteValue386_Op386MOVLstoreconstidx1(v) + case Op386MOVLstoreconstidx4: + return rewriteValue386_Op386MOVLstoreconstidx4(v) + case Op386MOVLstoreidx1: + return rewriteValue386_Op386MOVLstoreidx1(v) + case Op386MOVLstoreidx4: + return rewriteValue386_Op386MOVLstoreidx4(v) case Op386MOVSDconst: return rewriteValue386_Op386MOVSDconst(v) case Op386MOVSDload: return rewriteValue386_Op386MOVSDload(v) + case Op386MOVSDloadidx1: + return rewriteValue386_Op386MOVSDloadidx1(v) + case Op386MOVSDloadidx8: + return rewriteValue386_Op386MOVSDloadidx8(v) case Op386MOVSDstore: return rewriteValue386_Op386MOVSDstore(v) + case Op386MOVSDstoreidx1: + return rewriteValue386_Op386MOVSDstoreidx1(v) + case Op386MOVSDstoreidx8: + return rewriteValue386_Op386MOVSDstoreidx8(v) case Op386MOVSSconst: return rewriteValue386_Op386MOVSSconst(v) case Op386MOVSSload: return rewriteValue386_Op386MOVSSload(v) + case Op386MOVSSloadidx1: + return rewriteValue386_Op386MOVSSloadidx1(v) + case Op386MOVSSloadidx4: + return rewriteValue386_Op386MOVSSloadidx4(v) case Op386MOVSSstore: return rewriteValue386_Op386MOVSSstore(v) + case Op386MOVSSstoreidx1: + return rewriteValue386_Op386MOVSSstoreidx1(v) + case Op386MOVSSstoreidx4: + return rewriteValue386_Op386MOVSSstoreidx4(v) case Op386MOVWLSX: return rewriteValue386_Op386MOVWLSX(v) case Op386MOVWLSXload: @@ -114,16 +160,30 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386MOVWLZX(v) case Op386MOVWload: return rewriteValue386_Op386MOVWload(v) + case Op386MOVWloadidx1: + return rewriteValue386_Op386MOVWloadidx1(v) + case Op386MOVWloadidx2: + return rewriteValue386_Op386MOVWloadidx2(v) case Op386MOVWstore: return rewriteValue386_Op386MOVWstore(v) case Op386MOVWstoreconst: return rewriteValue386_Op386MOVWstoreconst(v) + case Op386MOVWstoreconstidx1: + return rewriteValue386_Op386MOVWstoreconstidx1(v) + case Op386MOVWstoreconstidx2: + return rewriteValue386_Op386MOVWstoreconstidx2(v) + case Op386MOVWstoreidx1: + return rewriteValue386_Op386MOVWstoreidx1(v) + case Op386MOVWstoreidx2: + return rewriteValue386_Op386MOVWstoreidx2(v) case Op386MULL: return rewriteValue386_Op386MULL(v) case Op386MULLconst: return rewriteValue386_Op386MULLconst(v) case Op386MULLload: return rewriteValue386_Op386MULLload(v) + case Op386MULLloadidx4: + return rewriteValue386_Op386MULLloadidx4(v) case Op386MULSD: return rewriteValue386_Op386MULSD(v) case Op386MULSDload: @@ -142,10 +202,16 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386ORLconst(v) case Op386ORLconstmodify: return rewriteValue386_Op386ORLconstmodify(v) + case Op386ORLconstmodifyidx4: + return rewriteValue386_Op386ORLconstmodifyidx4(v) case Op386ORLload: return rewriteValue386_Op386ORLload(v) + case Op386ORLloadidx4: + return rewriteValue386_Op386ORLloadidx4(v) case Op386ORLmodify: return rewriteValue386_Op386ORLmodify(v) + case Op386ORLmodifyidx4: + return rewriteValue386_Op386ORLmodifyidx4(v) case Op386ROLBconst: return rewriteValue386_Op386ROLBconst(v) case Op386ROLLconst: @@ -212,8 +278,12 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386SUBLconst(v) case Op386SUBLload: return rewriteValue386_Op386SUBLload(v) + case Op386SUBLloadidx4: + return rewriteValue386_Op386SUBLloadidx4(v) case Op386SUBLmodify: return rewriteValue386_Op386SUBLmodify(v) + case Op386SUBLmodifyidx4: + return rewriteValue386_Op386SUBLmodifyidx4(v) case Op386SUBSD: return rewriteValue386_Op386SUBSD(v) case Op386SUBSDload: @@ -228,10 +298,16 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386XORLconst(v) case Op386XORLconstmodify: return rewriteValue386_Op386XORLconstmodify(v) + case Op386XORLconstmodifyidx4: + return rewriteValue386_Op386XORLconstmodifyidx4(v) case Op386XORLload: return rewriteValue386_Op386XORLload(v) + case Op386XORLloadidx4: + return rewriteValue386_Op386XORLloadidx4(v) case Op386XORLmodify: return rewriteValue386_Op386XORLmodify(v) + case Op386XORLmodifyidx4: + return rewriteValue386_Op386XORLmodifyidx4(v) case OpAdd16: v.Op = Op386ADDL return true @@ -966,6 +1042,32 @@ func rewriteValue386_Op386ADDL(v *Value) bool { } break } + // match: (ADDL x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (ADDLloadidx4 x [off] {sym} ptr idx mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != Op386MOVLloadidx4 { + continue + } + off := l.AuxInt + sym := l.Aux + mem := l.Args[2] + ptr := l.Args[0] + idx := l.Args[1] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + continue + } + v.reset(Op386ADDLloadidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(x, ptr, idx, mem) + return true + } + break + } // match: (ADDL x (NEGL y)) // result: (SUBL x y) for { @@ -1214,6 +1316,81 @@ func rewriteValue386_Op386ADDLconstmodify(v *Value) bool { } return false } +func rewriteValue386_Op386ADDLconstmodifyidx4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (ADDLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (ADDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + idx := v_1 + mem := v_2 + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(Op386ADDLconstmodifyidx4) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg3(base, idx, mem) + return true + } + // match: (ADDLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) + // cond: ValAndOff(valoff1).canAdd(off2*4) + // result: (ADDLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + base := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + if !(ValAndOff(valoff1).canAdd(off2 * 4)) { + break + } + v.reset(Op386ADDLconstmodifyidx4) + v.AuxInt = ValAndOff(valoff1).add(off2 * 4) + v.Aux = sym + v.AddArg3(base, idx, mem) + return true + } + // match: (ADDLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ADDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + idx := v_1 + mem := v_2 + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ADDLconstmodifyidx4) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, idx, mem) + return true + } + return false +} func rewriteValue386_Op386ADDLload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -1265,6 +1442,109 @@ func rewriteValue386_Op386ADDLload(v *Value) bool { v.AddArg3(val, base, mem) return true } + // match: (ADDLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (ADDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + val := v_0 + if v_1.Op != Op386LEAL4 { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + idx := v_1.Args[1] + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386ADDLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(val, ptr, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386ADDLloadidx4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (ADDLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) + // cond: is32Bit(off1+off2) + // result: (ADDLloadidx4 [off1+off2] {sym} val base idx mem) + for { + off1 := v.AuxInt + sym := v.Aux + val := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + idx := v_2 + mem := v_3 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386ADDLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg4(val, base, idx, mem) + return true + } + // match: (ADDLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) + // cond: is32Bit(off1+off2*4) + // result: (ADDLloadidx4 [off1+off2*4] {sym} val base idx mem) + for { + off1 := v.AuxInt + sym := v.Aux + val := v_0 + base := v_1 + if v_2.Op != Op386ADDLconst { + break + } + off2 := v_2.AuxInt + idx := v_2.Args[0] + mem := v_3 + if !(is32Bit(off1 + off2*4)) { + break + } + v.reset(Op386ADDLloadidx4) + v.AuxInt = off1 + off2*4 + v.Aux = sym + v.AddArg4(val, base, idx, mem) + return true + } + // match: (ADDLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ADDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + val := v_0 + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + idx := v_2 + mem := v_3 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ADDLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(val, base, idx, mem) + return true + } return false } func rewriteValue386_Op386ADDLmodify(v *Value) bool { @@ -1320,6 +1600,107 @@ func rewriteValue386_Op386ADDLmodify(v *Value) bool { } return false } +func rewriteValue386_Op386ADDLmodifyidx4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (ADDLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) + // cond: is32Bit(off1+off2) + // result: (ADDLmodifyidx4 [off1+off2] {sym} base idx val mem) + for { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386ADDLmodifyidx4) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg4(base, idx, val, mem) + return true + } + // match: (ADDLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) + // cond: is32Bit(off1+off2*4) + // result: (ADDLmodifyidx4 [off1+off2*4] {sym} base idx val mem) + for { + off1 := v.AuxInt + sym := v.Aux + base := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + if !(is32Bit(off1 + off2*4)) { + break + } + v.reset(Op386ADDLmodifyidx4) + v.AuxInt = off1 + off2*4 + v.Aux = sym + v.AddArg4(base, idx, val, mem) + return true + } + // match: (ADDLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ADDLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ADDLmodifyidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(base, idx, val, mem) + return true + } + // match: (ADDLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) + // cond: validValAndOff(c,off) + // result: (ADDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + if v_2.Op != Op386MOVLconst { + break + } + c := v_2.AuxInt + mem := v_3 + if !(validValAndOff(c, off)) { + break + } + v.reset(Op386ADDLconstmodifyidx4) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + return false +} func rewriteValue386_Op386ADDSD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -1534,6 +1915,32 @@ func rewriteValue386_Op386ANDL(v *Value) bool { } break } + // match: (ANDL x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (ANDLloadidx4 x [off] {sym} ptr idx mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != Op386MOVLloadidx4 { + continue + } + off := l.AuxInt + sym := l.Aux + mem := l.Args[2] + ptr := l.Args[0] + idx := l.Args[1] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + continue + } + v.reset(Op386ANDLloadidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(x, ptr, idx, mem) + return true + } + break + } // match: (ANDL x x) // result: x for { @@ -1650,30 +2057,105 @@ func rewriteValue386_Op386ANDLconstmodify(v *Value) bool { } return false } -func rewriteValue386_Op386ANDLload(v *Value) bool { +func rewriteValue386_Op386ANDLconstmodifyidx4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block config := b.Func.Config - // match: (ANDLload [off1] {sym} val (ADDLconst [off2] base) mem) - // cond: is32Bit(off1+off2) - // result: (ANDLload [off1+off2] {sym} val base mem) + // match: (ANDLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (ANDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem) for { - off1 := v.AuxInt + valoff1 := v.AuxInt sym := v.Aux - val := v_0 - if v_1.Op != Op386ADDLconst { + if v_0.Op != Op386ADDLconst { break } - off2 := v_1.AuxInt - base := v_1.Args[0] + off2 := v_0.AuxInt + base := v_0.Args[0] + idx := v_1 mem := v_2 - if !(is32Bit(off1 + off2)) { + if !(ValAndOff(valoff1).canAdd(off2)) { break } - v.reset(Op386ANDLload) - v.AuxInt = off1 + off2 + v.reset(Op386ANDLconstmodifyidx4) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg3(base, idx, mem) + return true + } + // match: (ANDLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) + // cond: ValAndOff(valoff1).canAdd(off2*4) + // result: (ANDLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + base := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + if !(ValAndOff(valoff1).canAdd(off2 * 4)) { + break + } + v.reset(Op386ANDLconstmodifyidx4) + v.AuxInt = ValAndOff(valoff1).add(off2 * 4) + v.Aux = sym + v.AddArg3(base, idx, mem) + return true + } + // match: (ANDLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ANDLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + idx := v_1 + mem := v_2 + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ANDLconstmodifyidx4) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386ANDLload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (ANDLload [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (ANDLload [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + val := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386ANDLload) + v.AuxInt = off1 + off2 v.Aux = sym v.AddArg3(val, base, mem) return true @@ -1701,6 +2183,109 @@ func rewriteValue386_Op386ANDLload(v *Value) bool { v.AddArg3(val, base, mem) return true } + // match: (ANDLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (ANDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + val := v_0 + if v_1.Op != Op386LEAL4 { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + idx := v_1.Args[1] + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386ANDLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(val, ptr, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386ANDLloadidx4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (ANDLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) + // cond: is32Bit(off1+off2) + // result: (ANDLloadidx4 [off1+off2] {sym} val base idx mem) + for { + off1 := v.AuxInt + sym := v.Aux + val := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + idx := v_2 + mem := v_3 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386ANDLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg4(val, base, idx, mem) + return true + } + // match: (ANDLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) + // cond: is32Bit(off1+off2*4) + // result: (ANDLloadidx4 [off1+off2*4] {sym} val base idx mem) + for { + off1 := v.AuxInt + sym := v.Aux + val := v_0 + base := v_1 + if v_2.Op != Op386ADDLconst { + break + } + off2 := v_2.AuxInt + idx := v_2.Args[0] + mem := v_3 + if !(is32Bit(off1 + off2*4)) { + break + } + v.reset(Op386ANDLloadidx4) + v.AuxInt = off1 + off2*4 + v.Aux = sym + v.AddArg4(val, base, idx, mem) + return true + } + // match: (ANDLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ANDLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + val := v_0 + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + idx := v_2 + mem := v_3 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ANDLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(val, base, idx, mem) + return true + } return false } func rewriteValue386_Op386ANDLmodify(v *Value) bool { @@ -1756,6 +2341,107 @@ func rewriteValue386_Op386ANDLmodify(v *Value) bool { } return false } +func rewriteValue386_Op386ANDLmodifyidx4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (ANDLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) + // cond: is32Bit(off1+off2) + // result: (ANDLmodifyidx4 [off1+off2] {sym} base idx val mem) + for { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386ANDLmodifyidx4) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg4(base, idx, val, mem) + return true + } + // match: (ANDLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) + // cond: is32Bit(off1+off2*4) + // result: (ANDLmodifyidx4 [off1+off2*4] {sym} base idx val mem) + for { + off1 := v.AuxInt + sym := v.Aux + base := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + if !(is32Bit(off1 + off2*4)) { + break + } + v.reset(Op386ANDLmodifyidx4) + v.AuxInt = off1 + off2*4 + v.Aux = sym + v.AddArg4(base, idx, val, mem) + return true + } + // match: (ANDLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ANDLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ANDLmodifyidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(base, idx, val, mem) + return true + } + // match: (ANDLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) + // cond: validValAndOff(c,off) + // result: (ANDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + if v_2.Op != Op386MOVLconst { + break + } + c := v_2.AuxInt + mem := v_3 + if !(validValAndOff(c, off)) { + break + } + v.reset(Op386ANDLconstmodifyidx4) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + return false +} func rewriteValue386_Op386CMPB(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -3082,109 +3768,35 @@ func rewriteValue386_Op386LEAL1(v *Value) bool { } break } - // match: (LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y) + return false +} +func rewriteValue386_Op386LEAL2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LEAL2 [c] {s} (ADDLconst [d] x) y) + // cond: is32Bit(c+d) && x.Op != OpSB + // result: (LEAL2 [c+d] {s} x y) for { - off1 := v.AuxInt - sym1 := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != Op386LEAL1 { - continue - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - y := v_1.Args[1] - if y != v_1.Args[0] || !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - continue - } - v.reset(Op386LEAL2) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(x, y) - return true + c := v.AuxInt + s := v.Aux + if v_0.Op != Op386ADDLconst { + break } - break + d := v_0.AuxInt + x := v_0.Args[0] + y := v_1 + if !(is32Bit(c+d) && x.Op != OpSB) { + break + } + v.reset(Op386LEAL2) + v.AuxInt = c + d + v.Aux = s + v.AddArg2(x, y) + return true } - // match: (LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y)) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x) - for { - off1 := v.AuxInt - sym1 := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != Op386LEAL1 { - continue - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { - if x != v_1_0 { - continue - } - y := v_1_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - continue - } - v.reset(Op386LEAL2) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(y, x) - return true - } - } - break - } - // match: (LEAL1 [0] x y) - // cond: v.Aux == nil - // result: (ADDL x y) - for { - if v.AuxInt != 0 { - break - } - x := v_0 - y := v_1 - if !(v.Aux == nil) { - break - } - v.reset(Op386ADDL) - v.AddArg2(x, y) - return true - } - return false -} -func rewriteValue386_Op386LEAL2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (LEAL2 [c] {s} (ADDLconst [d] x) y) - // cond: is32Bit(c+d) && x.Op != OpSB - // result: (LEAL2 [c+d] {s} x y) - for { - c := v.AuxInt - s := v.Aux - if v_0.Op != Op386ADDLconst { - break - } - d := v_0.AuxInt - x := v_0.Args[0] - y := v_1 - if !(is32Bit(c+d) && x.Op != OpSB) { - break - } - v.reset(Op386LEAL2) - v.AuxInt = c + d - v.Aux = s - v.AddArg2(x, y) - return true - } - // match: (LEAL2 [c] {s} x (ADDLconst [d] y)) - // cond: is32Bit(c+2*d) && y.Op != OpSB - // result: (LEAL2 [c+2*d] {s} x y) + // match: (LEAL2 [c] {s} x (ADDLconst [d] y)) + // cond: is32Bit(c+2*d) && y.Op != OpSB + // result: (LEAL2 [c+2*d] {s} x y) for { c := v.AuxInt s := v.Aux @@ -3257,28 +3869,6 @@ func rewriteValue386_Op386LEAL2(v *Value) bool { v.AddArg2(x, y) return true } - // match: (LEAL2 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) - // cond: is32Bit(off1+2*off2) && sym2 == nil - // result: (LEAL4 [off1+2*off2] {sym1} x y) - for { - off1 := v.AuxInt - sym1 := v.Aux - x := v_0 - if v_1.Op != Op386LEAL1 { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - y := v_1.Args[1] - if y != v_1.Args[0] || !(is32Bit(off1+2*off2) && sym2 == nil) { - break - } - v.reset(Op386LEAL4) - v.AuxInt = off1 + 2*off2 - v.Aux = sym1 - v.AddArg2(x, y) - return true - } return false } func rewriteValue386_Op386LEAL4(v *Value) bool { @@ -3364,28 +3954,6 @@ func rewriteValue386_Op386LEAL4(v *Value) bool { v.AddArg2(x, y) return true } - // match: (LEAL4 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) - // cond: is32Bit(off1+4*off2) && sym2 == nil - // result: (LEAL8 [off1+4*off2] {sym1} x y) - for { - off1 := v.AuxInt - sym1 := v.Aux - x := v_0 - if v_1.Op != Op386LEAL1 { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - y := v_1.Args[1] - if y != v_1.Args[0] || !(is32Bit(off1+4*off2) && sym2 == nil) { - break - } - v.reset(Op386LEAL8) - v.AuxInt = off1 + 4*off2 - v.Aux = sym1 - v.AddArg2(x, y) - return true - } return false } func rewriteValue386_Op386LEAL8(v *Value) bool { @@ -3578,6 +4146,30 @@ func rewriteValue386_Op386MOVBLZX(v *Value) bool { v0.AddArg2(ptr, mem) return true } + // match: (MOVBLZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVBloadidx1 [off] {sym} ptr idx mem) + for { + x := v_0 + if x.Op != Op386MOVBloadidx1 { + break + } + off := x.AuxInt + sym := x.Aux + mem := x.Args[2] + ptr := x.Args[0] + idx := x.Args[1] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, Op386MOVBloadidx1, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + return true + } // match: (MOVBLZX (ANDLconst [c] x)) // result: (ANDLconst [c & 0xff] x) for { @@ -3662,117 +4254,55 @@ func rewriteValue386_Op386MOVBload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVBload [i0] {s0} l:(LEAL1 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBload [i0+i1] {s0} (LEAL1 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(Op386MOVBload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVBload [i0] {s0} l:(LEAL2 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBload [i0+i1] {s0} (LEAL2 [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(Op386MOVBload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVBload [i0] {s0} l:(LEAL4 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBload [i0+i1] {s0} (LEAL4 [0] {s1} x y) mem) + // match: (MOVBload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVBloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL4 { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL1 { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - v.reset(Op386MOVBload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) + v.reset(Op386MOVBloadidx1) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(ptr, idx, mem) return true } - // match: (MOVBload [i0] {s0} l:(LEAL8 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBload [i0+i1] {s0} (LEAL8 [0] {s1} x y) mem) + // match: (MOVBload [off] {sym} (ADDL ptr idx) mem) + // cond: ptr.Op != OpSB + // result: (MOVBloadidx1 [off] {sym} ptr idx mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL8 { + off := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDL { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + ptr := v_0_0 + idx := v_0_1 + mem := v_1 + if !(ptr.Op != OpSB) { + continue + } + v.reset(Op386MOVBloadidx1) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true } - v.reset(Op386MOVBload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true + break } // match: (MOVBload [off] {sym} (SB) _) // cond: symIsRO(sym) @@ -3789,6 +4319,54 @@ func rewriteValue386_Op386MOVBload(v *Value) bool { } return false } +func rewriteValue386_Op386MOVBloadidx1(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVBloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) + // result: (MOVBloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != Op386ADDLconst { + continue + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + mem := v_2 + v.reset(Op386MOVBloadidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + break + } + // match: (MOVBloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) + // result: (MOVBloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + ptr := v_0 + if v_1.Op != Op386ADDLconst { + continue + } + d := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVBloadidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + break + } + return false +} func rewriteValue386_Op386MOVBstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -3895,13 +4473,65 @@ func rewriteValue386_Op386MOVBstore(v *Value) bool { v.AddArg3(base, val, mem) return true } - // match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i-1] {s} p0 w mem) + // match: (MOVBstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVBstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL1 { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVBstoreidx1) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (MOVBstore [off] {sym} (ADDL ptr idx) val mem) + // cond: ptr.Op != OpSB + // result: (MOVBstoreidx1 [off] {sym} ptr idx val mem) + for { + off := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + ptr := v_0_0 + idx := v_0_1 + val := v_1 + mem := v_2 + if !(ptr.Op != OpSB) { + continue + } + v.reset(Op386MOVBstoreidx1) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + break + } + // match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w mem) for { i := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 if v_1.Op != Op386SHRWconst || v_1.AuxInt != 8 { break } @@ -3911,23 +4541,22 @@ func rewriteValue386_Op386MOVBstore(v *Value) bool { break } mem := x.Args[2] - p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { break } v.reset(Op386MOVWstore) v.AuxInt = i - 1 v.Aux = s - v.AddArg3(p0, w, mem) + v.AddArg3(p, w, mem) return true } - // match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i-1] {s} p0 w mem) + // match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w mem) for { i := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 if v_1.Op != Op386SHRLconst || v_1.AuxInt != 8 { break } @@ -3937,71 +4566,74 @@ func rewriteValue386_Op386MOVBstore(v *Value) bool { break } mem := x.Args[2] - p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { break } v.reset(Op386MOVWstore) v.AuxInt = i - 1 v.Aux = s - v.AddArg3(p0, w, mem) + v.AddArg3(p, w, mem) return true } - // match: (MOVBstore [i] {s} p1 w x:(MOVBstore {s} [i+1] p0 (SHRWconst [8] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i] {s} p0 w mem) + // match: (MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i] {s} p w mem) for { i := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 w := v_1 x := v_2 if x.Op != Op386MOVBstore || x.AuxInt != i+1 || x.Aux != s { break } mem := x.Args[2] - p0 := x.Args[0] + if p != x.Args[0] { + break + } x_1 := x.Args[1] - if x_1.Op != Op386SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if x_1.Op != Op386SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) { break } v.reset(Op386MOVWstore) v.AuxInt = i v.Aux = s - v.AddArg3(p0, w, mem) + v.AddArg3(p, w, mem) return true } - // match: (MOVBstore [i] {s} p1 w x:(MOVBstore {s} [i+1] p0 (SHRLconst [8] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i] {s} p0 w mem) + // match: (MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i] {s} p w mem) for { i := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 w := v_1 x := v_2 if x.Op != Op386MOVBstore || x.AuxInt != i+1 || x.Aux != s { break } mem := x.Args[2] - p0 := x.Args[0] + if p != x.Args[0] { + break + } x_1 := x.Args[1] - if x_1.Op != Op386SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if x_1.Op != Op386SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) { break } v.reset(Op386MOVWstore) v.AuxInt = i v.Aux = s - v.AddArg3(p0, w, mem) + v.AddArg3(p, w, mem) return true } - // match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRLconst [j-8] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i-1] {s} p0 w0 mem) + // match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w0 mem) for { i := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 if v_1.Op != Op386SHRLconst { break } @@ -4012,131 +4644,17 @@ func rewriteValue386_Op386MOVBstore(v *Value) bool { break } mem := x.Args[2] - p0 := x.Args[0] + if p != x.Args[0] { + break + } w0 := x.Args[1] - if w0.Op != Op386SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w0.Op != Op386SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { break } v.reset(Op386MOVWstore) v.AuxInt = i - 1 v.Aux = s - v.AddArg3(p0, w0, mem) - return true - } - // match: (MOVBstore [i0] {s0} l:(LEAL1 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBstore [i0+i1] {s0} (LEAL1 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(Op386MOVBstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVBstore [i0] {s0} l:(LEAL2 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBstore [i0+i1] {s0} (LEAL2 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(Op386MOVBstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVBstore [i0] {s0} l:(LEAL4 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBstore [i0+i1] {s0} (LEAL4 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL4 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(Op386MOVBstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVBstore [i0] {s0} l:(LEAL8 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBstore [i0+i1] {s0} (LEAL8 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL8 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(Op386MOVBstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + v.AddArg3(p, w0, mem) return true } return false @@ -4189,13 +4707,53 @@ func rewriteValue386_Op386MOVBstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x) - // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) + // match: (MOVBstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) + // cond: canMergeSym(sym1, sym2) + // result: (MOVBstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) + for { + x := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL1 { + break + } + off := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVBstoreconstidx1) + v.AuxInt = ValAndOff(x).add(off) + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVBstoreconst [x] {sym} (ADDL ptr idx) mem) + // result: (MOVBstoreconstidx1 [x] {sym} ptr idx mem) + for { + x := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDL { + break + } + idx := v_0.Args[1] + ptr := v_0.Args[0] + mem := v_1 + v.reset(Op386MOVBstoreconstidx1) + v.AuxInt = x + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) for { c := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != Op386MOVBstoreconst { break @@ -4205,23 +4763,22 @@ func rewriteValue386_Op386MOVBstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(Op386MOVWstoreconst) v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) v.Aux = s - v.AddArg2(p0, mem) + v.AddArg2(p, mem) return true } - // match: (MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x) - // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) + // match: (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) for { a := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != Op386MOVBstoreconst { break @@ -4231,18 +4788,308 @@ func rewriteValue386_Op386MOVBstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(Op386MOVWstoreconst) v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) v.Aux = s - v.AddArg2(p0, mem) + v.AddArg2(p, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVBstoreconstidx1(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVBstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) + // result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) + for { + x := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + c := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + mem := v_2 + v.reset(Op386MOVBstoreconstidx1) + v.AuxInt = ValAndOff(x).add(c) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVBstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) + // result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) + for { + x := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { + break + } + c := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVBstoreconstidx1) + v.AuxInt = ValAndOff(x).add(c) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem) + for { + c := v.AuxInt + s := v.Aux + p := v_0 + i := v_1 + x := v_2 + if x.Op != Op386MOVBstoreconstidx1 { + break + } + a := x.AuxInt + if x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(Op386MOVWstoreconstidx1) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) + v.Aux = s + v.AddArg3(p, i, mem) return true } return false } +func rewriteValue386_Op386MOVBstoreidx1(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVBstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) + // result: (MOVBstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != Op386ADDLconst { + continue + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + v.reset(Op386MOVBstoreidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + break + } + // match: (MOVBstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) + // result: (MOVBstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + ptr := v_0 + if v_1.Op != Op386ADDLconst { + continue + } + d := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + v.reset(Op386MOVBstoreidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + break + } + // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i-1] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + p := v_0 + idx := v_1 + if v_2.Op != Op386SHRLconst || v_2.AuxInt != 8 { + continue + } + w := v_2.Args[0] + x := v_3 + if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s { + continue + } + mem := x.Args[3] + x_0 := x.Args[0] + x_1 := x.Args[1] + for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { + if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { + continue + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg4(p, idx, w, mem) + return true + } + } + break + } + // match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i-1] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + p := v_0 + idx := v_1 + if v_2.Op != Op386SHRWconst || v_2.AuxInt != 8 { + continue + } + w := v_2.Args[0] + x := v_3 + if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s { + continue + } + mem := x.Args[3] + x_0 := x.Args[0] + x_1 := x.Args[1] + for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { + if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { + continue + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg4(p, idx, w, mem) + return true + } + } + break + } + // match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + p := v_0 + idx := v_1 + w := v_2 + x := v_3 + if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i+1 || x.Aux != s { + continue + } + mem := x.Args[3] + x_0 := x.Args[0] + x_1 := x.Args[1] + for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { + if p != x_0 || idx != x_1 { + continue + } + x_2 := x.Args[2] + if x_2.Op != Op386SHRLconst || x_2.AuxInt != 8 || w != x_2.Args[0] || !(x.Uses == 1 && clobber(x)) { + continue + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i + v.Aux = s + v.AddArg4(p, idx, w, mem) + return true + } + } + break + } + // match: (MOVBstoreidx1 [i] {s} p idx w x:(MOVBstoreidx1 [i+1] {s} p idx (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i] {s} p idx w mem) + for { + i := v.AuxInt + s := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + p := v_0 + idx := v_1 + w := v_2 + x := v_3 + if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i+1 || x.Aux != s { + continue + } + mem := x.Args[3] + x_0 := x.Args[0] + x_1 := x.Args[1] + for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { + if p != x_0 || idx != x_1 { + continue + } + x_2 := x.Args[2] + if x_2.Op != Op386SHRWconst || x_2.AuxInt != 8 || w != x_2.Args[0] || !(x.Uses == 1 && clobber(x)) { + continue + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i + v.Aux = s + v.AddArg4(p, idx, w, mem) + return true + } + } + break + } + // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem) + for { + i := v.AuxInt + s := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + p := v_0 + idx := v_1 + if v_2.Op != Op386SHRLconst { + continue + } + j := v_2.AuxInt + w := v_2.Args[0] + x := v_3 + if x.Op != Op386MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s { + continue + } + mem := x.Args[3] + x_0 := x.Args[0] + x_1 := x.Args[1] + for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { + if p != x_0 || idx != x_1 { + continue + } + w0 := x.Args[2] + if w0.Op != Op386SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + continue + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg4(p, idx, w0, mem) + return true + } + } + break + } + return false +} func rewriteValue386_Op386MOVLload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -4311,129 +5158,200 @@ func rewriteValue386_Op386MOVLload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVLload [i0] {s0} l:(LEAL1 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLload [i0+i1] {s0} (LEAL1 [0] {s1} x y) mem) + // match: (MOVLload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL1 { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL1 { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - v.reset(Op386MOVLload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) + v.reset(Op386MOVLloadidx1) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(ptr, idx, mem) return true } - // match: (MOVLload [i0] {s0} l:(LEAL2 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLload [i0+i1] {s0} (LEAL2 [0] {s1} x y) mem) + // match: (MOVLload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL2 { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL4 { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - v.reset(Op386MOVLload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) + v.reset(Op386MOVLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(ptr, idx, mem) return true } - // match: (MOVLload [i0] {s0} l:(LEAL4 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLload [i0+i1] {s0} (LEAL4 [0] {s1} x y) mem) + // match: (MOVLload [off] {sym} (ADDL ptr idx) mem) + // cond: ptr.Op != OpSB + // result: (MOVLloadidx1 [off] {sym} ptr idx mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL4 { + off := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDL { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + ptr := v_0_0 + idx := v_0_1 + mem := v_1 + if !(ptr.Op != OpSB) { + continue + } + v.reset(Op386MOVLloadidx1) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + break + } + // match: (MOVLload [off] {sym} (SB) _) + // cond: symIsRO(sym) + // result: (MOVLconst [int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))]) + for { + off := v.AuxInt + sym := v.Aux + if v_0.Op != OpSB || !(symIsRO(sym)) { break } - v.reset(Op386MOVLload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) + v.reset(Op386MOVLconst) + v.AuxInt = int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder))) return true } - // match: (MOVLload [i0] {s0} l:(LEAL8 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLload [i0+i1] {s0} (LEAL8 [0] {s1} x y) mem) + return false +} +func rewriteValue386_Op386MOVLloadidx1(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVLloadidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) + // result: (MOVLloadidx4 [c] {sym} ptr idx mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL8 { - break + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + ptr := v_0 + if v_1.Op != Op386SHLLconst || v_1.AuxInt != 2 { + continue + } + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVLloadidx4) + v.AuxInt = c + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { + break + } + // match: (MOVLloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) + // result: (MOVLloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != Op386ADDLconst { + continue + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + mem := v_2 + v.reset(Op386MOVLloadidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + break + } + // match: (MOVLloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) + // result: (MOVLloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + ptr := v_0 + if v_1.Op != Op386ADDLconst { + continue + } + d := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVLloadidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + break + } + return false +} +func rewriteValue386_Op386MOVLloadidx4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVLloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem) + // result: (MOVLloadidx4 [int64(int32(c+d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { break } - v.reset(Op386MOVLload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + mem := v_2 + v.reset(Op386MOVLloadidx4) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) return true } - // match: (MOVLload [off] {sym} (SB) _) - // cond: symIsRO(sym) - // result: (MOVLconst [int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))]) + // match: (MOVLloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem) + // result: (MOVLloadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem) for { - off := v.AuxInt + c := v.AuxInt sym := v.Aux - if v_0.Op != OpSB || !(symIsRO(sym)) { + ptr := v_0 + if v_1.Op != Op386ADDLconst { break } - v.reset(Op386MOVLconst) - v.AuxInt = int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder))) + d := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVLloadidx4) + v.AuxInt = int64(int32(c + 4*d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) return true } return false @@ -4510,6 +5428,82 @@ func rewriteValue386_Op386MOVLstore(v *Value) bool { v.AddArg3(base, val, mem) return true } + // match: (MOVLstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL1 { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVLstoreidx1) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (MOVLstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL4 { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVLstoreidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (MOVLstore [off] {sym} (ADDL ptr idx) val mem) + // cond: ptr.Op != OpSB + // result: (MOVLstoreidx1 [off] {sym} ptr idx val mem) + for { + off := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + ptr := v_0_0 + idx := v_0_1 + val := v_1 + mem := v_2 + if !(ptr.Op != OpSB) { + continue + } + v.reset(Op386MOVLstoreidx1) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + break + } // match: (MOVLstore {sym} [off] ptr y:(ADDLload x [off] {sym} ptr mem) mem) // cond: y.Uses==1 && clobber(y) // result: (ADDLmodify [off] {sym} ptr x mem) @@ -4868,122 +5862,6 @@ func rewriteValue386_Op386MOVLstore(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVLstore [i0] {s0} l:(LEAL1 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLstore [i0+i1] {s0} (LEAL1 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(Op386MOVLstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVLstore [i0] {s0} l:(LEAL2 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLstore [i0+i1] {s0} (LEAL2 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(Op386MOVLstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVLstore [i0] {s0} l:(LEAL4 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLstore [i0+i1] {s0} (LEAL4 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL4 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(Op386MOVLstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVLstore [i0] {s0} l:(LEAL8 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLstore [i0+i1] {s0} (LEAL8 [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL8 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(Op386MOVLstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } return false } func rewriteValue386_Op386MOVLstoreconst(v *Value) bool { @@ -5034,36 +5912,1765 @@ func rewriteValue386_Op386MOVLstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - return false -} -func rewriteValue386_Op386MOVSDconst(v *Value) bool { - b := v.Block - config := b.Func.Config - typ := &b.Func.Config.Types - // match: (MOVSDconst [c]) - // cond: config.ctxt.Flag_shared - // result: (MOVSDconst2 (MOVSDconst1 [c])) + // match: (MOVLstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) + // cond: canMergeSym(sym1, sym2) + // result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) for { - c := v.AuxInt - if !(config.ctxt.Flag_shared) { + x := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL1 { break } - v.reset(Op386MOVSDconst2) - v0 := b.NewValue0(v.Pos, Op386MOVSDconst1, typ.UInt32) - v0.AuxInt = c - v.AddArg(v0) + off := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVLstoreconstidx1) + v.AuxInt = ValAndOff(x).add(off) + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVLstoreconst [x] {sym1} (LEAL4 [off] {sym2} ptr idx) mem) + // cond: canMergeSym(sym1, sym2) + // result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) + for { + x := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL4 { + break + } + off := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVLstoreconstidx4) + v.AuxInt = ValAndOff(x).add(off) + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVLstoreconst [x] {sym} (ADDL ptr idx) mem) + // result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem) + for { + x := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDL { + break + } + idx := v_0.Args[1] + ptr := v_0.Args[0] + mem := v_1 + v.reset(Op386MOVLstoreconstidx1) + v.AuxInt = x + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVLstoreconstidx1(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLLconst [2] idx) mem) + // result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386SHLLconst || v_1.AuxInt != 2 { + break + } + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVLstoreconstidx4) + v.AuxInt = c + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVLstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) + // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) + for { + x := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + c := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + mem := v_2 + v.reset(Op386MOVLstoreconstidx1) + v.AuxInt = ValAndOff(x).add(c) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) + // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) + for { + x := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { + break + } + c := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVLstoreconstidx1) + v.AuxInt = ValAndOff(x).add(c) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVLstoreconstidx4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVLstoreconstidx4 [x] {sym} (ADDLconst [c] ptr) idx mem) + // result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem) + for { + x := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + c := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + mem := v_2 + v.reset(Op386MOVLstoreconstidx4) + v.AuxInt = ValAndOff(x).add(c) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDLconst [c] idx) mem) + // result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem) + for { + x := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { + break + } + c := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVLstoreconstidx4) + v.AuxInt = ValAndOff(x).add(4 * c) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVLstoreidx1(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVLstoreidx1 [c] {sym} ptr (SHLLconst [2] idx) val mem) + // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + ptr := v_0 + if v_1.Op != Op386SHLLconst || v_1.AuxInt != 2 { + continue + } + idx := v_1.Args[0] + val := v_2 + mem := v_3 + v.reset(Op386MOVLstoreidx4) + v.AuxInt = c + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + break + } + // match: (MOVLstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) + // result: (MOVLstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != Op386ADDLconst { + continue + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + v.reset(Op386MOVLstoreidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + break + } + // match: (MOVLstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) + // result: (MOVLstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + ptr := v_0 + if v_1.Op != Op386ADDLconst { + continue + } + d := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + v.reset(Op386MOVLstoreidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + break + } + return false +} +func rewriteValue386_Op386MOVLstoreidx4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVLstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem) + // result: (MOVLstoreidx4 [int64(int32(c+d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + v.reset(Op386MOVLstoreidx4) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (MOVLstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem) + // result: (MOVLstoreidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { + break + } + d := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + v.reset(Op386MOVLstoreidx4) + v.AuxInt = int64(int32(c + 4*d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ADDLloadidx4 x [off] {sym} ptr idx mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ADDLmodifyidx4 [off] {sym} ptr idx x mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + y := v_2 + if y.Op != Op386ADDLloadidx4 || y.AuxInt != off || y.Aux != sym { + break + } + mem := y.Args[3] + x := y.Args[0] + if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(Op386ADDLmodifyidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, x, mem) + return true + } + // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ANDLloadidx4 x [off] {sym} ptr idx mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ANDLmodifyidx4 [off] {sym} ptr idx x mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + y := v_2 + if y.Op != Op386ANDLloadidx4 || y.AuxInt != off || y.Aux != sym { + break + } + mem := y.Args[3] + x := y.Args[0] + if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(Op386ANDLmodifyidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, x, mem) + return true + } + // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ORLloadidx4 x [off] {sym} ptr idx mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ORLmodifyidx4 [off] {sym} ptr idx x mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + y := v_2 + if y.Op != Op386ORLloadidx4 || y.AuxInt != off || y.Aux != sym { + break + } + mem := y.Args[3] + x := y.Args[0] + if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(Op386ORLmodifyidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, x, mem) + return true + } + // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(XORLloadidx4 x [off] {sym} ptr idx mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (XORLmodifyidx4 [off] {sym} ptr idx x mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + y := v_2 + if y.Op != Op386XORLloadidx4 || y.AuxInt != off || y.Aux != sym { + break + } + mem := y.Args[3] + x := y.Args[0] + if ptr != y.Args[1] || idx != y.Args[2] || mem != v_3 || !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(Op386XORLmodifyidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, x, mem) + return true + } + // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ADDL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (ADDLmodifyidx4 [off] {sym} ptr idx x mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + y := v_2 + if y.Op != Op386ADDL { + break + } + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { + continue + } + mem := l.Args[2] + if ptr != l.Args[0] || idx != l.Args[1] { + continue + } + x := y_1 + if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(Op386ADDLmodifyidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, x, mem) + return true + } + break + } + // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(SUBL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (SUBLmodifyidx4 [off] {sym} ptr idx x mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + y := v_2 + if y.Op != Op386SUBL { + break + } + x := y.Args[1] + l := y.Args[0] + if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { + break + } + mem := l.Args[2] + if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + break + } + v.reset(Op386SUBLmodifyidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, x, mem) + return true + } + // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ANDL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (ANDLmodifyidx4 [off] {sym} ptr idx x mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + y := v_2 + if y.Op != Op386ANDL { + break + } + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { + continue + } + mem := l.Args[2] + if ptr != l.Args[0] || idx != l.Args[1] { + continue + } + x := y_1 + if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(Op386ANDLmodifyidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, x, mem) + return true + } + break + } + // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ORL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (ORLmodifyidx4 [off] {sym} ptr idx x mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + y := v_2 + if y.Op != Op386ORL { + break + } + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { + continue + } + mem := l.Args[2] + if ptr != l.Args[0] || idx != l.Args[1] { + continue + } + x := y_1 + if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(Op386ORLmodifyidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, x, mem) + return true + } + break + } + // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(XORL l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (XORLmodifyidx4 [off] {sym} ptr idx x mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + y := v_2 + if y.Op != Op386XORL { + break + } + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { + continue + } + mem := l.Args[2] + if ptr != l.Args[0] || idx != l.Args[1] { + continue + } + x := y_1 + if mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(Op386XORLmodifyidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, x, mem) + return true + } + break + } + // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ADDLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) + // result: (ADDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + y := v_2 + if y.Op != Op386ADDLconst { + break + } + c := y.AuxInt + l := y.Args[0] + if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { + break + } + mem := l.Args[2] + if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) { + break + } + v.reset(Op386ADDLconstmodifyidx4) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ANDLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) + // result: (ANDLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + y := v_2 + if y.Op != Op386ANDLconst { + break + } + c := y.AuxInt + l := y.Args[0] + if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { + break + } + mem := l.Args[2] + if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) { + break + } + v.reset(Op386ANDLconstmodifyidx4) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(ORLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) + // result: (ORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + y := v_2 + if y.Op != Op386ORLconst { + break + } + c := y.AuxInt + l := y.Args[0] + if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { + break + } + mem := l.Args[2] + if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) { + break + } + v.reset(Op386ORLconstmodifyidx4) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVLstoreidx4 {sym} [off] ptr idx y:(XORLconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) + // result: (XORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + y := v_2 + if y.Op != Op386XORLconst { + break + } + c := y.AuxInt + l := y.Args[0] + if l.Op != Op386MOVLloadidx4 || l.AuxInt != off || l.Aux != sym { + break + } + mem := l.Args[2] + if ptr != l.Args[0] || idx != l.Args[1] || mem != v_3 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l) && validValAndOff(c, off)) { + break + } + v.reset(Op386XORLconstmodifyidx4) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVSDconst(v *Value) bool { + b := v.Block + config := b.Func.Config + typ := &b.Func.Config.Types + // match: (MOVSDconst [c]) + // cond: config.ctxt.Flag_shared + // result: (MOVSDconst2 (MOVSDconst1 [c])) + for { + c := v.AuxInt + if !(config.ctxt.Flag_shared) { + break + } + v.reset(Op386MOVSDconst2) + v0 := b.NewValue0(v.Pos, Op386MOVSDconst1, typ.UInt32) + v0.AuxInt = c + v.AddArg(v0) + return true + } + return false +} +func rewriteValue386_Op386MOVSDload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVSDload [off1] {sym} (ADDLconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVSDload [off1+off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386MOVSDload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg2(ptr, mem) + return true + } + // match: (MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386MOVSDload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(base, mem) + return true + } + // match: (MOVSDload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL1 { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVSDloadidx1) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVSDload [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL8 { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVSDloadidx8) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVSDload [off] {sym} (ADDL ptr idx) mem) + // cond: ptr.Op != OpSB + // result: (MOVSDloadidx1 [off] {sym} ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + ptr := v_0_0 + idx := v_0_1 + mem := v_1 + if !(ptr.Op != OpSB) { + continue + } + v.reset(Op386MOVSDloadidx1) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + break + } + return false +} +func rewriteValue386_Op386MOVSDloadidx1(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVSDloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) + // result: (MOVSDloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + mem := v_2 + v.reset(Op386MOVSDloadidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVSDloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) + // result: (MOVSDloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { + break + } + d := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVSDloadidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVSDloadidx8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVSDloadidx8 [c] {sym} (ADDLconst [d] ptr) idx mem) + // result: (MOVSDloadidx8 [int64(int32(c+d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + mem := v_2 + v.reset(Op386MOVSDloadidx8) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVSDloadidx8 [c] {sym} ptr (ADDLconst [d] idx) mem) + // result: (MOVSDloadidx8 [int64(int32(c+8*d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { + break + } + d := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVSDloadidx8) + v.AuxInt = int64(int32(c + 8*d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVSDstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVSDstore [off1+off2] {sym} ptr val mem) + for { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386MOVSDstore) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg3(ptr, val, mem) + return true + } + // match: (MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386MOVSDstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, val, mem) + return true + } + // match: (MOVSDstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL1 { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVSDstoreidx1) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL8 { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVSDstoreidx8) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (MOVSDstore [off] {sym} (ADDL ptr idx) val mem) + // cond: ptr.Op != OpSB + // result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem) + for { + off := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + ptr := v_0_0 + idx := v_0_1 + val := v_1 + mem := v_2 + if !(ptr.Op != OpSB) { + continue + } + v.reset(Op386MOVSDstoreidx1) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + break + } + return false +} +func rewriteValue386_Op386MOVSDstoreidx1(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVSDstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) + // result: (MOVSDstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + v.reset(Op386MOVSDstoreidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (MOVSDstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) + // result: (MOVSDstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { + break + } + d := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + v.reset(Op386MOVSDstoreidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVSDstoreidx8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVSDstoreidx8 [c] {sym} (ADDLconst [d] ptr) idx val mem) + // result: (MOVSDstoreidx8 [int64(int32(c+d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + v.reset(Op386MOVSDstoreidx8) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem) + // result: (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { + break + } + d := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + v.reset(Op386MOVSDstoreidx8) + v.AuxInt = int64(int32(c + 8*d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVSSconst(v *Value) bool { + b := v.Block + config := b.Func.Config + typ := &b.Func.Config.Types + // match: (MOVSSconst [c]) + // cond: config.ctxt.Flag_shared + // result: (MOVSSconst2 (MOVSSconst1 [c])) + for { + c := v.AuxInt + if !(config.ctxt.Flag_shared) { + break + } + v.reset(Op386MOVSSconst2) + v0 := b.NewValue0(v.Pos, Op386MOVSSconst1, typ.UInt32) + v0.AuxInt = c + v.AddArg(v0) + return true + } + return false +} +func rewriteValue386_Op386MOVSSload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVSSload [off1] {sym} (ADDLconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVSSload [off1+off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386MOVSSload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg2(ptr, mem) + return true + } + // match: (MOVSSload [off1] {sym1} (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386MOVSSload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(base, mem) + return true + } + // match: (MOVSSload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL1 { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVSSloadidx1) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVSSload [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL4 { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVSSloadidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVSSload [off] {sym} (ADDL ptr idx) mem) + // cond: ptr.Op != OpSB + // result: (MOVSSloadidx1 [off] {sym} ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + ptr := v_0_0 + idx := v_0_1 + mem := v_1 + if !(ptr.Op != OpSB) { + continue + } + v.reset(Op386MOVSSloadidx1) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + break + } + return false +} +func rewriteValue386_Op386MOVSSloadidx1(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVSSloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) + // result: (MOVSSloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + mem := v_2 + v.reset(Op386MOVSSloadidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVSSloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) + // result: (MOVSSloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { + break + } + d := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVSSloadidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVSSloadidx4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVSSloadidx4 [c] {sym} (ADDLconst [d] ptr) idx mem) + // result: (MOVSSloadidx4 [int64(int32(c+d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + mem := v_2 + v.reset(Op386MOVSSloadidx4) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVSSloadidx4 [c] {sym} ptr (ADDLconst [d] idx) mem) + // result: (MOVSSloadidx4 [int64(int32(c+4*d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { + break + } + d := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVSSloadidx4) + v.AuxInt = int64(int32(c + 4*d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVSSstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVSSstore [off1+off2] {sym} ptr val mem) + for { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386MOVSSstore) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg3(ptr, val, mem) + return true + } + // match: (MOVSSstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386MOVSSstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, val, mem) + return true + } + // match: (MOVSSstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL1 { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVSSstoreidx1) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (MOVSSstore [off1] {sym1} (LEAL4 [off2] {sym2} ptr idx) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL4 { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MOVSSstoreidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (MOVSSstore [off] {sym} (ADDL ptr idx) val mem) + // cond: ptr.Op != OpSB + // result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem) + for { + off := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + ptr := v_0_0 + idx := v_0_1 + val := v_1 + mem := v_2 + if !(ptr.Op != OpSB) { + continue + } + v.reset(Op386MOVSSstoreidx1) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + break + } + return false +} +func rewriteValue386_Op386MOVSSstoreidx1(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVSSstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) + // result: (MOVSSstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + v.reset(Op386MOVSSstoreidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (MOVSSstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) + // result: (MOVSSstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { + break + } + d := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + v.reset(Op386MOVSSstoreidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVSSstoreidx4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVSSstoreidx4 [c] {sym} (ADDLconst [d] ptr) idx val mem) + // result: (MOVSSstoreidx4 [int64(int32(c+d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + v.reset(Op386MOVSSstoreidx4) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (MOVSSstoreidx4 [c] {sym} ptr (ADDLconst [d] idx) val mem) + // result: (MOVSSstoreidx4 [int64(int32(c+4*d))] {sym} ptr idx val mem) + for { + c := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { + break + } + d := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + v.reset(Op386MOVSSstoreidx4) + v.AuxInt = int64(int32(c + 4*d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVWLSX(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (MOVWLSX x:(MOVWload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWLSXload [off] {sym} ptr mem) + for { + x := v_0 + if x.Op != Op386MOVWload { + break + } + off := x.AuxInt + sym := x.Aux + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, Op386MOVWLSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg2(ptr, mem) + return true + } + // match: (MOVWLSX (ANDLconst [c] x)) + // cond: c & 0x8000 == 0 + // result: (ANDLconst [c & 0x7fff] x) + for { + if v_0.Op != Op386ANDLconst { + break + } + c := v_0.AuxInt + x := v_0.Args[0] + if !(c&0x8000 == 0) { + break + } + v.reset(Op386ANDLconst) + v.AuxInt = c & 0x7fff + v.AddArg(x) + return true + } + return false +} +func rewriteValue386_Op386MOVWLSXload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVWLSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVWLSX x) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386MOVWstore { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + break + } + v.reset(Op386MOVWLSX) + v.AddArg(x) + return true + } + // match: (MOVWLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVWLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386MOVWLSXload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(base, mem) + return true + } + return false +} +func rewriteValue386_Op386MOVWLZX(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (MOVWLZX x:(MOVWload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWload [off] {sym} ptr mem) + for { + x := v_0 + if x.Op != Op386MOVWload { + break + } + off := x.AuxInt + sym := x.Aux + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, Op386MOVWload, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg2(ptr, mem) + return true + } + // match: (MOVWLZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWloadidx1 [off] {sym} ptr idx mem) + for { + x := v_0 + if x.Op != Op386MOVWloadidx1 { + break + } + off := x.AuxInt + sym := x.Aux + mem := x.Args[2] + ptr := x.Args[0] + idx := x.Args[1] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVWLZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWloadidx2 [off] {sym} ptr idx mem) + for { + x := v_0 + if x.Op != Op386MOVWloadidx2 { + break + } + off := x.AuxInt + sym := x.Aux + mem := x.Args[2] + ptr := x.Args[0] + idx := x.Args[1] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(v.Pos, Op386MOVWloadidx2, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVWLZX (ANDLconst [c] x)) + // result: (ANDLconst [c & 0xffff] x) + for { + if v_0.Op != Op386ANDLconst { + break + } + c := v_0.AuxInt + x := v_0.Args[0] + v.reset(Op386ANDLconst) + v.AuxInt = c & 0xffff + v.AddArg(x) return true } return false } -func rewriteValue386_Op386MOVSDload(v *Value) bool { +func rewriteValue386_Op386MOVWload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block config := b.Func.Config - // match: (MOVSDload [off1] {sym} (ADDLconst [off2] ptr) mem) + // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVWLZX x) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386MOVWstore { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + break + } + v.reset(Op386MOVWLZX) + v.AddArg(x) + return true + } + // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) // cond: is32Bit(off1+off2) - // result: (MOVSDload [off1+off2] {sym} ptr mem) + // result: (MOVWload [off1+off2] {sym} ptr mem) for { off1 := v.AuxInt sym := v.Aux @@ -5076,15 +7683,15 @@ func rewriteValue386_Op386MOVSDload(v *Value) bool { if !(is32Bit(off1 + off2)) { break } - v.reset(Op386MOVSDload) + v.reset(Op386MOVWload) v.AuxInt = off1 + off2 v.Aux = sym v.AddArg2(ptr, mem) return true } - // match: (MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem) + // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem) + // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { off1 := v.AuxInt sym1 := v.Aux @@ -5098,146 +7705,253 @@ func rewriteValue386_Op386MOVSDload(v *Value) bool { if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(Op386MOVSDload) + v.reset(Op386MOVWload) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) v.AddArg2(base, mem) return true } - return false -} -func rewriteValue386_Op386MOVSDstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) - // result: (MOVSDstore [off1+off2] {sym} ptr val mem) + // match: (MOVWload [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) for { off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { + sym1 := v.Aux + if v_0.Op != Op386LEAL1 { break } off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1 + off2)) { + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - v.reset(Op386MOVSDstore) + v.reset(Op386MOVWloadidx1) v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg3(ptr, val, mem) + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(ptr, idx, mem) return true } - // match: (MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // match: (MOVWload [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) for { off1 := v.AuxInt sym1 := v.Aux - if v_0.Op != Op386LEAL { + if v_0.Op != Op386LEAL2 { break } off2 := v_0.AuxInt sym2 := v_0.Aux - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + idx := v_0.Args[1] + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - v.reset(Op386MOVSDstore) + v.reset(Op386MOVWloadidx2) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, val, mem) + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVWload [off] {sym} (ADDL ptr idx) mem) + // cond: ptr.Op != OpSB + // result: (MOVWloadidx1 [off] {sym} ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + ptr := v_0_0 + idx := v_0_1 + mem := v_1 + if !(ptr.Op != OpSB) { + continue + } + v.reset(Op386MOVWloadidx1) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + break + } + // match: (MOVWload [off] {sym} (SB) _) + // cond: symIsRO(sym) + // result: (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))]) + for { + off := v.AuxInt + sym := v.Aux + if v_0.Op != OpSB || !(symIsRO(sym)) { + break + } + v.reset(Op386MOVLconst) + v.AuxInt = int64(read16(sym, off, config.ctxt.Arch.ByteOrder)) return true } return false } -func rewriteValue386_Op386MOVSSconst(v *Value) bool { - b := v.Block - config := b.Func.Config - typ := &b.Func.Config.Types - // match: (MOVSSconst [c]) - // cond: config.ctxt.Flag_shared - // result: (MOVSSconst2 (MOVSSconst1 [c])) +func rewriteValue386_Op386MOVWloadidx1(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVWloadidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) + // result: (MOVWloadidx2 [c] {sym} ptr idx mem) for { c := v.AuxInt - if !(config.ctxt.Flag_shared) { - break + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + ptr := v_0 + if v_1.Op != Op386SHLLconst || v_1.AuxInt != 1 { + continue + } + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVWloadidx2) + v.AuxInt = c + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true } - v.reset(Op386MOVSSconst2) - v0 := b.NewValue0(v.Pos, Op386MOVSSconst1, typ.UInt32) - v0.AuxInt = c - v.AddArg(v0) - return true + break + } + // match: (MOVWloadidx1 [c] {sym} (ADDLconst [d] ptr) idx mem) + // result: (MOVWloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != Op386ADDLconst { + continue + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + mem := v_2 + v.reset(Op386MOVWloadidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + break + } + // match: (MOVWloadidx1 [c] {sym} ptr (ADDLconst [d] idx) mem) + // result: (MOVWloadidx1 [int64(int32(c+d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + ptr := v_0 + if v_1.Op != Op386ADDLconst { + continue + } + d := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVWloadidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + break } return false } -func rewriteValue386_Op386MOVSSload(v *Value) bool { +func rewriteValue386_Op386MOVWloadidx2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVSSload [off1] {sym} (ADDLconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) - // result: (MOVSSload [off1+off2] {sym} ptr mem) + // match: (MOVWloadidx2 [c] {sym} (ADDLconst [d] ptr) idx mem) + // result: (MOVWloadidx2 [int64(int32(c+d))] {sym} ptr idx mem) for { - off1 := v.AuxInt + c := v.AuxInt sym := v.Aux if v_0.Op != Op386ADDLconst { break } - off2 := v_0.AuxInt + d := v_0.AuxInt ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1 + off2)) { + idx := v_1 + mem := v_2 + v.reset(Op386MOVWloadidx2) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVWloadidx2 [c] {sym} ptr (ADDLconst [d] idx) mem) + // result: (MOVWloadidx2 [int64(int32(c+2*d))] {sym} ptr idx mem) + for { + c := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { break } - v.reset(Op386MOVSSload) - v.AuxInt = off1 + off2 + d := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVWloadidx2) + v.AuxInt = int64(int32(c + 2*d)) v.Aux = sym - v.AddArg2(ptr, mem) + v.AddArg3(ptr, idx, mem) return true } - // match: (MOVSSload [off1] {sym1} (LEAL [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem) + return false +} +func rewriteValue386_Op386MOVWstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVWstore [off] {sym} ptr (MOVWLSX x) mem) + // result: (MOVWstore [off] {sym} ptr x mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386MOVWLSX { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + x := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVWstore) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, x, mem) + return true + } + // match: (MOVWstore [off] {sym} ptr (MOVWLZX x) mem) + // result: (MOVWstore [off] {sym} ptr x mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386MOVWLZX { break } - v.reset(Op386MOVSSload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) + x := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVWstore) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValue386_Op386MOVSSstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem) + // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) // cond: is32Bit(off1+off2) - // result: (MOVSSstore [off1+off2] {sym} ptr val mem) + // result: (MOVWstore [off1+off2] {sym} ptr val mem) for { off1 := v.AuxInt sym := v.Aux @@ -5251,15 +7965,36 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool { if !(is32Bit(off1 + off2)) { break } - v.reset(Op386MOVSSstore) + v.reset(Op386MOVWstore) v.AuxInt = off1 + off2 v.Aux = sym v.AddArg3(ptr, val, mem) return true } - // match: (MOVSSstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) + // cond: validOff(off) + // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386MOVLconst { + break + } + c := v_1.AuxInt + mem := v_2 + if !(validOff(off)) { + break + } + v.reset(Op386MOVWstoreconst) + v.AuxInt = makeValAndOff(int64(int16(c)), off) + v.Aux = sym + v.AddArg2(ptr, mem) + return true + } + // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := v.AuxInt sym1 := v.Aux @@ -5274,722 +8009,713 @@ func rewriteValue386_Op386MOVSSstore(v *Value) bool { if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(Op386MOVSSstore) + v.reset(Op386MOVWstore) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValue386_Op386MOVWLSX(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVWLSX x:(MOVWload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWLSXload [off] {sym} ptr mem) + // match: (MOVWstore [off1] {sym1} (LEAL1 [off2] {sym2} ptr idx) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) for { - x := v_0 - if x.Op != Op386MOVWload { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL1 { break } - off := x.AuxInt - sym := x.Aux - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - b = x.Block - v0 := b.NewValue0(x.Pos, Op386MOVWLSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg2(ptr, mem) + v.reset(Op386MOVWstoreidx1) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(ptr, idx, val, mem) return true } - // match: (MOVWLSX (ANDLconst [c] x)) - // cond: c & 0x8000 == 0 - // result: (ANDLconst [c & 0x7fff] x) + // match: (MOVWstore [off1] {sym1} (LEAL2 [off2] {sym2} ptr idx) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) for { - if v_0.Op != Op386ANDLconst { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL2 { break } - c := v_0.AuxInt - x := v_0.Args[0] - if !(c&0x8000 == 0) { + off2 := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - v.reset(Op386ANDLconst) - v.AuxInt = c & 0x7fff - v.AddArg(x) + v.reset(Op386MOVWstoreidx2) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(ptr, idx, val, mem) return true } - return false -} -func rewriteValue386_Op386MOVWLSXload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVWLSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVWLSX x) + // match: (MOVWstore [off] {sym} (ADDL ptr idx) val mem) + // cond: ptr.Op != OpSB + // result: (MOVWstoreidx1 [off] {sym} ptr idx val mem) for { off := v.AuxInt sym := v.Aux - ptr := v_0 - if v_1.Op != Op386MOVWstore { + if v_0.Op != Op386ADDL { break } - off2 := v_1.AuxInt - sym2 := v_1.Aux - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { - break + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + ptr := v_0_0 + idx := v_0_1 + val := v_1 + mem := v_2 + if !(ptr.Op != OpSB) { + continue + } + v.reset(Op386MOVWstoreidx1) + v.AuxInt = off + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true } - v.reset(Op386MOVWLSX) - v.AddArg(x) - return true + break } - // match: (MOVWLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem) + // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstore [i-2] {s} p w mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != Op386SHRLconst || v_1.AuxInt != 16 { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + w := v_1.Args[0] + x := v_2 + if x.Op != Op386MOVWstore || x.AuxInt != i-2 || x.Aux != s { break } - v.reset(Op386MOVWLSXload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) + mem := x.Args[2] + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p, w, mem) return true } - return false -} -func rewriteValue386_Op386MOVWLZX(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVWLZX x:(MOVWload [off] {sym} ptr mem)) + // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem)) // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWload [off] {sym} ptr mem) + // result: (MOVLstore [i-2] {s} p w0 mem) for { - x := v_0 - if x.Op != Op386MOVWload { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != Op386SHRLconst { break } - off := x.AuxInt - sym := x.Aux - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != Op386MOVWstore || x.AuxInt != i-2 || x.Aux != s { break } - b = x.Block - v0 := b.NewValue0(x.Pos, Op386MOVWload, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg2(ptr, mem) - return true - } - // match: (MOVWLZX (ANDLconst [c] x)) - // result: (ANDLconst [c & 0xffff] x) - for { - if v_0.Op != Op386ANDLconst { + mem := x.Args[2] + if p != x.Args[0] { break } - c := v_0.AuxInt - x := v_0.Args[0] - v.reset(Op386ANDLconst) - v.AuxInt = c & 0xffff - v.AddArg(x) + w0 := x.Args[1] + if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p, w0, mem) return true } return false } -func rewriteValue386_Op386MOVWload(v *Value) bool { +func rewriteValue386_Op386MOVWstoreconst(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block config := b.Func.Config - // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVWLZX x) + // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386MOVWstore { + sc := v.AuxInt + s := v.Aux + if v_0.Op != Op386ADDLconst { break } - off2 := v_1.AuxInt - sym2 := v_1.Aux - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + off := v_0.AuxInt + ptr := v_0.Args[0] + mem := v_1 + if !(ValAndOff(sc).canAdd(off)) { break } - v.reset(Op386MOVWLZX) - v.AddArg(x) + v.reset(Op386MOVWstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = s + v.AddArg2(ptr, mem) return true } - // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) - // result: (MOVWload [off1+off2] {sym} ptr mem) + // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != Op386ADDLconst { + sc := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { break } - off2 := v_0.AuxInt + off := v_0.AuxInt + sym2 := v_0.Aux ptr := v_0.Args[0] mem := v_1 - if !(is32Bit(off1 + off2)) { + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(Op386MOVWload) - v.AuxInt = off1 + off2 - v.Aux = sym + v.reset(Op386MOVWstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = mergeSym(sym1, sym2) v.AddArg2(ptr, mem) return true } - // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) + // match: (MOVWstoreconst [x] {sym1} (LEAL1 [off] {sym2} ptr idx) mem) + // cond: canMergeSym(sym1, sym2) + // result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) for { - off1 := v.AuxInt + x := v.AuxInt sym1 := v.Aux - if v_0.Op != Op386LEAL { + if v_0.Op != Op386LEAL1 { break } - off2 := v_0.AuxInt + off := v_0.AuxInt sym2 := v_0.Aux - base := v_0.Args[0] + idx := v_0.Args[1] + ptr := v_0.Args[0] mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + if !(canMergeSym(sym1, sym2)) { break } - v.reset(Op386MOVWload) - v.AuxInt = off1 + off2 + v.reset(Op386MOVWstoreconstidx1) + v.AuxInt = ValAndOff(x).add(off) v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) + v.AddArg3(ptr, idx, mem) return true } - // match: (MOVWload [i0] {s0} l:(LEAL1 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWload [i0+i1] {s0} (LEAL1 [0] {s1} x y) mem) + // match: (MOVWstoreconst [x] {sym1} (LEAL2 [off] {sym2} ptr idx) mem) + // cond: canMergeSym(sym1, sym2) + // result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL1 { + x := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL2 { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] + off := v_0.AuxInt + sym2 := v_0.Aux + idx := v_0.Args[1] + ptr := v_0.Args[0] mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { + if !(canMergeSym(sym1, sym2)) { break } - v.reset(Op386MOVWload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) + v.reset(Op386MOVWstoreconstidx2) + v.AuxInt = ValAndOff(x).add(off) + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(ptr, idx, mem) return true } - // match: (MOVWload [i0] {s0} l:(LEAL2 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWload [i0+i1] {s0} (LEAL2 [0] {s1} x y) mem) + // match: (MOVWstoreconst [x] {sym} (ADDL ptr idx) mem) + // result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL2 { + x := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDL { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] + idx := v_0.Args[1] + ptr := v_0.Args[0] mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(Op386MOVWload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) + v.reset(Op386MOVWstoreconstidx1) + v.AuxInt = x + v.Aux = sym + v.AddArg3(ptr, idx, mem) return true } - // match: (MOVWload [i0] {s0} l:(LEAL4 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWload [i0+i1] {s0} (LEAL4 [0] {s1} x y) mem) + // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL4 { + c := v.AuxInt + s := v.Aux + p := v_0 + x := v_1 + if x.Op != Op386MOVWstoreconst { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { + a := x.AuxInt + if x.Aux != s { break } - v.reset(Op386MOVWload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) + mem := x.Args[1] + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(Op386MOVLstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) + v.Aux = s + v.AddArg2(p, mem) return true } - // match: (MOVWload [i0] {s0} l:(LEAL8 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWload [i0+i1] {s0} (LEAL8 [0] {s1} x y) mem) + // match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL8 { + a := v.AuxInt + s := v.Aux + p := v_0 + x := v_1 + if x.Op != Op386MOVWstoreconst { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { + c := x.AuxInt + if x.Aux != s { break } - v.reset(Op386MOVWload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVWload [off] {sym} (SB) _) - // cond: symIsRO(sym) - // result: (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))]) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpSB || !(symIsRO(sym)) { + mem := x.Args[1] + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { break } - v.reset(Op386MOVLconst) - v.AuxInt = int64(read16(sym, off, config.ctxt.Arch.ByteOrder)) + v.reset(Op386MOVLstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) + v.Aux = s + v.AddArg2(p, mem) return true } return false } -func rewriteValue386_Op386MOVWstore(v *Value) bool { +func rewriteValue386_Op386MOVWstoreconstidx1(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVWstore [off] {sym} ptr (MOVWLSX x) mem) - // result: (MOVWstore [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != Op386MOVWLSX { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(Op386MOVWstore) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) - return true - } - // match: (MOVWstore [off] {sym} ptr (MOVWLZX x) mem) - // result: (MOVWstore [off] {sym} ptr x mem) + // match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLLconst [1] idx) mem) + // result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem) for { - off := v.AuxInt + c := v.AuxInt sym := v.Aux ptr := v_0 - if v_1.Op != Op386MOVWLZX { + if v_1.Op != Op386SHLLconst || v_1.AuxInt != 1 { break } - x := v_1.Args[0] + idx := v_1.Args[0] mem := v_2 - v.reset(Op386MOVWstore) - v.AuxInt = off + v.reset(Op386MOVWstoreconstidx2) + v.AuxInt = c v.Aux = sym - v.AddArg3(ptr, x, mem) + v.AddArg3(ptr, idx, mem) return true } - // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) - // result: (MOVWstore [off1+off2] {sym} ptr val mem) + // match: (MOVWstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) + // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) for { - off1 := v.AuxInt + x := v.AuxInt sym := v.Aux if v_0.Op != Op386ADDLconst { break } - off2 := v_0.AuxInt + c := v_0.AuxInt ptr := v_0.Args[0] - val := v_1 + idx := v_1 mem := v_2 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(Op386MOVWstore) - v.AuxInt = off1 + off2 + v.reset(Op386MOVWstoreconstidx1) + v.AuxInt = ValAndOff(x).add(c) v.Aux = sym - v.AddArg3(ptr, val, mem) + v.AddArg3(ptr, idx, mem) return true } - // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) - // cond: validOff(off) - // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) + // match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDLconst [c] idx) mem) + // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) for { - off := v.AuxInt + x := v.AuxInt sym := v.Aux ptr := v_0 - if v_1.Op != Op386MOVLconst { + if v_1.Op != Op386ADDLconst { break } c := v_1.AuxInt + idx := v_1.Args[0] mem := v_2 - if !(validOff(off)) { - break - } - v.reset(Op386MOVWstoreconst) - v.AuxInt = makeValAndOff(int64(int16(c)), off) + v.reset(Op386MOVWstoreconstidx1) + v.AuxInt = ValAndOff(x).add(c) v.Aux = sym - v.AddArg2(ptr, mem) + v.AddArg3(ptr, idx, mem) return true } - // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { + c := v.AuxInt + s := v.Aux + p := v_0 + i := v_1 + x := v_2 + if x.Op != Op386MOVWstoreconstidx1 { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + a := x.AuxInt + if x.Aux != s { break } - v.reset(Op386MOVWstore) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, val, mem) + mem := x.Args[2] + if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(Op386MOVLstoreconstidx1) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) + v.Aux = s + v.AddArg3(p, i, mem) return true } - // match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVLstore [i-2] {s} p0 w mem) + return false +} +func rewriteValue386_Op386MOVWstoreconstidx2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MOVWstoreconstidx2 [x] {sym} (ADDLconst [c] ptr) idx mem) + // result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem) for { - i := v.AuxInt - s := v.Aux - p1 := v_0 - if v_1.Op != Op386SHRLconst || v_1.AuxInt != 16 { - break - } - w := v_1.Args[0] - x := v_2 - if x.Op != Op386MOVWstore || x.AuxInt != i-2 || x.Aux != s { + x := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { break } - mem := x.Args[2] - p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + c := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + mem := v_2 + v.reset(Op386MOVWstoreconstidx2) + v.AuxInt = ValAndOff(x).add(c) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + // match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDLconst [c] idx) mem) + // result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem) + for { + x := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { break } - v.reset(Op386MOVLstore) - v.AuxInt = i - 2 - v.Aux = s - v.AddArg3(p0, w, mem) + c := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + v.reset(Op386MOVWstoreconstidx2) + v.AuxInt = ValAndOff(x).add(2 * c) + v.Aux = sym + v.AddArg3(ptr, idx, mem) return true } - // match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRLconst [j-16] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVLstore [i-2] {s} p0 w0 mem) + // match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLLconst [1] i) mem) for { - i := v.AuxInt + c := v.AuxInt s := v.Aux - p1 := v_0 - if v_1.Op != Op386SHRLconst { + p := v_0 + i := v_1 + x := v_2 + if x.Op != Op386MOVWstoreconstidx2 { break } - j := v_1.AuxInt - w := v_1.Args[0] - x := v_2 - if x.Op != Op386MOVWstore || x.AuxInt != i-2 || x.Aux != s { + a := x.AuxInt + if x.Aux != s { break } mem := x.Args[2] - p0 := x.Args[0] - w0 := x.Args[1] - if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { break } - v.reset(Op386MOVLstore) - v.AuxInt = i - 2 + v.reset(Op386MOVLstoreconstidx1) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) v.Aux = s - v.AddArg3(p0, w0, mem) + v0 := b.NewValue0(v.Pos, Op386SHLLconst, i.Type) + v0.AuxInt = 1 + v0.AddArg(i) + v.AddArg3(p, v0, mem) return true } - // match: (MOVWstore [i0] {s0} l:(LEAL1 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWstore [i0+i1] {s0} (LEAL1 [0] {s1} x y) val mem) + return false +} +func rewriteValue386_Op386MOVWstoreidx1(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVWstoreidx1 [c] {sym} ptr (SHLLconst [1] idx) val mem) + // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(Op386MOVWstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + ptr := v_0 + if v_1.Op != Op386SHLLconst || v_1.AuxInt != 1 { + continue + } + idx := v_1.Args[0] + val := v_2 + mem := v_3 + v.reset(Op386MOVWstoreidx2) + v.AuxInt = c + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true + } + break } - // match: (MOVWstore [i0] {s0} l:(LEAL2 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWstore [i0+i1] {s0} (LEAL2 [0] {s1} x y) val mem) + // match: (MOVWstoreidx1 [c] {sym} (ADDLconst [d] ptr) idx val mem) + // result: (MOVWstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != Op386ADDLconst { + continue + } + d := v_0.AuxInt + ptr := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + v.reset(Op386MOVWstoreidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true } - v.reset(Op386MOVWstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true + break } - // match: (MOVWstore [i0] {s0} l:(LEAL4 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWstore [i0+i1] {s0} (LEAL4 [0] {s1} x y) val mem) + // match: (MOVWstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) + // result: (MOVWstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL4 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break + c := v.AuxInt + sym := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + ptr := v_0 + if v_1.Op != Op386ADDLconst { + continue + } + d := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + v.reset(Op386MOVWstoreidx1) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) + return true } - v.reset(Op386MOVWstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true + break } - // match: (MOVWstore [i0] {s0} l:(LEAL8 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWstore [i0+i1] {s0} (LEAL8 [0] {s1} x y) val mem) + // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstoreidx1 [i-2] {s} p idx w mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != Op386LEAL8 { - break + i := v.AuxInt + s := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + p := v_0 + idx := v_1 + if v_2.Op != Op386SHRLconst || v_2.AuxInt != 16 { + continue + } + w := v_2.Args[0] + x := v_3 + if x.Op != Op386MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s { + continue + } + mem := x.Args[3] + x_0 := x.Args[0] + x_1 := x.Args[1] + for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { + if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { + continue + } + v.reset(Op386MOVLstoreidx1) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg4(p, idx, w, mem) + return true + } } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break + break + } + // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem) + for { + i := v.AuxInt + s := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + p := v_0 + idx := v_1 + if v_2.Op != Op386SHRLconst { + continue + } + j := v_2.AuxInt + w := v_2.Args[0] + x := v_3 + if x.Op != Op386MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s { + continue + } + mem := x.Args[3] + x_0 := x.Args[0] + x_1 := x.Args[1] + for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { + if p != x_0 || idx != x_1 { + continue + } + w0 := x.Args[2] + if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + continue + } + v.reset(Op386MOVLstoreidx1) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg4(p, idx, w0, mem) + return true + } } - v.reset(Op386MOVWstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, Op386LEAL8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true + break } return false } -func rewriteValue386_Op386MOVWstoreconst(v *Value) bool { +func rewriteValue386_Op386MOVWstoreidx2(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - config := b.Func.Config - // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) - // cond: ValAndOff(sc).canAdd(off) - // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) + // match: (MOVWstoreidx2 [c] {sym} (ADDLconst [d] ptr) idx val mem) + // result: (MOVWstoreidx2 [int64(int32(c+d))] {sym} ptr idx val mem) for { - sc := v.AuxInt - s := v.Aux + c := v.AuxInt + sym := v.Aux if v_0.Op != Op386ADDLconst { break } - off := v_0.AuxInt + d := v_0.AuxInt ptr := v_0.Args[0] - mem := v_1 - if !(ValAndOff(sc).canAdd(off)) { - break - } - v.reset(Op386MOVWstoreconst) - v.AuxInt = ValAndOff(sc).add(off) - v.Aux = s - v.AddArg2(ptr, mem) + idx := v_1 + val := v_2 + mem := v_3 + v.reset(Op386MOVWstoreidx2) + v.AuxInt = int64(int32(c + d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) return true } - // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + // match: (MOVWstoreidx2 [c] {sym} ptr (ADDLconst [d] idx) val mem) + // result: (MOVWstoreidx2 [int64(int32(c+2*d))] {sym} ptr idx val mem) for { - sc := v.AuxInt - sym1 := v.Aux - if v_0.Op != Op386LEAL { - break - } - off := v_0.AuxInt - sym2 := v_0.Aux - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) { + c := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != Op386ADDLconst { break } - v.reset(Op386MOVWstoreconst) - v.AuxInt = ValAndOff(sc).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(ptr, mem) + d := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + v.reset(Op386MOVWstoreidx2) + v.AuxInt = int64(int32(c + 2*d)) + v.Aux = sym + v.AddArg4(ptr, idx, val, mem) return true } - // match: (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x) - // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) + // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst [1] idx) w mem) for { - c := v.AuxInt + i := v.AuxInt s := v.Aux - p1 := v_0 - x := v_1 - if x.Op != Op386MOVWstoreconst { + p := v_0 + idx := v_1 + if v_2.Op != Op386SHRLconst || v_2.AuxInt != 16 { break } - a := x.AuxInt - if x.Aux != s { + w := v_2.Args[0] + x := v_3 + if x.Op != Op386MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s { break } - mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x)) { + mem := x.Args[3] + if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { break } - v.reset(Op386MOVLstoreconst) - v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) + v.reset(Op386MOVLstoreidx1) + v.AuxInt = i - 2 v.Aux = s - v.AddArg2(p0, mem) + v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type) + v0.AuxInt = 1 + v0.AddArg(idx) + v.AddArg4(p, v0, w, mem) return true } - // match: (MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x) - // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) + // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstoreidx1 [i-2] {s} p (SHLLconst [1] idx) w0 mem) for { - a := v.AuxInt + i := v.AuxInt s := v.Aux - p1 := v_0 - x := v_1 - if x.Op != Op386MOVWstoreconst { + p := v_0 + idx := v_1 + if v_2.Op != Op386SHRLconst { break } - c := x.AuxInt - if x.Aux != s { + j := v_2.AuxInt + w := v_2.Args[0] + x := v_3 + if x.Op != Op386MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s { break } - mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && same(p0, p1, 1) && clobber(x)) { + mem := x.Args[3] + if p != x.Args[0] || idx != x.Args[1] { break } - v.reset(Op386MOVLstoreconst) - v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) + w0 := x.Args[2] + if w0.Op != Op386SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(Op386MOVLstoreidx1) + v.AuxInt = i - 2 v.Aux = s - v.AddArg2(p0, mem) + v0 := b.NewValue0(v.Pos, Op386SHLLconst, idx.Type) + v0.AuxInt = 1 + v0.AddArg(idx) + v.AddArg4(p, v0, w0, mem) return true } return false @@ -6038,6 +8764,32 @@ func rewriteValue386_Op386MULL(v *Value) bool { } break } + // match: (MULL x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (MULLloadidx4 x [off] {sym} ptr idx mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != Op386MOVLloadidx4 { + continue + } + off := l.AuxInt + sym := l.Aux + mem := l.Args[2] + ptr := l.Args[0] + idx := l.Args[1] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + continue + } + v.reset(Op386MULLloadidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(x, ptr, idx, mem) + return true + } + break + } return false } func rewriteValue386_Op386MULLconst(v *Value) bool { @@ -6427,52 +9179,130 @@ func rewriteValue386_Op386MULLconst(v *Value) bool { if !(c%5 == 0 && isPowerOfTwo(c/5)) { break } - v.reset(Op386SHLLconst) - v.AuxInt = log2(c / 5) - v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) - return true - } - // match: (MULLconst [c] x) - // cond: c%9 == 0 && isPowerOfTwo(c/9) - // result: (SHLLconst [log2(c/9)] (LEAL8 x x)) - for { - c := v.AuxInt - x := v_0 - if !(c%9 == 0 && isPowerOfTwo(c/9)) { + v.reset(Op386SHLLconst) + v.AuxInt = log2(c / 5) + v0 := b.NewValue0(v.Pos, Op386LEAL4, v.Type) + v0.AddArg2(x, x) + v.AddArg(v0) + return true + } + // match: (MULLconst [c] x) + // cond: c%9 == 0 && isPowerOfTwo(c/9) + // result: (SHLLconst [log2(c/9)] (LEAL8 x x)) + for { + c := v.AuxInt + x := v_0 + if !(c%9 == 0 && isPowerOfTwo(c/9)) { + break + } + v.reset(Op386SHLLconst) + v.AuxInt = log2(c / 9) + v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type) + v0.AddArg2(x, x) + v.AddArg(v0) + return true + } + // match: (MULLconst [c] (MOVLconst [d])) + // result: (MOVLconst [int64(int32(c*d))]) + for { + c := v.AuxInt + if v_0.Op != Op386MOVLconst { + break + } + d := v_0.AuxInt + v.reset(Op386MOVLconst) + v.AuxInt = int64(int32(c * d)) + return true + } + return false +} +func rewriteValue386_Op386MULLload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MULLload [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (MULLload [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + val := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386MULLload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg3(val, base, mem) + return true + } + // match: (MULLload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MULLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + val := v_0 + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(Op386SHLLconst) - v.AuxInt = log2(c / 9) - v0 := b.NewValue0(v.Pos, Op386LEAL8, v.Type) - v0.AddArg2(x, x) - v.AddArg(v0) + v.reset(Op386MULLload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(val, base, mem) return true } - // match: (MULLconst [c] (MOVLconst [d])) - // result: (MOVLconst [int64(int32(c*d))]) + // match: (MULLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MULLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) for { - c := v.AuxInt - if v_0.Op != Op386MOVLconst { + off1 := v.AuxInt + sym1 := v.Aux + val := v_0 + if v_1.Op != Op386LEAL4 { break } - d := v_0.AuxInt - v.reset(Op386MOVLconst) - v.AuxInt = int64(int32(c * d)) + off2 := v_1.AuxInt + sym2 := v_1.Aux + idx := v_1.Args[1] + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386MULLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(val, ptr, idx, mem) return true } return false } -func rewriteValue386_Op386MULLload(v *Value) bool { +func rewriteValue386_Op386MULLloadidx4(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block config := b.Func.Config - // match: (MULLload [off1] {sym} val (ADDLconst [off2] base) mem) + // match: (MULLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) // cond: is32Bit(off1+off2) - // result: (MULLload [off1+off2] {sym} val base mem) + // result: (MULLloadidx4 [off1+off2] {sym} val base idx mem) for { off1 := v.AuxInt sym := v.Aux @@ -6482,19 +9312,43 @@ func rewriteValue386_Op386MULLload(v *Value) bool { } off2 := v_1.AuxInt base := v_1.Args[0] - mem := v_2 + idx := v_2 + mem := v_3 if !(is32Bit(off1 + off2)) { break } - v.reset(Op386MULLload) + v.reset(Op386MULLloadidx4) v.AuxInt = off1 + off2 v.Aux = sym - v.AddArg3(val, base, mem) + v.AddArg4(val, base, idx, mem) return true } - // match: (MULLload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // match: (MULLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) + // cond: is32Bit(off1+off2*4) + // result: (MULLloadidx4 [off1+off2*4] {sym} val base idx mem) + for { + off1 := v.AuxInt + sym := v.Aux + val := v_0 + base := v_1 + if v_2.Op != Op386ADDLconst { + break + } + off2 := v_2.AuxInt + idx := v_2.Args[0] + mem := v_3 + if !(is32Bit(off1 + off2*4)) { + break + } + v.reset(Op386MULLloadidx4) + v.AuxInt = off1 + off2*4 + v.Aux = sym + v.AddArg4(val, base, idx, mem) + return true + } + // match: (MULLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (MULLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + // result: (MULLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) for { off1 := v.AuxInt sym1 := v.Aux @@ -6505,14 +9359,15 @@ func rewriteValue386_Op386MULLload(v *Value) bool { off2 := v_1.AuxInt sym2 := v_1.Aux base := v_1.Args[0] - mem := v_2 + idx := v_2 + mem := v_3 if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(Op386MULLload) + v.reset(Op386MULLloadidx4) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg3(val, base, mem) + v.AddArg4(val, base, idx, mem) return true } return false @@ -6837,6 +9692,32 @@ func rewriteValue386_Op386ORL(v *Value) bool { } break } + // match: (ORL x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (ORLloadidx4 x [off] {sym} ptr idx mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != Op386MOVLloadidx4 { + continue + } + off := l.AuxInt + sym := l.Aux + mem := l.Args[2] + ptr := l.Args[0] + idx := l.Args[1] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + continue + } + v.reset(Op386ORLloadidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(x, ptr, idx, mem) + return true + } + break + } // match: (ORL x x) // result: x for { @@ -6847,9 +9728,9 @@ func rewriteValue386_Op386ORL(v *Value) bool { v.copyOf(x) return true } - // match: (ORL x0:(MOVBload [i0] {s} p0 mem) s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0) - // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) + // match: (ORL x0:(MOVBload [i0] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem))) + // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0) + // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -6859,7 +9740,7 @@ func rewriteValue386_Op386ORL(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] s0 := v_1 if s0.Op != Op386SHLLconst || s0.AuxInt != 8 { continue @@ -6873,8 +9754,7 @@ func rewriteValue386_Op386ORL(v *Value) bool { continue } _ = x1.Args[1] - p1 := x1.Args[0] - if mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) { + if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) { continue } b = mergePoint(b, x0, x1) @@ -6882,14 +9762,14 @@ func rewriteValue386_Op386ORL(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s - v0.AddArg2(p0, mem) + v0.AddArg2(p, mem) return true } break } - // match: (ORL o0:(ORL x0:(MOVWload [i0] {s} p0 mem) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p1 mem))) s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p2 mem))) - // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0) - // result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p0 mem) + // match: (ORL o0:(ORL x0:(MOVWload [i0] {s} p mem) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem))) s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem))) + // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0) + // result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { o0 := v_0 @@ -6907,7 +9787,7 @@ func rewriteValue386_Op386ORL(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] s0 := o0_1 if s0.Op != Op386SHLLconst || s0.AuxInt != 16 { continue @@ -6921,8 +9801,7 @@ func rewriteValue386_Op386ORL(v *Value) bool { continue } _ = x1.Args[1] - p1 := x1.Args[0] - if mem != x1.Args[1] { + if p != x1.Args[0] || mem != x1.Args[1] { continue } s1 := v_1 @@ -6938,8 +9817,7 @@ func rewriteValue386_Op386ORL(v *Value) bool { continue } _ = x2.Args[1] - p2 := x2.Args[0] - if mem != x2.Args[1] || !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) { + if p != x2.Args[0] || mem != x2.Args[1] || !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) { continue } b = mergePoint(b, x0, x1, x2) @@ -6947,12 +9825,137 @@ func rewriteValue386_Op386ORL(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s - v0.AddArg2(p0, mem) + v0.AddArg2(p, mem) return true } } break } + // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) + // cond: i1==i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0) + // result: @mergePoint(b,x0,x1) (MOVWloadidx1 [i0] {s} p idx mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + if x0.Op != Op386MOVBloadidx1 { + continue + } + i0 := x0.AuxInt + s := x0.Aux + mem := x0.Args[2] + x0_0 := x0.Args[0] + x0_1 := x0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 { + p := x0_0 + idx := x0_1 + s0 := v_1 + if s0.Op != Op386SHLLconst || s0.AuxInt != 8 { + continue + } + x1 := s0.Args[0] + if x1.Op != Op386MOVBloadidx1 { + continue + } + i1 := x1.AuxInt + if x1.Aux != s { + continue + } + _ = x1.Args[2] + x1_0 := x1.Args[0] + x1_1 := x1.Args[1] + for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 { + if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(v.Pos, Op386MOVWloadidx1, v.Type) + v.copyOf(v0) + v0.AuxInt = i0 + v0.Aux = s + v0.AddArg3(p, idx, mem) + return true + } + } + } + break + } + // match: (ORL o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem))) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem))) + // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0) + // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 [i0] {s} p idx mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + o0 := v_0 + if o0.Op != Op386ORL { + continue + } + _ = o0.Args[1] + o0_0 := o0.Args[0] + o0_1 := o0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 { + x0 := o0_0 + if x0.Op != Op386MOVWloadidx1 { + continue + } + i0 := x0.AuxInt + s := x0.Aux + mem := x0.Args[2] + x0_0 := x0.Args[0] + x0_1 := x0.Args[1] + for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 { + p := x0_0 + idx := x0_1 + s0 := o0_1 + if s0.Op != Op386SHLLconst || s0.AuxInt != 16 { + continue + } + x1 := s0.Args[0] + if x1.Op != Op386MOVBloadidx1 { + continue + } + i2 := x1.AuxInt + if x1.Aux != s { + continue + } + _ = x1.Args[2] + x1_0 := x1.Args[0] + x1_1 := x1.Args[1] + for _i3 := 0; _i3 <= 1; _i3, x1_0, x1_1 = _i3+1, x1_1, x1_0 { + if p != x1_0 || idx != x1_1 || mem != x1.Args[2] { + continue + } + s1 := v_1 + if s1.Op != Op386SHLLconst || s1.AuxInt != 24 { + continue + } + x2 := s1.Args[0] + if x2.Op != Op386MOVBloadidx1 { + continue + } + i3 := x2.AuxInt + if x2.Aux != s { + continue + } + _ = x2.Args[2] + x2_0 := x2.Args[0] + x2_1 := x2.Args[1] + for _i4 := 0; _i4 <= 1; _i4, x2_0, x2_1 = _i4+1, x2_1, x2_0 { + if p != x2_0 || idx != x2_1 || mem != x2.Args[2] || !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) { + continue + } + b = mergePoint(b, x0, x1, x2) + v0 := b.NewValue0(v.Pos, Op386MOVLloadidx1, v.Type) + v.copyOf(v0) + v0.AuxInt = i0 + v0.Aux = s + v0.AddArg3(p, idx, mem) + return true + } + } + } + } + } + break + } return false } func rewriteValue386_Op386ORLconst(v *Value) bool { @@ -7037,23 +10040,176 @@ func rewriteValue386_Op386ORLconstmodify(v *Value) bool { if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(Op386ORLconstmodify) - v.AuxInt = ValAndOff(valoff1).add(off2) + v.reset(Op386ORLconstmodify) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(base, mem) + return true + } + return false +} +func rewriteValue386_Op386ORLconstmodifyidx4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (ORLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (ORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + idx := v_1 + mem := v_2 + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(Op386ORLconstmodifyidx4) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg3(base, idx, mem) + return true + } + // match: (ORLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) + // cond: ValAndOff(valoff1).canAdd(off2*4) + // result: (ORLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + base := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + if !(ValAndOff(valoff1).canAdd(off2 * 4)) { + break + } + v.reset(Op386ORLconstmodifyidx4) + v.AuxInt = ValAndOff(valoff1).add(off2 * 4) + v.Aux = sym + v.AddArg3(base, idx, mem) + return true + } + // match: (ORLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + idx := v_1 + mem := v_2 + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ORLconstmodifyidx4) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386ORLload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (ORLload [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (ORLload [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + val := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386ORLload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg3(val, base, mem) + return true + } + // match: (ORLload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ORLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + val := v_0 + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ORLload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(val, base, mem) + return true + } + // match: (ORLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (ORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + val := v_0 + if v_1.Op != Op386LEAL4 { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + idx := v_1.Args[1] + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386ORLloadidx4) + v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) + v.AddArg4(val, ptr, idx, mem) return true } return false } -func rewriteValue386_Op386ORLload(v *Value) bool { +func rewriteValue386_Op386ORLloadidx4(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block config := b.Func.Config - // match: (ORLload [off1] {sym} val (ADDLconst [off2] base) mem) + // match: (ORLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) // cond: is32Bit(off1+off2) - // result: (ORLload [off1+off2] {sym} val base mem) + // result: (ORLloadidx4 [off1+off2] {sym} val base idx mem) for { off1 := v.AuxInt sym := v.Aux @@ -7063,19 +10219,43 @@ func rewriteValue386_Op386ORLload(v *Value) bool { } off2 := v_1.AuxInt base := v_1.Args[0] - mem := v_2 + idx := v_2 + mem := v_3 if !(is32Bit(off1 + off2)) { break } - v.reset(Op386ORLload) + v.reset(Op386ORLloadidx4) v.AuxInt = off1 + off2 v.Aux = sym - v.AddArg3(val, base, mem) + v.AddArg4(val, base, idx, mem) return true } - // match: (ORLload [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // match: (ORLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) + // cond: is32Bit(off1+off2*4) + // result: (ORLloadidx4 [off1+off2*4] {sym} val base idx mem) + for { + off1 := v.AuxInt + sym := v.Aux + val := v_0 + base := v_1 + if v_2.Op != Op386ADDLconst { + break + } + off2 := v_2.AuxInt + idx := v_2.Args[0] + mem := v_3 + if !(is32Bit(off1 + off2*4)) { + break + } + v.reset(Op386ORLloadidx4) + v.AuxInt = off1 + off2*4 + v.Aux = sym + v.AddArg4(val, base, idx, mem) + return true + } + // match: (ORLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (ORLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + // result: (ORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) for { off1 := v.AuxInt sym1 := v.Aux @@ -7086,14 +10266,15 @@ func rewriteValue386_Op386ORLload(v *Value) bool { off2 := v_1.AuxInt sym2 := v_1.Aux base := v_1.Args[0] - mem := v_2 + idx := v_2 + mem := v_3 if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(Op386ORLload) + v.reset(Op386ORLloadidx4) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg3(val, base, mem) + v.AddArg4(val, base, idx, mem) return true } return false @@ -7151,6 +10332,107 @@ func rewriteValue386_Op386ORLmodify(v *Value) bool { } return false } +func rewriteValue386_Op386ORLmodifyidx4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (ORLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) + // cond: is32Bit(off1+off2) + // result: (ORLmodifyidx4 [off1+off2] {sym} base idx val mem) + for { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386ORLmodifyidx4) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg4(base, idx, val, mem) + return true + } + // match: (ORLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) + // cond: is32Bit(off1+off2*4) + // result: (ORLmodifyidx4 [off1+off2*4] {sym} base idx val mem) + for { + off1 := v.AuxInt + sym := v.Aux + base := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + if !(is32Bit(off1 + off2*4)) { + break + } + v.reset(Op386ORLmodifyidx4) + v.AuxInt = off1 + off2*4 + v.Aux = sym + v.AddArg4(base, idx, val, mem) + return true + } + // match: (ORLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ORLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ORLmodifyidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(base, idx, val, mem) + return true + } + // match: (ORLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) + // cond: validValAndOff(c,off) + // result: (ORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + if v_2.Op != Op386MOVLconst { + break + } + c := v_2.AuxInt + mem := v_3 + if !(validValAndOff(c, off)) { + break + } + v.reset(Op386ORLconstmodifyidx4) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + return false +} func rewriteValue386_Op386ROLBconst(v *Value) bool { v_0 := v.Args[0] // match: (ROLBconst [c] (ROLBconst [d] x)) @@ -8347,6 +11629,29 @@ func rewriteValue386_Op386SUBL(v *Value) bool { v.AddArg3(x, ptr, mem) return true } + // match: (SUBL x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (SUBLloadidx4 x [off] {sym} ptr idx mem) + for { + x := v_0 + l := v_1 + if l.Op != Op386MOVLloadidx4 { + break + } + off := l.AuxInt + sym := l.Aux + mem := l.Args[2] + ptr := l.Args[0] + idx := l.Args[1] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + break + } + v.reset(Op386SUBLloadidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(x, ptr, idx, mem) + return true + } // match: (SUBL x x) // result: (MOVLconst [0]) for { @@ -8454,6 +11759,109 @@ func rewriteValue386_Op386SUBLload(v *Value) bool { v.AddArg3(val, base, mem) return true } + // match: (SUBLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (SUBLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + val := v_0 + if v_1.Op != Op386LEAL4 { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + idx := v_1.Args[1] + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386SUBLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(val, ptr, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386SUBLloadidx4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (SUBLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) + // cond: is32Bit(off1+off2) + // result: (SUBLloadidx4 [off1+off2] {sym} val base idx mem) + for { + off1 := v.AuxInt + sym := v.Aux + val := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + idx := v_2 + mem := v_3 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386SUBLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg4(val, base, idx, mem) + return true + } + // match: (SUBLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) + // cond: is32Bit(off1+off2*4) + // result: (SUBLloadidx4 [off1+off2*4] {sym} val base idx mem) + for { + off1 := v.AuxInt + sym := v.Aux + val := v_0 + base := v_1 + if v_2.Op != Op386ADDLconst { + break + } + off2 := v_2.AuxInt + idx := v_2.Args[0] + mem := v_3 + if !(is32Bit(off1 + off2*4)) { + break + } + v.reset(Op386SUBLloadidx4) + v.AuxInt = off1 + off2*4 + v.Aux = sym + v.AddArg4(val, base, idx, mem) + return true + } + // match: (SUBLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (SUBLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + val := v_0 + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + idx := v_2 + mem := v_3 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386SUBLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(val, base, idx, mem) + return true + } return false } func rewriteValue386_Op386SUBLmodify(v *Value) bool { @@ -8468,25 +11876,103 @@ func rewriteValue386_Op386SUBLmodify(v *Value) bool { for { off1 := v.AuxInt sym := v.Aux - if v_0.Op != Op386ADDLconst { + if v_0.Op != Op386ADDLconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386SUBLmodify) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg3(base, val, mem) + return true + } + // match: (SUBLmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (SUBLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386SUBLmodify) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, val, mem) + return true + } + return false +} +func rewriteValue386_Op386SUBLmodifyidx4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (SUBLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) + // cond: is32Bit(off1+off2) + // result: (SUBLmodifyidx4 [off1+off2] {sym} base idx val mem) + for { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386SUBLmodifyidx4) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg4(base, idx, val, mem) + return true + } + // match: (SUBLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) + // cond: is32Bit(off1+off2*4) + // result: (SUBLmodifyidx4 [off1+off2*4] {sym} base idx val mem) + for { + off1 := v.AuxInt + sym := v.Aux + base := v_0 + if v_1.Op != Op386ADDLconst { break } - off2 := v_0.AuxInt - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1 + off2)) { + off2 := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + if !(is32Bit(off1 + off2*4)) { break } - v.reset(Op386SUBLmodify) - v.AuxInt = off1 + off2 + v.reset(Op386SUBLmodifyidx4) + v.AuxInt = off1 + off2*4 v.Aux = sym - v.AddArg3(base, val, mem) + v.AddArg4(base, idx, val, mem) return true } - // match: (SUBLmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // match: (SUBLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem) // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) - // result: (SUBLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (SUBLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem) for { off1 := v.AuxInt sym1 := v.Aux @@ -8496,15 +11982,38 @@ func rewriteValue386_Op386SUBLmodify(v *Value) bool { off2 := v_0.AuxInt sym2 := v_0.Aux base := v_0.Args[0] - val := v_1 - mem := v_2 + idx := v_1 + val := v_2 + mem := v_3 if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { break } - v.reset(Op386SUBLmodify) + v.reset(Op386SUBLmodifyidx4) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, val, mem) + v.AddArg4(base, idx, val, mem) + return true + } + // match: (SUBLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) + // cond: validValAndOff(-c,off) + // result: (ADDLconstmodifyidx4 [makeValAndOff(-c,off)] {sym} ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + if v_2.Op != Op386MOVLconst { + break + } + c := v_2.AuxInt + mem := v_3 + if !(validValAndOff(-c, off)) { + break + } + v.reset(Op386ADDLconstmodifyidx4) + v.AuxInt = makeValAndOff(-c, off) + v.Aux = sym + v.AddArg3(ptr, idx, mem) return true } return false @@ -8791,6 +12300,32 @@ func rewriteValue386_Op386XORL(v *Value) bool { } break } + // match: (XORL x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (XORLloadidx4 x [off] {sym} ptr idx mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != Op386MOVLloadidx4 { + continue + } + off := l.AuxInt + sym := l.Aux + mem := l.Args[2] + ptr := l.Args[0] + idx := l.Args[1] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + continue + } + v.reset(Op386XORLloadidx4) + v.AuxInt = off + v.Aux = sym + v.AddArg4(x, ptr, idx, mem) + return true + } + break + } // match: (XORL x x) // result: (MOVLconst [0]) for { @@ -8896,6 +12431,81 @@ func rewriteValue386_Op386XORLconstmodify(v *Value) bool { } return false } +func rewriteValue386_Op386XORLconstmodifyidx4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (XORLconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) + // cond: ValAndOff(valoff1).canAdd(off2) + // result: (XORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + idx := v_1 + mem := v_2 + if !(ValAndOff(valoff1).canAdd(off2)) { + break + } + v.reset(Op386XORLconstmodifyidx4) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = sym + v.AddArg3(base, idx, mem) + return true + } + // match: (XORLconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) + // cond: ValAndOff(valoff1).canAdd(off2*4) + // result: (XORLconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem) + for { + valoff1 := v.AuxInt + sym := v.Aux + base := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + idx := v_1.Args[0] + mem := v_2 + if !(ValAndOff(valoff1).canAdd(off2 * 4)) { + break + } + v.reset(Op386XORLconstmodifyidx4) + v.AuxInt = ValAndOff(valoff1).add(off2 * 4) + v.Aux = sym + v.AddArg3(base, idx, mem) + return true + } + // match: (XORLconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem) + // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (XORLconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem) + for { + valoff1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + idx := v_1 + mem := v_2 + if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386XORLconstmodifyidx4) + v.AuxInt = ValAndOff(valoff1).add(off2) + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, idx, mem) + return true + } + return false +} func rewriteValue386_Op386XORLload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -8947,6 +12557,109 @@ func rewriteValue386_Op386XORLload(v *Value) bool { v.AddArg3(val, base, mem) return true } + // match: (XORLload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (XORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + val := v_0 + if v_1.Op != Op386LEAL4 { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + idx := v_1.Args[1] + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(Op386XORLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(val, ptr, idx, mem) + return true + } + return false +} +func rewriteValue386_Op386XORLloadidx4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (XORLloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) + // cond: is32Bit(off1+off2) + // result: (XORLloadidx4 [off1+off2] {sym} val base idx mem) + for { + off1 := v.AuxInt + sym := v.Aux + val := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + idx := v_2 + mem := v_3 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386XORLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg4(val, base, idx, mem) + return true + } + // match: (XORLloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) + // cond: is32Bit(off1+off2*4) + // result: (XORLloadidx4 [off1+off2*4] {sym} val base idx mem) + for { + off1 := v.AuxInt + sym := v.Aux + val := v_0 + base := v_1 + if v_2.Op != Op386ADDLconst { + break + } + off2 := v_2.AuxInt + idx := v_2.Args[0] + mem := v_3 + if !(is32Bit(off1 + off2*4)) { + break + } + v.reset(Op386XORLloadidx4) + v.AuxInt = off1 + off2*4 + v.Aux = sym + v.AddArg4(val, base, idx, mem) + return true + } + // match: (XORLloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (XORLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + val := v_0 + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + idx := v_2 + mem := v_3 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386XORLloadidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(val, base, idx, mem) + return true + } return false } func rewriteValue386_Op386XORLmodify(v *Value) bool { @@ -9002,6 +12715,107 @@ func rewriteValue386_Op386XORLmodify(v *Value) bool { } return false } +func rewriteValue386_Op386XORLmodifyidx4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (XORLmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) + // cond: is32Bit(off1+off2) + // result: (XORLmodifyidx4 [off1+off2] {sym} base idx val mem) + for { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != Op386ADDLconst { + break + } + off2 := v_0.AuxInt + base := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386XORLmodifyidx4) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg4(base, idx, val, mem) + return true + } + // match: (XORLmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) + // cond: is32Bit(off1+off2*4) + // result: (XORLmodifyidx4 [off1+off2*4] {sym} base idx val mem) + for { + off1 := v.AuxInt + sym := v.Aux + base := v_0 + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + idx := v_1.Args[0] + val := v_2 + mem := v_3 + if !(is32Bit(off1 + off2*4)) { + break + } + v.reset(Op386XORLmodifyidx4) + v.AuxInt = off1 + off2*4 + v.Aux = sym + v.AddArg4(base, idx, val, mem) + return true + } + // match: (XORLmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (XORLmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != Op386LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + idx := v_1 + val := v_2 + mem := v_3 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386XORLmodifyidx4) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg4(base, idx, val, mem) + return true + } + // match: (XORLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) + // cond: validValAndOff(c,off) + // result: (XORLconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + idx := v_1 + if v_2.Op != Op386MOVLconst { + break + } + c := v_2.AuxInt + mem := v_3 + if !(validValAndOff(c, off)) { + break + } + v.reset(Op386XORLconstmodifyidx4) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg3(ptr, idx, mem) + return true + } + return false +} func rewriteValue386_OpConstNil(v *Value) bool { // match: (ConstNil) // result: (MOVLconst [0]) diff --git a/test/codegen/memops.go b/test/codegen/memops.go index 0df191480d..9d18153a29 100644 --- a/test/codegen/memops.go +++ b/test/codegen/memops.go @@ -99,61 +99,46 @@ func compMem3(x, y *int) (int, bool) { func idxInt8(x, y []int8, i int) { var t int8 // amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*` - // 386: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*` t = x[i+1] // amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` - // 386: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` y[i+1] = t // amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` - // 386: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` x[i+1] = 77 } func idxInt16(x, y []int16, i int) { var t int16 // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*` - // 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*` t = x[i+1] // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` - // 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` y[i+1] = t // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*` - // 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*` t = x[16*i+1] // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` - // 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` y[16*i+1] = t // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` - // 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` x[i+1] = 77 // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` - // 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` x[16*i+1] = 77 } func idxInt32(x, y []int32, i int) { var t int32 // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` - // 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` t = x[i+1] // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` - // 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` y[i+1] = t // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` t = x[2*i+1] // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` y[2*i+1] = t // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*` - // 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*` t = x[16*i+1] // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` - // 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` y[16*i+1] = t // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` - // 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` x[i+1] = 77 // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` - // 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` x[16*i+1] = 77 } @@ -175,71 +160,24 @@ func idxInt64(x, y []int64, i int) { func idxFloat32(x, y []float32, i int) { var t float32 - // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` - // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` t = x[i+1] - // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` - // 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` y[i+1] = t - // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` - // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` + // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` t = x[16*i+1] - // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` - // 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` y[16*i+1] = t } func idxFloat64(x, y []float64, i int) { var t float64 - // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` - // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` t = x[i+1] - // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` - // 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` y[i+1] = t - // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` - // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` + // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` t = x[16*i+1] - // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` - // 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` + // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` y[16*i+1] = t } - -func idxLoadPlusOp(x []int32, i int) int32 { - s := x[0] - // 386: `ADDL\t4\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` - s += x[i+1] - // 386: `SUBL\t8\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` - s -= x[i+2] - // 386: `IMULL\t12\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` - s *= x[i+3] - // 386: `ANDL\t16\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` - s &= x[i+4] - // 386: `ORL\t20\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` - s |= x[i+5] - // 386: `XORL\t24\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` - s ^= x[i+6] - return s -} - -func idxStorePlusOp(x []int32, i int, v int32) { - // 386: `ADDL\t[A-Z]+, 4\([A-Z]+\)\([A-Z]+\*4\)` - x[i+1] += v - // 386: `SUBL\t[A-Z]+, 8\([A-Z]+\)\([A-Z]+\*4\)` - x[i+2] -= v - // 386: `ANDL\t[A-Z]+, 12\([A-Z]+\)\([A-Z]+\*4\)` - x[i+3] &= v - // 386: `ORL\t[A-Z]+, 16\([A-Z]+\)\([A-Z]+\*4\)` - x[i+4] |= v - // 386: `XORL\t[A-Z]+, 20\([A-Z]+\)\([A-Z]+\*4\)` - x[i+5] ^= v - - // 386: `ADDL\t[$]77, 24\([A-Z]+\)\([A-Z]+\*4\)` - x[i+6] += 77 - // 386: `ANDL\t[$]77, 28\([A-Z]+\)\([A-Z]+\*4\)` - x[i+7] &= 77 - // 386: `ORL\t[$]77, 32\([A-Z]+\)\([A-Z]+\*4\)` - x[i+8] |= 77 - // 386: `XORL\t[$]77, 36\([A-Z]+\)\([A-Z]+\*4\)` - x[i+9] ^= 77 -} -- 2.50.0